diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-04-07 06:05:21 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-07 06:05:25 -0400 |
commit | 6c009ecef8cca28c7c09eb16d0802e37915a76e1 (patch) | |
tree | 11c773f780186fdb9fbc9c80a73fb7c8426b1fba /fs | |
parent | 98c2aaf8be5baf7193be37fb28bce8e7327158bc (diff) | |
parent | d508afb437daee7cf07da085b635c44a4ebf9b38 (diff) |
Merge branch 'linus' into perfcounters/core
Merge reason: need the upstream facility added by:
7f1e2ca: hrtimer: fix rq->lock inversion (again)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs')
48 files changed, 3656 insertions, 1331 deletions
diff --git a/fs/befs/debug.c b/fs/befs/debug.c index b8e304a0661e..622e73775c83 100644 --- a/fs/befs/debug.c +++ b/fs/befs/debug.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/slab.h> | ||
20 | 21 | ||
21 | #endif /* __KERNEL__ */ | 22 | #endif /* __KERNEL__ */ |
22 | 23 | ||
diff --git a/fs/buffer.c b/fs/buffer.c index 5d55a896ff78..6e35762b6169 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -737,7 +737,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
737 | { | 737 | { |
738 | struct buffer_head *bh; | 738 | struct buffer_head *bh; |
739 | struct list_head tmp; | 739 | struct list_head tmp; |
740 | struct address_space *mapping; | 740 | struct address_space *mapping, *prev_mapping = NULL; |
741 | int err = 0, err2; | 741 | int err = 0, err2; |
742 | 742 | ||
743 | INIT_LIST_HEAD(&tmp); | 743 | INIT_LIST_HEAD(&tmp); |
@@ -762,7 +762,18 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
762 | * contents - it is a noop if I/O is still in | 762 | * contents - it is a noop if I/O is still in |
763 | * flight on potentially older contents. | 763 | * flight on potentially older contents. |
764 | */ | 764 | */ |
765 | ll_rw_block(SWRITE_SYNC, 1, &bh); | 765 | ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); |
766 | |||
767 | /* | ||
768 | * Kick off IO for the previous mapping. Note | ||
769 | * that we will not run the very last mapping, | ||
770 | * wait_on_buffer() will do that for us | ||
771 | * through sync_buffer(). | ||
772 | */ | ||
773 | if (prev_mapping && prev_mapping != mapping) | ||
774 | blk_run_address_space(prev_mapping); | ||
775 | prev_mapping = mapping; | ||
776 | |||
766 | brelse(bh); | 777 | brelse(bh); |
767 | spin_lock(lock); | 778 | spin_lock(lock); |
768 | } | 779 | } |
@@ -2957,12 +2968,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) | |||
2957 | for (i = 0; i < nr; i++) { | 2968 | for (i = 0; i < nr; i++) { |
2958 | struct buffer_head *bh = bhs[i]; | 2969 | struct buffer_head *bh = bhs[i]; |
2959 | 2970 | ||
2960 | if (rw == SWRITE || rw == SWRITE_SYNC) | 2971 | if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) |
2961 | lock_buffer(bh); | 2972 | lock_buffer(bh); |
2962 | else if (!trylock_buffer(bh)) | 2973 | else if (!trylock_buffer(bh)) |
2963 | continue; | 2974 | continue; |
2964 | 2975 | ||
2965 | if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { | 2976 | if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || |
2977 | rw == SWRITE_SYNC_PLUG) { | ||
2966 | if (test_clear_buffer_dirty(bh)) { | 2978 | if (test_clear_buffer_dirty(bh)) { |
2967 | bh->b_end_io = end_buffer_write_sync; | 2979 | bh->b_end_io = end_buffer_write_sync; |
2968 | get_bh(bh); | 2980 | get_bh(bh); |
@@ -2998,7 +3010,7 @@ int sync_dirty_buffer(struct buffer_head *bh) | |||
2998 | if (test_clear_buffer_dirty(bh)) { | 3010 | if (test_clear_buffer_dirty(bh)) { |
2999 | get_bh(bh); | 3011 | get_bh(bh); |
3000 | bh->b_end_io = end_buffer_write_sync; | 3012 | bh->b_end_io = end_buffer_write_sync; |
3001 | ret = submit_bh(WRITE, bh); | 3013 | ret = submit_bh(WRITE_SYNC, bh); |
3002 | wait_on_buffer(bh); | 3014 | wait_on_buffer(bh); |
3003 | if (buffer_eopnotsupp(bh)) { | 3015 | if (buffer_eopnotsupp(bh)) { |
3004 | clear_buffer_eopnotsupp(bh); | 3016 | clear_buffer_eopnotsupp(bh); |
diff --git a/fs/direct-io.c b/fs/direct-io.c index b6d43908ff7a..da258e7249cc 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -1126,7 +1126,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1126 | int acquire_i_mutex = 0; | 1126 | int acquire_i_mutex = 0; |
1127 | 1127 | ||
1128 | if (rw & WRITE) | 1128 | if (rw & WRITE) |
1129 | rw = WRITE_SYNC; | 1129 | rw = WRITE_ODIRECT; |
1130 | 1130 | ||
1131 | if (bdev) | 1131 | if (bdev) |
1132 | bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); | 1132 | bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); |
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig index 8e0cfe44b0fc..fb3c1a21b135 100644 --- a/fs/ext3/Kconfig +++ b/fs/ext3/Kconfig | |||
@@ -28,6 +28,25 @@ config EXT3_FS | |||
28 | To compile this file system support as a module, choose M here: the | 28 | To compile this file system support as a module, choose M here: the |
29 | module will be called ext3. | 29 | module will be called ext3. |
30 | 30 | ||
31 | config EXT3_DEFAULTS_TO_ORDERED | ||
32 | bool "Default to 'data=ordered' in ext3 (legacy option)" | ||
33 | depends on EXT3_FS | ||
34 | help | ||
35 | If a filesystem does not explicitly specify a data ordering | ||
36 | mode, and the journal capability allowed it, ext3 used to | ||
37 | historically default to 'data=ordered'. | ||
38 | |||
39 | That was a rather unfortunate choice, because it leads to all | ||
40 | kinds of latency problems, and the 'data=writeback' mode is more | ||
41 | appropriate these days. | ||
42 | |||
43 | You should probably always answer 'n' here, and if you really | ||
44 | want to use 'data=ordered' mode, set it in the filesystem itself | ||
45 | with 'tune2fs -o journal_data_ordered'. | ||
46 | |||
47 | But if you really want to enable the legacy default, you can do | ||
48 | so by answering 'y' to this question. | ||
49 | |||
31 | config EXT3_FS_XATTR | 50 | config EXT3_FS_XATTR |
32 | bool "Ext3 extended attributes" | 51 | bool "Ext3 extended attributes" |
33 | depends on EXT3_FS | 52 | depends on EXT3_FS |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 9e5b8e387e1e..599dbfe504c3 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -44,6 +44,12 @@ | |||
44 | #include "acl.h" | 44 | #include "acl.h" |
45 | #include "namei.h" | 45 | #include "namei.h" |
46 | 46 | ||
47 | #ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED | ||
48 | #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA | ||
49 | #else | ||
50 | #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA | ||
51 | #endif | ||
52 | |||
47 | static int ext3_load_journal(struct super_block *, struct ext3_super_block *, | 53 | static int ext3_load_journal(struct super_block *, struct ext3_super_block *, |
48 | unsigned long journal_devnum); | 54 | unsigned long journal_devnum); |
49 | static int ext3_create_journal(struct super_block *, struct ext3_super_block *, | 55 | static int ext3_create_journal(struct super_block *, struct ext3_super_block *, |
@@ -1919,7 +1925,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1919 | cope, else JOURNAL_DATA */ | 1925 | cope, else JOURNAL_DATA */ |
1920 | if (journal_check_available_features | 1926 | if (journal_check_available_features |
1921 | (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) | 1927 | (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) |
1922 | set_opt(sbi->s_mount_opt, ORDERED_DATA); | 1928 | set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE); |
1923 | else | 1929 | else |
1924 | set_opt(sbi->s_mount_opt, JOURNAL_DATA); | 1930 | set_opt(sbi->s_mount_opt, JOURNAL_DATA); |
1925 | break; | 1931 | break; |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index f8077b9c8981..a8e8513a78a9 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -351,8 +351,13 @@ void journal_commit_transaction(journal_t *journal) | |||
351 | spin_lock(&journal->j_state_lock); | 351 | spin_lock(&journal->j_state_lock); |
352 | commit_transaction->t_state = T_LOCKED; | 352 | commit_transaction->t_state = T_LOCKED; |
353 | 353 | ||
354 | /* | ||
355 | * Use plugged writes here, since we want to submit several before | ||
356 | * we unplug the device. We don't do explicit unplugging in here, | ||
357 | * instead we rely on sync_buffer() doing the unplug for us. | ||
358 | */ | ||
354 | if (commit_transaction->t_synchronous_commit) | 359 | if (commit_transaction->t_synchronous_commit) |
355 | write_op = WRITE_SYNC; | 360 | write_op = WRITE_SYNC_PLUG; |
356 | spin_lock(&commit_transaction->t_handle_lock); | 361 | spin_lock(&commit_transaction->t_handle_lock); |
357 | while (commit_transaction->t_updates) { | 362 | while (commit_transaction->t_updates) { |
358 | DEFINE_WAIT(wait); | 363 | DEFINE_WAIT(wait); |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 4ea72377c7a2..073c8c3df7cd 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -138,7 +138,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
138 | set_buffer_ordered(bh); | 138 | set_buffer_ordered(bh); |
139 | barrier_done = 1; | 139 | barrier_done = 1; |
140 | } | 140 | } |
141 | ret = submit_bh(WRITE_SYNC, bh); | 141 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
142 | if (barrier_done) | 142 | if (barrier_done) |
143 | clear_buffer_ordered(bh); | 143 | clear_buffer_ordered(bh); |
144 | 144 | ||
@@ -159,7 +159,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
159 | lock_buffer(bh); | 159 | lock_buffer(bh); |
160 | set_buffer_uptodate(bh); | 160 | set_buffer_uptodate(bh); |
161 | clear_buffer_dirty(bh); | 161 | clear_buffer_dirty(bh); |
162 | ret = submit_bh(WRITE_SYNC, bh); | 162 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
163 | } | 163 | } |
164 | *cbh = bh; | 164 | *cbh = bh; |
165 | return ret; | 165 | return ret; |
@@ -190,7 +190,7 @@ retry: | |||
190 | set_buffer_uptodate(bh); | 190 | set_buffer_uptodate(bh); |
191 | bh->b_end_io = journal_end_buffer_io_sync; | 191 | bh->b_end_io = journal_end_buffer_io_sync; |
192 | 192 | ||
193 | ret = submit_bh(WRITE_SYNC, bh); | 193 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
194 | if (ret) { | 194 | if (ret) { |
195 | unlock_buffer(bh); | 195 | unlock_buffer(bh); |
196 | return ret; | 196 | return ret; |
@@ -402,8 +402,13 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
402 | spin_lock(&journal->j_state_lock); | 402 | spin_lock(&journal->j_state_lock); |
403 | commit_transaction->t_state = T_LOCKED; | 403 | commit_transaction->t_state = T_LOCKED; |
404 | 404 | ||
405 | /* | ||
406 | * Use plugged writes here, since we want to submit several before | ||
407 | * we unplug the device. We don't do explicit unplugging in here, | ||
408 | * instead we rely on sync_buffer() doing the unplug for us. | ||
409 | */ | ||
405 | if (commit_transaction->t_synchronous_commit) | 410 | if (commit_transaction->t_synchronous_commit) |
406 | write_op = WRITE_SYNC; | 411 | write_op = WRITE_SYNC_PLUG; |
407 | stats.u.run.rs_wait = commit_transaction->t_max_wait; | 412 | stats.u.run.rs_wait = commit_transaction->t_max_wait; |
408 | stats.u.run.rs_locked = jiffies; | 413 | stats.u.run.rs_locked = jiffies; |
409 | stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, | 414 | stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, |
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 77ccf8cb0823..043740dde20c 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c | |||
@@ -38,12 +38,12 @@ static int jffs2_acl_count(size_t size) | |||
38 | size_t s; | 38 | size_t s; |
39 | 39 | ||
40 | size -= sizeof(struct jffs2_acl_header); | 40 | size -= sizeof(struct jffs2_acl_header); |
41 | s = size - 4 * sizeof(struct jffs2_acl_entry_short); | 41 | if (size < 4 * sizeof(struct jffs2_acl_entry_short)) { |
42 | if (s < 0) { | ||
43 | if (size % sizeof(struct jffs2_acl_entry_short)) | 42 | if (size % sizeof(struct jffs2_acl_entry_short)) |
44 | return -1; | 43 | return -1; |
45 | return size / sizeof(struct jffs2_acl_entry_short); | 44 | return size / sizeof(struct jffs2_acl_entry_short); |
46 | } else { | 45 | } else { |
46 | s = size - 4 * sizeof(struct jffs2_acl_entry_short); | ||
47 | if (s % sizeof(struct jffs2_acl_entry)) | 47 | if (s % sizeof(struct jffs2_acl_entry)) |
48 | return -1; | 48 | return -1; |
49 | return s / sizeof(struct jffs2_acl_entry) + 4; | 49 | return s / sizeof(struct jffs2_acl_entry) + 4; |
diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c index f9211252b5f1..9eff2bdae8a7 100644 --- a/fs/jffs2/malloc.c +++ b/fs/jffs2/malloc.c | |||
@@ -284,10 +284,9 @@ void jffs2_free_inode_cache(struct jffs2_inode_cache *x) | |||
284 | struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void) | 284 | struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void) |
285 | { | 285 | { |
286 | struct jffs2_xattr_datum *xd; | 286 | struct jffs2_xattr_datum *xd; |
287 | xd = kmem_cache_alloc(xattr_datum_cache, GFP_KERNEL); | 287 | xd = kmem_cache_zalloc(xattr_datum_cache, GFP_KERNEL); |
288 | dbg_memalloc("%p\n", xd); | 288 | dbg_memalloc("%p\n", xd); |
289 | 289 | ||
290 | memset(xd, 0, sizeof(struct jffs2_xattr_datum)); | ||
291 | xd->class = RAWNODE_CLASS_XATTR_DATUM; | 290 | xd->class = RAWNODE_CLASS_XATTR_DATUM; |
292 | xd->node = (void *)xd; | 291 | xd->node = (void *)xd; |
293 | INIT_LIST_HEAD(&xd->xindex); | 292 | INIT_LIST_HEAD(&xd->xindex); |
@@ -303,10 +302,9 @@ void jffs2_free_xattr_datum(struct jffs2_xattr_datum *xd) | |||
303 | struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void) | 302 | struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void) |
304 | { | 303 | { |
305 | struct jffs2_xattr_ref *ref; | 304 | struct jffs2_xattr_ref *ref; |
306 | ref = kmem_cache_alloc(xattr_ref_cache, GFP_KERNEL); | 305 | ref = kmem_cache_zalloc(xattr_ref_cache, GFP_KERNEL); |
307 | dbg_memalloc("%p\n", ref); | 306 | dbg_memalloc("%p\n", ref); |
308 | 307 | ||
309 | memset(ref, 0, sizeof(struct jffs2_xattr_ref)); | ||
310 | ref->class = RAWNODE_CLASS_XATTR_REF; | 308 | ref->class = RAWNODE_CLASS_XATTR_REF; |
311 | ref->node = (void *)ref; | 309 | ref->node = (void *)ref; |
312 | return ref; | 310 | return ref; |
diff --git a/fs/libfs.c b/fs/libfs.c index 4910a36f516e..cd223190c4e9 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -575,6 +575,21 @@ ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, | |||
575 | * possibly a read which collects the result - which is stored in a | 575 | * possibly a read which collects the result - which is stored in a |
576 | * file-local buffer. | 576 | * file-local buffer. |
577 | */ | 577 | */ |
578 | |||
579 | void simple_transaction_set(struct file *file, size_t n) | ||
580 | { | ||
581 | struct simple_transaction_argresp *ar = file->private_data; | ||
582 | |||
583 | BUG_ON(n > SIMPLE_TRANSACTION_LIMIT); | ||
584 | |||
585 | /* | ||
586 | * The barrier ensures that ar->size will really remain zero until | ||
587 | * ar->data is ready for reading. | ||
588 | */ | ||
589 | smp_mb(); | ||
590 | ar->size = n; | ||
591 | } | ||
592 | |||
578 | char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) | 593 | char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) |
579 | { | 594 | { |
580 | struct simple_transaction_argresp *ar; | 595 | struct simple_transaction_argresp *ar; |
@@ -820,6 +835,7 @@ EXPORT_SYMBOL(simple_sync_file); | |||
820 | EXPORT_SYMBOL(simple_unlink); | 835 | EXPORT_SYMBOL(simple_unlink); |
821 | EXPORT_SYMBOL(simple_read_from_buffer); | 836 | EXPORT_SYMBOL(simple_read_from_buffer); |
822 | EXPORT_SYMBOL(memory_read_from_buffer); | 837 | EXPORT_SYMBOL(memory_read_from_buffer); |
838 | EXPORT_SYMBOL(simple_transaction_set); | ||
823 | EXPORT_SYMBOL(simple_transaction_get); | 839 | EXPORT_SYMBOL(simple_transaction_get); |
824 | EXPORT_SYMBOL(simple_transaction_read); | 840 | EXPORT_SYMBOL(simple_transaction_read); |
825 | EXPORT_SYMBOL(simple_transaction_release); | 841 | EXPORT_SYMBOL(simple_transaction_release); |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 763b78a6e9de..83ee34203bd7 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -426,8 +426,15 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, | |||
426 | ret = nlm_granted; | 426 | ret = nlm_granted; |
427 | goto out; | 427 | goto out; |
428 | case -EAGAIN: | 428 | case -EAGAIN: |
429 | /* | ||
430 | * If this is a blocking request for an | ||
431 | * already pending lock request then we need | ||
432 | * to put it back on lockd's block list | ||
433 | */ | ||
434 | if (wait) | ||
435 | break; | ||
429 | ret = nlm_lck_denied; | 436 | ret = nlm_lck_denied; |
430 | break; | 437 | goto out; |
431 | case FILE_LOCK_DEFERRED: | 438 | case FILE_LOCK_DEFERRED: |
432 | if (wait) | 439 | if (wait) |
433 | break; | 440 | break; |
@@ -443,10 +450,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, | |||
443 | goto out; | 450 | goto out; |
444 | } | 451 | } |
445 | 452 | ||
446 | ret = nlm_lck_denied; | ||
447 | if (!wait) | ||
448 | goto out; | ||
449 | |||
450 | ret = nlm_lck_blocked; | 453 | ret = nlm_lck_blocked; |
451 | 454 | ||
452 | /* Append to list of blocked */ | 455 | /* Append to list of blocked */ |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 82eaadbff408..6717200923fe 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -1228,7 +1228,6 @@ static int nfs_parse_mount_options(char *raw, | |||
1228 | goto out_nomem; | 1228 | goto out_nomem; |
1229 | token = match_token(string, | 1229 | token = match_token(string, |
1230 | nfs_xprt_protocol_tokens, args); | 1230 | nfs_xprt_protocol_tokens, args); |
1231 | kfree(string); | ||
1232 | 1231 | ||
1233 | switch (token) { | 1232 | switch (token) { |
1234 | case Opt_xprt_udp: | 1233 | case Opt_xprt_udp: |
@@ -1258,6 +1257,7 @@ static int nfs_parse_mount_options(char *raw, | |||
1258 | goto out_nomem; | 1257 | goto out_nomem; |
1259 | token = match_token(string, | 1258 | token = match_token(string, |
1260 | nfs_xprt_protocol_tokens, args); | 1259 | nfs_xprt_protocol_tokens, args); |
1260 | kfree(string); | ||
1261 | 1261 | ||
1262 | switch (token) { | 1262 | switch (token) { |
1263 | case Opt_xprt_udp: | 1263 | case Opt_xprt_udp: |
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 44d7d04dab95..503b9da159a3 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
@@ -1,6 +1,7 @@ | |||
1 | config NFSD | 1 | config NFSD |
2 | tristate "NFS server support" | 2 | tristate "NFS server support" |
3 | depends on INET | 3 | depends on INET |
4 | depends on FILE_LOCKING | ||
4 | select LOCKD | 5 | select LOCKD |
5 | select SUNRPC | 6 | select SUNRPC |
6 | select EXPORTFS | 7 | select EXPORTFS |
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 9dbd2eb91281..7c9fe838f038 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/unistd.h> | 18 | #include <linux/unistd.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/major.h> | 20 | #include <linux/major.h> |
21 | #include <linux/magic.h> | ||
21 | 22 | ||
22 | #include <linux/sunrpc/svc.h> | 23 | #include <linux/sunrpc/svc.h> |
23 | #include <linux/nfsd/nfsd.h> | 24 | #include <linux/nfsd/nfsd.h> |
@@ -202,6 +203,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, | |||
202 | struct nfsd3_writeres *resp) | 203 | struct nfsd3_writeres *resp) |
203 | { | 204 | { |
204 | __be32 nfserr; | 205 | __be32 nfserr; |
206 | unsigned long cnt = argp->len; | ||
205 | 207 | ||
206 | dprintk("nfsd: WRITE(3) %s %d bytes at %ld%s\n", | 208 | dprintk("nfsd: WRITE(3) %s %d bytes at %ld%s\n", |
207 | SVCFH_fmt(&argp->fh), | 209 | SVCFH_fmt(&argp->fh), |
@@ -214,9 +216,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, | |||
214 | nfserr = nfsd_write(rqstp, &resp->fh, NULL, | 216 | nfserr = nfsd_write(rqstp, &resp->fh, NULL, |
215 | argp->offset, | 217 | argp->offset, |
216 | rqstp->rq_vec, argp->vlen, | 218 | rqstp->rq_vec, argp->vlen, |
217 | argp->len, | 219 | &cnt, |
218 | &resp->committed); | 220 | &resp->committed); |
219 | resp->count = argp->count; | 221 | resp->count = cnt; |
220 | RETURN_STATUS(nfserr); | 222 | RETURN_STATUS(nfserr); |
221 | } | 223 | } |
222 | 224 | ||
@@ -569,7 +571,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
569 | struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; | 571 | struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; |
570 | 572 | ||
571 | /* Note that we don't care for remote fs's here */ | 573 | /* Note that we don't care for remote fs's here */ |
572 | if (sb->s_magic == 0x4d44 /* MSDOS_SUPER_MAGIC */) { | 574 | if (sb->s_magic == MSDOS_SUPER_MAGIC) { |
573 | resp->f_properties = NFS3_FSF_BILLYBOY; | 575 | resp->f_properties = NFS3_FSF_BILLYBOY; |
574 | } | 576 | } |
575 | resp->f_maxfilesize = sb->s_maxbytes; | 577 | resp->f_maxfilesize = sb->s_maxbytes; |
@@ -610,7 +612,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
610 | resp->p_link_max = EXT2_LINK_MAX; | 612 | resp->p_link_max = EXT2_LINK_MAX; |
611 | resp->p_name_max = EXT2_NAME_LEN; | 613 | resp->p_name_max = EXT2_NAME_LEN; |
612 | break; | 614 | break; |
613 | case 0x4d44: /* MSDOS_SUPER_MAGIC */ | 615 | case MSDOS_SUPER_MAGIC: |
614 | resp->p_case_insensitive = 1; | 616 | resp->p_case_insensitive = 1; |
615 | resp->p_case_preserving = 0; | 617 | resp->p_case_preserving = 0; |
616 | break; | 618 | break; |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index c464181b5994..290289bd44f7 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -218,7 +218,7 @@ static int | |||
218 | encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) | 218 | encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) |
219 | { | 219 | { |
220 | __be32 *p; | 220 | __be32 *p; |
221 | int len = cb_rec->cbr_fhlen; | 221 | int len = cb_rec->cbr_fh.fh_size; |
222 | 222 | ||
223 | RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); | 223 | RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); |
224 | WRITE32(OP_CB_RECALL); | 224 | WRITE32(OP_CB_RECALL); |
@@ -226,7 +226,7 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) | |||
226 | WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); | 226 | WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); |
227 | WRITE32(cb_rec->cbr_trunc); | 227 | WRITE32(cb_rec->cbr_trunc); |
228 | WRITE32(len); | 228 | WRITE32(len); |
229 | WRITEMEM(cb_rec->cbr_fhval, len); | 229 | WRITEMEM(&cb_rec->cbr_fh.fh_base, len); |
230 | return 0; | 230 | return 0; |
231 | } | 231 | } |
232 | 232 | ||
@@ -361,9 +361,8 @@ static struct rpc_program cb_program = { | |||
361 | /* Reference counting, callback cleanup, etc., all look racy as heck. | 361 | /* Reference counting, callback cleanup, etc., all look racy as heck. |
362 | * And why is cb_set an atomic? */ | 362 | * And why is cb_set an atomic? */ |
363 | 363 | ||
364 | static int do_probe_callback(void *data) | 364 | static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp) |
365 | { | 365 | { |
366 | struct nfs4_client *clp = data; | ||
367 | struct sockaddr_in addr; | 366 | struct sockaddr_in addr; |
368 | struct nfs4_callback *cb = &clp->cl_callback; | 367 | struct nfs4_callback *cb = &clp->cl_callback; |
369 | struct rpc_timeout timeparms = { | 368 | struct rpc_timeout timeparms = { |
@@ -384,17 +383,10 @@ static int do_probe_callback(void *data) | |||
384 | .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), | 383 | .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), |
385 | .client_name = clp->cl_principal, | 384 | .client_name = clp->cl_principal, |
386 | }; | 385 | }; |
387 | struct rpc_message msg = { | ||
388 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], | ||
389 | .rpc_argp = clp, | ||
390 | }; | ||
391 | struct rpc_clnt *client; | 386 | struct rpc_clnt *client; |
392 | int status; | ||
393 | 387 | ||
394 | if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) { | 388 | if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) |
395 | status = nfserr_cb_path_down; | 389 | return ERR_PTR(-EINVAL); |
396 | goto out_err; | ||
397 | } | ||
398 | 390 | ||
399 | /* Initialize address */ | 391 | /* Initialize address */ |
400 | memset(&addr, 0, sizeof(addr)); | 392 | memset(&addr, 0, sizeof(addr)); |
@@ -404,9 +396,29 @@ static int do_probe_callback(void *data) | |||
404 | 396 | ||
405 | /* Create RPC client */ | 397 | /* Create RPC client */ |
406 | client = rpc_create(&args); | 398 | client = rpc_create(&args); |
399 | if (IS_ERR(client)) | ||
400 | dprintk("NFSD: couldn't create callback client: %ld\n", | ||
401 | PTR_ERR(client)); | ||
402 | return client; | ||
403 | |||
404 | } | ||
405 | |||
406 | static int do_probe_callback(void *data) | ||
407 | { | ||
408 | struct nfs4_client *clp = data; | ||
409 | struct nfs4_callback *cb = &clp->cl_callback; | ||
410 | struct rpc_message msg = { | ||
411 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], | ||
412 | .rpc_argp = clp, | ||
413 | }; | ||
414 | struct rpc_clnt *client; | ||
415 | int status; | ||
416 | |||
417 | client = setup_callback_client(clp); | ||
407 | if (IS_ERR(client)) { | 418 | if (IS_ERR(client)) { |
408 | dprintk("NFSD: couldn't create callback client\n"); | ||
409 | status = PTR_ERR(client); | 419 | status = PTR_ERR(client); |
420 | dprintk("NFSD: couldn't create callback client: %d\n", | ||
421 | status); | ||
410 | goto out_err; | 422 | goto out_err; |
411 | } | 423 | } |
412 | 424 | ||
@@ -422,10 +434,10 @@ static int do_probe_callback(void *data) | |||
422 | out_release_client: | 434 | out_release_client: |
423 | rpc_shutdown_client(client); | 435 | rpc_shutdown_client(client); |
424 | out_err: | 436 | out_err: |
425 | dprintk("NFSD: warning: no callback path to client %.*s\n", | 437 | dprintk("NFSD: warning: no callback path to client %.*s: error %d\n", |
426 | (int)clp->cl_name.len, clp->cl_name.data); | 438 | (int)clp->cl_name.len, clp->cl_name.data, status); |
427 | put_nfs4_client(clp); | 439 | put_nfs4_client(clp); |
428 | return status; | 440 | return 0; |
429 | } | 441 | } |
430 | 442 | ||
431 | /* | 443 | /* |
@@ -451,7 +463,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) | |||
451 | 463 | ||
452 | /* | 464 | /* |
453 | * called with dp->dl_count inc'ed. | 465 | * called with dp->dl_count inc'ed. |
454 | * nfs4_lock_state() may or may not have been called. | ||
455 | */ | 466 | */ |
456 | void | 467 | void |
457 | nfsd4_cb_recall(struct nfs4_delegation *dp) | 468 | nfsd4_cb_recall(struct nfs4_delegation *dp) |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9fa60a3ad48c..b2883e9c6381 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -93,6 +93,21 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o | |||
93 | open->op_truncate = 0; | 93 | open->op_truncate = 0; |
94 | 94 | ||
95 | if (open->op_create) { | 95 | if (open->op_create) { |
96 | /* FIXME: check session persistence and pnfs flags. | ||
97 | * The nfsv4.1 spec requires the following semantics: | ||
98 | * | ||
99 | * Persistent | pNFS | Server REQUIRED | Client Allowed | ||
100 | * Reply Cache | server | | | ||
101 | * -------------+--------+-----------------+-------------------- | ||
102 | * no | no | EXCLUSIVE4_1 | EXCLUSIVE4_1 | ||
103 | * | | | (SHOULD) | ||
104 | * | | and EXCLUSIVE4 | or EXCLUSIVE4 | ||
105 | * | | | (SHOULD NOT) | ||
106 | * no | yes | EXCLUSIVE4_1 | EXCLUSIVE4_1 | ||
107 | * yes | no | GUARDED4 | GUARDED4 | ||
108 | * yes | yes | GUARDED4 | GUARDED4 | ||
109 | */ | ||
110 | |||
96 | /* | 111 | /* |
97 | * Note: create modes (UNCHECKED,GUARDED...) are the same | 112 | * Note: create modes (UNCHECKED,GUARDED...) are the same |
98 | * in NFSv4 as in v3. | 113 | * in NFSv4 as in v3. |
@@ -103,11 +118,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o | |||
103 | (u32 *)open->op_verf.data, | 118 | (u32 *)open->op_verf.data, |
104 | &open->op_truncate, &created); | 119 | &open->op_truncate, &created); |
105 | 120 | ||
106 | /* If we ever decide to use different attrs to store the | 121 | /* |
107 | * verifier in nfsd_create_v3, then we'll need to change this | 122 | * Following rfc 3530 14.2.16, use the returned bitmask |
123 | * to indicate which attributes we used to store the | ||
124 | * verifier: | ||
108 | */ | 125 | */ |
109 | if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) | 126 | if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) |
110 | open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS | | 127 | open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | |
111 | FATTR4_WORD1_TIME_MODIFY); | 128 | FATTR4_WORD1_TIME_MODIFY); |
112 | } else { | 129 | } else { |
113 | status = nfsd_lookup(rqstp, current_fh, | 130 | status = nfsd_lookup(rqstp, current_fh, |
@@ -118,13 +135,11 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o | |||
118 | goto out; | 135 | goto out; |
119 | 136 | ||
120 | set_change_info(&open->op_cinfo, current_fh); | 137 | set_change_info(&open->op_cinfo, current_fh); |
121 | |||
122 | /* set reply cache */ | ||
123 | fh_dup2(current_fh, &resfh); | 138 | fh_dup2(current_fh, &resfh); |
124 | open->op_stateowner->so_replay.rp_openfh_len = resfh.fh_handle.fh_size; | ||
125 | memcpy(open->op_stateowner->so_replay.rp_openfh, | ||
126 | &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size); | ||
127 | 139 | ||
140 | /* set reply cache */ | ||
141 | fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh, | ||
142 | &resfh.fh_handle); | ||
128 | if (!created) | 143 | if (!created) |
129 | status = do_open_permission(rqstp, current_fh, open, | 144 | status = do_open_permission(rqstp, current_fh, open, |
130 | NFSD_MAY_NOP); | 145 | NFSD_MAY_NOP); |
@@ -150,10 +165,8 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ | |||
150 | memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); | 165 | memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); |
151 | 166 | ||
152 | /* set replay cache */ | 167 | /* set replay cache */ |
153 | open->op_stateowner->so_replay.rp_openfh_len = current_fh->fh_handle.fh_size; | 168 | fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh, |
154 | memcpy(open->op_stateowner->so_replay.rp_openfh, | 169 | ¤t_fh->fh_handle); |
155 | ¤t_fh->fh_handle.fh_base, | ||
156 | current_fh->fh_handle.fh_size); | ||
157 | 170 | ||
158 | open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && | 171 | open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && |
159 | (open->op_iattr.ia_size == 0); | 172 | (open->op_iattr.ia_size == 0); |
@@ -164,12 +177,23 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ | |||
164 | return status; | 177 | return status; |
165 | } | 178 | } |
166 | 179 | ||
180 | static void | ||
181 | copy_clientid(clientid_t *clid, struct nfsd4_session *session) | ||
182 | { | ||
183 | struct nfsd4_sessionid *sid = | ||
184 | (struct nfsd4_sessionid *)session->se_sessionid.data; | ||
185 | |||
186 | clid->cl_boot = sid->clientid.cl_boot; | ||
187 | clid->cl_id = sid->clientid.cl_id; | ||
188 | } | ||
167 | 189 | ||
168 | static __be32 | 190 | static __be32 |
169 | nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 191 | nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
170 | struct nfsd4_open *open) | 192 | struct nfsd4_open *open) |
171 | { | 193 | { |
172 | __be32 status; | 194 | __be32 status; |
195 | struct nfsd4_compoundres *resp; | ||
196 | |||
173 | dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", | 197 | dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", |
174 | (int)open->op_fname.len, open->op_fname.data, | 198 | (int)open->op_fname.len, open->op_fname.data, |
175 | open->op_stateowner); | 199 | open->op_stateowner); |
@@ -178,16 +202,19 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
178 | if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) | 202 | if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) |
179 | return nfserr_inval; | 203 | return nfserr_inval; |
180 | 204 | ||
205 | if (nfsd4_has_session(cstate)) | ||
206 | copy_clientid(&open->op_clientid, cstate->session); | ||
207 | |||
181 | nfs4_lock_state(); | 208 | nfs4_lock_state(); |
182 | 209 | ||
183 | /* check seqid for replay. set nfs4_owner */ | 210 | /* check seqid for replay. set nfs4_owner */ |
184 | status = nfsd4_process_open1(open); | 211 | resp = rqstp->rq_resp; |
212 | status = nfsd4_process_open1(&resp->cstate, open); | ||
185 | if (status == nfserr_replay_me) { | 213 | if (status == nfserr_replay_me) { |
186 | struct nfs4_replay *rp = &open->op_stateowner->so_replay; | 214 | struct nfs4_replay *rp = &open->op_stateowner->so_replay; |
187 | fh_put(&cstate->current_fh); | 215 | fh_put(&cstate->current_fh); |
188 | cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len; | 216 | fh_copy_shallow(&cstate->current_fh.fh_handle, |
189 | memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh, | 217 | &rp->rp_openfh); |
190 | rp->rp_openfh_len); | ||
191 | status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); | 218 | status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); |
192 | if (status) | 219 | if (status) |
193 | dprintk("nfsd4_open: replay failed" | 220 | dprintk("nfsd4_open: replay failed" |
@@ -209,10 +236,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
209 | 236 | ||
210 | switch (open->op_claim_type) { | 237 | switch (open->op_claim_type) { |
211 | case NFS4_OPEN_CLAIM_DELEGATE_CUR: | 238 | case NFS4_OPEN_CLAIM_DELEGATE_CUR: |
212 | status = nfserr_inval; | ||
213 | if (open->op_create) | ||
214 | goto out; | ||
215 | /* fall through */ | ||
216 | case NFS4_OPEN_CLAIM_NULL: | 239 | case NFS4_OPEN_CLAIM_NULL: |
217 | /* | 240 | /* |
218 | * (1) set CURRENT_FH to the file being opened, | 241 | * (1) set CURRENT_FH to the file being opened, |
@@ -455,8 +478,9 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
455 | if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) | 478 | if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) |
456 | return nfserr_inval; | 479 | return nfserr_inval; |
457 | 480 | ||
458 | getattr->ga_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; | 481 | getattr->ga_bmval[0] &= nfsd_suppattrs0(cstate->minorversion); |
459 | getattr->ga_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; | 482 | getattr->ga_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); |
483 | getattr->ga_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); | ||
460 | 484 | ||
461 | getattr->ga_fhp = &cstate->current_fh; | 485 | getattr->ga_fhp = &cstate->current_fh; |
462 | return nfs_ok; | 486 | return nfs_ok; |
@@ -520,9 +544,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
520 | 544 | ||
521 | nfs4_lock_state(); | 545 | nfs4_lock_state(); |
522 | /* check stateid */ | 546 | /* check stateid */ |
523 | if ((status = nfs4_preprocess_stateid_op(&cstate->current_fh, | 547 | if ((status = nfs4_preprocess_stateid_op(cstate, &read->rd_stateid, |
524 | &read->rd_stateid, | 548 | RD_STATE, &read->rd_filp))) { |
525 | CHECK_FH | RD_STATE, &read->rd_filp))) { | ||
526 | dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); | 549 | dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); |
527 | goto out; | 550 | goto out; |
528 | } | 551 | } |
@@ -548,8 +571,9 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
548 | if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) | 571 | if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) |
549 | return nfserr_inval; | 572 | return nfserr_inval; |
550 | 573 | ||
551 | readdir->rd_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; | 574 | readdir->rd_bmval[0] &= nfsd_suppattrs0(cstate->minorversion); |
552 | readdir->rd_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; | 575 | readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); |
576 | readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); | ||
553 | 577 | ||
554 | if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || | 578 | if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || |
555 | (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) | 579 | (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) |
@@ -653,8 +677,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
653 | 677 | ||
654 | if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { | 678 | if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { |
655 | nfs4_lock_state(); | 679 | nfs4_lock_state(); |
656 | status = nfs4_preprocess_stateid_op(&cstate->current_fh, | 680 | status = nfs4_preprocess_stateid_op(cstate, |
657 | &setattr->sa_stateid, CHECK_FH | WR_STATE, NULL); | 681 | &setattr->sa_stateid, WR_STATE, NULL); |
658 | nfs4_unlock_state(); | 682 | nfs4_unlock_state(); |
659 | if (status) { | 683 | if (status) { |
660 | dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); | 684 | dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); |
@@ -685,6 +709,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
685 | struct file *filp = NULL; | 709 | struct file *filp = NULL; |
686 | u32 *p; | 710 | u32 *p; |
687 | __be32 status = nfs_ok; | 711 | __be32 status = nfs_ok; |
712 | unsigned long cnt; | ||
688 | 713 | ||
689 | /* no need to check permission - this will be done in nfsd_write() */ | 714 | /* no need to check permission - this will be done in nfsd_write() */ |
690 | 715 | ||
@@ -692,8 +717,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
692 | return nfserr_inval; | 717 | return nfserr_inval; |
693 | 718 | ||
694 | nfs4_lock_state(); | 719 | nfs4_lock_state(); |
695 | status = nfs4_preprocess_stateid_op(&cstate->current_fh, stateid, | 720 | status = nfs4_preprocess_stateid_op(cstate, stateid, WR_STATE, &filp); |
696 | CHECK_FH | WR_STATE, &filp); | ||
697 | if (filp) | 721 | if (filp) |
698 | get_file(filp); | 722 | get_file(filp); |
699 | nfs4_unlock_state(); | 723 | nfs4_unlock_state(); |
@@ -703,7 +727,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
703 | return status; | 727 | return status; |
704 | } | 728 | } |
705 | 729 | ||
706 | write->wr_bytes_written = write->wr_buflen; | 730 | cnt = write->wr_buflen; |
707 | write->wr_how_written = write->wr_stable_how; | 731 | write->wr_how_written = write->wr_stable_how; |
708 | p = (u32 *)write->wr_verifier.data; | 732 | p = (u32 *)write->wr_verifier.data; |
709 | *p++ = nfssvc_boot.tv_sec; | 733 | *p++ = nfssvc_boot.tv_sec; |
@@ -711,10 +735,12 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
711 | 735 | ||
712 | status = nfsd_write(rqstp, &cstate->current_fh, filp, | 736 | status = nfsd_write(rqstp, &cstate->current_fh, filp, |
713 | write->wr_offset, rqstp->rq_vec, write->wr_vlen, | 737 | write->wr_offset, rqstp->rq_vec, write->wr_vlen, |
714 | write->wr_buflen, &write->wr_how_written); | 738 | &cnt, &write->wr_how_written); |
715 | if (filp) | 739 | if (filp) |
716 | fput(filp); | 740 | fput(filp); |
717 | 741 | ||
742 | write->wr_bytes_written = cnt; | ||
743 | |||
718 | if (status == nfserr_symlink) | 744 | if (status == nfserr_symlink) |
719 | status = nfserr_inval; | 745 | status = nfserr_inval; |
720 | return status; | 746 | return status; |
@@ -737,8 +763,9 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
737 | if (status) | 763 | if (status) |
738 | return status; | 764 | return status; |
739 | 765 | ||
740 | if ((verify->ve_bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) | 766 | if ((verify->ve_bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) |
741 | || (verify->ve_bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) | 767 | || (verify->ve_bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) |
768 | || (verify->ve_bmval[2] & ~nfsd_suppattrs2(cstate->minorversion))) | ||
742 | return nfserr_attrnotsupp; | 769 | return nfserr_attrnotsupp; |
743 | if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) | 770 | if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) |
744 | || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) | 771 | || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) |
@@ -766,7 +793,8 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
766 | if (status) | 793 | if (status) |
767 | goto out_kfree; | 794 | goto out_kfree; |
768 | 795 | ||
769 | p = buf + 3; | 796 | /* skip bitmap */ |
797 | p = buf + 1 + ntohl(buf[0]); | ||
770 | status = nfserr_not_same; | 798 | status = nfserr_not_same; |
771 | if (ntohl(*p++) != verify->ve_attrlen) | 799 | if (ntohl(*p++) != verify->ve_attrlen) |
772 | goto out_kfree; | 800 | goto out_kfree; |
@@ -813,39 +841,17 @@ static inline void nfsd4_increment_op_stats(u32 opnum) | |||
813 | nfsdstats.nfs4_opcount[opnum]++; | 841 | nfsdstats.nfs4_opcount[opnum]++; |
814 | } | 842 | } |
815 | 843 | ||
816 | static void cstate_free(struct nfsd4_compound_state *cstate) | ||
817 | { | ||
818 | if (cstate == NULL) | ||
819 | return; | ||
820 | fh_put(&cstate->current_fh); | ||
821 | fh_put(&cstate->save_fh); | ||
822 | BUG_ON(cstate->replay_owner); | ||
823 | kfree(cstate); | ||
824 | } | ||
825 | |||
826 | static struct nfsd4_compound_state *cstate_alloc(void) | ||
827 | { | ||
828 | struct nfsd4_compound_state *cstate; | ||
829 | |||
830 | cstate = kmalloc(sizeof(struct nfsd4_compound_state), GFP_KERNEL); | ||
831 | if (cstate == NULL) | ||
832 | return NULL; | ||
833 | fh_init(&cstate->current_fh, NFS4_FHSIZE); | ||
834 | fh_init(&cstate->save_fh, NFS4_FHSIZE); | ||
835 | cstate->replay_owner = NULL; | ||
836 | return cstate; | ||
837 | } | ||
838 | |||
839 | typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, | 844 | typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, |
840 | void *); | 845 | void *); |
846 | enum nfsd4_op_flags { | ||
847 | ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ | ||
848 | ALLOWED_ON_ABSENT_FS = 2 << 0, /* ops processed on absent fs */ | ||
849 | ALLOWED_AS_FIRST_OP = 3 << 0, /* ops reqired first in compound */ | ||
850 | }; | ||
841 | 851 | ||
842 | struct nfsd4_operation { | 852 | struct nfsd4_operation { |
843 | nfsd4op_func op_func; | 853 | nfsd4op_func op_func; |
844 | u32 op_flags; | 854 | u32 op_flags; |
845 | /* Most ops require a valid current filehandle; a few don't: */ | ||
846 | #define ALLOWED_WITHOUT_FH 1 | ||
847 | /* GETATTR and ops not listed as returning NFS4ERR_MOVED: */ | ||
848 | #define ALLOWED_ON_ABSENT_FS 2 | ||
849 | char *op_name; | 855 | char *op_name; |
850 | }; | 856 | }; |
851 | 857 | ||
@@ -854,6 +860,51 @@ static struct nfsd4_operation nfsd4_ops[]; | |||
854 | static const char *nfsd4_op_name(unsigned opnum); | 860 | static const char *nfsd4_op_name(unsigned opnum); |
855 | 861 | ||
856 | /* | 862 | /* |
863 | * This is a replay of a compound for which no cache entry pages | ||
864 | * were used. Encode the sequence operation, and if cachethis is FALSE | ||
865 | * encode the uncache rep error on the next operation. | ||
866 | */ | ||
867 | static __be32 | ||
868 | nfsd4_enc_uncached_replay(struct nfsd4_compoundargs *args, | ||
869 | struct nfsd4_compoundres *resp) | ||
870 | { | ||
871 | struct nfsd4_op *op; | ||
872 | |||
873 | dprintk("--> %s resp->opcnt %d ce_cachethis %u \n", __func__, | ||
874 | resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis); | ||
875 | |||
876 | /* Encode the replayed sequence operation */ | ||
877 | BUG_ON(resp->opcnt != 1); | ||
878 | op = &args->ops[resp->opcnt - 1]; | ||
879 | nfsd4_encode_operation(resp, op); | ||
880 | |||
881 | /*return nfserr_retry_uncached_rep in next operation. */ | ||
882 | if (resp->cstate.slot->sl_cache_entry.ce_cachethis == 0) { | ||
883 | op = &args->ops[resp->opcnt++]; | ||
884 | op->status = nfserr_retry_uncached_rep; | ||
885 | nfsd4_encode_operation(resp, op); | ||
886 | } | ||
887 | return op->status; | ||
888 | } | ||
889 | |||
890 | /* | ||
891 | * Enforce NFSv4.1 COMPOUND ordering rules. | ||
892 | * | ||
893 | * TODO: | ||
894 | * - enforce NFS4ERR_NOT_ONLY_OP, | ||
895 | * - DESTROY_SESSION MUST be the final operation in the COMPOUND request. | ||
896 | */ | ||
897 | static bool nfs41_op_ordering_ok(struct nfsd4_compoundargs *args) | ||
898 | { | ||
899 | if (args->minorversion && args->opcnt > 0) { | ||
900 | struct nfsd4_op *op = &args->ops[0]; | ||
901 | return (op->status == nfserr_op_illegal) || | ||
902 | (nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP); | ||
903 | } | ||
904 | return true; | ||
905 | } | ||
906 | |||
907 | /* | ||
857 | * COMPOUND call. | 908 | * COMPOUND call. |
858 | */ | 909 | */ |
859 | static __be32 | 910 | static __be32 |
@@ -863,12 +914,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
863 | { | 914 | { |
864 | struct nfsd4_op *op; | 915 | struct nfsd4_op *op; |
865 | struct nfsd4_operation *opdesc; | 916 | struct nfsd4_operation *opdesc; |
866 | struct nfsd4_compound_state *cstate = NULL; | 917 | struct nfsd4_compound_state *cstate = &resp->cstate; |
867 | int slack_bytes; | 918 | int slack_bytes; |
868 | __be32 status; | 919 | __be32 status; |
869 | 920 | ||
870 | resp->xbuf = &rqstp->rq_res; | 921 | resp->xbuf = &rqstp->rq_res; |
871 | resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; | 922 | resp->p = rqstp->rq_res.head[0].iov_base + |
923 | rqstp->rq_res.head[0].iov_len; | ||
872 | resp->tagp = resp->p; | 924 | resp->tagp = resp->p; |
873 | /* reserve space for: taglen, tag, and opcnt */ | 925 | /* reserve space for: taglen, tag, and opcnt */ |
874 | resp->p += 2 + XDR_QUADLEN(args->taglen); | 926 | resp->p += 2 + XDR_QUADLEN(args->taglen); |
@@ -877,18 +929,25 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
877 | resp->tag = args->tag; | 929 | resp->tag = args->tag; |
878 | resp->opcnt = 0; | 930 | resp->opcnt = 0; |
879 | resp->rqstp = rqstp; | 931 | resp->rqstp = rqstp; |
932 | resp->cstate.minorversion = args->minorversion; | ||
933 | resp->cstate.replay_owner = NULL; | ||
934 | fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); | ||
935 | fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); | ||
936 | /* Use the deferral mechanism only for NFSv4.0 compounds */ | ||
937 | rqstp->rq_usedeferral = (args->minorversion == 0); | ||
880 | 938 | ||
881 | /* | 939 | /* |
882 | * According to RFC3010, this takes precedence over all other errors. | 940 | * According to RFC3010, this takes precedence over all other errors. |
883 | */ | 941 | */ |
884 | status = nfserr_minor_vers_mismatch; | 942 | status = nfserr_minor_vers_mismatch; |
885 | if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) | 943 | if (args->minorversion > nfsd_supported_minorversion) |
886 | goto out; | 944 | goto out; |
887 | 945 | ||
888 | status = nfserr_resource; | 946 | if (!nfs41_op_ordering_ok(args)) { |
889 | cstate = cstate_alloc(); | 947 | op = &args->ops[0]; |
890 | if (cstate == NULL) | 948 | op->status = nfserr_sequence_pos; |
891 | goto out; | 949 | goto encode_op; |
950 | } | ||
892 | 951 | ||
893 | status = nfs_ok; | 952 | status = nfs_ok; |
894 | while (!status && resp->opcnt < args->opcnt) { | 953 | while (!status && resp->opcnt < args->opcnt) { |
@@ -897,7 +956,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
897 | dprintk("nfsv4 compound op #%d/%d: %d (%s)\n", | 956 | dprintk("nfsv4 compound op #%d/%d: %d (%s)\n", |
898 | resp->opcnt, args->opcnt, op->opnum, | 957 | resp->opcnt, args->opcnt, op->opnum, |
899 | nfsd4_op_name(op->opnum)); | 958 | nfsd4_op_name(op->opnum)); |
900 | |||
901 | /* | 959 | /* |
902 | * The XDR decode routines may have pre-set op->status; | 960 | * The XDR decode routines may have pre-set op->status; |
903 | * for example, if there is a miscellaneous XDR error | 961 | * for example, if there is a miscellaneous XDR error |
@@ -938,6 +996,15 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
938 | BUG_ON(op->status == nfs_ok); | 996 | BUG_ON(op->status == nfs_ok); |
939 | 997 | ||
940 | encode_op: | 998 | encode_op: |
999 | /* Only from SEQUENCE or CREATE_SESSION */ | ||
1000 | if (resp->cstate.status == nfserr_replay_cache) { | ||
1001 | dprintk("%s NFS4.1 replay from cache\n", __func__); | ||
1002 | if (nfsd4_not_cached(resp)) | ||
1003 | status = nfsd4_enc_uncached_replay(args, resp); | ||
1004 | else | ||
1005 | status = op->status; | ||
1006 | goto out; | ||
1007 | } | ||
941 | if (op->status == nfserr_replay_me) { | 1008 | if (op->status == nfserr_replay_me) { |
942 | op->replay = &cstate->replay_owner->so_replay; | 1009 | op->replay = &cstate->replay_owner->so_replay; |
943 | nfsd4_encode_replay(resp, op); | 1010 | nfsd4_encode_replay(resp, op); |
@@ -961,15 +1028,24 @@ encode_op: | |||
961 | 1028 | ||
962 | nfsd4_increment_op_stats(op->opnum); | 1029 | nfsd4_increment_op_stats(op->opnum); |
963 | } | 1030 | } |
1031 | if (!rqstp->rq_usedeferral && status == nfserr_dropit) { | ||
1032 | dprintk("%s Dropit - send NFS4ERR_DELAY\n", __func__); | ||
1033 | status = nfserr_jukebox; | ||
1034 | } | ||
964 | 1035 | ||
965 | cstate_free(cstate); | 1036 | resp->cstate.status = status; |
1037 | fh_put(&resp->cstate.current_fh); | ||
1038 | fh_put(&resp->cstate.save_fh); | ||
1039 | BUG_ON(resp->cstate.replay_owner); | ||
966 | out: | 1040 | out: |
967 | nfsd4_release_compoundargs(args); | 1041 | nfsd4_release_compoundargs(args); |
1042 | /* Reset deferral mechanism for RPC deferrals */ | ||
1043 | rqstp->rq_usedeferral = 1; | ||
968 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); | 1044 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); |
969 | return status; | 1045 | return status; |
970 | } | 1046 | } |
971 | 1047 | ||
972 | static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { | 1048 | static struct nfsd4_operation nfsd4_ops[] = { |
973 | [OP_ACCESS] = { | 1049 | [OP_ACCESS] = { |
974 | .op_func = (nfsd4op_func)nfsd4_access, | 1050 | .op_func = (nfsd4op_func)nfsd4_access, |
975 | .op_name = "OP_ACCESS", | 1051 | .op_name = "OP_ACCESS", |
@@ -1045,7 +1121,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { | |||
1045 | .op_name = "OP_PUTFH", | 1121 | .op_name = "OP_PUTFH", |
1046 | }, | 1122 | }, |
1047 | [OP_PUTPUBFH] = { | 1123 | [OP_PUTPUBFH] = { |
1048 | /* unsupported, just for future reference: */ | 1124 | .op_func = (nfsd4op_func)nfsd4_putrootfh, |
1049 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, | 1125 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, |
1050 | .op_name = "OP_PUTPUBFH", | 1126 | .op_name = "OP_PUTPUBFH", |
1051 | }, | 1127 | }, |
@@ -1119,6 +1195,28 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { | |||
1119 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, | 1195 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, |
1120 | .op_name = "OP_RELEASE_LOCKOWNER", | 1196 | .op_name = "OP_RELEASE_LOCKOWNER", |
1121 | }, | 1197 | }, |
1198 | |||
1199 | /* NFSv4.1 operations */ | ||
1200 | [OP_EXCHANGE_ID] = { | ||
1201 | .op_func = (nfsd4op_func)nfsd4_exchange_id, | ||
1202 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | ||
1203 | .op_name = "OP_EXCHANGE_ID", | ||
1204 | }, | ||
1205 | [OP_CREATE_SESSION] = { | ||
1206 | .op_func = (nfsd4op_func)nfsd4_create_session, | ||
1207 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | ||
1208 | .op_name = "OP_CREATE_SESSION", | ||
1209 | }, | ||
1210 | [OP_DESTROY_SESSION] = { | ||
1211 | .op_func = (nfsd4op_func)nfsd4_destroy_session, | ||
1212 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | ||
1213 | .op_name = "OP_DESTROY_SESSION", | ||
1214 | }, | ||
1215 | [OP_SEQUENCE] = { | ||
1216 | .op_func = (nfsd4op_func)nfsd4_sequence, | ||
1217 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | ||
1218 | .op_name = "OP_SEQUENCE", | ||
1219 | }, | ||
1122 | }; | 1220 | }; |
1123 | 1221 | ||
1124 | static const char *nfsd4_op_name(unsigned opnum) | 1222 | static const char *nfsd4_op_name(unsigned opnum) |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 74f7b67567fd..3444c0052a87 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -182,36 +182,26 @@ out_unlock: | |||
182 | 182 | ||
183 | typedef int (recdir_func)(struct dentry *, struct dentry *); | 183 | typedef int (recdir_func)(struct dentry *, struct dentry *); |
184 | 184 | ||
185 | struct dentry_list { | 185 | struct name_list { |
186 | struct dentry *dentry; | 186 | char name[HEXDIR_LEN]; |
187 | struct list_head list; | 187 | struct list_head list; |
188 | }; | 188 | }; |
189 | 189 | ||
190 | struct dentry_list_arg { | ||
191 | struct list_head dentries; | ||
192 | struct dentry *parent; | ||
193 | }; | ||
194 | |||
195 | static int | 190 | static int |
196 | nfsd4_build_dentrylist(void *arg, const char *name, int namlen, | 191 | nfsd4_build_namelist(void *arg, const char *name, int namlen, |
197 | loff_t offset, u64 ino, unsigned int d_type) | 192 | loff_t offset, u64 ino, unsigned int d_type) |
198 | { | 193 | { |
199 | struct dentry_list_arg *dla = arg; | 194 | struct list_head *names = arg; |
200 | struct list_head *dentries = &dla->dentries; | 195 | struct name_list *entry; |
201 | struct dentry *parent = dla->parent; | ||
202 | struct dentry *dentry; | ||
203 | struct dentry_list *child; | ||
204 | 196 | ||
205 | if (name && isdotent(name, namlen)) | 197 | if (namlen != HEXDIR_LEN - 1) |
206 | return 0; | 198 | return 0; |
207 | dentry = lookup_one_len(name, parent, namlen); | 199 | entry = kmalloc(sizeof(struct name_list), GFP_KERNEL); |
208 | if (IS_ERR(dentry)) | 200 | if (entry == NULL) |
209 | return PTR_ERR(dentry); | ||
210 | child = kmalloc(sizeof(*child), GFP_KERNEL); | ||
211 | if (child == NULL) | ||
212 | return -ENOMEM; | 201 | return -ENOMEM; |
213 | child->dentry = dentry; | 202 | memcpy(entry->name, name, HEXDIR_LEN - 1); |
214 | list_add(&child->list, dentries); | 203 | entry->name[HEXDIR_LEN - 1] = '\0'; |
204 | list_add(&entry->list, names); | ||
215 | return 0; | 205 | return 0; |
216 | } | 206 | } |
217 | 207 | ||
@@ -220,11 +210,9 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) | |||
220 | { | 210 | { |
221 | const struct cred *original_cred; | 211 | const struct cred *original_cred; |
222 | struct file *filp; | 212 | struct file *filp; |
223 | struct dentry_list_arg dla = { | 213 | LIST_HEAD(names); |
224 | .parent = dir, | 214 | struct name_list *entry; |
225 | }; | 215 | struct dentry *dentry; |
226 | struct list_head *dentries = &dla.dentries; | ||
227 | struct dentry_list *child; | ||
228 | int status; | 216 | int status; |
229 | 217 | ||
230 | if (!rec_dir_init) | 218 | if (!rec_dir_init) |
@@ -233,31 +221,34 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) | |||
233 | status = nfs4_save_creds(&original_cred); | 221 | status = nfs4_save_creds(&original_cred); |
234 | if (status < 0) | 222 | if (status < 0) |
235 | return status; | 223 | return status; |
236 | INIT_LIST_HEAD(dentries); | ||
237 | 224 | ||
238 | filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY, | 225 | filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY, |
239 | current_cred()); | 226 | current_cred()); |
240 | status = PTR_ERR(filp); | 227 | status = PTR_ERR(filp); |
241 | if (IS_ERR(filp)) | 228 | if (IS_ERR(filp)) |
242 | goto out; | 229 | goto out; |
243 | INIT_LIST_HEAD(dentries); | 230 | status = vfs_readdir(filp, nfsd4_build_namelist, &names); |
244 | status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla); | ||
245 | fput(filp); | 231 | fput(filp); |
246 | while (!list_empty(dentries)) { | 232 | while (!list_empty(&names)) { |
247 | child = list_entry(dentries->next, struct dentry_list, list); | 233 | entry = list_entry(names.next, struct name_list, list); |
248 | status = f(dir, child->dentry); | 234 | |
235 | dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); | ||
236 | if (IS_ERR(dentry)) { | ||
237 | status = PTR_ERR(dentry); | ||
238 | goto out; | ||
239 | } | ||
240 | status = f(dir, dentry); | ||
241 | dput(dentry); | ||
249 | if (status) | 242 | if (status) |
250 | goto out; | 243 | goto out; |
251 | list_del(&child->list); | 244 | list_del(&entry->list); |
252 | dput(child->dentry); | 245 | kfree(entry); |
253 | kfree(child); | ||
254 | } | 246 | } |
255 | out: | 247 | out: |
256 | while (!list_empty(dentries)) { | 248 | while (!list_empty(&names)) { |
257 | child = list_entry(dentries->next, struct dentry_list, list); | 249 | entry = list_entry(names.next, struct name_list, list); |
258 | list_del(&child->list); | 250 | list_del(&entry->list); |
259 | dput(child->dentry); | 251 | kfree(entry); |
260 | kfree(child); | ||
261 | } | 252 | } |
262 | nfs4_reset_creds(original_cred); | 253 | nfs4_reset_creds(original_cred); |
263 | return status; | 254 | return status; |
@@ -353,7 +344,8 @@ purge_old(struct dentry *parent, struct dentry *child) | |||
353 | { | 344 | { |
354 | int status; | 345 | int status; |
355 | 346 | ||
356 | if (nfs4_has_reclaimed_state(child->d_name.name)) | 347 | /* note: we currently use this path only for minorversion 0 */ |
348 | if (nfs4_has_reclaimed_state(child->d_name.name, false)) | ||
357 | return 0; | 349 | return 0; |
358 | 350 | ||
359 | status = nfsd4_clear_clid_dir(parent, child); | 351 | status = nfsd4_clear_clid_dir(parent, child); |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b6f60f48e94b..c65a27b76a9d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -68,6 +68,7 @@ static u32 current_delegid = 1; | |||
68 | static u32 nfs4_init; | 68 | static u32 nfs4_init; |
69 | static stateid_t zerostateid; /* bits all 0 */ | 69 | static stateid_t zerostateid; /* bits all 0 */ |
70 | static stateid_t onestateid; /* bits all 1 */ | 70 | static stateid_t onestateid; /* bits all 1 */ |
71 | static u64 current_sessionid = 1; | ||
71 | 72 | ||
72 | #define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) | 73 | #define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) |
73 | #define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) | 74 | #define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) |
@@ -75,18 +76,21 @@ static stateid_t onestateid; /* bits all 1 */ | |||
75 | /* forward declarations */ | 76 | /* forward declarations */ |
76 | static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); | 77 | static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); |
77 | static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); | 78 | static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); |
78 | static void release_stateid_lockowners(struct nfs4_stateid *open_stp); | ||
79 | static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; | 79 | static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; |
80 | static void nfs4_set_recdir(char *recdir); | 80 | static void nfs4_set_recdir(char *recdir); |
81 | 81 | ||
82 | /* Locking: | 82 | /* Locking: */ |
83 | * | 83 | |
84 | * client_mutex: | 84 | /* Currently used for almost all code touching nfsv4 state: */ |
85 | * protects clientid_hashtbl[], clientstr_hashtbl[], | ||
86 | * unconfstr_hashtbl[], uncofid_hashtbl[]. | ||
87 | */ | ||
88 | static DEFINE_MUTEX(client_mutex); | 85 | static DEFINE_MUTEX(client_mutex); |
89 | 86 | ||
87 | /* | ||
88 | * Currently used for the del_recall_lru and file hash table. In an | ||
89 | * effort to decrease the scope of the client_mutex, this spinlock may | ||
90 | * eventually cover more: | ||
91 | */ | ||
92 | static DEFINE_SPINLOCK(recall_lock); | ||
93 | |||
90 | static struct kmem_cache *stateowner_slab = NULL; | 94 | static struct kmem_cache *stateowner_slab = NULL; |
91 | static struct kmem_cache *file_slab = NULL; | 95 | static struct kmem_cache *file_slab = NULL; |
92 | static struct kmem_cache *stateid_slab = NULL; | 96 | static struct kmem_cache *stateid_slab = NULL; |
@@ -117,37 +121,23 @@ opaque_hashval(const void *ptr, int nbytes) | |||
117 | return x; | 121 | return x; |
118 | } | 122 | } |
119 | 123 | ||
120 | /* forward declarations */ | ||
121 | static void release_stateowner(struct nfs4_stateowner *sop); | ||
122 | static void release_stateid(struct nfs4_stateid *stp, int flags); | ||
123 | |||
124 | /* | ||
125 | * Delegation state | ||
126 | */ | ||
127 | |||
128 | /* recall_lock protects the del_recall_lru */ | ||
129 | static DEFINE_SPINLOCK(recall_lock); | ||
130 | static struct list_head del_recall_lru; | 124 | static struct list_head del_recall_lru; |
131 | 125 | ||
132 | static void | ||
133 | free_nfs4_file(struct kref *kref) | ||
134 | { | ||
135 | struct nfs4_file *fp = container_of(kref, struct nfs4_file, fi_ref); | ||
136 | list_del(&fp->fi_hash); | ||
137 | iput(fp->fi_inode); | ||
138 | kmem_cache_free(file_slab, fp); | ||
139 | } | ||
140 | |||
141 | static inline void | 126 | static inline void |
142 | put_nfs4_file(struct nfs4_file *fi) | 127 | put_nfs4_file(struct nfs4_file *fi) |
143 | { | 128 | { |
144 | kref_put(&fi->fi_ref, free_nfs4_file); | 129 | if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) { |
130 | list_del(&fi->fi_hash); | ||
131 | spin_unlock(&recall_lock); | ||
132 | iput(fi->fi_inode); | ||
133 | kmem_cache_free(file_slab, fi); | ||
134 | } | ||
145 | } | 135 | } |
146 | 136 | ||
147 | static inline void | 137 | static inline void |
148 | get_nfs4_file(struct nfs4_file *fi) | 138 | get_nfs4_file(struct nfs4_file *fi) |
149 | { | 139 | { |
150 | kref_get(&fi->fi_ref); | 140 | atomic_inc(&fi->fi_ref); |
151 | } | 141 | } |
152 | 142 | ||
153 | static int num_delegations; | 143 | static int num_delegations; |
@@ -220,9 +210,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f | |||
220 | dp->dl_stateid.si_stateownerid = current_delegid++; | 210 | dp->dl_stateid.si_stateownerid = current_delegid++; |
221 | dp->dl_stateid.si_fileid = 0; | 211 | dp->dl_stateid.si_fileid = 0; |
222 | dp->dl_stateid.si_generation = 0; | 212 | dp->dl_stateid.si_generation = 0; |
223 | dp->dl_fhlen = current_fh->fh_handle.fh_size; | 213 | fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); |
224 | memcpy(dp->dl_fhval, ¤t_fh->fh_handle.fh_base, | ||
225 | current_fh->fh_handle.fh_size); | ||
226 | dp->dl_time = 0; | 214 | dp->dl_time = 0; |
227 | atomic_set(&dp->dl_count, 1); | 215 | atomic_set(&dp->dl_count, 1); |
228 | list_add(&dp->dl_perfile, &fp->fi_delegations); | 216 | list_add(&dp->dl_perfile, &fp->fi_delegations); |
@@ -311,6 +299,291 @@ static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; | |||
311 | static struct list_head client_lru; | 299 | static struct list_head client_lru; |
312 | static struct list_head close_lru; | 300 | static struct list_head close_lru; |
313 | 301 | ||
302 | static void unhash_generic_stateid(struct nfs4_stateid *stp) | ||
303 | { | ||
304 | list_del(&stp->st_hash); | ||
305 | list_del(&stp->st_perfile); | ||
306 | list_del(&stp->st_perstateowner); | ||
307 | } | ||
308 | |||
309 | static void free_generic_stateid(struct nfs4_stateid *stp) | ||
310 | { | ||
311 | put_nfs4_file(stp->st_file); | ||
312 | kmem_cache_free(stateid_slab, stp); | ||
313 | } | ||
314 | |||
315 | static void release_lock_stateid(struct nfs4_stateid *stp) | ||
316 | { | ||
317 | unhash_generic_stateid(stp); | ||
318 | locks_remove_posix(stp->st_vfs_file, (fl_owner_t)stp->st_stateowner); | ||
319 | free_generic_stateid(stp); | ||
320 | } | ||
321 | |||
322 | static void unhash_lockowner(struct nfs4_stateowner *sop) | ||
323 | { | ||
324 | struct nfs4_stateid *stp; | ||
325 | |||
326 | list_del(&sop->so_idhash); | ||
327 | list_del(&sop->so_strhash); | ||
328 | list_del(&sop->so_perstateid); | ||
329 | while (!list_empty(&sop->so_stateids)) { | ||
330 | stp = list_first_entry(&sop->so_stateids, | ||
331 | struct nfs4_stateid, st_perstateowner); | ||
332 | release_lock_stateid(stp); | ||
333 | } | ||
334 | } | ||
335 | |||
336 | static void release_lockowner(struct nfs4_stateowner *sop) | ||
337 | { | ||
338 | unhash_lockowner(sop); | ||
339 | nfs4_put_stateowner(sop); | ||
340 | } | ||
341 | |||
342 | static void | ||
343 | release_stateid_lockowners(struct nfs4_stateid *open_stp) | ||
344 | { | ||
345 | struct nfs4_stateowner *lock_sop; | ||
346 | |||
347 | while (!list_empty(&open_stp->st_lockowners)) { | ||
348 | lock_sop = list_entry(open_stp->st_lockowners.next, | ||
349 | struct nfs4_stateowner, so_perstateid); | ||
350 | /* list_del(&open_stp->st_lockowners); */ | ||
351 | BUG_ON(lock_sop->so_is_open_owner); | ||
352 | release_lockowner(lock_sop); | ||
353 | } | ||
354 | } | ||
355 | |||
356 | static void release_open_stateid(struct nfs4_stateid *stp) | ||
357 | { | ||
358 | unhash_generic_stateid(stp); | ||
359 | release_stateid_lockowners(stp); | ||
360 | nfsd_close(stp->st_vfs_file); | ||
361 | free_generic_stateid(stp); | ||
362 | } | ||
363 | |||
364 | static void unhash_openowner(struct nfs4_stateowner *sop) | ||
365 | { | ||
366 | struct nfs4_stateid *stp; | ||
367 | |||
368 | list_del(&sop->so_idhash); | ||
369 | list_del(&sop->so_strhash); | ||
370 | list_del(&sop->so_perclient); | ||
371 | list_del(&sop->so_perstateid); /* XXX: necessary? */ | ||
372 | while (!list_empty(&sop->so_stateids)) { | ||
373 | stp = list_first_entry(&sop->so_stateids, | ||
374 | struct nfs4_stateid, st_perstateowner); | ||
375 | release_open_stateid(stp); | ||
376 | } | ||
377 | } | ||
378 | |||
379 | static void release_openowner(struct nfs4_stateowner *sop) | ||
380 | { | ||
381 | unhash_openowner(sop); | ||
382 | list_del(&sop->so_close_lru); | ||
383 | nfs4_put_stateowner(sop); | ||
384 | } | ||
385 | |||
386 | static DEFINE_SPINLOCK(sessionid_lock); | ||
387 | #define SESSION_HASH_SIZE 512 | ||
388 | static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE]; | ||
389 | |||
390 | static inline int | ||
391 | hash_sessionid(struct nfs4_sessionid *sessionid) | ||
392 | { | ||
393 | struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid; | ||
394 | |||
395 | return sid->sequence % SESSION_HASH_SIZE; | ||
396 | } | ||
397 | |||
398 | static inline void | ||
399 | dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) | ||
400 | { | ||
401 | u32 *ptr = (u32 *)(&sessionid->data[0]); | ||
402 | dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]); | ||
403 | } | ||
404 | |||
405 | static void | ||
406 | gen_sessionid(struct nfsd4_session *ses) | ||
407 | { | ||
408 | struct nfs4_client *clp = ses->se_client; | ||
409 | struct nfsd4_sessionid *sid; | ||
410 | |||
411 | sid = (struct nfsd4_sessionid *)ses->se_sessionid.data; | ||
412 | sid->clientid = clp->cl_clientid; | ||
413 | sid->sequence = current_sessionid++; | ||
414 | sid->reserved = 0; | ||
415 | } | ||
416 | |||
417 | /* | ||
418 | * Give the client the number of slots it requests bound by | ||
419 | * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages. | ||
420 | * | ||
421 | * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we | ||
422 | * should (up to a point) re-negotiate active sessions and reduce their | ||
423 | * slot usage to make rooom for new connections. For now we just fail the | ||
424 | * create session. | ||
425 | */ | ||
426 | static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) | ||
427 | { | ||
428 | int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; | ||
429 | |||
430 | spin_lock(&nfsd_serv->sv_lock); | ||
431 | if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) | ||
432 | np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; | ||
433 | nfsd_serv->sv_drc_pages_used += np; | ||
434 | spin_unlock(&nfsd_serv->sv_lock); | ||
435 | |||
436 | if (np <= 0) { | ||
437 | status = nfserr_resource; | ||
438 | fchan->maxreqs = 0; | ||
439 | } else | ||
440 | fchan->maxreqs = np / NFSD_PAGES_PER_SLOT; | ||
441 | |||
442 | return status; | ||
443 | } | ||
444 | |||
445 | /* | ||
446 | * fchan holds the client values on input, and the server values on output | ||
447 | */ | ||
448 | static int init_forechannel_attrs(struct svc_rqst *rqstp, | ||
449 | struct nfsd4_session *session, | ||
450 | struct nfsd4_channel_attrs *fchan) | ||
451 | { | ||
452 | int status = 0; | ||
453 | __u32 maxcount = svc_max_payload(rqstp); | ||
454 | |||
455 | /* headerpadsz set to zero in encode routine */ | ||
456 | |||
457 | /* Use the client's max request and max response size if possible */ | ||
458 | if (fchan->maxreq_sz > maxcount) | ||
459 | fchan->maxreq_sz = maxcount; | ||
460 | session->se_fmaxreq_sz = fchan->maxreq_sz; | ||
461 | |||
462 | if (fchan->maxresp_sz > maxcount) | ||
463 | fchan->maxresp_sz = maxcount; | ||
464 | session->se_fmaxresp_sz = fchan->maxresp_sz; | ||
465 | |||
466 | /* Set the max response cached size our default which is | ||
467 | * a multiple of PAGE_SIZE and small */ | ||
468 | session->se_fmaxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; | ||
469 | fchan->maxresp_cached = session->se_fmaxresp_cached; | ||
470 | |||
471 | /* Use the client's maxops if possible */ | ||
472 | if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) | ||
473 | fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; | ||
474 | session->se_fmaxops = fchan->maxops; | ||
475 | |||
476 | /* try to use the client requested number of slots */ | ||
477 | if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) | ||
478 | fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; | ||
479 | |||
480 | /* FIXME: Error means no more DRC pages so the server should | ||
481 | * recover pages from existing sessions. For now fail session | ||
482 | * creation. | ||
483 | */ | ||
484 | status = set_forechannel_maxreqs(fchan); | ||
485 | |||
486 | session->se_fnumslots = fchan->maxreqs; | ||
487 | return status; | ||
488 | } | ||
489 | |||
490 | static int | ||
491 | alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, | ||
492 | struct nfsd4_create_session *cses) | ||
493 | { | ||
494 | struct nfsd4_session *new, tmp; | ||
495 | int idx, status = nfserr_resource, slotsize; | ||
496 | |||
497 | memset(&tmp, 0, sizeof(tmp)); | ||
498 | |||
499 | /* FIXME: For now, we just accept the client back channel attributes. */ | ||
500 | status = init_forechannel_attrs(rqstp, &tmp, &cses->fore_channel); | ||
501 | if (status) | ||
502 | goto out; | ||
503 | |||
504 | /* allocate struct nfsd4_session and slot table in one piece */ | ||
505 | slotsize = tmp.se_fnumslots * sizeof(struct nfsd4_slot); | ||
506 | new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); | ||
507 | if (!new) | ||
508 | goto out; | ||
509 | |||
510 | memcpy(new, &tmp, sizeof(*new)); | ||
511 | |||
512 | new->se_client = clp; | ||
513 | gen_sessionid(new); | ||
514 | idx = hash_sessionid(&new->se_sessionid); | ||
515 | memcpy(clp->cl_sessionid.data, new->se_sessionid.data, | ||
516 | NFS4_MAX_SESSIONID_LEN); | ||
517 | |||
518 | new->se_flags = cses->flags; | ||
519 | kref_init(&new->se_ref); | ||
520 | spin_lock(&sessionid_lock); | ||
521 | list_add(&new->se_hash, &sessionid_hashtbl[idx]); | ||
522 | list_add(&new->se_perclnt, &clp->cl_sessions); | ||
523 | spin_unlock(&sessionid_lock); | ||
524 | |||
525 | status = nfs_ok; | ||
526 | out: | ||
527 | return status; | ||
528 | } | ||
529 | |||
530 | /* caller must hold sessionid_lock */ | ||
531 | static struct nfsd4_session * | ||
532 | find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) | ||
533 | { | ||
534 | struct nfsd4_session *elem; | ||
535 | int idx; | ||
536 | |||
537 | dump_sessionid(__func__, sessionid); | ||
538 | idx = hash_sessionid(sessionid); | ||
539 | dprintk("%s: idx is %d\n", __func__, idx); | ||
540 | /* Search in the appropriate list */ | ||
541 | list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) { | ||
542 | dump_sessionid("list traversal", &elem->se_sessionid); | ||
543 | if (!memcmp(elem->se_sessionid.data, sessionid->data, | ||
544 | NFS4_MAX_SESSIONID_LEN)) { | ||
545 | return elem; | ||
546 | } | ||
547 | } | ||
548 | |||
549 | dprintk("%s: session not found\n", __func__); | ||
550 | return NULL; | ||
551 | } | ||
552 | |||
553 | /* caller must hold sessionid_lock */ | ||
554 | static void | ||
555 | unhash_session(struct nfsd4_session *ses) | ||
556 | { | ||
557 | list_del(&ses->se_hash); | ||
558 | list_del(&ses->se_perclnt); | ||
559 | } | ||
560 | |||
561 | static void | ||
562 | release_session(struct nfsd4_session *ses) | ||
563 | { | ||
564 | spin_lock(&sessionid_lock); | ||
565 | unhash_session(ses); | ||
566 | spin_unlock(&sessionid_lock); | ||
567 | nfsd4_put_session(ses); | ||
568 | } | ||
569 | |||
570 | static void nfsd4_release_respages(struct page **respages, short resused); | ||
571 | |||
572 | void | ||
573 | free_session(struct kref *kref) | ||
574 | { | ||
575 | struct nfsd4_session *ses; | ||
576 | int i; | ||
577 | |||
578 | ses = container_of(kref, struct nfsd4_session, se_ref); | ||
579 | for (i = 0; i < ses->se_fnumslots; i++) { | ||
580 | struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; | ||
581 | nfsd4_release_respages(e->ce_respages, e->ce_resused); | ||
582 | } | ||
583 | kfree(ses->se_slots); | ||
584 | kfree(ses); | ||
585 | } | ||
586 | |||
314 | static inline void | 587 | static inline void |
315 | renew_client(struct nfs4_client *clp) | 588 | renew_client(struct nfs4_client *clp) |
316 | { | 589 | { |
@@ -330,8 +603,8 @@ STALE_CLIENTID(clientid_t *clid) | |||
330 | { | 603 | { |
331 | if (clid->cl_boot == boot_time) | 604 | if (clid->cl_boot == boot_time) |
332 | return 0; | 605 | return 0; |
333 | dprintk("NFSD stale clientid (%08x/%08x)\n", | 606 | dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", |
334 | clid->cl_boot, clid->cl_id); | 607 | clid->cl_boot, clid->cl_id, boot_time); |
335 | return 1; | 608 | return 1; |
336 | } | 609 | } |
337 | 610 | ||
@@ -376,6 +649,8 @@ static inline void | |||
376 | free_client(struct nfs4_client *clp) | 649 | free_client(struct nfs4_client *clp) |
377 | { | 650 | { |
378 | shutdown_callback_client(clp); | 651 | shutdown_callback_client(clp); |
652 | nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages, | ||
653 | clp->cl_slot.sl_cache_entry.ce_resused); | ||
379 | if (clp->cl_cred.cr_group_info) | 654 | if (clp->cl_cred.cr_group_info) |
380 | put_group_info(clp->cl_cred.cr_group_info); | 655 | put_group_info(clp->cl_cred.cr_group_info); |
381 | kfree(clp->cl_principal); | 656 | kfree(clp->cl_principal); |
@@ -420,7 +695,13 @@ expire_client(struct nfs4_client *clp) | |||
420 | list_del(&clp->cl_lru); | 695 | list_del(&clp->cl_lru); |
421 | while (!list_empty(&clp->cl_openowners)) { | 696 | while (!list_empty(&clp->cl_openowners)) { |
422 | sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); | 697 | sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); |
423 | release_stateowner(sop); | 698 | release_openowner(sop); |
699 | } | ||
700 | while (!list_empty(&clp->cl_sessions)) { | ||
701 | struct nfsd4_session *ses; | ||
702 | ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, | ||
703 | se_perclnt); | ||
704 | release_session(ses); | ||
424 | } | 705 | } |
425 | put_nfs4_client(clp); | 706 | put_nfs4_client(clp); |
426 | } | 707 | } |
@@ -439,6 +720,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir) | |||
439 | INIT_LIST_HEAD(&clp->cl_strhash); | 720 | INIT_LIST_HEAD(&clp->cl_strhash); |
440 | INIT_LIST_HEAD(&clp->cl_openowners); | 721 | INIT_LIST_HEAD(&clp->cl_openowners); |
441 | INIT_LIST_HEAD(&clp->cl_delegations); | 722 | INIT_LIST_HEAD(&clp->cl_delegations); |
723 | INIT_LIST_HEAD(&clp->cl_sessions); | ||
442 | INIT_LIST_HEAD(&clp->cl_lru); | 724 | INIT_LIST_HEAD(&clp->cl_lru); |
443 | return clp; | 725 | return clp; |
444 | } | 726 | } |
@@ -568,25 +850,45 @@ find_unconfirmed_client(clientid_t *clid) | |||
568 | return NULL; | 850 | return NULL; |
569 | } | 851 | } |
570 | 852 | ||
853 | /* | ||
854 | * Return 1 iff clp's clientid establishment method matches the use_exchange_id | ||
855 | * parameter. Matching is based on the fact the at least one of the | ||
856 | * EXCHGID4_FLAG_USE_{NON_PNFS,PNFS_MDS,PNFS_DS} flags must be set for v4.1 | ||
857 | * | ||
858 | * FIXME: we need to unify the clientid namespaces for nfsv4.x | ||
859 | * and correctly deal with client upgrade/downgrade in EXCHANGE_ID | ||
860 | * and SET_CLIENTID{,_CONFIRM} | ||
861 | */ | ||
862 | static inline int | ||
863 | match_clientid_establishment(struct nfs4_client *clp, bool use_exchange_id) | ||
864 | { | ||
865 | bool has_exchange_flags = (clp->cl_exchange_flags != 0); | ||
866 | return use_exchange_id == has_exchange_flags; | ||
867 | } | ||
868 | |||
571 | static struct nfs4_client * | 869 | static struct nfs4_client * |
572 | find_confirmed_client_by_str(const char *dname, unsigned int hashval) | 870 | find_confirmed_client_by_str(const char *dname, unsigned int hashval, |
871 | bool use_exchange_id) | ||
573 | { | 872 | { |
574 | struct nfs4_client *clp; | 873 | struct nfs4_client *clp; |
575 | 874 | ||
576 | list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { | 875 | list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { |
577 | if (same_name(clp->cl_recdir, dname)) | 876 | if (same_name(clp->cl_recdir, dname) && |
877 | match_clientid_establishment(clp, use_exchange_id)) | ||
578 | return clp; | 878 | return clp; |
579 | } | 879 | } |
580 | return NULL; | 880 | return NULL; |
581 | } | 881 | } |
582 | 882 | ||
583 | static struct nfs4_client * | 883 | static struct nfs4_client * |
584 | find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) | 884 | find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, |
885 | bool use_exchange_id) | ||
585 | { | 886 | { |
586 | struct nfs4_client *clp; | 887 | struct nfs4_client *clp; |
587 | 888 | ||
588 | list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { | 889 | list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { |
589 | if (same_name(clp->cl_recdir, dname)) | 890 | if (same_name(clp->cl_recdir, dname) && |
891 | match_clientid_establishment(clp, use_exchange_id)) | ||
590 | return clp; | 892 | return clp; |
591 | } | 893 | } |
592 | return NULL; | 894 | return NULL; |
@@ -685,6 +987,534 @@ out_err: | |||
685 | return; | 987 | return; |
686 | } | 988 | } |
687 | 989 | ||
990 | void | ||
991 | nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp) | ||
992 | { | ||
993 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | ||
994 | |||
995 | resp->cstate.statp = statp; | ||
996 | } | ||
997 | |||
998 | /* | ||
999 | * Dereference the result pages. | ||
1000 | */ | ||
1001 | static void | ||
1002 | nfsd4_release_respages(struct page **respages, short resused) | ||
1003 | { | ||
1004 | int i; | ||
1005 | |||
1006 | dprintk("--> %s\n", __func__); | ||
1007 | for (i = 0; i < resused; i++) { | ||
1008 | if (!respages[i]) | ||
1009 | continue; | ||
1010 | put_page(respages[i]); | ||
1011 | respages[i] = NULL; | ||
1012 | } | ||
1013 | } | ||
1014 | |||
1015 | static void | ||
1016 | nfsd4_copy_pages(struct page **topages, struct page **frompages, short count) | ||
1017 | { | ||
1018 | int i; | ||
1019 | |||
1020 | for (i = 0; i < count; i++) { | ||
1021 | topages[i] = frompages[i]; | ||
1022 | if (!topages[i]) | ||
1023 | continue; | ||
1024 | get_page(topages[i]); | ||
1025 | } | ||
1026 | } | ||
1027 | |||
1028 | /* | ||
1029 | * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous | ||
1030 | * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total | ||
1031 | * length of the XDR response is less than se_fmaxresp_cached | ||
1032 | * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a | ||
1033 | * of the reply (e.g. readdir). | ||
1034 | * | ||
1035 | * Store the base and length of the rq_req.head[0] page | ||
1036 | * of the NFSv4.1 data, just past the rpc header. | ||
1037 | */ | ||
1038 | void | ||
1039 | nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) | ||
1040 | { | ||
1041 | struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; | ||
1042 | struct svc_rqst *rqstp = resp->rqstp; | ||
1043 | struct nfsd4_compoundargs *args = rqstp->rq_argp; | ||
1044 | struct nfsd4_op *op = &args->ops[resp->opcnt]; | ||
1045 | struct kvec *resv = &rqstp->rq_res.head[0]; | ||
1046 | |||
1047 | dprintk("--> %s entry %p\n", __func__, entry); | ||
1048 | |||
1049 | /* Don't cache a failed OP_SEQUENCE. */ | ||
1050 | if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status) | ||
1051 | return; | ||
1052 | |||
1053 | nfsd4_release_respages(entry->ce_respages, entry->ce_resused); | ||
1054 | entry->ce_opcnt = resp->opcnt; | ||
1055 | entry->ce_status = resp->cstate.status; | ||
1056 | |||
1057 | /* | ||
1058 | * Don't need a page to cache just the sequence operation - the slot | ||
1059 | * does this for us! | ||
1060 | */ | ||
1061 | |||
1062 | if (nfsd4_not_cached(resp)) { | ||
1063 | entry->ce_resused = 0; | ||
1064 | entry->ce_rpchdrlen = 0; | ||
1065 | dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__, | ||
1066 | resp->cstate.slot->sl_cache_entry.ce_cachethis); | ||
1067 | return; | ||
1068 | } | ||
1069 | entry->ce_resused = rqstp->rq_resused; | ||
1070 | if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1) | ||
1071 | entry->ce_resused = NFSD_PAGES_PER_SLOT + 1; | ||
1072 | nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages, | ||
1073 | entry->ce_resused); | ||
1074 | entry->ce_datav.iov_base = resp->cstate.statp; | ||
1075 | entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp - | ||
1076 | (char *)page_address(rqstp->rq_respages[0])); | ||
1077 | /* Current request rpc header length*/ | ||
1078 | entry->ce_rpchdrlen = (char *)resp->cstate.statp - | ||
1079 | (char *)page_address(rqstp->rq_respages[0]); | ||
1080 | } | ||
1081 | |||
1082 | /* | ||
1083 | * We keep the rpc header, but take the nfs reply from the replycache. | ||
1084 | */ | ||
1085 | static int | ||
1086 | nfsd41_copy_replay_data(struct nfsd4_compoundres *resp, | ||
1087 | struct nfsd4_cache_entry *entry) | ||
1088 | { | ||
1089 | struct svc_rqst *rqstp = resp->rqstp; | ||
1090 | struct kvec *resv = &resp->rqstp->rq_res.head[0]; | ||
1091 | int len; | ||
1092 | |||
1093 | /* Current request rpc header length*/ | ||
1094 | len = (char *)resp->cstate.statp - | ||
1095 | (char *)page_address(rqstp->rq_respages[0]); | ||
1096 | if (entry->ce_datav.iov_len + len > PAGE_SIZE) { | ||
1097 | dprintk("%s v41 cached reply too large (%Zd).\n", __func__, | ||
1098 | entry->ce_datav.iov_len); | ||
1099 | return 0; | ||
1100 | } | ||
1101 | /* copy the cached reply nfsd data past the current rpc header */ | ||
1102 | memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base, | ||
1103 | entry->ce_datav.iov_len); | ||
1104 | resv->iov_len = len + entry->ce_datav.iov_len; | ||
1105 | return 1; | ||
1106 | } | ||
1107 | |||
1108 | /* | ||
1109 | * Keep the first page of the replay. Copy the NFSv4.1 data from the first | ||
1110 | * cached page. Replace any futher replay pages from the cache. | ||
1111 | */ | ||
1112 | __be32 | ||
1113 | nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, | ||
1114 | struct nfsd4_sequence *seq) | ||
1115 | { | ||
1116 | struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; | ||
1117 | __be32 status; | ||
1118 | |||
1119 | dprintk("--> %s entry %p\n", __func__, entry); | ||
1120 | |||
1121 | /* | ||
1122 | * If this is just the sequence operation, we did not keep | ||
1123 | * a page in the cache entry because we can just use the | ||
1124 | * slot info stored in struct nfsd4_sequence that was checked | ||
1125 | * against the slot in nfsd4_sequence(). | ||
1126 | * | ||
1127 | * This occurs when seq->cachethis is FALSE, or when the client | ||
1128 | * session inactivity timer fires and a solo sequence operation | ||
1129 | * is sent (lease renewal). | ||
1130 | */ | ||
1131 | if (seq && nfsd4_not_cached(resp)) { | ||
1132 | seq->maxslots = resp->cstate.session->se_fnumslots; | ||
1133 | return nfs_ok; | ||
1134 | } | ||
1135 | |||
1136 | if (!nfsd41_copy_replay_data(resp, entry)) { | ||
1137 | /* | ||
1138 | * Not enough room to use the replay rpc header, send the | ||
1139 | * cached header. Release all the allocated result pages. | ||
1140 | */ | ||
1141 | svc_free_res_pages(resp->rqstp); | ||
1142 | nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages, | ||
1143 | entry->ce_resused); | ||
1144 | } else { | ||
1145 | /* Release all but the first allocated result page */ | ||
1146 | |||
1147 | resp->rqstp->rq_resused--; | ||
1148 | svc_free_res_pages(resp->rqstp); | ||
1149 | |||
1150 | nfsd4_copy_pages(&resp->rqstp->rq_respages[1], | ||
1151 | &entry->ce_respages[1], | ||
1152 | entry->ce_resused - 1); | ||
1153 | } | ||
1154 | |||
1155 | resp->rqstp->rq_resused = entry->ce_resused; | ||
1156 | resp->opcnt = entry->ce_opcnt; | ||
1157 | resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen; | ||
1158 | status = entry->ce_status; | ||
1159 | |||
1160 | return status; | ||
1161 | } | ||
1162 | |||
1163 | /* | ||
1164 | * Set the exchange_id flags returned by the server. | ||
1165 | */ | ||
1166 | static void | ||
1167 | nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) | ||
1168 | { | ||
1169 | /* pNFS is not supported */ | ||
1170 | new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; | ||
1171 | |||
1172 | /* Referrals are supported, Migration is not. */ | ||
1173 | new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; | ||
1174 | |||
1175 | /* set the wire flags to return to client. */ | ||
1176 | clid->flags = new->cl_exchange_flags; | ||
1177 | } | ||
1178 | |||
1179 | __be32 | ||
1180 | nfsd4_exchange_id(struct svc_rqst *rqstp, | ||
1181 | struct nfsd4_compound_state *cstate, | ||
1182 | struct nfsd4_exchange_id *exid) | ||
1183 | { | ||
1184 | struct nfs4_client *unconf, *conf, *new; | ||
1185 | int status; | ||
1186 | unsigned int strhashval; | ||
1187 | char dname[HEXDIR_LEN]; | ||
1188 | nfs4_verifier verf = exid->verifier; | ||
1189 | u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; | ||
1190 | |||
1191 | dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " | ||
1192 | " ip_addr=%u flags %x, spa_how %d\n", | ||
1193 | __func__, rqstp, exid, exid->clname.len, exid->clname.data, | ||
1194 | ip_addr, exid->flags, exid->spa_how); | ||
1195 | |||
1196 | if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) | ||
1197 | return nfserr_inval; | ||
1198 | |||
1199 | /* Currently only support SP4_NONE */ | ||
1200 | switch (exid->spa_how) { | ||
1201 | case SP4_NONE: | ||
1202 | break; | ||
1203 | case SP4_SSV: | ||
1204 | return nfserr_encr_alg_unsupp; | ||
1205 | default: | ||
1206 | BUG(); /* checked by xdr code */ | ||
1207 | case SP4_MACH_CRED: | ||
1208 | return nfserr_serverfault; /* no excuse :-/ */ | ||
1209 | } | ||
1210 | |||
1211 | status = nfs4_make_rec_clidname(dname, &exid->clname); | ||
1212 | |||
1213 | if (status) | ||
1214 | goto error; | ||
1215 | |||
1216 | strhashval = clientstr_hashval(dname); | ||
1217 | |||
1218 | nfs4_lock_state(); | ||
1219 | status = nfs_ok; | ||
1220 | |||
1221 | conf = find_confirmed_client_by_str(dname, strhashval, true); | ||
1222 | if (conf) { | ||
1223 | if (!same_verf(&verf, &conf->cl_verifier)) { | ||
1224 | /* 18.35.4 case 8 */ | ||
1225 | if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { | ||
1226 | status = nfserr_not_same; | ||
1227 | goto out; | ||
1228 | } | ||
1229 | /* Client reboot: destroy old state */ | ||
1230 | expire_client(conf); | ||
1231 | goto out_new; | ||
1232 | } | ||
1233 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { | ||
1234 | /* 18.35.4 case 9 */ | ||
1235 | if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { | ||
1236 | status = nfserr_perm; | ||
1237 | goto out; | ||
1238 | } | ||
1239 | expire_client(conf); | ||
1240 | goto out_new; | ||
1241 | } | ||
1242 | if (ip_addr != conf->cl_addr && | ||
1243 | !(exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A)) { | ||
1244 | /* Client collision. 18.35.4 case 3 */ | ||
1245 | status = nfserr_clid_inuse; | ||
1246 | goto out; | ||
1247 | } | ||
1248 | /* | ||
1249 | * Set bit when the owner id and verifier map to an already | ||
1250 | * confirmed client id (18.35.3). | ||
1251 | */ | ||
1252 | exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; | ||
1253 | |||
1254 | /* | ||
1255 | * Falling into 18.35.4 case 2, possible router replay. | ||
1256 | * Leave confirmed record intact and return same result. | ||
1257 | */ | ||
1258 | copy_verf(conf, &verf); | ||
1259 | new = conf; | ||
1260 | goto out_copy; | ||
1261 | } else { | ||
1262 | /* 18.35.4 case 7 */ | ||
1263 | if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { | ||
1264 | status = nfserr_noent; | ||
1265 | goto out; | ||
1266 | } | ||
1267 | } | ||
1268 | |||
1269 | unconf = find_unconfirmed_client_by_str(dname, strhashval, true); | ||
1270 | if (unconf) { | ||
1271 | /* | ||
1272 | * Possible retry or client restart. Per 18.35.4 case 4, | ||
1273 | * a new unconfirmed record should be generated regardless | ||
1274 | * of whether any properties have changed. | ||
1275 | */ | ||
1276 | expire_client(unconf); | ||
1277 | } | ||
1278 | |||
1279 | out_new: | ||
1280 | /* Normal case */ | ||
1281 | new = create_client(exid->clname, dname); | ||
1282 | if (new == NULL) { | ||
1283 | status = nfserr_resource; | ||
1284 | goto out; | ||
1285 | } | ||
1286 | |||
1287 | copy_verf(new, &verf); | ||
1288 | copy_cred(&new->cl_cred, &rqstp->rq_cred); | ||
1289 | new->cl_addr = ip_addr; | ||
1290 | gen_clid(new); | ||
1291 | gen_confirm(new); | ||
1292 | add_to_unconfirmed(new, strhashval); | ||
1293 | out_copy: | ||
1294 | exid->clientid.cl_boot = new->cl_clientid.cl_boot; | ||
1295 | exid->clientid.cl_id = new->cl_clientid.cl_id; | ||
1296 | |||
1297 | new->cl_slot.sl_seqid = 0; | ||
1298 | exid->seqid = 1; | ||
1299 | nfsd4_set_ex_flags(new, exid); | ||
1300 | |||
1301 | dprintk("nfsd4_exchange_id seqid %d flags %x\n", | ||
1302 | new->cl_slot.sl_seqid, new->cl_exchange_flags); | ||
1303 | status = nfs_ok; | ||
1304 | |||
1305 | out: | ||
1306 | nfs4_unlock_state(); | ||
1307 | error: | ||
1308 | dprintk("nfsd4_exchange_id returns %d\n", ntohl(status)); | ||
1309 | return status; | ||
1310 | } | ||
1311 | |||
1312 | static int | ||
1313 | check_slot_seqid(u32 seqid, struct nfsd4_slot *slot) | ||
1314 | { | ||
1315 | dprintk("%s enter. seqid %d slot->sl_seqid %d\n", __func__, seqid, | ||
1316 | slot->sl_seqid); | ||
1317 | |||
1318 | /* The slot is in use, and no response has been sent. */ | ||
1319 | if (slot->sl_inuse) { | ||
1320 | if (seqid == slot->sl_seqid) | ||
1321 | return nfserr_jukebox; | ||
1322 | else | ||
1323 | return nfserr_seq_misordered; | ||
1324 | } | ||
1325 | /* Normal */ | ||
1326 | if (likely(seqid == slot->sl_seqid + 1)) | ||
1327 | return nfs_ok; | ||
1328 | /* Replay */ | ||
1329 | if (seqid == slot->sl_seqid) | ||
1330 | return nfserr_replay_cache; | ||
1331 | /* Wraparound */ | ||
1332 | if (seqid == 1 && (slot->sl_seqid + 1) == 0) | ||
1333 | return nfs_ok; | ||
1334 | /* Misordered replay or misordered new request */ | ||
1335 | return nfserr_seq_misordered; | ||
1336 | } | ||
1337 | |||
1338 | __be32 | ||
1339 | nfsd4_create_session(struct svc_rqst *rqstp, | ||
1340 | struct nfsd4_compound_state *cstate, | ||
1341 | struct nfsd4_create_session *cr_ses) | ||
1342 | { | ||
1343 | u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; | ||
1344 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | ||
1345 | struct nfs4_client *conf, *unconf; | ||
1346 | struct nfsd4_slot *slot = NULL; | ||
1347 | int status = 0; | ||
1348 | |||
1349 | nfs4_lock_state(); | ||
1350 | unconf = find_unconfirmed_client(&cr_ses->clientid); | ||
1351 | conf = find_confirmed_client(&cr_ses->clientid); | ||
1352 | |||
1353 | if (conf) { | ||
1354 | slot = &conf->cl_slot; | ||
1355 | status = check_slot_seqid(cr_ses->seqid, slot); | ||
1356 | if (status == nfserr_replay_cache) { | ||
1357 | dprintk("Got a create_session replay! seqid= %d\n", | ||
1358 | slot->sl_seqid); | ||
1359 | cstate->slot = slot; | ||
1360 | cstate->status = status; | ||
1361 | /* Return the cached reply status */ | ||
1362 | status = nfsd4_replay_cache_entry(resp, NULL); | ||
1363 | goto out; | ||
1364 | } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) { | ||
1365 | status = nfserr_seq_misordered; | ||
1366 | dprintk("Sequence misordered!\n"); | ||
1367 | dprintk("Expected seqid= %d but got seqid= %d\n", | ||
1368 | slot->sl_seqid, cr_ses->seqid); | ||
1369 | goto out; | ||
1370 | } | ||
1371 | conf->cl_slot.sl_seqid++; | ||
1372 | } else if (unconf) { | ||
1373 | if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || | ||
1374 | (ip_addr != unconf->cl_addr)) { | ||
1375 | status = nfserr_clid_inuse; | ||
1376 | goto out; | ||
1377 | } | ||
1378 | |||
1379 | slot = &unconf->cl_slot; | ||
1380 | status = check_slot_seqid(cr_ses->seqid, slot); | ||
1381 | if (status) { | ||
1382 | /* an unconfirmed replay returns misordered */ | ||
1383 | status = nfserr_seq_misordered; | ||
1384 | goto out; | ||
1385 | } | ||
1386 | |||
1387 | slot->sl_seqid++; /* from 0 to 1 */ | ||
1388 | move_to_confirmed(unconf); | ||
1389 | |||
1390 | /* | ||
1391 | * We do not support RDMA or persistent sessions | ||
1392 | */ | ||
1393 | cr_ses->flags &= ~SESSION4_PERSIST; | ||
1394 | cr_ses->flags &= ~SESSION4_RDMA; | ||
1395 | |||
1396 | conf = unconf; | ||
1397 | } else { | ||
1398 | status = nfserr_stale_clientid; | ||
1399 | goto out; | ||
1400 | } | ||
1401 | |||
1402 | status = alloc_init_session(rqstp, conf, cr_ses); | ||
1403 | if (status) | ||
1404 | goto out; | ||
1405 | |||
1406 | memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, | ||
1407 | NFS4_MAX_SESSIONID_LEN); | ||
1408 | cr_ses->seqid = slot->sl_seqid; | ||
1409 | |||
1410 | slot->sl_inuse = true; | ||
1411 | cstate->slot = slot; | ||
1412 | /* Ensure a page is used for the cache */ | ||
1413 | slot->sl_cache_entry.ce_cachethis = 1; | ||
1414 | out: | ||
1415 | nfs4_unlock_state(); | ||
1416 | dprintk("%s returns %d\n", __func__, ntohl(status)); | ||
1417 | return status; | ||
1418 | } | ||
1419 | |||
1420 | __be32 | ||
1421 | nfsd4_destroy_session(struct svc_rqst *r, | ||
1422 | struct nfsd4_compound_state *cstate, | ||
1423 | struct nfsd4_destroy_session *sessionid) | ||
1424 | { | ||
1425 | struct nfsd4_session *ses; | ||
1426 | u32 status = nfserr_badsession; | ||
1427 | |||
1428 | /* Notes: | ||
1429 | * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid | ||
1430 | * - Should we return nfserr_back_chan_busy if waiting for | ||
1431 | * callbacks on to-be-destroyed session? | ||
1432 | * - Do we need to clear any callback info from previous session? | ||
1433 | */ | ||
1434 | |||
1435 | dump_sessionid(__func__, &sessionid->sessionid); | ||
1436 | spin_lock(&sessionid_lock); | ||
1437 | ses = find_in_sessionid_hashtbl(&sessionid->sessionid); | ||
1438 | if (!ses) { | ||
1439 | spin_unlock(&sessionid_lock); | ||
1440 | goto out; | ||
1441 | } | ||
1442 | |||
1443 | unhash_session(ses); | ||
1444 | spin_unlock(&sessionid_lock); | ||
1445 | |||
1446 | /* wait for callbacks */ | ||
1447 | shutdown_callback_client(ses->se_client); | ||
1448 | nfsd4_put_session(ses); | ||
1449 | status = nfs_ok; | ||
1450 | out: | ||
1451 | dprintk("%s returns %d\n", __func__, ntohl(status)); | ||
1452 | return status; | ||
1453 | } | ||
1454 | |||
1455 | __be32 | ||
1456 | nfsd4_sequence(struct svc_rqst *rqstp, | ||
1457 | struct nfsd4_compound_state *cstate, | ||
1458 | struct nfsd4_sequence *seq) | ||
1459 | { | ||
1460 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | ||
1461 | struct nfsd4_session *session; | ||
1462 | struct nfsd4_slot *slot; | ||
1463 | int status; | ||
1464 | |||
1465 | if (resp->opcnt != 1) | ||
1466 | return nfserr_sequence_pos; | ||
1467 | |||
1468 | spin_lock(&sessionid_lock); | ||
1469 | status = nfserr_badsession; | ||
1470 | session = find_in_sessionid_hashtbl(&seq->sessionid); | ||
1471 | if (!session) | ||
1472 | goto out; | ||
1473 | |||
1474 | status = nfserr_badslot; | ||
1475 | if (seq->slotid >= session->se_fnumslots) | ||
1476 | goto out; | ||
1477 | |||
1478 | slot = &session->se_slots[seq->slotid]; | ||
1479 | dprintk("%s: slotid %d\n", __func__, seq->slotid); | ||
1480 | |||
1481 | status = check_slot_seqid(seq->seqid, slot); | ||
1482 | if (status == nfserr_replay_cache) { | ||
1483 | cstate->slot = slot; | ||
1484 | cstate->session = session; | ||
1485 | /* Return the cached reply status and set cstate->status | ||
1486 | * for nfsd4_svc_encode_compoundres processing */ | ||
1487 | status = nfsd4_replay_cache_entry(resp, seq); | ||
1488 | cstate->status = nfserr_replay_cache; | ||
1489 | goto replay_cache; | ||
1490 | } | ||
1491 | if (status) | ||
1492 | goto out; | ||
1493 | |||
1494 | /* Success! bump slot seqid */ | ||
1495 | slot->sl_inuse = true; | ||
1496 | slot->sl_seqid = seq->seqid; | ||
1497 | slot->sl_cache_entry.ce_cachethis = seq->cachethis; | ||
1498 | /* Always set the cache entry cachethis for solo sequence */ | ||
1499 | if (nfsd4_is_solo_sequence(resp)) | ||
1500 | slot->sl_cache_entry.ce_cachethis = 1; | ||
1501 | |||
1502 | cstate->slot = slot; | ||
1503 | cstate->session = session; | ||
1504 | |||
1505 | replay_cache: | ||
1506 | /* Renew the clientid on success and on replay. | ||
1507 | * Hold a session reference until done processing the compound: | ||
1508 | * nfsd4_put_session called only if the cstate slot is set. | ||
1509 | */ | ||
1510 | renew_client(session->se_client); | ||
1511 | nfsd4_get_session(session); | ||
1512 | out: | ||
1513 | spin_unlock(&sessionid_lock); | ||
1514 | dprintk("%s: return %d\n", __func__, ntohl(status)); | ||
1515 | return status; | ||
1516 | } | ||
1517 | |||
688 | __be32 | 1518 | __be32 |
689 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 1519 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
690 | struct nfsd4_setclientid *setclid) | 1520 | struct nfsd4_setclientid *setclid) |
@@ -716,14 +1546,13 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
716 | strhashval = clientstr_hashval(dname); | 1546 | strhashval = clientstr_hashval(dname); |
717 | 1547 | ||
718 | nfs4_lock_state(); | 1548 | nfs4_lock_state(); |
719 | conf = find_confirmed_client_by_str(dname, strhashval); | 1549 | conf = find_confirmed_client_by_str(dname, strhashval, false); |
720 | if (conf) { | 1550 | if (conf) { |
721 | /* RFC 3530 14.2.33 CASE 0: */ | 1551 | /* RFC 3530 14.2.33 CASE 0: */ |
722 | status = nfserr_clid_inuse; | 1552 | status = nfserr_clid_inuse; |
723 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred) | 1553 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { |
724 | || conf->cl_addr != sin->sin_addr.s_addr) { | 1554 | dprintk("NFSD: setclientid: string in use by client" |
725 | dprintk("NFSD: setclientid: string in use by clientat %pI4\n", | 1555 | " at %pI4\n", &conf->cl_addr); |
726 | &conf->cl_addr); | ||
727 | goto out; | 1556 | goto out; |
728 | } | 1557 | } |
729 | } | 1558 | } |
@@ -732,7 +1561,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
732 | * has a description of SETCLIENTID request processing consisting | 1561 | * has a description of SETCLIENTID request processing consisting |
733 | * of 5 bullet points, labeled as CASE0 - CASE4 below. | 1562 | * of 5 bullet points, labeled as CASE0 - CASE4 below. |
734 | */ | 1563 | */ |
735 | unconf = find_unconfirmed_client_by_str(dname, strhashval); | 1564 | unconf = find_unconfirmed_client_by_str(dname, strhashval, false); |
736 | status = nfserr_resource; | 1565 | status = nfserr_resource; |
737 | if (!conf) { | 1566 | if (!conf) { |
738 | /* | 1567 | /* |
@@ -887,7 +1716,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
887 | unsigned int hash = | 1716 | unsigned int hash = |
888 | clientstr_hashval(unconf->cl_recdir); | 1717 | clientstr_hashval(unconf->cl_recdir); |
889 | conf = find_confirmed_client_by_str(unconf->cl_recdir, | 1718 | conf = find_confirmed_client_by_str(unconf->cl_recdir, |
890 | hash); | 1719 | hash, false); |
891 | if (conf) { | 1720 | if (conf) { |
892 | nfsd4_remove_clid_dir(conf); | 1721 | nfsd4_remove_clid_dir(conf); |
893 | expire_client(conf); | 1722 | expire_client(conf); |
@@ -923,11 +1752,13 @@ alloc_init_file(struct inode *ino) | |||
923 | 1752 | ||
924 | fp = kmem_cache_alloc(file_slab, GFP_KERNEL); | 1753 | fp = kmem_cache_alloc(file_slab, GFP_KERNEL); |
925 | if (fp) { | 1754 | if (fp) { |
926 | kref_init(&fp->fi_ref); | 1755 | atomic_set(&fp->fi_ref, 1); |
927 | INIT_LIST_HEAD(&fp->fi_hash); | 1756 | INIT_LIST_HEAD(&fp->fi_hash); |
928 | INIT_LIST_HEAD(&fp->fi_stateids); | 1757 | INIT_LIST_HEAD(&fp->fi_stateids); |
929 | INIT_LIST_HEAD(&fp->fi_delegations); | 1758 | INIT_LIST_HEAD(&fp->fi_delegations); |
1759 | spin_lock(&recall_lock); | ||
930 | list_add(&fp->fi_hash, &file_hashtbl[hashval]); | 1760 | list_add(&fp->fi_hash, &file_hashtbl[hashval]); |
1761 | spin_unlock(&recall_lock); | ||
931 | fp->fi_inode = igrab(ino); | 1762 | fp->fi_inode = igrab(ino); |
932 | fp->fi_id = current_fileid++; | 1763 | fp->fi_id = current_fileid++; |
933 | fp->fi_had_conflict = false; | 1764 | fp->fi_had_conflict = false; |
@@ -1037,48 +1868,6 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str | |||
1037 | return sop; | 1868 | return sop; |
1038 | } | 1869 | } |
1039 | 1870 | ||
1040 | static void | ||
1041 | release_stateid_lockowners(struct nfs4_stateid *open_stp) | ||
1042 | { | ||
1043 | struct nfs4_stateowner *lock_sop; | ||
1044 | |||
1045 | while (!list_empty(&open_stp->st_lockowners)) { | ||
1046 | lock_sop = list_entry(open_stp->st_lockowners.next, | ||
1047 | struct nfs4_stateowner, so_perstateid); | ||
1048 | /* list_del(&open_stp->st_lockowners); */ | ||
1049 | BUG_ON(lock_sop->so_is_open_owner); | ||
1050 | release_stateowner(lock_sop); | ||
1051 | } | ||
1052 | } | ||
1053 | |||
1054 | static void | ||
1055 | unhash_stateowner(struct nfs4_stateowner *sop) | ||
1056 | { | ||
1057 | struct nfs4_stateid *stp; | ||
1058 | |||
1059 | list_del(&sop->so_idhash); | ||
1060 | list_del(&sop->so_strhash); | ||
1061 | if (sop->so_is_open_owner) | ||
1062 | list_del(&sop->so_perclient); | ||
1063 | list_del(&sop->so_perstateid); | ||
1064 | while (!list_empty(&sop->so_stateids)) { | ||
1065 | stp = list_entry(sop->so_stateids.next, | ||
1066 | struct nfs4_stateid, st_perstateowner); | ||
1067 | if (sop->so_is_open_owner) | ||
1068 | release_stateid(stp, OPEN_STATE); | ||
1069 | else | ||
1070 | release_stateid(stp, LOCK_STATE); | ||
1071 | } | ||
1072 | } | ||
1073 | |||
1074 | static void | ||
1075 | release_stateowner(struct nfs4_stateowner *sop) | ||
1076 | { | ||
1077 | unhash_stateowner(sop); | ||
1078 | list_del(&sop->so_close_lru); | ||
1079 | nfs4_put_stateowner(sop); | ||
1080 | } | ||
1081 | |||
1082 | static inline void | 1871 | static inline void |
1083 | init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { | 1872 | init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { |
1084 | struct nfs4_stateowner *sop = open->op_stateowner; | 1873 | struct nfs4_stateowner *sop = open->op_stateowner; |
@@ -1100,30 +1889,13 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open * | |||
1100 | stp->st_stateid.si_generation = 0; | 1889 | stp->st_stateid.si_generation = 0; |
1101 | stp->st_access_bmap = 0; | 1890 | stp->st_access_bmap = 0; |
1102 | stp->st_deny_bmap = 0; | 1891 | stp->st_deny_bmap = 0; |
1103 | __set_bit(open->op_share_access, &stp->st_access_bmap); | 1892 | __set_bit(open->op_share_access & ~NFS4_SHARE_WANT_MASK, |
1893 | &stp->st_access_bmap); | ||
1104 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); | 1894 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); |
1105 | stp->st_openstp = NULL; | 1895 | stp->st_openstp = NULL; |
1106 | } | 1896 | } |
1107 | 1897 | ||
1108 | static void | 1898 | static void |
1109 | release_stateid(struct nfs4_stateid *stp, int flags) | ||
1110 | { | ||
1111 | struct file *filp = stp->st_vfs_file; | ||
1112 | |||
1113 | list_del(&stp->st_hash); | ||
1114 | list_del(&stp->st_perfile); | ||
1115 | list_del(&stp->st_perstateowner); | ||
1116 | if (flags & OPEN_STATE) { | ||
1117 | release_stateid_lockowners(stp); | ||
1118 | stp->st_vfs_file = NULL; | ||
1119 | nfsd_close(filp); | ||
1120 | } else if (flags & LOCK_STATE) | ||
1121 | locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner); | ||
1122 | put_nfs4_file(stp->st_file); | ||
1123 | kmem_cache_free(stateid_slab, stp); | ||
1124 | } | ||
1125 | |||
1126 | static void | ||
1127 | move_to_close_lru(struct nfs4_stateowner *sop) | 1899 | move_to_close_lru(struct nfs4_stateowner *sop) |
1128 | { | 1900 | { |
1129 | dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); | 1901 | dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); |
@@ -1160,20 +1932,33 @@ find_file(struct inode *ino) | |||
1160 | unsigned int hashval = file_hashval(ino); | 1932 | unsigned int hashval = file_hashval(ino); |
1161 | struct nfs4_file *fp; | 1933 | struct nfs4_file *fp; |
1162 | 1934 | ||
1935 | spin_lock(&recall_lock); | ||
1163 | list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { | 1936 | list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { |
1164 | if (fp->fi_inode == ino) { | 1937 | if (fp->fi_inode == ino) { |
1165 | get_nfs4_file(fp); | 1938 | get_nfs4_file(fp); |
1939 | spin_unlock(&recall_lock); | ||
1166 | return fp; | 1940 | return fp; |
1167 | } | 1941 | } |
1168 | } | 1942 | } |
1943 | spin_unlock(&recall_lock); | ||
1169 | return NULL; | 1944 | return NULL; |
1170 | } | 1945 | } |
1171 | 1946 | ||
1172 | static inline int access_valid(u32 x) | 1947 | static inline int access_valid(u32 x, u32 minorversion) |
1173 | { | 1948 | { |
1174 | if (x < NFS4_SHARE_ACCESS_READ) | 1949 | if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ) |
1175 | return 0; | 1950 | return 0; |
1176 | if (x > NFS4_SHARE_ACCESS_BOTH) | 1951 | if ((x & NFS4_SHARE_ACCESS_MASK) > NFS4_SHARE_ACCESS_BOTH) |
1952 | return 0; | ||
1953 | x &= ~NFS4_SHARE_ACCESS_MASK; | ||
1954 | if (minorversion && x) { | ||
1955 | if ((x & NFS4_SHARE_WANT_MASK) > NFS4_SHARE_WANT_CANCEL) | ||
1956 | return 0; | ||
1957 | if ((x & NFS4_SHARE_WHEN_MASK) > NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED) | ||
1958 | return 0; | ||
1959 | x &= ~(NFS4_SHARE_WANT_MASK | NFS4_SHARE_WHEN_MASK); | ||
1960 | } | ||
1961 | if (x) | ||
1177 | return 0; | 1962 | return 0; |
1178 | return 1; | 1963 | return 1; |
1179 | } | 1964 | } |
@@ -1409,7 +2194,8 @@ static struct lock_manager_operations nfsd_lease_mng_ops = { | |||
1409 | 2194 | ||
1410 | 2195 | ||
1411 | __be32 | 2196 | __be32 |
1412 | nfsd4_process_open1(struct nfsd4_open *open) | 2197 | nfsd4_process_open1(struct nfsd4_compound_state *cstate, |
2198 | struct nfsd4_open *open) | ||
1413 | { | 2199 | { |
1414 | clientid_t *clientid = &open->op_clientid; | 2200 | clientid_t *clientid = &open->op_clientid; |
1415 | struct nfs4_client *clp = NULL; | 2201 | struct nfs4_client *clp = NULL; |
@@ -1432,10 +2218,13 @@ nfsd4_process_open1(struct nfsd4_open *open) | |||
1432 | return nfserr_expired; | 2218 | return nfserr_expired; |
1433 | goto renew; | 2219 | goto renew; |
1434 | } | 2220 | } |
2221 | /* When sessions are used, skip open sequenceid processing */ | ||
2222 | if (nfsd4_has_session(cstate)) | ||
2223 | goto renew; | ||
1435 | if (!sop->so_confirmed) { | 2224 | if (!sop->so_confirmed) { |
1436 | /* Replace unconfirmed owners without checking for replay. */ | 2225 | /* Replace unconfirmed owners without checking for replay. */ |
1437 | clp = sop->so_client; | 2226 | clp = sop->so_client; |
1438 | release_stateowner(sop); | 2227 | release_openowner(sop); |
1439 | open->op_stateowner = NULL; | 2228 | open->op_stateowner = NULL; |
1440 | goto renew; | 2229 | goto renew; |
1441 | } | 2230 | } |
@@ -1709,6 +2498,7 @@ out: | |||
1709 | __be32 | 2498 | __be32 |
1710 | nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) | 2499 | nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) |
1711 | { | 2500 | { |
2501 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | ||
1712 | struct nfs4_file *fp = NULL; | 2502 | struct nfs4_file *fp = NULL; |
1713 | struct inode *ino = current_fh->fh_dentry->d_inode; | 2503 | struct inode *ino = current_fh->fh_dentry->d_inode; |
1714 | struct nfs4_stateid *stp = NULL; | 2504 | struct nfs4_stateid *stp = NULL; |
@@ -1716,7 +2506,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf | |||
1716 | __be32 status; | 2506 | __be32 status; |
1717 | 2507 | ||
1718 | status = nfserr_inval; | 2508 | status = nfserr_inval; |
1719 | if (!access_valid(open->op_share_access) | 2509 | if (!access_valid(open->op_share_access, resp->cstate.minorversion) |
1720 | || !deny_valid(open->op_share_deny)) | 2510 | || !deny_valid(open->op_share_deny)) |
1721 | goto out; | 2511 | goto out; |
1722 | /* | 2512 | /* |
@@ -1764,12 +2554,17 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf | |||
1764 | init_stateid(stp, fp, open); | 2554 | init_stateid(stp, fp, open); |
1765 | status = nfsd4_truncate(rqstp, current_fh, open); | 2555 | status = nfsd4_truncate(rqstp, current_fh, open); |
1766 | if (status) { | 2556 | if (status) { |
1767 | release_stateid(stp, OPEN_STATE); | 2557 | release_open_stateid(stp); |
1768 | goto out; | 2558 | goto out; |
1769 | } | 2559 | } |
2560 | if (nfsd4_has_session(&resp->cstate)) | ||
2561 | update_stateid(&stp->st_stateid); | ||
1770 | } | 2562 | } |
1771 | memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); | 2563 | memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); |
1772 | 2564 | ||
2565 | if (nfsd4_has_session(&resp->cstate)) | ||
2566 | open->op_stateowner->so_confirmed = 1; | ||
2567 | |||
1773 | /* | 2568 | /* |
1774 | * Attempt to hand out a delegation. No error return, because the | 2569 | * Attempt to hand out a delegation. No error return, because the |
1775 | * OPEN succeeds even if we fail. | 2570 | * OPEN succeeds even if we fail. |
@@ -1790,7 +2585,8 @@ out: | |||
1790 | * To finish the open response, we just need to set the rflags. | 2585 | * To finish the open response, we just need to set the rflags. |
1791 | */ | 2586 | */ |
1792 | open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; | 2587 | open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; |
1793 | if (!open->op_stateowner->so_confirmed) | 2588 | if (!open->op_stateowner->so_confirmed && |
2589 | !nfsd4_has_session(&resp->cstate)) | ||
1794 | open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; | 2590 | open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; |
1795 | 2591 | ||
1796 | return status; | 2592 | return status; |
@@ -1898,7 +2694,7 @@ nfs4_laundromat(void) | |||
1898 | } | 2694 | } |
1899 | dprintk("NFSD: purging unused open stateowner (so_id %d)\n", | 2695 | dprintk("NFSD: purging unused open stateowner (so_id %d)\n", |
1900 | sop->so_id); | 2696 | sop->so_id); |
1901 | release_stateowner(sop); | 2697 | release_openowner(sop); |
1902 | } | 2698 | } |
1903 | if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) | 2699 | if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) |
1904 | clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; | 2700 | clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; |
@@ -1983,10 +2779,7 @@ out: | |||
1983 | static inline __be32 | 2779 | static inline __be32 |
1984 | check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) | 2780 | check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) |
1985 | { | 2781 | { |
1986 | /* Trying to call delegreturn with a special stateid? Yuch: */ | 2782 | if (ONE_STATEID(stateid) && (flags & RD_STATE)) |
1987 | if (!(flags & (RD_STATE | WR_STATE))) | ||
1988 | return nfserr_bad_stateid; | ||
1989 | else if (ONE_STATEID(stateid) && (flags & RD_STATE)) | ||
1990 | return nfs_ok; | 2783 | return nfs_ok; |
1991 | else if (locks_in_grace()) { | 2784 | else if (locks_in_grace()) { |
1992 | /* Answer in remaining cases depends on existance of | 2785 | /* Answer in remaining cases depends on existance of |
@@ -2005,14 +2798,20 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) | |||
2005 | * that are not able to provide mandatory locking. | 2798 | * that are not able to provide mandatory locking. |
2006 | */ | 2799 | */ |
2007 | static inline int | 2800 | static inline int |
2008 | io_during_grace_disallowed(struct inode *inode, int flags) | 2801 | grace_disallows_io(struct inode *inode) |
2009 | { | 2802 | { |
2010 | return locks_in_grace() && (flags & (RD_STATE | WR_STATE)) | 2803 | return locks_in_grace() && mandatory_lock(inode); |
2011 | && mandatory_lock(inode); | ||
2012 | } | 2804 | } |
2013 | 2805 | ||
2014 | static int check_stateid_generation(stateid_t *in, stateid_t *ref) | 2806 | static int check_stateid_generation(stateid_t *in, stateid_t *ref, int flags) |
2015 | { | 2807 | { |
2808 | /* | ||
2809 | * When sessions are used the stateid generation number is ignored | ||
2810 | * when it is zero. | ||
2811 | */ | ||
2812 | if ((flags & HAS_SESSION) && in->si_generation == 0) | ||
2813 | goto out; | ||
2814 | |||
2016 | /* If the client sends us a stateid from the future, it's buggy: */ | 2815 | /* If the client sends us a stateid from the future, it's buggy: */ |
2017 | if (in->si_generation > ref->si_generation) | 2816 | if (in->si_generation > ref->si_generation) |
2018 | return nfserr_bad_stateid; | 2817 | return nfserr_bad_stateid; |
@@ -2028,74 +2827,77 @@ static int check_stateid_generation(stateid_t *in, stateid_t *ref) | |||
2028 | */ | 2827 | */ |
2029 | if (in->si_generation < ref->si_generation) | 2828 | if (in->si_generation < ref->si_generation) |
2030 | return nfserr_old_stateid; | 2829 | return nfserr_old_stateid; |
2830 | out: | ||
2031 | return nfs_ok; | 2831 | return nfs_ok; |
2032 | } | 2832 | } |
2033 | 2833 | ||
2834 | static int is_delegation_stateid(stateid_t *stateid) | ||
2835 | { | ||
2836 | return stateid->si_fileid == 0; | ||
2837 | } | ||
2838 | |||
2034 | /* | 2839 | /* |
2035 | * Checks for stateid operations | 2840 | * Checks for stateid operations |
2036 | */ | 2841 | */ |
2037 | __be32 | 2842 | __be32 |
2038 | nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp) | 2843 | nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, |
2844 | stateid_t *stateid, int flags, struct file **filpp) | ||
2039 | { | 2845 | { |
2040 | struct nfs4_stateid *stp = NULL; | 2846 | struct nfs4_stateid *stp = NULL; |
2041 | struct nfs4_delegation *dp = NULL; | 2847 | struct nfs4_delegation *dp = NULL; |
2042 | stateid_t *stidp; | 2848 | struct svc_fh *current_fh = &cstate->current_fh; |
2043 | struct inode *ino = current_fh->fh_dentry->d_inode; | 2849 | struct inode *ino = current_fh->fh_dentry->d_inode; |
2044 | __be32 status; | 2850 | __be32 status; |
2045 | 2851 | ||
2046 | dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n", | ||
2047 | stateid->si_boot, stateid->si_stateownerid, | ||
2048 | stateid->si_fileid, stateid->si_generation); | ||
2049 | if (filpp) | 2852 | if (filpp) |
2050 | *filpp = NULL; | 2853 | *filpp = NULL; |
2051 | 2854 | ||
2052 | if (io_during_grace_disallowed(ino, flags)) | 2855 | if (grace_disallows_io(ino)) |
2053 | return nfserr_grace; | 2856 | return nfserr_grace; |
2054 | 2857 | ||
2858 | if (nfsd4_has_session(cstate)) | ||
2859 | flags |= HAS_SESSION; | ||
2860 | |||
2055 | if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) | 2861 | if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) |
2056 | return check_special_stateids(current_fh, stateid, flags); | 2862 | return check_special_stateids(current_fh, stateid, flags); |
2057 | 2863 | ||
2058 | /* STALE STATEID */ | ||
2059 | status = nfserr_stale_stateid; | 2864 | status = nfserr_stale_stateid; |
2060 | if (STALE_STATEID(stateid)) | 2865 | if (STALE_STATEID(stateid)) |
2061 | goto out; | 2866 | goto out; |
2062 | 2867 | ||
2063 | /* BAD STATEID */ | ||
2064 | status = nfserr_bad_stateid; | 2868 | status = nfserr_bad_stateid; |
2065 | if (!stateid->si_fileid) { /* delegation stateid */ | 2869 | if (is_delegation_stateid(stateid)) { |
2066 | if(!(dp = find_delegation_stateid(ino, stateid))) { | 2870 | dp = find_delegation_stateid(ino, stateid); |
2067 | dprintk("NFSD: delegation stateid not found\n"); | 2871 | if (!dp) |
2068 | goto out; | 2872 | goto out; |
2069 | } | 2873 | status = check_stateid_generation(stateid, &dp->dl_stateid, |
2070 | stidp = &dp->dl_stateid; | 2874 | flags); |
2875 | if (status) | ||
2876 | goto out; | ||
2877 | status = nfs4_check_delegmode(dp, flags); | ||
2878 | if (status) | ||
2879 | goto out; | ||
2880 | renew_client(dp->dl_client); | ||
2881 | if (filpp) | ||
2882 | *filpp = dp->dl_vfs_file; | ||
2071 | } else { /* open or lock stateid */ | 2883 | } else { /* open or lock stateid */ |
2072 | if (!(stp = find_stateid(stateid, flags))) { | 2884 | stp = find_stateid(stateid, flags); |
2073 | dprintk("NFSD: open or lock stateid not found\n"); | 2885 | if (!stp) |
2074 | goto out; | 2886 | goto out; |
2075 | } | 2887 | if (nfs4_check_fh(current_fh, stp)) |
2076 | if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) | ||
2077 | goto out; | 2888 | goto out; |
2078 | if (!stp->st_stateowner->so_confirmed) | 2889 | if (!stp->st_stateowner->so_confirmed) |
2079 | goto out; | 2890 | goto out; |
2080 | stidp = &stp->st_stateid; | 2891 | status = check_stateid_generation(stateid, &stp->st_stateid, |
2081 | } | 2892 | flags); |
2082 | status = check_stateid_generation(stateid, stidp); | 2893 | if (status) |
2083 | if (status) | 2894 | goto out; |
2084 | goto out; | 2895 | status = nfs4_check_openmode(stp, flags); |
2085 | if (stp) { | 2896 | if (status) |
2086 | if ((status = nfs4_check_openmode(stp,flags))) | ||
2087 | goto out; | 2897 | goto out; |
2088 | renew_client(stp->st_stateowner->so_client); | 2898 | renew_client(stp->st_stateowner->so_client); |
2089 | if (filpp) | 2899 | if (filpp) |
2090 | *filpp = stp->st_vfs_file; | 2900 | *filpp = stp->st_vfs_file; |
2091 | } else { | ||
2092 | if ((status = nfs4_check_delegmode(dp, flags))) | ||
2093 | goto out; | ||
2094 | renew_client(dp->dl_client); | ||
2095 | if (flags & DELEG_RET) | ||
2096 | unhash_delegation(dp); | ||
2097 | if (filpp) | ||
2098 | *filpp = dp->dl_vfs_file; | ||
2099 | } | 2901 | } |
2100 | status = nfs_ok; | 2902 | status = nfs_ok; |
2101 | out: | 2903 | out: |
@@ -2113,10 +2915,14 @@ setlkflg (int type) | |||
2113 | * Checks for sequence id mutating operations. | 2915 | * Checks for sequence id mutating operations. |
2114 | */ | 2916 | */ |
2115 | static __be32 | 2917 | static __be32 |
2116 | nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, struct nfsd4_lock *lock) | 2918 | nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, |
2919 | stateid_t *stateid, int flags, | ||
2920 | struct nfs4_stateowner **sopp, | ||
2921 | struct nfs4_stateid **stpp, struct nfsd4_lock *lock) | ||
2117 | { | 2922 | { |
2118 | struct nfs4_stateid *stp; | 2923 | struct nfs4_stateid *stp; |
2119 | struct nfs4_stateowner *sop; | 2924 | struct nfs4_stateowner *sop; |
2925 | struct svc_fh *current_fh = &cstate->current_fh; | ||
2120 | __be32 status; | 2926 | __be32 status; |
2121 | 2927 | ||
2122 | dprintk("NFSD: preprocess_seqid_op: seqid=%d " | 2928 | dprintk("NFSD: preprocess_seqid_op: seqid=%d " |
@@ -2134,6 +2940,10 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei | |||
2134 | 2940 | ||
2135 | if (STALE_STATEID(stateid)) | 2941 | if (STALE_STATEID(stateid)) |
2136 | return nfserr_stale_stateid; | 2942 | return nfserr_stale_stateid; |
2943 | |||
2944 | if (nfsd4_has_session(cstate)) | ||
2945 | flags |= HAS_SESSION; | ||
2946 | |||
2137 | /* | 2947 | /* |
2138 | * We return BAD_STATEID if filehandle doesn't match stateid, | 2948 | * We return BAD_STATEID if filehandle doesn't match stateid, |
2139 | * the confirmed flag is incorrecly set, or the generation | 2949 | * the confirmed flag is incorrecly set, or the generation |
@@ -2166,8 +2976,9 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei | |||
2166 | if (lock->lk_is_new) { | 2976 | if (lock->lk_is_new) { |
2167 | if (!sop->so_is_open_owner) | 2977 | if (!sop->so_is_open_owner) |
2168 | return nfserr_bad_stateid; | 2978 | return nfserr_bad_stateid; |
2169 | if (!same_clid(&clp->cl_clientid, lockclid)) | 2979 | if (!(flags & HAS_SESSION) && |
2170 | return nfserr_bad_stateid; | 2980 | !same_clid(&clp->cl_clientid, lockclid)) |
2981 | return nfserr_bad_stateid; | ||
2171 | /* stp is the open stateid */ | 2982 | /* stp is the open stateid */ |
2172 | status = nfs4_check_openmode(stp, lkflg); | 2983 | status = nfs4_check_openmode(stp, lkflg); |
2173 | if (status) | 2984 | if (status) |
@@ -2190,7 +3001,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei | |||
2190 | * For the moment, we ignore the possibility of | 3001 | * For the moment, we ignore the possibility of |
2191 | * generation number wraparound. | 3002 | * generation number wraparound. |
2192 | */ | 3003 | */ |
2193 | if (seqid != sop->so_seqid) | 3004 | if (!(flags & HAS_SESSION) && seqid != sop->so_seqid) |
2194 | goto check_replay; | 3005 | goto check_replay; |
2195 | 3006 | ||
2196 | if (sop->so_confirmed && flags & CONFIRM) { | 3007 | if (sop->so_confirmed && flags & CONFIRM) { |
@@ -2203,7 +3014,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei | |||
2203 | " confirmed yet!\n"); | 3014 | " confirmed yet!\n"); |
2204 | return nfserr_bad_stateid; | 3015 | return nfserr_bad_stateid; |
2205 | } | 3016 | } |
2206 | status = check_stateid_generation(stateid, &stp->st_stateid); | 3017 | status = check_stateid_generation(stateid, &stp->st_stateid, flags); |
2207 | if (status) | 3018 | if (status) |
2208 | return status; | 3019 | return status; |
2209 | renew_client(sop->so_client); | 3020 | renew_client(sop->so_client); |
@@ -2239,7 +3050,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2239 | 3050 | ||
2240 | nfs4_lock_state(); | 3051 | nfs4_lock_state(); |
2241 | 3052 | ||
2242 | if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, | 3053 | if ((status = nfs4_preprocess_seqid_op(cstate, |
2243 | oc->oc_seqid, &oc->oc_req_stateid, | 3054 | oc->oc_seqid, &oc->oc_req_stateid, |
2244 | CONFIRM | OPEN_STATE, | 3055 | CONFIRM | OPEN_STATE, |
2245 | &oc->oc_stateowner, &stp, NULL))) | 3056 | &oc->oc_stateowner, &stp, NULL))) |
@@ -2304,12 +3115,12 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, | |||
2304 | (int)cstate->current_fh.fh_dentry->d_name.len, | 3115 | (int)cstate->current_fh.fh_dentry->d_name.len, |
2305 | cstate->current_fh.fh_dentry->d_name.name); | 3116 | cstate->current_fh.fh_dentry->d_name.name); |
2306 | 3117 | ||
2307 | if (!access_valid(od->od_share_access) | 3118 | if (!access_valid(od->od_share_access, cstate->minorversion) |
2308 | || !deny_valid(od->od_share_deny)) | 3119 | || !deny_valid(od->od_share_deny)) |
2309 | return nfserr_inval; | 3120 | return nfserr_inval; |
2310 | 3121 | ||
2311 | nfs4_lock_state(); | 3122 | nfs4_lock_state(); |
2312 | if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, | 3123 | if ((status = nfs4_preprocess_seqid_op(cstate, |
2313 | od->od_seqid, | 3124 | od->od_seqid, |
2314 | &od->od_stateid, | 3125 | &od->od_stateid, |
2315 | OPEN_STATE, | 3126 | OPEN_STATE, |
@@ -2362,7 +3173,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2362 | 3173 | ||
2363 | nfs4_lock_state(); | 3174 | nfs4_lock_state(); |
2364 | /* check close_lru for replay */ | 3175 | /* check close_lru for replay */ |
2365 | if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, | 3176 | if ((status = nfs4_preprocess_seqid_op(cstate, |
2366 | close->cl_seqid, | 3177 | close->cl_seqid, |
2367 | &close->cl_stateid, | 3178 | &close->cl_stateid, |
2368 | OPEN_STATE | CLOSE_STATE, | 3179 | OPEN_STATE | CLOSE_STATE, |
@@ -2373,7 +3184,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2373 | memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t)); | 3184 | memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t)); |
2374 | 3185 | ||
2375 | /* release_stateid() calls nfsd_close() if needed */ | 3186 | /* release_stateid() calls nfsd_close() if needed */ |
2376 | release_stateid(stp, OPEN_STATE); | 3187 | release_open_stateid(stp); |
2377 | 3188 | ||
2378 | /* place unused nfs4_stateowners on so_close_lru list to be | 3189 | /* place unused nfs4_stateowners on so_close_lru list to be |
2379 | * released by the laundromat service after the lease period | 3190 | * released by the laundromat service after the lease period |
@@ -2394,16 +3205,40 @@ __be32 | |||
2394 | nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 3205 | nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
2395 | struct nfsd4_delegreturn *dr) | 3206 | struct nfsd4_delegreturn *dr) |
2396 | { | 3207 | { |
3208 | struct nfs4_delegation *dp; | ||
3209 | stateid_t *stateid = &dr->dr_stateid; | ||
3210 | struct inode *inode; | ||
2397 | __be32 status; | 3211 | __be32 status; |
3212 | int flags = 0; | ||
2398 | 3213 | ||
2399 | if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) | 3214 | if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) |
2400 | goto out; | 3215 | return status; |
3216 | inode = cstate->current_fh.fh_dentry->d_inode; | ||
2401 | 3217 | ||
3218 | if (nfsd4_has_session(cstate)) | ||
3219 | flags |= HAS_SESSION; | ||
2402 | nfs4_lock_state(); | 3220 | nfs4_lock_state(); |
2403 | status = nfs4_preprocess_stateid_op(&cstate->current_fh, | 3221 | status = nfserr_bad_stateid; |
2404 | &dr->dr_stateid, DELEG_RET, NULL); | 3222 | if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) |
2405 | nfs4_unlock_state(); | 3223 | goto out; |
3224 | status = nfserr_stale_stateid; | ||
3225 | if (STALE_STATEID(stateid)) | ||
3226 | goto out; | ||
3227 | status = nfserr_bad_stateid; | ||
3228 | if (!is_delegation_stateid(stateid)) | ||
3229 | goto out; | ||
3230 | dp = find_delegation_stateid(inode, stateid); | ||
3231 | if (!dp) | ||
3232 | goto out; | ||
3233 | status = check_stateid_generation(stateid, &dp->dl_stateid, flags); | ||
3234 | if (status) | ||
3235 | goto out; | ||
3236 | renew_client(dp->dl_client); | ||
3237 | |||
3238 | unhash_delegation(dp); | ||
2406 | out: | 3239 | out: |
3240 | nfs4_unlock_state(); | ||
3241 | |||
2407 | return status; | 3242 | return status; |
2408 | } | 3243 | } |
2409 | 3244 | ||
@@ -2684,11 +3519,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2684 | struct nfs4_file *fp; | 3519 | struct nfs4_file *fp; |
2685 | 3520 | ||
2686 | status = nfserr_stale_clientid; | 3521 | status = nfserr_stale_clientid; |
2687 | if (STALE_CLIENTID(&lock->lk_new_clientid)) | 3522 | if (!nfsd4_has_session(cstate) && |
3523 | STALE_CLIENTID(&lock->lk_new_clientid)) | ||
2688 | goto out; | 3524 | goto out; |
2689 | 3525 | ||
2690 | /* validate and update open stateid and open seqid */ | 3526 | /* validate and update open stateid and open seqid */ |
2691 | status = nfs4_preprocess_seqid_op(&cstate->current_fh, | 3527 | status = nfs4_preprocess_seqid_op(cstate, |
2692 | lock->lk_new_open_seqid, | 3528 | lock->lk_new_open_seqid, |
2693 | &lock->lk_new_open_stateid, | 3529 | &lock->lk_new_open_stateid, |
2694 | OPEN_STATE, | 3530 | OPEN_STATE, |
@@ -2715,7 +3551,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2715 | goto out; | 3551 | goto out; |
2716 | } else { | 3552 | } else { |
2717 | /* lock (lock owner + lock stateid) already exists */ | 3553 | /* lock (lock owner + lock stateid) already exists */ |
2718 | status = nfs4_preprocess_seqid_op(&cstate->current_fh, | 3554 | status = nfs4_preprocess_seqid_op(cstate, |
2719 | lock->lk_old_lock_seqid, | 3555 | lock->lk_old_lock_seqid, |
2720 | &lock->lk_old_lock_stateid, | 3556 | &lock->lk_old_lock_stateid, |
2721 | LOCK_STATE, | 3557 | LOCK_STATE, |
@@ -2788,7 +3624,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2788 | } | 3624 | } |
2789 | out: | 3625 | out: |
2790 | if (status && lock->lk_is_new && lock_sop) | 3626 | if (status && lock->lk_is_new && lock_sop) |
2791 | release_stateowner(lock_sop); | 3627 | release_lockowner(lock_sop); |
2792 | if (lock->lk_replay_owner) { | 3628 | if (lock->lk_replay_owner) { |
2793 | nfs4_get_stateowner(lock->lk_replay_owner); | 3629 | nfs4_get_stateowner(lock->lk_replay_owner); |
2794 | cstate->replay_owner = lock->lk_replay_owner; | 3630 | cstate->replay_owner = lock->lk_replay_owner; |
@@ -2838,7 +3674,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2838 | nfs4_lock_state(); | 3674 | nfs4_lock_state(); |
2839 | 3675 | ||
2840 | status = nfserr_stale_clientid; | 3676 | status = nfserr_stale_clientid; |
2841 | if (STALE_CLIENTID(&lockt->lt_clientid)) | 3677 | if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid)) |
2842 | goto out; | 3678 | goto out; |
2843 | 3679 | ||
2844 | if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) { | 3680 | if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) { |
@@ -2911,7 +3747,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2911 | 3747 | ||
2912 | nfs4_lock_state(); | 3748 | nfs4_lock_state(); |
2913 | 3749 | ||
2914 | if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, | 3750 | if ((status = nfs4_preprocess_seqid_op(cstate, |
2915 | locku->lu_seqid, | 3751 | locku->lu_seqid, |
2916 | &locku->lu_stateid, | 3752 | &locku->lu_stateid, |
2917 | LOCK_STATE, | 3753 | LOCK_STATE, |
@@ -3037,7 +3873,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, | |||
3037 | /* unhash_stateowner deletes so_perclient only | 3873 | /* unhash_stateowner deletes so_perclient only |
3038 | * for openowners. */ | 3874 | * for openowners. */ |
3039 | list_del(&sop->so_perclient); | 3875 | list_del(&sop->so_perclient); |
3040 | release_stateowner(sop); | 3876 | release_lockowner(sop); |
3041 | } | 3877 | } |
3042 | out: | 3878 | out: |
3043 | nfs4_unlock_state(); | 3879 | nfs4_unlock_state(); |
@@ -3051,12 +3887,12 @@ alloc_reclaim(void) | |||
3051 | } | 3887 | } |
3052 | 3888 | ||
3053 | int | 3889 | int |
3054 | nfs4_has_reclaimed_state(const char *name) | 3890 | nfs4_has_reclaimed_state(const char *name, bool use_exchange_id) |
3055 | { | 3891 | { |
3056 | unsigned int strhashval = clientstr_hashval(name); | 3892 | unsigned int strhashval = clientstr_hashval(name); |
3057 | struct nfs4_client *clp; | 3893 | struct nfs4_client *clp; |
3058 | 3894 | ||
3059 | clp = find_confirmed_client_by_str(name, strhashval); | 3895 | clp = find_confirmed_client_by_str(name, strhashval, use_exchange_id); |
3060 | return clp ? 1 : 0; | 3896 | return clp ? 1 : 0; |
3061 | } | 3897 | } |
3062 | 3898 | ||
@@ -3153,6 +3989,8 @@ nfs4_state_init(void) | |||
3153 | INIT_LIST_HEAD(&unconf_str_hashtbl[i]); | 3989 | INIT_LIST_HEAD(&unconf_str_hashtbl[i]); |
3154 | INIT_LIST_HEAD(&unconf_id_hashtbl[i]); | 3990 | INIT_LIST_HEAD(&unconf_id_hashtbl[i]); |
3155 | } | 3991 | } |
3992 | for (i = 0; i < SESSION_HASH_SIZE; i++) | ||
3993 | INIT_LIST_HEAD(&sessionid_hashtbl[i]); | ||
3156 | for (i = 0; i < FILE_HASH_SIZE; i++) { | 3994 | for (i = 0; i < FILE_HASH_SIZE; i++) { |
3157 | INIT_LIST_HEAD(&file_hashtbl[i]); | 3995 | INIT_LIST_HEAD(&file_hashtbl[i]); |
3158 | } | 3996 | } |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 9250067943d8..b820c311931c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/fs.h> | 45 | #include <linux/fs.h> |
46 | #include <linux/namei.h> | 46 | #include <linux/namei.h> |
47 | #include <linux/vfs.h> | 47 | #include <linux/vfs.h> |
48 | #include <linux/utsname.h> | ||
48 | #include <linux/sunrpc/xdr.h> | 49 | #include <linux/sunrpc/xdr.h> |
49 | #include <linux/sunrpc/svc.h> | 50 | #include <linux/sunrpc/svc.h> |
50 | #include <linux/sunrpc/clnt.h> | 51 | #include <linux/sunrpc/clnt.h> |
@@ -188,6 +189,11 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) | |||
188 | return p; | 189 | return p; |
189 | } | 190 | } |
190 | 191 | ||
192 | static int zero_clientid(clientid_t *clid) | ||
193 | { | ||
194 | return (clid->cl_boot == 0) && (clid->cl_id == 0); | ||
195 | } | ||
196 | |||
191 | static int | 197 | static int |
192 | defer_free(struct nfsd4_compoundargs *argp, | 198 | defer_free(struct nfsd4_compoundargs *argp, |
193 | void (*release)(const void *), void *p) | 199 | void (*release)(const void *), void *p) |
@@ -230,6 +236,7 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) | |||
230 | 236 | ||
231 | bmval[0] = 0; | 237 | bmval[0] = 0; |
232 | bmval[1] = 0; | 238 | bmval[1] = 0; |
239 | bmval[2] = 0; | ||
233 | 240 | ||
234 | READ_BUF(4); | 241 | READ_BUF(4); |
235 | READ32(bmlen); | 242 | READ32(bmlen); |
@@ -241,13 +248,27 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) | |||
241 | READ32(bmval[0]); | 248 | READ32(bmval[0]); |
242 | if (bmlen > 1) | 249 | if (bmlen > 1) |
243 | READ32(bmval[1]); | 250 | READ32(bmval[1]); |
251 | if (bmlen > 2) | ||
252 | READ32(bmval[2]); | ||
244 | 253 | ||
245 | DECODE_TAIL; | 254 | DECODE_TAIL; |
246 | } | 255 | } |
247 | 256 | ||
257 | static u32 nfsd_attrmask[] = { | ||
258 | NFSD_WRITEABLE_ATTRS_WORD0, | ||
259 | NFSD_WRITEABLE_ATTRS_WORD1, | ||
260 | NFSD_WRITEABLE_ATTRS_WORD2 | ||
261 | }; | ||
262 | |||
263 | static u32 nfsd41_ex_attrmask[] = { | ||
264 | NFSD_SUPPATTR_EXCLCREAT_WORD0, | ||
265 | NFSD_SUPPATTR_EXCLCREAT_WORD1, | ||
266 | NFSD_SUPPATTR_EXCLCREAT_WORD2 | ||
267 | }; | ||
268 | |||
248 | static __be32 | 269 | static __be32 |
249 | nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, | 270 | nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable, |
250 | struct nfs4_acl **acl) | 271 | struct iattr *iattr, struct nfs4_acl **acl) |
251 | { | 272 | { |
252 | int expected_len, len = 0; | 273 | int expected_len, len = 0; |
253 | u32 dummy32; | 274 | u32 dummy32; |
@@ -263,9 +284,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia | |||
263 | * According to spec, unsupported attributes return ERR_ATTRNOTSUPP; | 284 | * According to spec, unsupported attributes return ERR_ATTRNOTSUPP; |
264 | * read-only attributes return ERR_INVAL. | 285 | * read-only attributes return ERR_INVAL. |
265 | */ | 286 | */ |
266 | if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) | 287 | if ((bmval[0] & ~nfsd_suppattrs0(argp->minorversion)) || |
288 | (bmval[1] & ~nfsd_suppattrs1(argp->minorversion)) || | ||
289 | (bmval[2] & ~nfsd_suppattrs2(argp->minorversion))) | ||
267 | return nfserr_attrnotsupp; | 290 | return nfserr_attrnotsupp; |
268 | if ((bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0) || (bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1)) | 291 | if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) || |
292 | (bmval[2] & ~writable[2])) | ||
269 | return nfserr_inval; | 293 | return nfserr_inval; |
270 | 294 | ||
271 | READ_BUF(4); | 295 | READ_BUF(4); |
@@ -400,6 +424,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia | |||
400 | goto xdr_error; | 424 | goto xdr_error; |
401 | } | 425 | } |
402 | } | 426 | } |
427 | BUG_ON(bmval[2]); /* no such writeable attr supported yet */ | ||
403 | if (len != expected_len) | 428 | if (len != expected_len) |
404 | goto xdr_error; | 429 | goto xdr_error; |
405 | 430 | ||
@@ -493,7 +518,9 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create | |||
493 | if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) | 518 | if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) |
494 | return status; | 519 | return status; |
495 | 520 | ||
496 | if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl))) | 521 | status = nfsd4_decode_fattr(argp, create->cr_bmval, nfsd_attrmask, |
522 | &create->cr_iattr, &create->cr_acl); | ||
523 | if (status) | ||
497 | goto out; | 524 | goto out; |
498 | 525 | ||
499 | DECODE_TAIL; | 526 | DECODE_TAIL; |
@@ -583,6 +610,8 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) | |||
583 | READ_BUF(lockt->lt_owner.len); | 610 | READ_BUF(lockt->lt_owner.len); |
584 | READMEM(lockt->lt_owner.data, lockt->lt_owner.len); | 611 | READMEM(lockt->lt_owner.data, lockt->lt_owner.len); |
585 | 612 | ||
613 | if (argp->minorversion && !zero_clientid(&lockt->lt_clientid)) | ||
614 | return nfserr_inval; | ||
586 | DECODE_TAIL; | 615 | DECODE_TAIL; |
587 | } | 616 | } |
588 | 617 | ||
@@ -652,13 +681,26 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) | |||
652 | switch (open->op_createmode) { | 681 | switch (open->op_createmode) { |
653 | case NFS4_CREATE_UNCHECKED: | 682 | case NFS4_CREATE_UNCHECKED: |
654 | case NFS4_CREATE_GUARDED: | 683 | case NFS4_CREATE_GUARDED: |
655 | if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl))) | 684 | status = nfsd4_decode_fattr(argp, open->op_bmval, |
685 | nfsd_attrmask, &open->op_iattr, &open->op_acl); | ||
686 | if (status) | ||
656 | goto out; | 687 | goto out; |
657 | break; | 688 | break; |
658 | case NFS4_CREATE_EXCLUSIVE: | 689 | case NFS4_CREATE_EXCLUSIVE: |
659 | READ_BUF(8); | 690 | READ_BUF(8); |
660 | COPYMEM(open->op_verf.data, 8); | 691 | COPYMEM(open->op_verf.data, 8); |
661 | break; | 692 | break; |
693 | case NFS4_CREATE_EXCLUSIVE4_1: | ||
694 | if (argp->minorversion < 1) | ||
695 | goto xdr_error; | ||
696 | READ_BUF(8); | ||
697 | COPYMEM(open->op_verf.data, 8); | ||
698 | status = nfsd4_decode_fattr(argp, open->op_bmval, | ||
699 | nfsd41_ex_attrmask, &open->op_iattr, | ||
700 | &open->op_acl); | ||
701 | if (status) | ||
702 | goto out; | ||
703 | break; | ||
662 | default: | 704 | default: |
663 | goto xdr_error; | 705 | goto xdr_error; |
664 | } | 706 | } |
@@ -851,7 +893,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta | |||
851 | status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); | 893 | status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); |
852 | if (status) | 894 | if (status) |
853 | return status; | 895 | return status; |
854 | return nfsd4_decode_fattr(argp, setattr->sa_bmval, | 896 | return nfsd4_decode_fattr(argp, setattr->sa_bmval, nfsd_attrmask, |
855 | &setattr->sa_iattr, &setattr->sa_acl); | 897 | &setattr->sa_iattr, &setattr->sa_acl); |
856 | } | 898 | } |
857 | 899 | ||
@@ -993,6 +1035,241 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel | |||
993 | READ_BUF(rlockowner->rl_owner.len); | 1035 | READ_BUF(rlockowner->rl_owner.len); |
994 | READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); | 1036 | READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); |
995 | 1037 | ||
1038 | if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid)) | ||
1039 | return nfserr_inval; | ||
1040 | DECODE_TAIL; | ||
1041 | } | ||
1042 | |||
1043 | static __be32 | ||
1044 | nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, | ||
1045 | struct nfsd4_exchange_id *exid) | ||
1046 | { | ||
1047 | int dummy; | ||
1048 | DECODE_HEAD; | ||
1049 | |||
1050 | READ_BUF(NFS4_VERIFIER_SIZE); | ||
1051 | COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE); | ||
1052 | |||
1053 | READ_BUF(4); | ||
1054 | READ32(exid->clname.len); | ||
1055 | |||
1056 | READ_BUF(exid->clname.len); | ||
1057 | SAVEMEM(exid->clname.data, exid->clname.len); | ||
1058 | |||
1059 | READ_BUF(4); | ||
1060 | READ32(exid->flags); | ||
1061 | |||
1062 | /* Ignore state_protect4_a */ | ||
1063 | READ_BUF(4); | ||
1064 | READ32(exid->spa_how); | ||
1065 | switch (exid->spa_how) { | ||
1066 | case SP4_NONE: | ||
1067 | break; | ||
1068 | case SP4_MACH_CRED: | ||
1069 | /* spo_must_enforce */ | ||
1070 | READ_BUF(4); | ||
1071 | READ32(dummy); | ||
1072 | READ_BUF(dummy * 4); | ||
1073 | p += dummy; | ||
1074 | |||
1075 | /* spo_must_allow */ | ||
1076 | READ_BUF(4); | ||
1077 | READ32(dummy); | ||
1078 | READ_BUF(dummy * 4); | ||
1079 | p += dummy; | ||
1080 | break; | ||
1081 | case SP4_SSV: | ||
1082 | /* ssp_ops */ | ||
1083 | READ_BUF(4); | ||
1084 | READ32(dummy); | ||
1085 | READ_BUF(dummy * 4); | ||
1086 | p += dummy; | ||
1087 | |||
1088 | READ_BUF(4); | ||
1089 | READ32(dummy); | ||
1090 | READ_BUF(dummy * 4); | ||
1091 | p += dummy; | ||
1092 | |||
1093 | /* ssp_hash_algs<> */ | ||
1094 | READ_BUF(4); | ||
1095 | READ32(dummy); | ||
1096 | READ_BUF(dummy); | ||
1097 | p += XDR_QUADLEN(dummy); | ||
1098 | |||
1099 | /* ssp_encr_algs<> */ | ||
1100 | READ_BUF(4); | ||
1101 | READ32(dummy); | ||
1102 | READ_BUF(dummy); | ||
1103 | p += XDR_QUADLEN(dummy); | ||
1104 | |||
1105 | /* ssp_window and ssp_num_gss_handles */ | ||
1106 | READ_BUF(8); | ||
1107 | READ32(dummy); | ||
1108 | READ32(dummy); | ||
1109 | break; | ||
1110 | default: | ||
1111 | goto xdr_error; | ||
1112 | } | ||
1113 | |||
1114 | /* Ignore Implementation ID */ | ||
1115 | READ_BUF(4); /* nfs_impl_id4 array length */ | ||
1116 | READ32(dummy); | ||
1117 | |||
1118 | if (dummy > 1) | ||
1119 | goto xdr_error; | ||
1120 | |||
1121 | if (dummy == 1) { | ||
1122 | /* nii_domain */ | ||
1123 | READ_BUF(4); | ||
1124 | READ32(dummy); | ||
1125 | READ_BUF(dummy); | ||
1126 | p += XDR_QUADLEN(dummy); | ||
1127 | |||
1128 | /* nii_name */ | ||
1129 | READ_BUF(4); | ||
1130 | READ32(dummy); | ||
1131 | READ_BUF(dummy); | ||
1132 | p += XDR_QUADLEN(dummy); | ||
1133 | |||
1134 | /* nii_date */ | ||
1135 | READ_BUF(12); | ||
1136 | p += 3; | ||
1137 | } | ||
1138 | DECODE_TAIL; | ||
1139 | } | ||
1140 | |||
1141 | static __be32 | ||
1142 | nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, | ||
1143 | struct nfsd4_create_session *sess) | ||
1144 | { | ||
1145 | DECODE_HEAD; | ||
1146 | |||
1147 | u32 dummy; | ||
1148 | char *machine_name; | ||
1149 | int i; | ||
1150 | int nr_secflavs; | ||
1151 | |||
1152 | READ_BUF(16); | ||
1153 | COPYMEM(&sess->clientid, 8); | ||
1154 | READ32(sess->seqid); | ||
1155 | READ32(sess->flags); | ||
1156 | |||
1157 | /* Fore channel attrs */ | ||
1158 | READ_BUF(28); | ||
1159 | READ32(dummy); /* headerpadsz is always 0 */ | ||
1160 | READ32(sess->fore_channel.maxreq_sz); | ||
1161 | READ32(sess->fore_channel.maxresp_sz); | ||
1162 | READ32(sess->fore_channel.maxresp_cached); | ||
1163 | READ32(sess->fore_channel.maxops); | ||
1164 | READ32(sess->fore_channel.maxreqs); | ||
1165 | READ32(sess->fore_channel.nr_rdma_attrs); | ||
1166 | if (sess->fore_channel.nr_rdma_attrs == 1) { | ||
1167 | READ_BUF(4); | ||
1168 | READ32(sess->fore_channel.rdma_attrs); | ||
1169 | } else if (sess->fore_channel.nr_rdma_attrs > 1) { | ||
1170 | dprintk("Too many fore channel attr bitmaps!\n"); | ||
1171 | goto xdr_error; | ||
1172 | } | ||
1173 | |||
1174 | /* Back channel attrs */ | ||
1175 | READ_BUF(28); | ||
1176 | READ32(dummy); /* headerpadsz is always 0 */ | ||
1177 | READ32(sess->back_channel.maxreq_sz); | ||
1178 | READ32(sess->back_channel.maxresp_sz); | ||
1179 | READ32(sess->back_channel.maxresp_cached); | ||
1180 | READ32(sess->back_channel.maxops); | ||
1181 | READ32(sess->back_channel.maxreqs); | ||
1182 | READ32(sess->back_channel.nr_rdma_attrs); | ||
1183 | if (sess->back_channel.nr_rdma_attrs == 1) { | ||
1184 | READ_BUF(4); | ||
1185 | READ32(sess->back_channel.rdma_attrs); | ||
1186 | } else if (sess->back_channel.nr_rdma_attrs > 1) { | ||
1187 | dprintk("Too many back channel attr bitmaps!\n"); | ||
1188 | goto xdr_error; | ||
1189 | } | ||
1190 | |||
1191 | READ_BUF(8); | ||
1192 | READ32(sess->callback_prog); | ||
1193 | |||
1194 | /* callback_sec_params4 */ | ||
1195 | READ32(nr_secflavs); | ||
1196 | for (i = 0; i < nr_secflavs; ++i) { | ||
1197 | READ_BUF(4); | ||
1198 | READ32(dummy); | ||
1199 | switch (dummy) { | ||
1200 | case RPC_AUTH_NULL: | ||
1201 | /* Nothing to read */ | ||
1202 | break; | ||
1203 | case RPC_AUTH_UNIX: | ||
1204 | READ_BUF(8); | ||
1205 | /* stamp */ | ||
1206 | READ32(dummy); | ||
1207 | |||
1208 | /* machine name */ | ||
1209 | READ32(dummy); | ||
1210 | READ_BUF(dummy); | ||
1211 | SAVEMEM(machine_name, dummy); | ||
1212 | |||
1213 | /* uid, gid */ | ||
1214 | READ_BUF(8); | ||
1215 | READ32(sess->uid); | ||
1216 | READ32(sess->gid); | ||
1217 | |||
1218 | /* more gids */ | ||
1219 | READ_BUF(4); | ||
1220 | READ32(dummy); | ||
1221 | READ_BUF(dummy * 4); | ||
1222 | for (i = 0; i < dummy; ++i) | ||
1223 | READ32(dummy); | ||
1224 | break; | ||
1225 | case RPC_AUTH_GSS: | ||
1226 | dprintk("RPC_AUTH_GSS callback secflavor " | ||
1227 | "not supported!\n"); | ||
1228 | READ_BUF(8); | ||
1229 | /* gcbp_service */ | ||
1230 | READ32(dummy); | ||
1231 | /* gcbp_handle_from_server */ | ||
1232 | READ32(dummy); | ||
1233 | READ_BUF(dummy); | ||
1234 | p += XDR_QUADLEN(dummy); | ||
1235 | /* gcbp_handle_from_client */ | ||
1236 | READ_BUF(4); | ||
1237 | READ32(dummy); | ||
1238 | READ_BUF(dummy); | ||
1239 | p += XDR_QUADLEN(dummy); | ||
1240 | break; | ||
1241 | default: | ||
1242 | dprintk("Illegal callback secflavor\n"); | ||
1243 | return nfserr_inval; | ||
1244 | } | ||
1245 | } | ||
1246 | DECODE_TAIL; | ||
1247 | } | ||
1248 | |||
1249 | static __be32 | ||
1250 | nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, | ||
1251 | struct nfsd4_destroy_session *destroy_session) | ||
1252 | { | ||
1253 | DECODE_HEAD; | ||
1254 | READ_BUF(NFS4_MAX_SESSIONID_LEN); | ||
1255 | COPYMEM(destroy_session->sessionid.data, NFS4_MAX_SESSIONID_LEN); | ||
1256 | |||
1257 | DECODE_TAIL; | ||
1258 | } | ||
1259 | |||
1260 | static __be32 | ||
1261 | nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, | ||
1262 | struct nfsd4_sequence *seq) | ||
1263 | { | ||
1264 | DECODE_HEAD; | ||
1265 | |||
1266 | READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); | ||
1267 | COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); | ||
1268 | READ32(seq->seqid); | ||
1269 | READ32(seq->slotid); | ||
1270 | READ32(seq->maxslots); | ||
1271 | READ32(seq->cachethis); | ||
1272 | |||
996 | DECODE_TAIL; | 1273 | DECODE_TAIL; |
997 | } | 1274 | } |
998 | 1275 | ||
@@ -1005,7 +1282,7 @@ nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) | |||
1005 | static __be32 | 1282 | static __be32 |
1006 | nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) | 1283 | nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) |
1007 | { | 1284 | { |
1008 | return nfserr_opnotsupp; | 1285 | return nfserr_notsupp; |
1009 | } | 1286 | } |
1010 | 1287 | ||
1011 | typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); | 1288 | typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); |
@@ -1031,7 +1308,7 @@ static nfsd4_dec nfsd4_dec_ops[] = { | |||
1031 | [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, | 1308 | [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, |
1032 | [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, | 1309 | [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, |
1033 | [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, | 1310 | [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, |
1034 | [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp, | 1311 | [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_noop, |
1035 | [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, | 1312 | [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, |
1036 | [OP_READ] = (nfsd4_dec)nfsd4_decode_read, | 1313 | [OP_READ] = (nfsd4_dec)nfsd4_decode_read, |
1037 | [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, | 1314 | [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, |
@@ -1050,6 +1327,67 @@ static nfsd4_dec nfsd4_dec_ops[] = { | |||
1050 | [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, | 1327 | [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, |
1051 | }; | 1328 | }; |
1052 | 1329 | ||
1330 | static nfsd4_dec nfsd41_dec_ops[] = { | ||
1331 | [OP_ACCESS] (nfsd4_dec)nfsd4_decode_access, | ||
1332 | [OP_CLOSE] (nfsd4_dec)nfsd4_decode_close, | ||
1333 | [OP_COMMIT] (nfsd4_dec)nfsd4_decode_commit, | ||
1334 | [OP_CREATE] (nfsd4_dec)nfsd4_decode_create, | ||
1335 | [OP_DELEGPURGE] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1336 | [OP_DELEGRETURN] (nfsd4_dec)nfsd4_decode_delegreturn, | ||
1337 | [OP_GETATTR] (nfsd4_dec)nfsd4_decode_getattr, | ||
1338 | [OP_GETFH] (nfsd4_dec)nfsd4_decode_noop, | ||
1339 | [OP_LINK] (nfsd4_dec)nfsd4_decode_link, | ||
1340 | [OP_LOCK] (nfsd4_dec)nfsd4_decode_lock, | ||
1341 | [OP_LOCKT] (nfsd4_dec)nfsd4_decode_lockt, | ||
1342 | [OP_LOCKU] (nfsd4_dec)nfsd4_decode_locku, | ||
1343 | [OP_LOOKUP] (nfsd4_dec)nfsd4_decode_lookup, | ||
1344 | [OP_LOOKUPP] (nfsd4_dec)nfsd4_decode_noop, | ||
1345 | [OP_NVERIFY] (nfsd4_dec)nfsd4_decode_verify, | ||
1346 | [OP_OPEN] (nfsd4_dec)nfsd4_decode_open, | ||
1347 | [OP_OPENATTR] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1348 | [OP_OPEN_CONFIRM] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1349 | [OP_OPEN_DOWNGRADE] (nfsd4_dec)nfsd4_decode_open_downgrade, | ||
1350 | [OP_PUTFH] (nfsd4_dec)nfsd4_decode_putfh, | ||
1351 | [OP_PUTPUBFH] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1352 | [OP_PUTROOTFH] (nfsd4_dec)nfsd4_decode_noop, | ||
1353 | [OP_READ] (nfsd4_dec)nfsd4_decode_read, | ||
1354 | [OP_READDIR] (nfsd4_dec)nfsd4_decode_readdir, | ||
1355 | [OP_READLINK] (nfsd4_dec)nfsd4_decode_noop, | ||
1356 | [OP_REMOVE] (nfsd4_dec)nfsd4_decode_remove, | ||
1357 | [OP_RENAME] (nfsd4_dec)nfsd4_decode_rename, | ||
1358 | [OP_RENEW] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1359 | [OP_RESTOREFH] (nfsd4_dec)nfsd4_decode_noop, | ||
1360 | [OP_SAVEFH] (nfsd4_dec)nfsd4_decode_noop, | ||
1361 | [OP_SECINFO] (nfsd4_dec)nfsd4_decode_secinfo, | ||
1362 | [OP_SETATTR] (nfsd4_dec)nfsd4_decode_setattr, | ||
1363 | [OP_SETCLIENTID] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1364 | [OP_SETCLIENTID_CONFIRM](nfsd4_dec)nfsd4_decode_notsupp, | ||
1365 | [OP_VERIFY] (nfsd4_dec)nfsd4_decode_verify, | ||
1366 | [OP_WRITE] (nfsd4_dec)nfsd4_decode_write, | ||
1367 | [OP_RELEASE_LOCKOWNER] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1368 | |||
1369 | /* new operations for NFSv4.1 */ | ||
1370 | [OP_BACKCHANNEL_CTL] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1371 | [OP_BIND_CONN_TO_SESSION](nfsd4_dec)nfsd4_decode_notsupp, | ||
1372 | [OP_EXCHANGE_ID] (nfsd4_dec)nfsd4_decode_exchange_id, | ||
1373 | [OP_CREATE_SESSION] (nfsd4_dec)nfsd4_decode_create_session, | ||
1374 | [OP_DESTROY_SESSION] (nfsd4_dec)nfsd4_decode_destroy_session, | ||
1375 | [OP_FREE_STATEID] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1376 | [OP_GET_DIR_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1377 | [OP_GETDEVICEINFO] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1378 | [OP_GETDEVICELIST] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1379 | [OP_LAYOUTCOMMIT] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1380 | [OP_LAYOUTGET] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1381 | [OP_LAYOUTRETURN] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1382 | [OP_SECINFO_NO_NAME] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1383 | [OP_SEQUENCE] (nfsd4_dec)nfsd4_decode_sequence, | ||
1384 | [OP_SET_SSV] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1385 | [OP_TEST_STATEID] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1386 | [OP_WANT_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1387 | [OP_DESTROY_CLIENTID] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1388 | [OP_RECLAIM_COMPLETE] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1389 | }; | ||
1390 | |||
1053 | struct nfsd4_minorversion_ops { | 1391 | struct nfsd4_minorversion_ops { |
1054 | nfsd4_dec *decoders; | 1392 | nfsd4_dec *decoders; |
1055 | int nops; | 1393 | int nops; |
@@ -1057,6 +1395,7 @@ struct nfsd4_minorversion_ops { | |||
1057 | 1395 | ||
1058 | static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { | 1396 | static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { |
1059 | [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, | 1397 | [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, |
1398 | [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, | ||
1060 | }; | 1399 | }; |
1061 | 1400 | ||
1062 | static __be32 | 1401 | static __be32 |
@@ -1412,6 +1751,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1412 | { | 1751 | { |
1413 | u32 bmval0 = bmval[0]; | 1752 | u32 bmval0 = bmval[0]; |
1414 | u32 bmval1 = bmval[1]; | 1753 | u32 bmval1 = bmval[1]; |
1754 | u32 bmval2 = bmval[2]; | ||
1415 | struct kstat stat; | 1755 | struct kstat stat; |
1416 | struct svc_fh tempfh; | 1756 | struct svc_fh tempfh; |
1417 | struct kstatfs statfs; | 1757 | struct kstatfs statfs; |
@@ -1425,12 +1765,16 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1425 | int err; | 1765 | int err; |
1426 | int aclsupport = 0; | 1766 | int aclsupport = 0; |
1427 | struct nfs4_acl *acl = NULL; | 1767 | struct nfs4_acl *acl = NULL; |
1768 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | ||
1769 | u32 minorversion = resp->cstate.minorversion; | ||
1428 | 1770 | ||
1429 | BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); | 1771 | BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); |
1430 | BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); | 1772 | BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); |
1431 | BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1); | 1773 | BUG_ON(bmval1 & ~nfsd_suppattrs1(minorversion)); |
1774 | BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion)); | ||
1432 | 1775 | ||
1433 | if (exp->ex_fslocs.migrated) { | 1776 | if (exp->ex_fslocs.migrated) { |
1777 | BUG_ON(bmval[2]); | ||
1434 | status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); | 1778 | status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); |
1435 | if (status) | 1779 | if (status) |
1436 | goto out; | 1780 | goto out; |
@@ -1476,22 +1820,42 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1476 | if ((buflen -= 16) < 0) | 1820 | if ((buflen -= 16) < 0) |
1477 | goto out_resource; | 1821 | goto out_resource; |
1478 | 1822 | ||
1479 | WRITE32(2); | 1823 | if (unlikely(bmval2)) { |
1480 | WRITE32(bmval0); | 1824 | WRITE32(3); |
1481 | WRITE32(bmval1); | 1825 | WRITE32(bmval0); |
1826 | WRITE32(bmval1); | ||
1827 | WRITE32(bmval2); | ||
1828 | } else if (likely(bmval1)) { | ||
1829 | WRITE32(2); | ||
1830 | WRITE32(bmval0); | ||
1831 | WRITE32(bmval1); | ||
1832 | } else { | ||
1833 | WRITE32(1); | ||
1834 | WRITE32(bmval0); | ||
1835 | } | ||
1482 | attrlenp = p++; /* to be backfilled later */ | 1836 | attrlenp = p++; /* to be backfilled later */ |
1483 | 1837 | ||
1484 | if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { | 1838 | if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { |
1485 | u32 word0 = NFSD_SUPPORTED_ATTRS_WORD0; | 1839 | u32 word0 = nfsd_suppattrs0(minorversion); |
1840 | u32 word1 = nfsd_suppattrs1(minorversion); | ||
1841 | u32 word2 = nfsd_suppattrs2(minorversion); | ||
1842 | |||
1486 | if ((buflen -= 12) < 0) | 1843 | if ((buflen -= 12) < 0) |
1487 | goto out_resource; | 1844 | goto out_resource; |
1488 | if (!aclsupport) | 1845 | if (!aclsupport) |
1489 | word0 &= ~FATTR4_WORD0_ACL; | 1846 | word0 &= ~FATTR4_WORD0_ACL; |
1490 | if (!exp->ex_fslocs.locations) | 1847 | if (!exp->ex_fslocs.locations) |
1491 | word0 &= ~FATTR4_WORD0_FS_LOCATIONS; | 1848 | word0 &= ~FATTR4_WORD0_FS_LOCATIONS; |
1492 | WRITE32(2); | 1849 | if (!word2) { |
1493 | WRITE32(word0); | 1850 | WRITE32(2); |
1494 | WRITE32(NFSD_SUPPORTED_ATTRS_WORD1); | 1851 | WRITE32(word0); |
1852 | WRITE32(word1); | ||
1853 | } else { | ||
1854 | WRITE32(3); | ||
1855 | WRITE32(word0); | ||
1856 | WRITE32(word1); | ||
1857 | WRITE32(word2); | ||
1858 | } | ||
1495 | } | 1859 | } |
1496 | if (bmval0 & FATTR4_WORD0_TYPE) { | 1860 | if (bmval0 & FATTR4_WORD0_TYPE) { |
1497 | if ((buflen -= 4) < 0) | 1861 | if ((buflen -= 4) < 0) |
@@ -1801,6 +2165,13 @@ out_acl: | |||
1801 | } | 2165 | } |
1802 | WRITE64(stat.ino); | 2166 | WRITE64(stat.ino); |
1803 | } | 2167 | } |
2168 | if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { | ||
2169 | WRITE32(3); | ||
2170 | WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); | ||
2171 | WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1); | ||
2172 | WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2); | ||
2173 | } | ||
2174 | |||
1804 | *attrlenp = htonl((char *)p - (char *)attrlenp - 4); | 2175 | *attrlenp = htonl((char *)p - (char *)attrlenp - 4); |
1805 | *countp = p - buffer; | 2176 | *countp = p - buffer; |
1806 | status = nfs_ok; | 2177 | status = nfs_ok; |
@@ -2572,6 +2943,143 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w | |||
2572 | } | 2943 | } |
2573 | 2944 | ||
2574 | static __be32 | 2945 | static __be32 |
2946 | nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr, | ||
2947 | struct nfsd4_exchange_id *exid) | ||
2948 | { | ||
2949 | ENCODE_HEAD; | ||
2950 | char *major_id; | ||
2951 | char *server_scope; | ||
2952 | int major_id_sz; | ||
2953 | int server_scope_sz; | ||
2954 | uint64_t minor_id = 0; | ||
2955 | |||
2956 | if (nfserr) | ||
2957 | return nfserr; | ||
2958 | |||
2959 | major_id = utsname()->nodename; | ||
2960 | major_id_sz = strlen(major_id); | ||
2961 | server_scope = utsname()->nodename; | ||
2962 | server_scope_sz = strlen(server_scope); | ||
2963 | |||
2964 | RESERVE_SPACE( | ||
2965 | 8 /* eir_clientid */ + | ||
2966 | 4 /* eir_sequenceid */ + | ||
2967 | 4 /* eir_flags */ + | ||
2968 | 4 /* spr_how (SP4_NONE) */ + | ||
2969 | 8 /* so_minor_id */ + | ||
2970 | 4 /* so_major_id.len */ + | ||
2971 | (XDR_QUADLEN(major_id_sz) * 4) + | ||
2972 | 4 /* eir_server_scope.len */ + | ||
2973 | (XDR_QUADLEN(server_scope_sz) * 4) + | ||
2974 | 4 /* eir_server_impl_id.count (0) */); | ||
2975 | |||
2976 | WRITEMEM(&exid->clientid, 8); | ||
2977 | WRITE32(exid->seqid); | ||
2978 | WRITE32(exid->flags); | ||
2979 | |||
2980 | /* state_protect4_r. Currently only support SP4_NONE */ | ||
2981 | BUG_ON(exid->spa_how != SP4_NONE); | ||
2982 | WRITE32(exid->spa_how); | ||
2983 | |||
2984 | /* The server_owner struct */ | ||
2985 | WRITE64(minor_id); /* Minor id */ | ||
2986 | /* major id */ | ||
2987 | WRITE32(major_id_sz); | ||
2988 | WRITEMEM(major_id, major_id_sz); | ||
2989 | |||
2990 | /* Server scope */ | ||
2991 | WRITE32(server_scope_sz); | ||
2992 | WRITEMEM(server_scope, server_scope_sz); | ||
2993 | |||
2994 | /* Implementation id */ | ||
2995 | WRITE32(0); /* zero length nfs_impl_id4 array */ | ||
2996 | ADJUST_ARGS(); | ||
2997 | return 0; | ||
2998 | } | ||
2999 | |||
3000 | static __be32 | ||
3001 | nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr, | ||
3002 | struct nfsd4_create_session *sess) | ||
3003 | { | ||
3004 | ENCODE_HEAD; | ||
3005 | |||
3006 | if (nfserr) | ||
3007 | return nfserr; | ||
3008 | |||
3009 | RESERVE_SPACE(24); | ||
3010 | WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); | ||
3011 | WRITE32(sess->seqid); | ||
3012 | WRITE32(sess->flags); | ||
3013 | ADJUST_ARGS(); | ||
3014 | |||
3015 | RESERVE_SPACE(28); | ||
3016 | WRITE32(0); /* headerpadsz */ | ||
3017 | WRITE32(sess->fore_channel.maxreq_sz); | ||
3018 | WRITE32(sess->fore_channel.maxresp_sz); | ||
3019 | WRITE32(sess->fore_channel.maxresp_cached); | ||
3020 | WRITE32(sess->fore_channel.maxops); | ||
3021 | WRITE32(sess->fore_channel.maxreqs); | ||
3022 | WRITE32(sess->fore_channel.nr_rdma_attrs); | ||
3023 | ADJUST_ARGS(); | ||
3024 | |||
3025 | if (sess->fore_channel.nr_rdma_attrs) { | ||
3026 | RESERVE_SPACE(4); | ||
3027 | WRITE32(sess->fore_channel.rdma_attrs); | ||
3028 | ADJUST_ARGS(); | ||
3029 | } | ||
3030 | |||
3031 | RESERVE_SPACE(28); | ||
3032 | WRITE32(0); /* headerpadsz */ | ||
3033 | WRITE32(sess->back_channel.maxreq_sz); | ||
3034 | WRITE32(sess->back_channel.maxresp_sz); | ||
3035 | WRITE32(sess->back_channel.maxresp_cached); | ||
3036 | WRITE32(sess->back_channel.maxops); | ||
3037 | WRITE32(sess->back_channel.maxreqs); | ||
3038 | WRITE32(sess->back_channel.nr_rdma_attrs); | ||
3039 | ADJUST_ARGS(); | ||
3040 | |||
3041 | if (sess->back_channel.nr_rdma_attrs) { | ||
3042 | RESERVE_SPACE(4); | ||
3043 | WRITE32(sess->back_channel.rdma_attrs); | ||
3044 | ADJUST_ARGS(); | ||
3045 | } | ||
3046 | return 0; | ||
3047 | } | ||
3048 | |||
3049 | static __be32 | ||
3050 | nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, | ||
3051 | struct nfsd4_destroy_session *destroy_session) | ||
3052 | { | ||
3053 | return nfserr; | ||
3054 | } | ||
3055 | |||
3056 | __be32 | ||
3057 | nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, | ||
3058 | struct nfsd4_sequence *seq) | ||
3059 | { | ||
3060 | ENCODE_HEAD; | ||
3061 | |||
3062 | if (nfserr) | ||
3063 | return nfserr; | ||
3064 | |||
3065 | RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 20); | ||
3066 | WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); | ||
3067 | WRITE32(seq->seqid); | ||
3068 | WRITE32(seq->slotid); | ||
3069 | WRITE32(seq->maxslots); | ||
3070 | /* | ||
3071 | * FIXME: for now: | ||
3072 | * target_maxslots = maxslots | ||
3073 | * status_flags = 0 | ||
3074 | */ | ||
3075 | WRITE32(seq->maxslots); | ||
3076 | WRITE32(0); | ||
3077 | |||
3078 | ADJUST_ARGS(); | ||
3079 | return 0; | ||
3080 | } | ||
3081 | |||
3082 | static __be32 | ||
2575 | nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) | 3083 | nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) |
2576 | { | 3084 | { |
2577 | return nfserr; | 3085 | return nfserr; |
@@ -2579,6 +3087,11 @@ nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) | |||
2579 | 3087 | ||
2580 | typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); | 3088 | typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); |
2581 | 3089 | ||
3090 | /* | ||
3091 | * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1 | ||
3092 | * since we don't need to filter out obsolete ops as this is | ||
3093 | * done in the decoding phase. | ||
3094 | */ | ||
2582 | static nfsd4_enc nfsd4_enc_ops[] = { | 3095 | static nfsd4_enc nfsd4_enc_ops[] = { |
2583 | [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, | 3096 | [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, |
2584 | [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, | 3097 | [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, |
@@ -2617,8 +3130,77 @@ static nfsd4_enc nfsd4_enc_ops[] = { | |||
2617 | [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, | 3130 | [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, |
2618 | [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, | 3131 | [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, |
2619 | [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, | 3132 | [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, |
3133 | |||
3134 | /* NFSv4.1 operations */ | ||
3135 | [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, | ||
3136 | [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_noop, | ||
3137 | [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, | ||
3138 | [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, | ||
3139 | [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session, | ||
3140 | [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, | ||
3141 | [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, | ||
3142 | [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, | ||
3143 | [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, | ||
3144 | [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, | ||
3145 | [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, | ||
3146 | [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, | ||
3147 | [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_noop, | ||
3148 | [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, | ||
3149 | [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, | ||
3150 | [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_noop, | ||
3151 | [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, | ||
3152 | [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, | ||
3153 | [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, | ||
2620 | }; | 3154 | }; |
2621 | 3155 | ||
3156 | /* | ||
3157 | * Calculate the total amount of memory that the compound response has taken | ||
3158 | * after encoding the current operation. | ||
3159 | * | ||
3160 | * pad: add on 8 bytes for the next operation's op_code and status so that | ||
3161 | * there is room to cache a failure on the next operation. | ||
3162 | * | ||
3163 | * Compare this length to the session se_fmaxresp_cached. | ||
3164 | * | ||
3165 | * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so | ||
3166 | * will be at least a page and will therefore hold the xdr_buf head. | ||
3167 | */ | ||
3168 | static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp) | ||
3169 | { | ||
3170 | int status = 0; | ||
3171 | struct xdr_buf *xb = &resp->rqstp->rq_res; | ||
3172 | struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; | ||
3173 | struct nfsd4_session *session = NULL; | ||
3174 | struct nfsd4_slot *slot = resp->cstate.slot; | ||
3175 | u32 length, tlen = 0, pad = 8; | ||
3176 | |||
3177 | if (!nfsd4_has_session(&resp->cstate)) | ||
3178 | return status; | ||
3179 | |||
3180 | session = resp->cstate.session; | ||
3181 | if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0) | ||
3182 | return status; | ||
3183 | |||
3184 | if (resp->opcnt >= args->opcnt) | ||
3185 | pad = 0; /* this is the last operation */ | ||
3186 | |||
3187 | if (xb->page_len == 0) { | ||
3188 | length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; | ||
3189 | } else { | ||
3190 | if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0) | ||
3191 | tlen = (char *)resp->p - (char *)xb->tail[0].iov_base; | ||
3192 | |||
3193 | length = xb->head[0].iov_len + xb->page_len + tlen + pad; | ||
3194 | } | ||
3195 | dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, | ||
3196 | length, xb->page_len, tlen, pad); | ||
3197 | |||
3198 | if (length <= session->se_fmaxresp_cached) | ||
3199 | return status; | ||
3200 | else | ||
3201 | return nfserr_rep_too_big_to_cache; | ||
3202 | } | ||
3203 | |||
2622 | void | 3204 | void |
2623 | nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) | 3205 | nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) |
2624 | { | 3206 | { |
@@ -2635,6 +3217,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) | |||
2635 | BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || | 3217 | BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || |
2636 | !nfsd4_enc_ops[op->opnum]); | 3218 | !nfsd4_enc_ops[op->opnum]); |
2637 | op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); | 3219 | op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); |
3220 | /* nfsd4_check_drc_limit guarantees enough room for error status */ | ||
3221 | if (!op->status && nfsd4_check_drc_limit(resp)) | ||
3222 | op->status = nfserr_rep_too_big_to_cache; | ||
2638 | status: | 3223 | status: |
2639 | /* | 3224 | /* |
2640 | * Note: We write the status directly, instead of using WRITE32(), | 3225 | * Note: We write the status directly, instead of using WRITE32(), |
@@ -2735,6 +3320,18 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo | |||
2735 | iov = &rqstp->rq_res.head[0]; | 3320 | iov = &rqstp->rq_res.head[0]; |
2736 | iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; | 3321 | iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; |
2737 | BUG_ON(iov->iov_len > PAGE_SIZE); | 3322 | BUG_ON(iov->iov_len > PAGE_SIZE); |
3323 | if (nfsd4_has_session(&resp->cstate)) { | ||
3324 | if (resp->cstate.status == nfserr_replay_cache && | ||
3325 | !nfsd4_not_cached(resp)) { | ||
3326 | iov->iov_len = resp->cstate.iovlen; | ||
3327 | } else { | ||
3328 | nfsd4_store_cache_entry(resp); | ||
3329 | dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); | ||
3330 | resp->cstate.slot->sl_inuse = 0; | ||
3331 | } | ||
3332 | if (resp->cstate.session) | ||
3333 | nfsd4_put_session(resp->cstate.session); | ||
3334 | } | ||
2738 | return 1; | 3335 | return 1; |
2739 | } | 3336 | } |
2740 | 3337 | ||
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index a4ed8644d69c..af16849d243a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -60,6 +60,7 @@ enum { | |||
60 | NFSD_FO_UnlockFS, | 60 | NFSD_FO_UnlockFS, |
61 | NFSD_Threads, | 61 | NFSD_Threads, |
62 | NFSD_Pool_Threads, | 62 | NFSD_Pool_Threads, |
63 | NFSD_Pool_Stats, | ||
63 | NFSD_Versions, | 64 | NFSD_Versions, |
64 | NFSD_Ports, | 65 | NFSD_Ports, |
65 | NFSD_MaxBlkSize, | 66 | NFSD_MaxBlkSize, |
@@ -172,6 +173,16 @@ static const struct file_operations exports_operations = { | |||
172 | .owner = THIS_MODULE, | 173 | .owner = THIS_MODULE, |
173 | }; | 174 | }; |
174 | 175 | ||
176 | extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); | ||
177 | |||
178 | static struct file_operations pool_stats_operations = { | ||
179 | .open = nfsd_pool_stats_open, | ||
180 | .read = seq_read, | ||
181 | .llseek = seq_lseek, | ||
182 | .release = seq_release, | ||
183 | .owner = THIS_MODULE, | ||
184 | }; | ||
185 | |||
175 | /*----------------------------------------------------------------------------*/ | 186 | /*----------------------------------------------------------------------------*/ |
176 | /* | 187 | /* |
177 | * payload - write methods | 188 | * payload - write methods |
@@ -781,8 +792,9 @@ out_free: | |||
781 | static ssize_t __write_versions(struct file *file, char *buf, size_t size) | 792 | static ssize_t __write_versions(struct file *file, char *buf, size_t size) |
782 | { | 793 | { |
783 | char *mesg = buf; | 794 | char *mesg = buf; |
784 | char *vers, sign; | 795 | char *vers, *minorp, sign; |
785 | int len, num; | 796 | int len, num; |
797 | unsigned minor; | ||
786 | ssize_t tlen = 0; | 798 | ssize_t tlen = 0; |
787 | char *sep; | 799 | char *sep; |
788 | 800 | ||
@@ -803,9 +815,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) | |||
803 | do { | 815 | do { |
804 | sign = *vers; | 816 | sign = *vers; |
805 | if (sign == '+' || sign == '-') | 817 | if (sign == '+' || sign == '-') |
806 | num = simple_strtol((vers+1), NULL, 0); | 818 | num = simple_strtol((vers+1), &minorp, 0); |
807 | else | 819 | else |
808 | num = simple_strtol(vers, NULL, 0); | 820 | num = simple_strtol(vers, &minorp, 0); |
821 | if (*minorp == '.') { | ||
822 | if (num < 4) | ||
823 | return -EINVAL; | ||
824 | minor = simple_strtoul(minorp+1, NULL, 0); | ||
825 | if (minor == 0) | ||
826 | return -EINVAL; | ||
827 | if (nfsd_minorversion(minor, sign == '-' ? | ||
828 | NFSD_CLEAR : NFSD_SET) < 0) | ||
829 | return -EINVAL; | ||
830 | goto next; | ||
831 | } | ||
809 | switch(num) { | 832 | switch(num) { |
810 | case 2: | 833 | case 2: |
811 | case 3: | 834 | case 3: |
@@ -815,6 +838,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) | |||
815 | default: | 838 | default: |
816 | return -EINVAL; | 839 | return -EINVAL; |
817 | } | 840 | } |
841 | next: | ||
818 | vers += len + 1; | 842 | vers += len + 1; |
819 | tlen += len; | 843 | tlen += len; |
820 | } while ((len = qword_get(&mesg, vers, size)) > 0); | 844 | } while ((len = qword_get(&mesg, vers, size)) > 0); |
@@ -833,6 +857,13 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) | |||
833 | num); | 857 | num); |
834 | sep = " "; | 858 | sep = " "; |
835 | } | 859 | } |
860 | if (nfsd_vers(4, NFSD_AVAIL)) | ||
861 | for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; minor++) | ||
862 | len += sprintf(buf+len, " %c4.%u", | ||
863 | (nfsd_vers(4, NFSD_TEST) && | ||
864 | nfsd_minorversion(minor, NFSD_TEST)) ? | ||
865 | '+' : '-', | ||
866 | minor); | ||
836 | len += sprintf(buf+len, "\n"); | 867 | len += sprintf(buf+len, "\n"); |
837 | return len; | 868 | return len; |
838 | } | 869 | } |
@@ -1248,6 +1279,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) | |||
1248 | [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, | 1279 | [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, |
1249 | [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, | 1280 | [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, |
1250 | [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, | 1281 | [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, |
1282 | [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, | ||
1251 | [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, | 1283 | [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, |
1252 | [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, | 1284 | [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, |
1253 | [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, | 1285 | [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, |
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 6f7f26351227..e298e260b5f1 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c | |||
@@ -180,6 +180,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, | |||
180 | { | 180 | { |
181 | __be32 nfserr; | 181 | __be32 nfserr; |
182 | int stable = 1; | 182 | int stable = 1; |
183 | unsigned long cnt = argp->len; | ||
183 | 184 | ||
184 | dprintk("nfsd: WRITE %s %d bytes at %d\n", | 185 | dprintk("nfsd: WRITE %s %d bytes at %d\n", |
185 | SVCFH_fmt(&argp->fh), | 186 | SVCFH_fmt(&argp->fh), |
@@ -188,7 +189,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, | |||
188 | nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, | 189 | nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, |
189 | argp->offset, | 190 | argp->offset, |
190 | rqstp->rq_vec, argp->vlen, | 191 | rqstp->rq_vec, argp->vlen, |
191 | argp->len, | 192 | &cnt, |
192 | &stable); | 193 | &stable); |
193 | return nfsd_return_attrs(nfserr, resp); | 194 | return nfsd_return_attrs(nfserr, resp); |
194 | } | 195 | } |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 7c09852be713..cbba4a935786 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/freezer.h> | 22 | #include <linux/freezer.h> |
23 | #include <linux/fs_struct.h> | 23 | #include <linux/fs_struct.h> |
24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
25 | #include <linux/swap.h> | ||
25 | 26 | ||
26 | #include <linux/sunrpc/types.h> | 27 | #include <linux/sunrpc/types.h> |
27 | #include <linux/sunrpc/stats.h> | 28 | #include <linux/sunrpc/stats.h> |
@@ -40,9 +41,6 @@ | |||
40 | extern struct svc_program nfsd_program; | 41 | extern struct svc_program nfsd_program; |
41 | static int nfsd(void *vrqstp); | 42 | static int nfsd(void *vrqstp); |
42 | struct timeval nfssvc_boot; | 43 | struct timeval nfssvc_boot; |
43 | static atomic_t nfsd_busy; | ||
44 | static unsigned long nfsd_last_call; | ||
45 | static DEFINE_SPINLOCK(nfsd_call_lock); | ||
46 | 44 | ||
47 | /* | 45 | /* |
48 | * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members | 46 | * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members |
@@ -123,6 +121,8 @@ struct svc_program nfsd_program = { | |||
123 | 121 | ||
124 | }; | 122 | }; |
125 | 123 | ||
124 | u32 nfsd_supported_minorversion; | ||
125 | |||
126 | int nfsd_vers(int vers, enum vers_op change) | 126 | int nfsd_vers(int vers, enum vers_op change) |
127 | { | 127 | { |
128 | if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) | 128 | if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) |
@@ -149,6 +149,28 @@ int nfsd_vers(int vers, enum vers_op change) | |||
149 | } | 149 | } |
150 | return 0; | 150 | return 0; |
151 | } | 151 | } |
152 | |||
153 | int nfsd_minorversion(u32 minorversion, enum vers_op change) | ||
154 | { | ||
155 | if (minorversion > NFSD_SUPPORTED_MINOR_VERSION) | ||
156 | return -1; | ||
157 | switch(change) { | ||
158 | case NFSD_SET: | ||
159 | nfsd_supported_minorversion = minorversion; | ||
160 | break; | ||
161 | case NFSD_CLEAR: | ||
162 | if (minorversion == 0) | ||
163 | return -1; | ||
164 | nfsd_supported_minorversion = minorversion - 1; | ||
165 | break; | ||
166 | case NFSD_TEST: | ||
167 | return minorversion <= nfsd_supported_minorversion; | ||
168 | case NFSD_AVAIL: | ||
169 | return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; | ||
170 | } | ||
171 | return 0; | ||
172 | } | ||
173 | |||
152 | /* | 174 | /* |
153 | * Maximum number of nfsd processes | 175 | * Maximum number of nfsd processes |
154 | */ | 176 | */ |
@@ -200,6 +222,28 @@ void nfsd_reset_versions(void) | |||
200 | } | 222 | } |
201 | } | 223 | } |
202 | 224 | ||
225 | /* | ||
226 | * Each session guarantees a negotiated per slot memory cache for replies | ||
227 | * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated | ||
228 | * NFSv4.1 server might want to use more memory for a DRC than a machine | ||
229 | * with mutiple services. | ||
230 | * | ||
231 | * Impose a hard limit on the number of pages for the DRC which varies | ||
232 | * according to the machines free pages. This is of course only a default. | ||
233 | * | ||
234 | * For now this is a #defined shift which could be under admin control | ||
235 | * in the future. | ||
236 | */ | ||
237 | static void set_max_drc(void) | ||
238 | { | ||
239 | /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ | ||
240 | #define NFSD_DRC_SIZE_SHIFT 7 | ||
241 | nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() | ||
242 | >> NFSD_DRC_SIZE_SHIFT; | ||
243 | nfsd_serv->sv_drc_pages_used = 0; | ||
244 | dprintk("%s svc_drc_max_pages %u\n", __func__, | ||
245 | nfsd_serv->sv_drc_max_pages); | ||
246 | } | ||
203 | 247 | ||
204 | int nfsd_create_serv(void) | 248 | int nfsd_create_serv(void) |
205 | { | 249 | { |
@@ -227,11 +271,12 @@ int nfsd_create_serv(void) | |||
227 | nfsd_max_blksize /= 2; | 271 | nfsd_max_blksize /= 2; |
228 | } | 272 | } |
229 | 273 | ||
230 | atomic_set(&nfsd_busy, 0); | ||
231 | nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, | 274 | nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, |
232 | nfsd_last_thread, nfsd, THIS_MODULE); | 275 | nfsd_last_thread, nfsd, THIS_MODULE); |
233 | if (nfsd_serv == NULL) | 276 | if (nfsd_serv == NULL) |
234 | err = -ENOMEM; | 277 | err = -ENOMEM; |
278 | else | ||
279 | set_max_drc(); | ||
235 | 280 | ||
236 | do_gettimeofday(&nfssvc_boot); /* record boot time */ | 281 | do_gettimeofday(&nfssvc_boot); /* record boot time */ |
237 | return err; | 282 | return err; |
@@ -375,26 +420,6 @@ nfsd_svc(unsigned short port, int nrservs) | |||
375 | return error; | 420 | return error; |
376 | } | 421 | } |
377 | 422 | ||
378 | static inline void | ||
379 | update_thread_usage(int busy_threads) | ||
380 | { | ||
381 | unsigned long prev_call; | ||
382 | unsigned long diff; | ||
383 | int decile; | ||
384 | |||
385 | spin_lock(&nfsd_call_lock); | ||
386 | prev_call = nfsd_last_call; | ||
387 | nfsd_last_call = jiffies; | ||
388 | decile = busy_threads*10/nfsdstats.th_cnt; | ||
389 | if (decile>0 && decile <= 10) { | ||
390 | diff = nfsd_last_call - prev_call; | ||
391 | if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP) | ||
392 | nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP; | ||
393 | if (decile == 10) | ||
394 | nfsdstats.th_fullcnt++; | ||
395 | } | ||
396 | spin_unlock(&nfsd_call_lock); | ||
397 | } | ||
398 | 423 | ||
399 | /* | 424 | /* |
400 | * This is the NFS server kernel thread | 425 | * This is the NFS server kernel thread |
@@ -460,8 +485,6 @@ nfsd(void *vrqstp) | |||
460 | continue; | 485 | continue; |
461 | } | 486 | } |
462 | 487 | ||
463 | update_thread_usage(atomic_read(&nfsd_busy)); | ||
464 | atomic_inc(&nfsd_busy); | ||
465 | 488 | ||
466 | /* Lock the export hash tables for reading. */ | 489 | /* Lock the export hash tables for reading. */ |
467 | exp_readlock(); | 490 | exp_readlock(); |
@@ -470,8 +493,6 @@ nfsd(void *vrqstp) | |||
470 | 493 | ||
471 | /* Unlock export hash tables */ | 494 | /* Unlock export hash tables */ |
472 | exp_readunlock(); | 495 | exp_readunlock(); |
473 | update_thread_usage(atomic_read(&nfsd_busy)); | ||
474 | atomic_dec(&nfsd_busy); | ||
475 | } | 496 | } |
476 | 497 | ||
477 | /* Clear signals before calling svc_exit_thread() */ | 498 | /* Clear signals before calling svc_exit_thread() */ |
@@ -539,6 +560,10 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) | |||
539 | + rqstp->rq_res.head[0].iov_len; | 560 | + rqstp->rq_res.head[0].iov_len; |
540 | rqstp->rq_res.head[0].iov_len += sizeof(__be32); | 561 | rqstp->rq_res.head[0].iov_len += sizeof(__be32); |
541 | 562 | ||
563 | /* NFSv4.1 DRC requires statp */ | ||
564 | if (rqstp->rq_vers == 4) | ||
565 | nfsd4_set_statp(rqstp, statp); | ||
566 | |||
542 | /* Now call the procedure handler, and encode NFS status. */ | 567 | /* Now call the procedure handler, and encode NFS status. */ |
543 | nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); | 568 | nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); |
544 | nfserr = map_new_errors(rqstp->rq_vers, nfserr); | 569 | nfserr = map_new_errors(rqstp->rq_vers, nfserr); |
@@ -570,3 +595,10 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) | |||
570 | nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); | 595 | nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); |
571 | return 1; | 596 | return 1; |
572 | } | 597 | } |
598 | |||
599 | int nfsd_pool_stats_open(struct inode *inode, struct file *file) | ||
600 | { | ||
601 | if (nfsd_serv == NULL) | ||
602 | return -ENODEV; | ||
603 | return svc_pool_stats_open(nfsd_serv, file); | ||
604 | } | ||
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 78376b6c0236..ab93fcfef254 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -366,8 +366,9 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | |||
366 | } | 366 | } |
367 | 367 | ||
368 | /* Revoke setuid/setgid on chown */ | 368 | /* Revoke setuid/setgid on chown */ |
369 | if (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || | 369 | if (!S_ISDIR(inode->i_mode) && |
370 | ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)) { | 370 | (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || |
371 | ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) { | ||
371 | iap->ia_valid |= ATTR_KILL_PRIV; | 372 | iap->ia_valid |= ATTR_KILL_PRIV; |
372 | if (iap->ia_valid & ATTR_MODE) { | 373 | if (iap->ia_valid & ATTR_MODE) { |
373 | /* we're setting mode too, just clear the s*id bits */ | 374 | /* we're setting mode too, just clear the s*id bits */ |
@@ -960,7 +961,7 @@ static void kill_suid(struct dentry *dentry) | |||
960 | static __be32 | 961 | static __be32 |
961 | nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | 962 | nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, |
962 | loff_t offset, struct kvec *vec, int vlen, | 963 | loff_t offset, struct kvec *vec, int vlen, |
963 | unsigned long cnt, int *stablep) | 964 | unsigned long *cnt, int *stablep) |
964 | { | 965 | { |
965 | struct svc_export *exp; | 966 | struct svc_export *exp; |
966 | struct dentry *dentry; | 967 | struct dentry *dentry; |
@@ -974,7 +975,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
974 | err = nfserr_perm; | 975 | err = nfserr_perm; |
975 | 976 | ||
976 | if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && | 977 | if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && |
977 | (!lock_may_write(file->f_path.dentry->d_inode, offset, cnt))) | 978 | (!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt))) |
978 | goto out; | 979 | goto out; |
979 | #endif | 980 | #endif |
980 | 981 | ||
@@ -1009,7 +1010,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
1009 | host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); | 1010 | host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); |
1010 | set_fs(oldfs); | 1011 | set_fs(oldfs); |
1011 | if (host_err >= 0) { | 1012 | if (host_err >= 0) { |
1012 | nfsdstats.io_write += cnt; | 1013 | nfsdstats.io_write += host_err; |
1013 | fsnotify_modify(file->f_path.dentry); | 1014 | fsnotify_modify(file->f_path.dentry); |
1014 | } | 1015 | } |
1015 | 1016 | ||
@@ -1054,9 +1055,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
1054 | } | 1055 | } |
1055 | 1056 | ||
1056 | dprintk("nfsd: write complete host_err=%d\n", host_err); | 1057 | dprintk("nfsd: write complete host_err=%d\n", host_err); |
1057 | if (host_err >= 0) | 1058 | if (host_err >= 0) { |
1058 | err = 0; | 1059 | err = 0; |
1059 | else | 1060 | *cnt = host_err; |
1061 | } else | ||
1060 | err = nfserrno(host_err); | 1062 | err = nfserrno(host_err); |
1061 | out: | 1063 | out: |
1062 | return err; | 1064 | return err; |
@@ -1098,7 +1100,7 @@ out: | |||
1098 | */ | 1100 | */ |
1099 | __be32 | 1101 | __be32 |
1100 | nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | 1102 | nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, |
1101 | loff_t offset, struct kvec *vec, int vlen, unsigned long cnt, | 1103 | loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, |
1102 | int *stablep) | 1104 | int *stablep) |
1103 | { | 1105 | { |
1104 | __be32 err = 0; | 1106 | __be32 err = 0; |
@@ -1179,6 +1181,21 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, | |||
1179 | return 0; | 1181 | return 0; |
1180 | } | 1182 | } |
1181 | 1183 | ||
1184 | /* HPUX client sometimes creates a file in mode 000, and sets size to 0. | ||
1185 | * setting size to 0 may fail for some specific file systems by the permission | ||
1186 | * checking which requires WRITE permission but the mode is 000. | ||
1187 | * we ignore the resizing(to 0) on the just new created file, since the size is | ||
1188 | * 0 after file created. | ||
1189 | * | ||
1190 | * call this only after vfs_create() is called. | ||
1191 | * */ | ||
1192 | static void | ||
1193 | nfsd_check_ignore_resizing(struct iattr *iap) | ||
1194 | { | ||
1195 | if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) | ||
1196 | iap->ia_valid &= ~ATTR_SIZE; | ||
1197 | } | ||
1198 | |||
1182 | /* | 1199 | /* |
1183 | * Create a file (regular, directory, device, fifo); UNIX sockets | 1200 | * Create a file (regular, directory, device, fifo); UNIX sockets |
1184 | * not yet implemented. | 1201 | * not yet implemented. |
@@ -1274,6 +1291,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1274 | switch (type) { | 1291 | switch (type) { |
1275 | case S_IFREG: | 1292 | case S_IFREG: |
1276 | host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); | 1293 | host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); |
1294 | if (!host_err) | ||
1295 | nfsd_check_ignore_resizing(iap); | ||
1277 | break; | 1296 | break; |
1278 | case S_IFDIR: | 1297 | case S_IFDIR: |
1279 | host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); | 1298 | host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); |
@@ -1427,6 +1446,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1427 | /* setattr will sync the child (or not) */ | 1446 | /* setattr will sync the child (or not) */ |
1428 | } | 1447 | } |
1429 | 1448 | ||
1449 | nfsd_check_ignore_resizing(iap); | ||
1450 | |||
1430 | if (createmode == NFS3_CREATE_EXCLUSIVE) { | 1451 | if (createmode == NFS3_CREATE_EXCLUSIVE) { |
1431 | /* Cram the verifier into atime/mtime */ | 1452 | /* Cram the verifier into atime/mtime */ |
1432 | iap->ia_valid = ATTR_MTIME|ATTR_ATIME | 1453 | iap->ia_valid = ATTR_MTIME|ATTR_ATIME |
diff --git a/fs/romfs/Kconfig b/fs/romfs/Kconfig index 1a17020f9faf..ce2d6bcc6266 100644 --- a/fs/romfs/Kconfig +++ b/fs/romfs/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config ROMFS_FS | 1 | config ROMFS_FS |
2 | tristate "ROM file system support" | 2 | tristate "ROM file system support" |
3 | depends on BLOCK | 3 | depends on BLOCK || MTD |
4 | ---help--- | 4 | ---help--- |
5 | This is a very small read-only file system mainly intended for | 5 | This is a very small read-only file system mainly intended for |
6 | initial ram disks of installation disks, but it could be used for | 6 | initial ram disks of installation disks, but it could be used for |
@@ -14,3 +14,49 @@ config ROMFS_FS | |||
14 | 14 | ||
15 | If you don't know whether you need it, then you don't need it: | 15 | If you don't know whether you need it, then you don't need it: |
16 | answer N. | 16 | answer N. |
17 | |||
18 | # | ||
19 | # Select the backing stores to be supported | ||
20 | # | ||
21 | choice | ||
22 | prompt "RomFS backing stores" | ||
23 | depends on ROMFS_FS | ||
24 | default ROMFS_BACKED_BY_BLOCK | ||
25 | help | ||
26 | Select the backing stores to be supported. | ||
27 | |||
28 | config ROMFS_BACKED_BY_BLOCK | ||
29 | bool "Block device-backed ROM file system support" | ||
30 | depends on BLOCK | ||
31 | help | ||
32 | This permits ROMFS to use block devices buffered through the page | ||
33 | cache as the medium from which to retrieve data. It does not allow | ||
34 | direct mapping of the medium. | ||
35 | |||
36 | If unsure, answer Y. | ||
37 | |||
38 | config ROMFS_BACKED_BY_MTD | ||
39 | bool "MTD-backed ROM file system support" | ||
40 | depends on MTD=y || (ROMFS_FS=m && MTD) | ||
41 | help | ||
42 | This permits ROMFS to use MTD based devices directly, without the | ||
43 | intercession of the block layer (which may have been disabled). It | ||
44 | also allows direct mapping of MTD devices through romfs files under | ||
45 | NOMMU conditions if the underlying device is directly addressable by | ||
46 | the CPU. | ||
47 | |||
48 | If unsure, answer Y. | ||
49 | |||
50 | config ROMFS_BACKED_BY_BOTH | ||
51 | bool "Both the above" | ||
52 | depends on BLOCK && (MTD=y || (ROMFS_FS=m && MTD)) | ||
53 | endchoice | ||
54 | |||
55 | |||
56 | config ROMFS_ON_BLOCK | ||
57 | bool | ||
58 | default y if ROMFS_BACKED_BY_BLOCK || ROMFS_BACKED_BY_BOTH | ||
59 | |||
60 | config ROMFS_ON_MTD | ||
61 | bool | ||
62 | default y if ROMFS_BACKED_BY_MTD || ROMFS_BACKED_BY_BOTH | ||
diff --git a/fs/romfs/Makefile b/fs/romfs/Makefile index c95b21cf49a3..420beb7d495c 100644 --- a/fs/romfs/Makefile +++ b/fs/romfs/Makefile | |||
@@ -1,7 +1,12 @@ | |||
1 | # | 1 | # |
2 | # Makefile for the linux romfs filesystem routines. | 2 | # Makefile for the linux RomFS filesystem routines. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_ROMFS_FS) += romfs.o | 5 | obj-$(CONFIG_ROMFS_FS) += romfs.o |
6 | 6 | ||
7 | romfs-objs := inode.o | 7 | romfs-y := storage.o super.o |
8 | |||
9 | ifneq ($(CONFIG_MMU),y) | ||
10 | romfs-$(CONFIG_ROMFS_ON_MTD) += mmap-nommu.o | ||
11 | endif | ||
12 | |||
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c deleted file mode 100644 index 98a232f7196b..000000000000 --- a/fs/romfs/inode.c +++ /dev/null | |||
@@ -1,665 +0,0 @@ | |||
1 | /* | ||
2 | * ROMFS file system, Linux implementation | ||
3 | * | ||
4 | * Copyright (C) 1997-1999 Janos Farkas <chexum@shadow.banki.hu> | ||
5 | * | ||
6 | * Using parts of the minix filesystem | ||
7 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
8 | * | ||
9 | * and parts of the affs filesystem additionally | ||
10 | * Copyright (C) 1993 Ray Burr | ||
11 | * Copyright (C) 1996 Hans-Joachim Widmaier | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or | ||
14 | * modify it under the terms of the GNU General Public License | ||
15 | * as published by the Free Software Foundation; either version | ||
16 | * 2 of the License, or (at your option) any later version. | ||
17 | * | ||
18 | * Changes | ||
19 | * Changed for 2.1.19 modules | ||
20 | * Jan 1997 Initial release | ||
21 | * Jun 1997 2.1.43+ changes | ||
22 | * Proper page locking in readpage | ||
23 | * Changed to work with 2.1.45+ fs | ||
24 | * Jul 1997 Fixed follow_link | ||
25 | * 2.1.47 | ||
26 | * lookup shouldn't return -ENOENT | ||
27 | * from Horst von Brand: | ||
28 | * fail on wrong checksum | ||
29 | * double unlock_super was possible | ||
30 | * correct namelen for statfs | ||
31 | * spotted by Bill Hawes: | ||
32 | * readlink shouldn't iput() | ||
33 | * Jun 1998 2.1.106 from Avery Pennarun: glibc scandir() | ||
34 | * exposed a problem in readdir | ||
35 | * 2.1.107 code-freeze spellchecker run | ||
36 | * Aug 1998 2.1.118+ VFS changes | ||
37 | * Sep 1998 2.1.122 another VFS change (follow_link) | ||
38 | * Apr 1999 2.2.7 no more EBADF checking in | ||
39 | * lookup/readdir, use ERR_PTR | ||
40 | * Jun 1999 2.3.6 d_alloc_root use changed | ||
41 | * 2.3.9 clean up usage of ENOENT/negative | ||
42 | * dentries in lookup | ||
43 | * clean up page flags setting | ||
44 | * (error, uptodate, locking) in | ||
45 | * in readpage | ||
46 | * use init_special_inode for | ||
47 | * fifos/sockets (and streamline) in | ||
48 | * read_inode, fix _ops table order | ||
49 | * Aug 1999 2.3.16 __initfunc() => __init change | ||
50 | * Oct 1999 2.3.24 page->owner hack obsoleted | ||
51 | * Nov 1999 2.3.27 2.3.25+ page->offset => index change | ||
52 | */ | ||
53 | |||
54 | /* todo: | ||
55 | * - see Documentation/filesystems/romfs.txt | ||
56 | * - use allocated, not stack memory for file names? | ||
57 | * - considering write access... | ||
58 | * - network (tftp) files? | ||
59 | * - merge back some _op tables | ||
60 | */ | ||
61 | |||
62 | /* | ||
63 | * Sorry about some optimizations and for some goto's. I just wanted | ||
64 | * to squeeze some more bytes out of this code.. :) | ||
65 | */ | ||
66 | |||
67 | #include <linux/module.h> | ||
68 | #include <linux/types.h> | ||
69 | #include <linux/errno.h> | ||
70 | #include <linux/slab.h> | ||
71 | #include <linux/romfs_fs.h> | ||
72 | #include <linux/fs.h> | ||
73 | #include <linux/init.h> | ||
74 | #include <linux/pagemap.h> | ||
75 | #include <linux/smp_lock.h> | ||
76 | #include <linux/buffer_head.h> | ||
77 | #include <linux/vfs.h> | ||
78 | |||
79 | #include <asm/uaccess.h> | ||
80 | |||
81 | struct romfs_inode_info { | ||
82 | unsigned long i_metasize; /* size of non-data area */ | ||
83 | unsigned long i_dataoffset; /* from the start of fs */ | ||
84 | struct inode vfs_inode; | ||
85 | }; | ||
86 | |||
87 | static struct inode *romfs_iget(struct super_block *, unsigned long); | ||
88 | |||
89 | /* instead of private superblock data */ | ||
90 | static inline unsigned long romfs_maxsize(struct super_block *sb) | ||
91 | { | ||
92 | return (unsigned long)sb->s_fs_info; | ||
93 | } | ||
94 | |||
95 | static inline struct romfs_inode_info *ROMFS_I(struct inode *inode) | ||
96 | { | ||
97 | return container_of(inode, struct romfs_inode_info, vfs_inode); | ||
98 | } | ||
99 | |||
100 | static __u32 | ||
101 | romfs_checksum(void *data, int size) | ||
102 | { | ||
103 | __u32 sum; | ||
104 | __be32 *ptr; | ||
105 | |||
106 | sum = 0; ptr = data; | ||
107 | size>>=2; | ||
108 | while (size>0) { | ||
109 | sum += be32_to_cpu(*ptr++); | ||
110 | size--; | ||
111 | } | ||
112 | return sum; | ||
113 | } | ||
114 | |||
115 | static const struct super_operations romfs_ops; | ||
116 | |||
117 | static int romfs_fill_super(struct super_block *s, void *data, int silent) | ||
118 | { | ||
119 | struct buffer_head *bh; | ||
120 | struct romfs_super_block *rsb; | ||
121 | struct inode *root; | ||
122 | int sz, ret = -EINVAL; | ||
123 | |||
124 | /* I would parse the options here, but there are none.. :) */ | ||
125 | |||
126 | sb_set_blocksize(s, ROMBSIZE); | ||
127 | s->s_maxbytes = 0xFFFFFFFF; | ||
128 | |||
129 | bh = sb_bread(s, 0); | ||
130 | if (!bh) { | ||
131 | /* XXX merge with other printk? */ | ||
132 | printk ("romfs: unable to read superblock\n"); | ||
133 | goto outnobh; | ||
134 | } | ||
135 | |||
136 | rsb = (struct romfs_super_block *)bh->b_data; | ||
137 | sz = be32_to_cpu(rsb->size); | ||
138 | if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 | ||
139 | || sz < ROMFH_SIZE) { | ||
140 | if (!silent) | ||
141 | printk ("VFS: Can't find a romfs filesystem on dev " | ||
142 | "%s.\n", s->s_id); | ||
143 | goto out; | ||
144 | } | ||
145 | if (romfs_checksum(rsb, min_t(int, sz, 512))) { | ||
146 | printk ("romfs: bad initial checksum on dev " | ||
147 | "%s.\n", s->s_id); | ||
148 | goto out; | ||
149 | } | ||
150 | |||
151 | s->s_magic = ROMFS_MAGIC; | ||
152 | s->s_fs_info = (void *)(long)sz; | ||
153 | |||
154 | s->s_flags |= MS_RDONLY; | ||
155 | |||
156 | /* Find the start of the fs */ | ||
157 | sz = (ROMFH_SIZE + | ||
158 | strnlen(rsb->name, ROMFS_MAXFN) + 1 + ROMFH_PAD) | ||
159 | & ROMFH_MASK; | ||
160 | |||
161 | s->s_op = &romfs_ops; | ||
162 | root = romfs_iget(s, sz); | ||
163 | if (IS_ERR(root)) { | ||
164 | ret = PTR_ERR(root); | ||
165 | goto out; | ||
166 | } | ||
167 | |||
168 | ret = -ENOMEM; | ||
169 | s->s_root = d_alloc_root(root); | ||
170 | if (!s->s_root) | ||
171 | goto outiput; | ||
172 | |||
173 | brelse(bh); | ||
174 | return 0; | ||
175 | |||
176 | outiput: | ||
177 | iput(root); | ||
178 | out: | ||
179 | brelse(bh); | ||
180 | outnobh: | ||
181 | return ret; | ||
182 | } | ||
183 | |||
184 | /* That's simple too. */ | ||
185 | |||
186 | static int | ||
187 | romfs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
188 | { | ||
189 | buf->f_type = ROMFS_MAGIC; | ||
190 | buf->f_bsize = ROMBSIZE; | ||
191 | buf->f_bfree = buf->f_bavail = buf->f_ffree; | ||
192 | buf->f_blocks = (romfs_maxsize(dentry->d_sb)+ROMBSIZE-1)>>ROMBSBITS; | ||
193 | buf->f_namelen = ROMFS_MAXFN; | ||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | /* some helper routines */ | ||
198 | |||
199 | static int | ||
200 | romfs_strnlen(struct inode *i, unsigned long offset, unsigned long count) | ||
201 | { | ||
202 | struct buffer_head *bh; | ||
203 | unsigned long avail, maxsize, res; | ||
204 | |||
205 | maxsize = romfs_maxsize(i->i_sb); | ||
206 | if (offset >= maxsize) | ||
207 | return -1; | ||
208 | |||
209 | /* strnlen is almost always valid */ | ||
210 | if (count > maxsize || offset+count > maxsize) | ||
211 | count = maxsize-offset; | ||
212 | |||
213 | bh = sb_bread(i->i_sb, offset>>ROMBSBITS); | ||
214 | if (!bh) | ||
215 | return -1; /* error */ | ||
216 | |||
217 | avail = ROMBSIZE - (offset & ROMBMASK); | ||
218 | maxsize = min_t(unsigned long, count, avail); | ||
219 | res = strnlen(((char *)bh->b_data)+(offset&ROMBMASK), maxsize); | ||
220 | brelse(bh); | ||
221 | |||
222 | if (res < maxsize) | ||
223 | return res; /* found all of it */ | ||
224 | |||
225 | while (res < count) { | ||
226 | offset += maxsize; | ||
227 | |||
228 | bh = sb_bread(i->i_sb, offset>>ROMBSBITS); | ||
229 | if (!bh) | ||
230 | return -1; | ||
231 | maxsize = min_t(unsigned long, count - res, ROMBSIZE); | ||
232 | avail = strnlen(bh->b_data, maxsize); | ||
233 | res += avail; | ||
234 | brelse(bh); | ||
235 | if (avail < maxsize) | ||
236 | return res; | ||
237 | } | ||
238 | return res; | ||
239 | } | ||
240 | |||
241 | static int | ||
242 | romfs_copyfrom(struct inode *i, void *dest, unsigned long offset, unsigned long count) | ||
243 | { | ||
244 | struct buffer_head *bh; | ||
245 | unsigned long avail, maxsize, res; | ||
246 | |||
247 | maxsize = romfs_maxsize(i->i_sb); | ||
248 | if (offset >= maxsize || count > maxsize || offset+count>maxsize) | ||
249 | return -1; | ||
250 | |||
251 | bh = sb_bread(i->i_sb, offset>>ROMBSBITS); | ||
252 | if (!bh) | ||
253 | return -1; /* error */ | ||
254 | |||
255 | avail = ROMBSIZE - (offset & ROMBMASK); | ||
256 | maxsize = min_t(unsigned long, count, avail); | ||
257 | memcpy(dest, ((char *)bh->b_data) + (offset & ROMBMASK), maxsize); | ||
258 | brelse(bh); | ||
259 | |||
260 | res = maxsize; /* all of it */ | ||
261 | |||
262 | while (res < count) { | ||
263 | offset += maxsize; | ||
264 | dest += maxsize; | ||
265 | |||
266 | bh = sb_bread(i->i_sb, offset>>ROMBSBITS); | ||
267 | if (!bh) | ||
268 | return -1; | ||
269 | maxsize = min_t(unsigned long, count - res, ROMBSIZE); | ||
270 | memcpy(dest, bh->b_data, maxsize); | ||
271 | brelse(bh); | ||
272 | res += maxsize; | ||
273 | } | ||
274 | return res; | ||
275 | } | ||
276 | |||
277 | static unsigned char romfs_dtype_table[] = { | ||
278 | DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_SOCK, DT_FIFO | ||
279 | }; | ||
280 | |||
281 | static int | ||
282 | romfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
283 | { | ||
284 | struct inode *i = filp->f_path.dentry->d_inode; | ||
285 | struct romfs_inode ri; | ||
286 | unsigned long offset, maxoff; | ||
287 | int j, ino, nextfh; | ||
288 | int stored = 0; | ||
289 | char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ | ||
290 | |||
291 | lock_kernel(); | ||
292 | |||
293 | maxoff = romfs_maxsize(i->i_sb); | ||
294 | |||
295 | offset = filp->f_pos; | ||
296 | if (!offset) { | ||
297 | offset = i->i_ino & ROMFH_MASK; | ||
298 | if (romfs_copyfrom(i, &ri, offset, ROMFH_SIZE) <= 0) | ||
299 | goto out; | ||
300 | offset = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
301 | } | ||
302 | |||
303 | /* Not really failsafe, but we are read-only... */ | ||
304 | for(;;) { | ||
305 | if (!offset || offset >= maxoff) { | ||
306 | offset = maxoff; | ||
307 | filp->f_pos = offset; | ||
308 | goto out; | ||
309 | } | ||
310 | filp->f_pos = offset; | ||
311 | |||
312 | /* Fetch inode info */ | ||
313 | if (romfs_copyfrom(i, &ri, offset, ROMFH_SIZE) <= 0) | ||
314 | goto out; | ||
315 | |||
316 | j = romfs_strnlen(i, offset+ROMFH_SIZE, sizeof(fsname)-1); | ||
317 | if (j < 0) | ||
318 | goto out; | ||
319 | |||
320 | fsname[j]=0; | ||
321 | romfs_copyfrom(i, fsname, offset+ROMFH_SIZE, j); | ||
322 | |||
323 | ino = offset; | ||
324 | nextfh = be32_to_cpu(ri.next); | ||
325 | if ((nextfh & ROMFH_TYPE) == ROMFH_HRD) | ||
326 | ino = be32_to_cpu(ri.spec); | ||
327 | if (filldir(dirent, fsname, j, offset, ino, | ||
328 | romfs_dtype_table[nextfh & ROMFH_TYPE]) < 0) { | ||
329 | goto out; | ||
330 | } | ||
331 | stored++; | ||
332 | offset = nextfh & ROMFH_MASK; | ||
333 | } | ||
334 | out: | ||
335 | unlock_kernel(); | ||
336 | return stored; | ||
337 | } | ||
338 | |||
339 | static struct dentry * | ||
340 | romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | ||
341 | { | ||
342 | unsigned long offset, maxoff; | ||
343 | long res; | ||
344 | int fslen; | ||
345 | struct inode *inode = NULL; | ||
346 | char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ | ||
347 | struct romfs_inode ri; | ||
348 | const char *name; /* got from dentry */ | ||
349 | int len; | ||
350 | |||
351 | res = -EACCES; /* placeholder for "no data here" */ | ||
352 | offset = dir->i_ino & ROMFH_MASK; | ||
353 | lock_kernel(); | ||
354 | if (romfs_copyfrom(dir, &ri, offset, ROMFH_SIZE) <= 0) | ||
355 | goto error; | ||
356 | |||
357 | maxoff = romfs_maxsize(dir->i_sb); | ||
358 | offset = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
359 | |||
360 | /* OK, now find the file whose name is in "dentry" in the | ||
361 | * directory specified by "dir". */ | ||
362 | |||
363 | name = dentry->d_name.name; | ||
364 | len = dentry->d_name.len; | ||
365 | |||
366 | for(;;) { | ||
367 | if (!offset || offset >= maxoff) | ||
368 | goto success; /* negative success */ | ||
369 | if (romfs_copyfrom(dir, &ri, offset, ROMFH_SIZE) <= 0) | ||
370 | goto error; | ||
371 | |||
372 | /* try to match the first 16 bytes of name */ | ||
373 | fslen = romfs_strnlen(dir, offset+ROMFH_SIZE, ROMFH_SIZE); | ||
374 | if (len < ROMFH_SIZE) { | ||
375 | if (len == fslen) { | ||
376 | /* both are shorter, and same size */ | ||
377 | romfs_copyfrom(dir, fsname, offset+ROMFH_SIZE, len+1); | ||
378 | if (strncmp (name, fsname, len) == 0) | ||
379 | break; | ||
380 | } | ||
381 | } else if (fslen >= ROMFH_SIZE) { | ||
382 | /* both are longer; XXX optimize max size */ | ||
383 | fslen = romfs_strnlen(dir, offset+ROMFH_SIZE, sizeof(fsname)-1); | ||
384 | if (len == fslen) { | ||
385 | romfs_copyfrom(dir, fsname, offset+ROMFH_SIZE, len+1); | ||
386 | if (strncmp(name, fsname, len) == 0) | ||
387 | break; | ||
388 | } | ||
389 | } | ||
390 | /* next entry */ | ||
391 | offset = be32_to_cpu(ri.next) & ROMFH_MASK; | ||
392 | } | ||
393 | |||
394 | /* Hard link handling */ | ||
395 | if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD) | ||
396 | offset = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
397 | |||
398 | inode = romfs_iget(dir->i_sb, offset); | ||
399 | if (IS_ERR(inode)) { | ||
400 | res = PTR_ERR(inode); | ||
401 | goto error; | ||
402 | } | ||
403 | |||
404 | success: | ||
405 | d_add(dentry, inode); | ||
406 | res = 0; | ||
407 | error: | ||
408 | unlock_kernel(); | ||
409 | return ERR_PTR(res); | ||
410 | } | ||
411 | |||
412 | /* | ||
413 | * Ok, we do readpage, to be able to execute programs. Unfortunately, | ||
414 | * we can't use bmap, since we may have looser alignments. | ||
415 | */ | ||
416 | |||
417 | static int | ||
418 | romfs_readpage(struct file *file, struct page * page) | ||
419 | { | ||
420 | struct inode *inode = page->mapping->host; | ||
421 | loff_t offset, size; | ||
422 | unsigned long filled; | ||
423 | void *buf; | ||
424 | int result = -EIO; | ||
425 | |||
426 | page_cache_get(page); | ||
427 | lock_kernel(); | ||
428 | buf = kmap(page); | ||
429 | if (!buf) | ||
430 | goto err_out; | ||
431 | |||
432 | /* 32 bit warning -- but not for us :) */ | ||
433 | offset = page_offset(page); | ||
434 | size = i_size_read(inode); | ||
435 | filled = 0; | ||
436 | result = 0; | ||
437 | if (offset < size) { | ||
438 | unsigned long readlen; | ||
439 | |||
440 | size -= offset; | ||
441 | readlen = size > PAGE_SIZE ? PAGE_SIZE : size; | ||
442 | |||
443 | filled = romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen); | ||
444 | |||
445 | if (filled != readlen) { | ||
446 | SetPageError(page); | ||
447 | filled = 0; | ||
448 | result = -EIO; | ||
449 | } | ||
450 | } | ||
451 | |||
452 | if (filled < PAGE_SIZE) | ||
453 | memset(buf + filled, 0, PAGE_SIZE-filled); | ||
454 | |||
455 | if (!result) | ||
456 | SetPageUptodate(page); | ||
457 | flush_dcache_page(page); | ||
458 | |||
459 | unlock_page(page); | ||
460 | |||
461 | kunmap(page); | ||
462 | err_out: | ||
463 | page_cache_release(page); | ||
464 | unlock_kernel(); | ||
465 | |||
466 | return result; | ||
467 | } | ||
468 | |||
469 | /* Mapping from our types to the kernel */ | ||
470 | |||
471 | static const struct address_space_operations romfs_aops = { | ||
472 | .readpage = romfs_readpage | ||
473 | }; | ||
474 | |||
475 | static const struct file_operations romfs_dir_operations = { | ||
476 | .read = generic_read_dir, | ||
477 | .readdir = romfs_readdir, | ||
478 | }; | ||
479 | |||
480 | static const struct inode_operations romfs_dir_inode_operations = { | ||
481 | .lookup = romfs_lookup, | ||
482 | }; | ||
483 | |||
484 | static mode_t romfs_modemap[] = | ||
485 | { | ||
486 | 0, S_IFDIR+0644, S_IFREG+0644, S_IFLNK+0777, | ||
487 | S_IFBLK+0600, S_IFCHR+0600, S_IFSOCK+0644, S_IFIFO+0644 | ||
488 | }; | ||
489 | |||
490 | static struct inode * | ||
491 | romfs_iget(struct super_block *sb, unsigned long ino) | ||
492 | { | ||
493 | int nextfh, ret; | ||
494 | struct romfs_inode ri; | ||
495 | struct inode *i; | ||
496 | |||
497 | ino &= ROMFH_MASK; | ||
498 | i = iget_locked(sb, ino); | ||
499 | if (!i) | ||
500 | return ERR_PTR(-ENOMEM); | ||
501 | if (!(i->i_state & I_NEW)) | ||
502 | return i; | ||
503 | |||
504 | i->i_mode = 0; | ||
505 | |||
506 | /* Loop for finding the real hard link */ | ||
507 | for(;;) { | ||
508 | if (romfs_copyfrom(i, &ri, ino, ROMFH_SIZE) <= 0) { | ||
509 | printk(KERN_ERR "romfs: read error for inode 0x%lx\n", | ||
510 | ino); | ||
511 | iget_failed(i); | ||
512 | return ERR_PTR(-EIO); | ||
513 | } | ||
514 | /* XXX: do romfs_checksum here too (with name) */ | ||
515 | |||
516 | nextfh = be32_to_cpu(ri.next); | ||
517 | if ((nextfh & ROMFH_TYPE) != ROMFH_HRD) | ||
518 | break; | ||
519 | |||
520 | ino = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
521 | } | ||
522 | |||
523 | i->i_nlink = 1; /* Hard to decide.. */ | ||
524 | i->i_size = be32_to_cpu(ri.size); | ||
525 | i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0; | ||
526 | i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0; | ||
527 | |||
528 | /* Precalculate the data offset */ | ||
529 | ret = romfs_strnlen(i, ino + ROMFH_SIZE, ROMFS_MAXFN); | ||
530 | if (ret >= 0) | ||
531 | ino = (ROMFH_SIZE + ret + 1 + ROMFH_PAD) & ROMFH_MASK; | ||
532 | else | ||
533 | ino = 0; | ||
534 | |||
535 | ROMFS_I(i)->i_metasize = ino; | ||
536 | ROMFS_I(i)->i_dataoffset = ino+(i->i_ino&ROMFH_MASK); | ||
537 | |||
538 | /* Compute permissions */ | ||
539 | ino = romfs_modemap[nextfh & ROMFH_TYPE]; | ||
540 | /* only "normal" files have ops */ | ||
541 | switch (nextfh & ROMFH_TYPE) { | ||
542 | case 1: | ||
543 | i->i_size = ROMFS_I(i)->i_metasize; | ||
544 | i->i_op = &romfs_dir_inode_operations; | ||
545 | i->i_fop = &romfs_dir_operations; | ||
546 | if (nextfh & ROMFH_EXEC) | ||
547 | ino |= S_IXUGO; | ||
548 | i->i_mode = ino; | ||
549 | break; | ||
550 | case 2: | ||
551 | i->i_fop = &generic_ro_fops; | ||
552 | i->i_data.a_ops = &romfs_aops; | ||
553 | if (nextfh & ROMFH_EXEC) | ||
554 | ino |= S_IXUGO; | ||
555 | i->i_mode = ino; | ||
556 | break; | ||
557 | case 3: | ||
558 | i->i_op = &page_symlink_inode_operations; | ||
559 | i->i_data.a_ops = &romfs_aops; | ||
560 | i->i_mode = ino | S_IRWXUGO; | ||
561 | break; | ||
562 | default: | ||
563 | /* depending on MBZ for sock/fifos */ | ||
564 | nextfh = be32_to_cpu(ri.spec); | ||
565 | init_special_inode(i, ino, | ||
566 | MKDEV(nextfh>>16,nextfh&0xffff)); | ||
567 | } | ||
568 | unlock_new_inode(i); | ||
569 | return i; | ||
570 | } | ||
571 | |||
572 | static struct kmem_cache * romfs_inode_cachep; | ||
573 | |||
574 | static struct inode *romfs_alloc_inode(struct super_block *sb) | ||
575 | { | ||
576 | struct romfs_inode_info *ei; | ||
577 | ei = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL); | ||
578 | if (!ei) | ||
579 | return NULL; | ||
580 | return &ei->vfs_inode; | ||
581 | } | ||
582 | |||
583 | static void romfs_destroy_inode(struct inode *inode) | ||
584 | { | ||
585 | kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); | ||
586 | } | ||
587 | |||
588 | static void init_once(void *foo) | ||
589 | { | ||
590 | struct romfs_inode_info *ei = foo; | ||
591 | |||
592 | inode_init_once(&ei->vfs_inode); | ||
593 | } | ||
594 | |||
595 | static int init_inodecache(void) | ||
596 | { | ||
597 | romfs_inode_cachep = kmem_cache_create("romfs_inode_cache", | ||
598 | sizeof(struct romfs_inode_info), | ||
599 | 0, (SLAB_RECLAIM_ACCOUNT| | ||
600 | SLAB_MEM_SPREAD), | ||
601 | init_once); | ||
602 | if (romfs_inode_cachep == NULL) | ||
603 | return -ENOMEM; | ||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | static void destroy_inodecache(void) | ||
608 | { | ||
609 | kmem_cache_destroy(romfs_inode_cachep); | ||
610 | } | ||
611 | |||
612 | static int romfs_remount(struct super_block *sb, int *flags, char *data) | ||
613 | { | ||
614 | *flags |= MS_RDONLY; | ||
615 | return 0; | ||
616 | } | ||
617 | |||
618 | static const struct super_operations romfs_ops = { | ||
619 | .alloc_inode = romfs_alloc_inode, | ||
620 | .destroy_inode = romfs_destroy_inode, | ||
621 | .statfs = romfs_statfs, | ||
622 | .remount_fs = romfs_remount, | ||
623 | }; | ||
624 | |||
625 | static int romfs_get_sb(struct file_system_type *fs_type, | ||
626 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | ||
627 | { | ||
628 | return get_sb_bdev(fs_type, flags, dev_name, data, romfs_fill_super, | ||
629 | mnt); | ||
630 | } | ||
631 | |||
632 | static struct file_system_type romfs_fs_type = { | ||
633 | .owner = THIS_MODULE, | ||
634 | .name = "romfs", | ||
635 | .get_sb = romfs_get_sb, | ||
636 | .kill_sb = kill_block_super, | ||
637 | .fs_flags = FS_REQUIRES_DEV, | ||
638 | }; | ||
639 | |||
640 | static int __init init_romfs_fs(void) | ||
641 | { | ||
642 | int err = init_inodecache(); | ||
643 | if (err) | ||
644 | goto out1; | ||
645 | err = register_filesystem(&romfs_fs_type); | ||
646 | if (err) | ||
647 | goto out; | ||
648 | return 0; | ||
649 | out: | ||
650 | destroy_inodecache(); | ||
651 | out1: | ||
652 | return err; | ||
653 | } | ||
654 | |||
655 | static void __exit exit_romfs_fs(void) | ||
656 | { | ||
657 | unregister_filesystem(&romfs_fs_type); | ||
658 | destroy_inodecache(); | ||
659 | } | ||
660 | |||
661 | /* Yes, works even as a module... :) */ | ||
662 | |||
663 | module_init(init_romfs_fs) | ||
664 | module_exit(exit_romfs_fs) | ||
665 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/romfs/internal.h b/fs/romfs/internal.h new file mode 100644 index 000000000000..06044a9dc62d --- /dev/null +++ b/fs/romfs/internal.h | |||
@@ -0,0 +1,47 @@ | |||
1 | /* RomFS internal definitions | ||
2 | * | ||
3 | * Copyright © 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/romfs_fs.h> | ||
13 | |||
14 | struct romfs_inode_info { | ||
15 | struct inode vfs_inode; | ||
16 | unsigned long i_metasize; /* size of non-data area */ | ||
17 | unsigned long i_dataoffset; /* from the start of fs */ | ||
18 | }; | ||
19 | |||
20 | static inline size_t romfs_maxsize(struct super_block *sb) | ||
21 | { | ||
22 | return (size_t) (unsigned long) sb->s_fs_info; | ||
23 | } | ||
24 | |||
25 | static inline struct romfs_inode_info *ROMFS_I(struct inode *inode) | ||
26 | { | ||
27 | return container_of(inode, struct romfs_inode_info, vfs_inode); | ||
28 | } | ||
29 | |||
30 | /* | ||
31 | * mmap-nommu.c | ||
32 | */ | ||
33 | #if !defined(CONFIG_MMU) && defined(CONFIG_ROMFS_ON_MTD) | ||
34 | extern const struct file_operations romfs_ro_fops; | ||
35 | #else | ||
36 | #define romfs_ro_fops generic_ro_fops | ||
37 | #endif | ||
38 | |||
39 | /* | ||
40 | * storage.c | ||
41 | */ | ||
42 | extern int romfs_dev_read(struct super_block *sb, unsigned long pos, | ||
43 | void *buf, size_t buflen); | ||
44 | extern ssize_t romfs_dev_strnlen(struct super_block *sb, | ||
45 | unsigned long pos, size_t maxlen); | ||
46 | extern int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, | ||
47 | const char *str, size_t size); | ||
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c new file mode 100644 index 000000000000..f0511e816967 --- /dev/null +++ b/fs/romfs/mmap-nommu.c | |||
@@ -0,0 +1,75 @@ | |||
1 | /* NOMMU mmap support for RomFS on MTD devices | ||
2 | * | ||
3 | * Copyright © 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/mm.h> | ||
13 | #include <linux/mtd/super.h> | ||
14 | #include "internal.h" | ||
15 | |||
16 | /* | ||
17 | * try to determine where a shared mapping can be made | ||
18 | * - only supported for NOMMU at the moment (MMU can't doesn't copy private | ||
19 | * mappings) | ||
20 | * - attempts to map through to the underlying MTD device | ||
21 | */ | ||
22 | static unsigned long romfs_get_unmapped_area(struct file *file, | ||
23 | unsigned long addr, | ||
24 | unsigned long len, | ||
25 | unsigned long pgoff, | ||
26 | unsigned long flags) | ||
27 | { | ||
28 | struct inode *inode = file->f_mapping->host; | ||
29 | struct mtd_info *mtd = inode->i_sb->s_mtd; | ||
30 | unsigned long isize, offset; | ||
31 | |||
32 | if (!mtd) | ||
33 | goto cant_map_directly; | ||
34 | |||
35 | isize = i_size_read(inode); | ||
36 | offset = pgoff << PAGE_SHIFT; | ||
37 | if (offset > isize || len > isize || offset > isize - len) | ||
38 | return (unsigned long) -EINVAL; | ||
39 | |||
40 | /* we need to call down to the MTD layer to do the actual mapping */ | ||
41 | if (mtd->get_unmapped_area) { | ||
42 | if (addr != 0) | ||
43 | return (unsigned long) -EINVAL; | ||
44 | |||
45 | if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT)) | ||
46 | return (unsigned long) -EINVAL; | ||
47 | |||
48 | offset += ROMFS_I(inode)->i_dataoffset; | ||
49 | if (offset > mtd->size - len) | ||
50 | return (unsigned long) -EINVAL; | ||
51 | |||
52 | return mtd->get_unmapped_area(mtd, len, offset, flags); | ||
53 | } | ||
54 | |||
55 | cant_map_directly: | ||
56 | return (unsigned long) -ENOSYS; | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * permit a R/O mapping to be made directly through onto an MTD device if | ||
61 | * possible | ||
62 | */ | ||
63 | static int romfs_mmap(struct file *file, struct vm_area_struct *vma) | ||
64 | { | ||
65 | return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS; | ||
66 | } | ||
67 | |||
68 | const struct file_operations romfs_ro_fops = { | ||
69 | .llseek = generic_file_llseek, | ||
70 | .read = do_sync_read, | ||
71 | .aio_read = generic_file_aio_read, | ||
72 | .splice_read = generic_file_splice_read, | ||
73 | .mmap = romfs_mmap, | ||
74 | .get_unmapped_area = romfs_get_unmapped_area, | ||
75 | }; | ||
diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c new file mode 100644 index 000000000000..7e3e1e12a081 --- /dev/null +++ b/fs/romfs/storage.c | |||
@@ -0,0 +1,261 @@ | |||
1 | /* RomFS storage access routines | ||
2 | * | ||
3 | * Copyright © 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/fs.h> | ||
13 | #include <linux/mtd/super.h> | ||
14 | #include <linux/buffer_head.h> | ||
15 | #include "internal.h" | ||
16 | |||
17 | #if !defined(CONFIG_ROMFS_ON_MTD) && !defined(CONFIG_ROMFS_ON_BLOCK) | ||
18 | #error no ROMFS backing store interface configured | ||
19 | #endif | ||
20 | |||
21 | #ifdef CONFIG_ROMFS_ON_MTD | ||
22 | #define ROMFS_MTD_READ(sb, ...) ((sb)->s_mtd->read((sb)->s_mtd, ##__VA_ARGS__)) | ||
23 | |||
24 | /* | ||
25 | * read data from an romfs image on an MTD device | ||
26 | */ | ||
27 | static int romfs_mtd_read(struct super_block *sb, unsigned long pos, | ||
28 | void *buf, size_t buflen) | ||
29 | { | ||
30 | size_t rlen; | ||
31 | int ret; | ||
32 | |||
33 | ret = ROMFS_MTD_READ(sb, pos, buflen, &rlen, buf); | ||
34 | return (ret < 0 || rlen != buflen) ? -EIO : 0; | ||
35 | } | ||
36 | |||
37 | /* | ||
38 | * determine the length of a string in a romfs image on an MTD device | ||
39 | */ | ||
40 | static ssize_t romfs_mtd_strnlen(struct super_block *sb, | ||
41 | unsigned long pos, size_t maxlen) | ||
42 | { | ||
43 | ssize_t n = 0; | ||
44 | size_t segment; | ||
45 | u_char buf[16], *p; | ||
46 | size_t len; | ||
47 | int ret; | ||
48 | |||
49 | /* scan the string up to 16 bytes at a time */ | ||
50 | while (maxlen > 0) { | ||
51 | segment = min_t(size_t, maxlen, 16); | ||
52 | ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf); | ||
53 | if (ret < 0) | ||
54 | return ret; | ||
55 | p = memchr(buf, 0, len); | ||
56 | if (p) | ||
57 | return n + (p - buf); | ||
58 | maxlen -= len; | ||
59 | pos += len; | ||
60 | n += len; | ||
61 | } | ||
62 | |||
63 | return n; | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * compare a string to one in a romfs image on MTD | ||
68 | * - return 1 if matched, 0 if differ, -ve if error | ||
69 | */ | ||
70 | static int romfs_mtd_strncmp(struct super_block *sb, unsigned long pos, | ||
71 | const char *str, size_t size) | ||
72 | { | ||
73 | u_char buf[16]; | ||
74 | size_t len, segment; | ||
75 | int ret; | ||
76 | |||
77 | /* scan the string up to 16 bytes at a time */ | ||
78 | while (size > 0) { | ||
79 | segment = min_t(size_t, size, 16); | ||
80 | ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf); | ||
81 | if (ret < 0) | ||
82 | return ret; | ||
83 | if (memcmp(buf, str, len) != 0) | ||
84 | return 0; | ||
85 | size -= len; | ||
86 | pos += len; | ||
87 | str += len; | ||
88 | } | ||
89 | |||
90 | return 1; | ||
91 | } | ||
92 | #endif /* CONFIG_ROMFS_ON_MTD */ | ||
93 | |||
94 | #ifdef CONFIG_ROMFS_ON_BLOCK | ||
95 | /* | ||
96 | * read data from an romfs image on a block device | ||
97 | */ | ||
98 | static int romfs_blk_read(struct super_block *sb, unsigned long pos, | ||
99 | void *buf, size_t buflen) | ||
100 | { | ||
101 | struct buffer_head *bh; | ||
102 | unsigned long offset; | ||
103 | size_t segment; | ||
104 | |||
105 | /* copy the string up to blocksize bytes at a time */ | ||
106 | while (buflen > 0) { | ||
107 | offset = pos & (ROMBSIZE - 1); | ||
108 | segment = min_t(size_t, buflen, ROMBSIZE - offset); | ||
109 | bh = sb_bread(sb, pos >> ROMBSBITS); | ||
110 | if (!bh) | ||
111 | return -EIO; | ||
112 | memcpy(buf, bh->b_data + offset, segment); | ||
113 | brelse(bh); | ||
114 | buflen -= segment; | ||
115 | pos += segment; | ||
116 | } | ||
117 | |||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * determine the length of a string in romfs on a block device | ||
123 | */ | ||
124 | static ssize_t romfs_blk_strnlen(struct super_block *sb, | ||
125 | unsigned long pos, size_t limit) | ||
126 | { | ||
127 | struct buffer_head *bh; | ||
128 | unsigned long offset; | ||
129 | ssize_t n = 0; | ||
130 | size_t segment; | ||
131 | u_char *buf, *p; | ||
132 | |||
133 | /* scan the string up to blocksize bytes at a time */ | ||
134 | while (limit > 0) { | ||
135 | offset = pos & (ROMBSIZE - 1); | ||
136 | segment = min_t(size_t, limit, ROMBSIZE - offset); | ||
137 | bh = sb_bread(sb, pos >> ROMBSBITS); | ||
138 | if (!bh) | ||
139 | return -EIO; | ||
140 | buf = bh->b_data + offset; | ||
141 | p = memchr(buf, 0, segment); | ||
142 | brelse(bh); | ||
143 | if (p) | ||
144 | return n + (p - buf); | ||
145 | limit -= segment; | ||
146 | pos += segment; | ||
147 | n += segment; | ||
148 | } | ||
149 | |||
150 | return n; | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * compare a string to one in a romfs image on a block device | ||
155 | * - return 1 if matched, 0 if differ, -ve if error | ||
156 | */ | ||
157 | static int romfs_blk_strncmp(struct super_block *sb, unsigned long pos, | ||
158 | const char *str, size_t size) | ||
159 | { | ||
160 | struct buffer_head *bh; | ||
161 | unsigned long offset; | ||
162 | size_t segment; | ||
163 | bool x; | ||
164 | |||
165 | /* scan the string up to 16 bytes at a time */ | ||
166 | while (size > 0) { | ||
167 | offset = pos & (ROMBSIZE - 1); | ||
168 | segment = min_t(size_t, size, ROMBSIZE - offset); | ||
169 | bh = sb_bread(sb, pos >> ROMBSBITS); | ||
170 | if (!bh) | ||
171 | return -EIO; | ||
172 | x = (memcmp(bh->b_data + offset, str, segment) != 0); | ||
173 | brelse(bh); | ||
174 | if (x) | ||
175 | return 0; | ||
176 | size -= segment; | ||
177 | pos += segment; | ||
178 | str += segment; | ||
179 | } | ||
180 | |||
181 | return 1; | ||
182 | } | ||
183 | #endif /* CONFIG_ROMFS_ON_BLOCK */ | ||
184 | |||
185 | /* | ||
186 | * read data from the romfs image | ||
187 | */ | ||
188 | int romfs_dev_read(struct super_block *sb, unsigned long pos, | ||
189 | void *buf, size_t buflen) | ||
190 | { | ||
191 | size_t limit; | ||
192 | |||
193 | limit = romfs_maxsize(sb); | ||
194 | if (pos >= limit) | ||
195 | return -EIO; | ||
196 | if (buflen > limit - pos) | ||
197 | buflen = limit - pos; | ||
198 | |||
199 | #ifdef CONFIG_ROMFS_ON_MTD | ||
200 | if (sb->s_mtd) | ||
201 | return romfs_mtd_read(sb, pos, buf, buflen); | ||
202 | #endif | ||
203 | #ifdef CONFIG_ROMFS_ON_BLOCK | ||
204 | if (sb->s_bdev) | ||
205 | return romfs_blk_read(sb, pos, buf, buflen); | ||
206 | #endif | ||
207 | return -EIO; | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * determine the length of a string in romfs | ||
212 | */ | ||
213 | ssize_t romfs_dev_strnlen(struct super_block *sb, | ||
214 | unsigned long pos, size_t maxlen) | ||
215 | { | ||
216 | size_t limit; | ||
217 | |||
218 | limit = romfs_maxsize(sb); | ||
219 | if (pos >= limit) | ||
220 | return -EIO; | ||
221 | if (maxlen > limit - pos) | ||
222 | maxlen = limit - pos; | ||
223 | |||
224 | #ifdef CONFIG_ROMFS_ON_MTD | ||
225 | if (sb->s_mtd) | ||
226 | return romfs_mtd_strnlen(sb, pos, limit); | ||
227 | #endif | ||
228 | #ifdef CONFIG_ROMFS_ON_BLOCK | ||
229 | if (sb->s_bdev) | ||
230 | return romfs_blk_strnlen(sb, pos, limit); | ||
231 | #endif | ||
232 | return -EIO; | ||
233 | } | ||
234 | |||
235 | /* | ||
236 | * compare a string to one in romfs | ||
237 | * - return 1 if matched, 0 if differ, -ve if error | ||
238 | */ | ||
239 | int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, | ||
240 | const char *str, size_t size) | ||
241 | { | ||
242 | size_t limit; | ||
243 | |||
244 | limit = romfs_maxsize(sb); | ||
245 | if (pos >= limit) | ||
246 | return -EIO; | ||
247 | if (size > ROMFS_MAXFN) | ||
248 | return -ENAMETOOLONG; | ||
249 | if (size > limit - pos) | ||
250 | return -EIO; | ||
251 | |||
252 | #ifdef CONFIG_ROMFS_ON_MTD | ||
253 | if (sb->s_mtd) | ||
254 | return romfs_mtd_strncmp(sb, pos, str, size); | ||
255 | #endif | ||
256 | #ifdef CONFIG_ROMFS_ON_BLOCK | ||
257 | if (sb->s_bdev) | ||
258 | return romfs_blk_strncmp(sb, pos, str, size); | ||
259 | #endif | ||
260 | return -EIO; | ||
261 | } | ||
diff --git a/fs/romfs/super.c b/fs/romfs/super.c new file mode 100644 index 000000000000..1e548a4975ba --- /dev/null +++ b/fs/romfs/super.c | |||
@@ -0,0 +1,648 @@ | |||
1 | /* Block- or MTD-based romfs | ||
2 | * | ||
3 | * Copyright © 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * Derived from: ROMFS file system, Linux implementation | ||
7 | * | ||
8 | * Copyright © 1997-1999 Janos Farkas <chexum@shadow.banki.hu> | ||
9 | * | ||
10 | * Using parts of the minix filesystem | ||
11 | * Copyright © 1991, 1992 Linus Torvalds | ||
12 | * | ||
13 | * and parts of the affs filesystem additionally | ||
14 | * Copyright © 1993 Ray Burr | ||
15 | * Copyright © 1996 Hans-Joachim Widmaier | ||
16 | * | ||
17 | * Changes | ||
18 | * Changed for 2.1.19 modules | ||
19 | * Jan 1997 Initial release | ||
20 | * Jun 1997 2.1.43+ changes | ||
21 | * Proper page locking in readpage | ||
22 | * Changed to work with 2.1.45+ fs | ||
23 | * Jul 1997 Fixed follow_link | ||
24 | * 2.1.47 | ||
25 | * lookup shouldn't return -ENOENT | ||
26 | * from Horst von Brand: | ||
27 | * fail on wrong checksum | ||
28 | * double unlock_super was possible | ||
29 | * correct namelen for statfs | ||
30 | * spotted by Bill Hawes: | ||
31 | * readlink shouldn't iput() | ||
32 | * Jun 1998 2.1.106 from Avery Pennarun: glibc scandir() | ||
33 | * exposed a problem in readdir | ||
34 | * 2.1.107 code-freeze spellchecker run | ||
35 | * Aug 1998 2.1.118+ VFS changes | ||
36 | * Sep 1998 2.1.122 another VFS change (follow_link) | ||
37 | * Apr 1999 2.2.7 no more EBADF checking in | ||
38 | * lookup/readdir, use ERR_PTR | ||
39 | * Jun 1999 2.3.6 d_alloc_root use changed | ||
40 | * 2.3.9 clean up usage of ENOENT/negative | ||
41 | * dentries in lookup | ||
42 | * clean up page flags setting | ||
43 | * (error, uptodate, locking) in | ||
44 | * in readpage | ||
45 | * use init_special_inode for | ||
46 | * fifos/sockets (and streamline) in | ||
47 | * read_inode, fix _ops table order | ||
48 | * Aug 1999 2.3.16 __initfunc() => __init change | ||
49 | * Oct 1999 2.3.24 page->owner hack obsoleted | ||
50 | * Nov 1999 2.3.27 2.3.25+ page->offset => index change | ||
51 | * | ||
52 | * | ||
53 | * This program is free software; you can redistribute it and/or | ||
54 | * modify it under the terms of the GNU General Public Licence | ||
55 | * as published by the Free Software Foundation; either version | ||
56 | * 2 of the Licence, or (at your option) any later version. | ||
57 | */ | ||
58 | |||
59 | #include <linux/module.h> | ||
60 | #include <linux/string.h> | ||
61 | #include <linux/fs.h> | ||
62 | #include <linux/time.h> | ||
63 | #include <linux/slab.h> | ||
64 | #include <linux/init.h> | ||
65 | #include <linux/blkdev.h> | ||
66 | #include <linux/parser.h> | ||
67 | #include <linux/mount.h> | ||
68 | #include <linux/namei.h> | ||
69 | #include <linux/statfs.h> | ||
70 | #include <linux/mtd/super.h> | ||
71 | #include <linux/ctype.h> | ||
72 | #include <linux/highmem.h> | ||
73 | #include <linux/pagemap.h> | ||
74 | #include <linux/uaccess.h> | ||
75 | #include "internal.h" | ||
76 | |||
77 | static struct kmem_cache *romfs_inode_cachep; | ||
78 | |||
79 | static const umode_t romfs_modemap[8] = { | ||
80 | 0, /* hard link */ | ||
81 | S_IFDIR | 0644, /* directory */ | ||
82 | S_IFREG | 0644, /* regular file */ | ||
83 | S_IFLNK | 0777, /* symlink */ | ||
84 | S_IFBLK | 0600, /* blockdev */ | ||
85 | S_IFCHR | 0600, /* chardev */ | ||
86 | S_IFSOCK | 0644, /* socket */ | ||
87 | S_IFIFO | 0644 /* FIFO */ | ||
88 | }; | ||
89 | |||
90 | static const unsigned char romfs_dtype_table[] = { | ||
91 | DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_SOCK, DT_FIFO | ||
92 | }; | ||
93 | |||
94 | static struct inode *romfs_iget(struct super_block *sb, unsigned long pos); | ||
95 | |||
96 | /* | ||
97 | * read a page worth of data from the image | ||
98 | */ | ||
99 | static int romfs_readpage(struct file *file, struct page *page) | ||
100 | { | ||
101 | struct inode *inode = page->mapping->host; | ||
102 | loff_t offset, size; | ||
103 | unsigned long fillsize, pos; | ||
104 | void *buf; | ||
105 | int ret; | ||
106 | |||
107 | buf = kmap(page); | ||
108 | if (!buf) | ||
109 | return -ENOMEM; | ||
110 | |||
111 | /* 32 bit warning -- but not for us :) */ | ||
112 | offset = page_offset(page); | ||
113 | size = i_size_read(inode); | ||
114 | fillsize = 0; | ||
115 | ret = 0; | ||
116 | if (offset < size) { | ||
117 | size -= offset; | ||
118 | fillsize = size > PAGE_SIZE ? PAGE_SIZE : size; | ||
119 | |||
120 | pos = ROMFS_I(inode)->i_dataoffset + offset; | ||
121 | |||
122 | ret = romfs_dev_read(inode->i_sb, pos, buf, fillsize); | ||
123 | if (ret < 0) { | ||
124 | SetPageError(page); | ||
125 | fillsize = 0; | ||
126 | ret = -EIO; | ||
127 | } | ||
128 | } | ||
129 | |||
130 | if (fillsize < PAGE_SIZE) | ||
131 | memset(buf + fillsize, 0, PAGE_SIZE - fillsize); | ||
132 | if (ret == 0) | ||
133 | SetPageUptodate(page); | ||
134 | |||
135 | flush_dcache_page(page); | ||
136 | kunmap(page); | ||
137 | unlock_page(page); | ||
138 | return ret; | ||
139 | } | ||
140 | |||
141 | static const struct address_space_operations romfs_aops = { | ||
142 | .readpage = romfs_readpage | ||
143 | }; | ||
144 | |||
145 | /* | ||
146 | * read the entries from a directory | ||
147 | */ | ||
148 | static int romfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
149 | { | ||
150 | struct inode *i = filp->f_dentry->d_inode; | ||
151 | struct romfs_inode ri; | ||
152 | unsigned long offset, maxoff; | ||
153 | int j, ino, nextfh; | ||
154 | int stored = 0; | ||
155 | char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ | ||
156 | int ret; | ||
157 | |||
158 | maxoff = romfs_maxsize(i->i_sb); | ||
159 | |||
160 | offset = filp->f_pos; | ||
161 | if (!offset) { | ||
162 | offset = i->i_ino & ROMFH_MASK; | ||
163 | ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); | ||
164 | if (ret < 0) | ||
165 | goto out; | ||
166 | offset = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
167 | } | ||
168 | |||
169 | /* Not really failsafe, but we are read-only... */ | ||
170 | for (;;) { | ||
171 | if (!offset || offset >= maxoff) { | ||
172 | offset = maxoff; | ||
173 | filp->f_pos = offset; | ||
174 | goto out; | ||
175 | } | ||
176 | filp->f_pos = offset; | ||
177 | |||
178 | /* Fetch inode info */ | ||
179 | ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); | ||
180 | if (ret < 0) | ||
181 | goto out; | ||
182 | |||
183 | j = romfs_dev_strnlen(i->i_sb, offset + ROMFH_SIZE, | ||
184 | sizeof(fsname) - 1); | ||
185 | if (j < 0) | ||
186 | goto out; | ||
187 | |||
188 | ret = romfs_dev_read(i->i_sb, offset + ROMFH_SIZE, fsname, j); | ||
189 | if (ret < 0) | ||
190 | goto out; | ||
191 | fsname[j] = '\0'; | ||
192 | |||
193 | ino = offset; | ||
194 | nextfh = be32_to_cpu(ri.next); | ||
195 | if ((nextfh & ROMFH_TYPE) == ROMFH_HRD) | ||
196 | ino = be32_to_cpu(ri.spec); | ||
197 | if (filldir(dirent, fsname, j, offset, ino, | ||
198 | romfs_dtype_table[nextfh & ROMFH_TYPE]) < 0) | ||
199 | goto out; | ||
200 | |||
201 | stored++; | ||
202 | offset = nextfh & ROMFH_MASK; | ||
203 | } | ||
204 | |||
205 | out: | ||
206 | return stored; | ||
207 | } | ||
208 | |||
209 | /* | ||
210 | * look up an entry in a directory | ||
211 | */ | ||
212 | static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry, | ||
213 | struct nameidata *nd) | ||
214 | { | ||
215 | unsigned long offset, maxoff; | ||
216 | struct inode *inode; | ||
217 | struct romfs_inode ri; | ||
218 | const char *name; /* got from dentry */ | ||
219 | int len, ret; | ||
220 | |||
221 | offset = dir->i_ino & ROMFH_MASK; | ||
222 | ret = romfs_dev_read(dir->i_sb, offset, &ri, ROMFH_SIZE); | ||
223 | if (ret < 0) | ||
224 | goto error; | ||
225 | |||
226 | /* search all the file entries in the list starting from the one | ||
227 | * pointed to by the directory's special data */ | ||
228 | maxoff = romfs_maxsize(dir->i_sb); | ||
229 | offset = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
230 | |||
231 | name = dentry->d_name.name; | ||
232 | len = dentry->d_name.len; | ||
233 | |||
234 | for (;;) { | ||
235 | if (!offset || offset >= maxoff) | ||
236 | goto out0; | ||
237 | |||
238 | ret = romfs_dev_read(dir->i_sb, offset, &ri, sizeof(ri)); | ||
239 | if (ret < 0) | ||
240 | goto error; | ||
241 | |||
242 | /* try to match the first 16 bytes of name */ | ||
243 | ret = romfs_dev_strncmp(dir->i_sb, offset + ROMFH_SIZE, name, | ||
244 | len); | ||
245 | if (ret < 0) | ||
246 | goto error; | ||
247 | if (ret == 1) | ||
248 | break; | ||
249 | |||
250 | /* next entry */ | ||
251 | offset = be32_to_cpu(ri.next) & ROMFH_MASK; | ||
252 | } | ||
253 | |||
254 | /* Hard link handling */ | ||
255 | if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD) | ||
256 | offset = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
257 | |||
258 | inode = romfs_iget(dir->i_sb, offset); | ||
259 | if (IS_ERR(inode)) { | ||
260 | ret = PTR_ERR(inode); | ||
261 | goto error; | ||
262 | } | ||
263 | goto outi; | ||
264 | |||
265 | /* | ||
266 | * it's a bit funky, _lookup needs to return an error code | ||
267 | * (negative) or a NULL, both as a dentry. ENOENT should not | ||
268 | * be returned, instead we need to create a negative dentry by | ||
269 | * d_add(dentry, NULL); and return 0 as no error. | ||
270 | * (Although as I see, it only matters on writable file | ||
271 | * systems). | ||
272 | */ | ||
273 | out0: | ||
274 | inode = NULL; | ||
275 | outi: | ||
276 | d_add(dentry, inode); | ||
277 | ret = 0; | ||
278 | error: | ||
279 | return ERR_PTR(ret); | ||
280 | } | ||
281 | |||
282 | static const struct file_operations romfs_dir_operations = { | ||
283 | .read = generic_read_dir, | ||
284 | .readdir = romfs_readdir, | ||
285 | }; | ||
286 | |||
287 | static struct inode_operations romfs_dir_inode_operations = { | ||
288 | .lookup = romfs_lookup, | ||
289 | }; | ||
290 | |||
291 | /* | ||
292 | * get a romfs inode based on its position in the image (which doubles as the | ||
293 | * inode number) | ||
294 | */ | ||
295 | static struct inode *romfs_iget(struct super_block *sb, unsigned long pos) | ||
296 | { | ||
297 | struct romfs_inode_info *inode; | ||
298 | struct romfs_inode ri; | ||
299 | struct inode *i; | ||
300 | unsigned long nlen; | ||
301 | unsigned nextfh, ret; | ||
302 | umode_t mode; | ||
303 | |||
304 | /* we might have to traverse a chain of "hard link" file entries to get | ||
305 | * to the actual file */ | ||
306 | for (;;) { | ||
307 | ret = romfs_dev_read(sb, pos, &ri, sizeof(ri)); | ||
308 | if (ret < 0) | ||
309 | goto error; | ||
310 | |||
311 | /* XXX: do romfs_checksum here too (with name) */ | ||
312 | |||
313 | nextfh = be32_to_cpu(ri.next); | ||
314 | if ((nextfh & ROMFH_TYPE) != ROMFH_HRD) | ||
315 | break; | ||
316 | |||
317 | pos = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
318 | } | ||
319 | |||
320 | /* determine the length of the filename */ | ||
321 | nlen = romfs_dev_strnlen(sb, pos + ROMFH_SIZE, ROMFS_MAXFN); | ||
322 | if (IS_ERR_VALUE(nlen)) | ||
323 | goto eio; | ||
324 | |||
325 | /* get an inode for this image position */ | ||
326 | i = iget_locked(sb, pos); | ||
327 | if (!i) | ||
328 | return ERR_PTR(-ENOMEM); | ||
329 | |||
330 | if (!(i->i_state & I_NEW)) | ||
331 | return i; | ||
332 | |||
333 | /* precalculate the data offset */ | ||
334 | inode = ROMFS_I(i); | ||
335 | inode->i_metasize = (ROMFH_SIZE + nlen + 1 + ROMFH_PAD) & ROMFH_MASK; | ||
336 | inode->i_dataoffset = pos + inode->i_metasize; | ||
337 | |||
338 | i->i_nlink = 1; /* Hard to decide.. */ | ||
339 | i->i_size = be32_to_cpu(ri.size); | ||
340 | i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0; | ||
341 | i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0; | ||
342 | |||
343 | /* set up mode and ops */ | ||
344 | mode = romfs_modemap[nextfh & ROMFH_TYPE]; | ||
345 | |||
346 | switch (nextfh & ROMFH_TYPE) { | ||
347 | case ROMFH_DIR: | ||
348 | i->i_size = ROMFS_I(i)->i_metasize; | ||
349 | i->i_op = &romfs_dir_inode_operations; | ||
350 | i->i_fop = &romfs_dir_operations; | ||
351 | if (nextfh & ROMFH_EXEC) | ||
352 | mode |= S_IXUGO; | ||
353 | break; | ||
354 | case ROMFH_REG: | ||
355 | i->i_fop = &romfs_ro_fops; | ||
356 | i->i_data.a_ops = &romfs_aops; | ||
357 | if (i->i_sb->s_mtd) | ||
358 | i->i_data.backing_dev_info = | ||
359 | i->i_sb->s_mtd->backing_dev_info; | ||
360 | if (nextfh & ROMFH_EXEC) | ||
361 | mode |= S_IXUGO; | ||
362 | break; | ||
363 | case ROMFH_SYM: | ||
364 | i->i_op = &page_symlink_inode_operations; | ||
365 | i->i_data.a_ops = &romfs_aops; | ||
366 | mode |= S_IRWXUGO; | ||
367 | break; | ||
368 | default: | ||
369 | /* depending on MBZ for sock/fifos */ | ||
370 | nextfh = be32_to_cpu(ri.spec); | ||
371 | init_special_inode(i, mode, MKDEV(nextfh >> 16, | ||
372 | nextfh & 0xffff)); | ||
373 | break; | ||
374 | } | ||
375 | |||
376 | i->i_mode = mode; | ||
377 | |||
378 | unlock_new_inode(i); | ||
379 | return i; | ||
380 | |||
381 | eio: | ||
382 | ret = -EIO; | ||
383 | error: | ||
384 | printk(KERN_ERR "ROMFS: read error for inode 0x%lx\n", pos); | ||
385 | return ERR_PTR(ret); | ||
386 | } | ||
387 | |||
388 | /* | ||
389 | * allocate a new inode | ||
390 | */ | ||
391 | static struct inode *romfs_alloc_inode(struct super_block *sb) | ||
392 | { | ||
393 | struct romfs_inode_info *inode; | ||
394 | inode = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL); | ||
395 | return inode ? &inode->vfs_inode : NULL; | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * return a spent inode to the slab cache | ||
400 | */ | ||
401 | static void romfs_destroy_inode(struct inode *inode) | ||
402 | { | ||
403 | kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * get filesystem statistics | ||
408 | */ | ||
409 | static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
410 | { | ||
411 | buf->f_type = ROMFS_MAGIC; | ||
412 | buf->f_namelen = ROMFS_MAXFN; | ||
413 | buf->f_bsize = ROMBSIZE; | ||
414 | buf->f_bfree = buf->f_bavail = buf->f_ffree; | ||
415 | buf->f_blocks = | ||
416 | (romfs_maxsize(dentry->d_sb) + ROMBSIZE - 1) >> ROMBSBITS; | ||
417 | return 0; | ||
418 | } | ||
419 | |||
420 | /* | ||
421 | * remounting must involve read-only | ||
422 | */ | ||
423 | static int romfs_remount(struct super_block *sb, int *flags, char *data) | ||
424 | { | ||
425 | *flags |= MS_RDONLY; | ||
426 | return 0; | ||
427 | } | ||
428 | |||
429 | static const struct super_operations romfs_super_ops = { | ||
430 | .alloc_inode = romfs_alloc_inode, | ||
431 | .destroy_inode = romfs_destroy_inode, | ||
432 | .statfs = romfs_statfs, | ||
433 | .remount_fs = romfs_remount, | ||
434 | }; | ||
435 | |||
436 | /* | ||
437 | * checksum check on part of a romfs filesystem | ||
438 | */ | ||
439 | static __u32 romfs_checksum(const void *data, int size) | ||
440 | { | ||
441 | const __be32 *ptr = data; | ||
442 | __u32 sum; | ||
443 | |||
444 | sum = 0; | ||
445 | size >>= 2; | ||
446 | while (size > 0) { | ||
447 | sum += be32_to_cpu(*ptr++); | ||
448 | size--; | ||
449 | } | ||
450 | return sum; | ||
451 | } | ||
452 | |||
453 | /* | ||
454 | * fill in the superblock | ||
455 | */ | ||
456 | static int romfs_fill_super(struct super_block *sb, void *data, int silent) | ||
457 | { | ||
458 | struct romfs_super_block *rsb; | ||
459 | struct inode *root; | ||
460 | unsigned long pos, img_size; | ||
461 | const char *storage; | ||
462 | size_t len; | ||
463 | int ret; | ||
464 | |||
465 | #ifdef CONFIG_BLOCK | ||
466 | if (!sb->s_mtd) { | ||
467 | sb_set_blocksize(sb, ROMBSIZE); | ||
468 | } else { | ||
469 | sb->s_blocksize = ROMBSIZE; | ||
470 | sb->s_blocksize_bits = blksize_bits(ROMBSIZE); | ||
471 | } | ||
472 | #endif | ||
473 | |||
474 | sb->s_maxbytes = 0xFFFFFFFF; | ||
475 | sb->s_magic = ROMFS_MAGIC; | ||
476 | sb->s_flags |= MS_RDONLY | MS_NOATIME; | ||
477 | sb->s_op = &romfs_super_ops; | ||
478 | |||
479 | /* read the image superblock and check it */ | ||
480 | rsb = kmalloc(512, GFP_KERNEL); | ||
481 | if (!rsb) | ||
482 | return -ENOMEM; | ||
483 | |||
484 | sb->s_fs_info = (void *) 512; | ||
485 | ret = romfs_dev_read(sb, 0, rsb, 512); | ||
486 | if (ret < 0) | ||
487 | goto error_rsb; | ||
488 | |||
489 | img_size = be32_to_cpu(rsb->size); | ||
490 | |||
491 | if (sb->s_mtd && img_size > sb->s_mtd->size) | ||
492 | goto error_rsb_inval; | ||
493 | |||
494 | sb->s_fs_info = (void *) img_size; | ||
495 | |||
496 | if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 || | ||
497 | img_size < ROMFH_SIZE) { | ||
498 | if (!silent) | ||
499 | printk(KERN_WARNING "VFS:" | ||
500 | " Can't find a romfs filesystem on dev %s.\n", | ||
501 | sb->s_id); | ||
502 | goto error_rsb_inval; | ||
503 | } | ||
504 | |||
505 | if (romfs_checksum(rsb, min_t(size_t, img_size, 512))) { | ||
506 | printk(KERN_ERR "ROMFS: bad initial checksum on dev %s.\n", | ||
507 | sb->s_id); | ||
508 | goto error_rsb_inval; | ||
509 | } | ||
510 | |||
511 | storage = sb->s_mtd ? "MTD" : "the block layer"; | ||
512 | |||
513 | len = strnlen(rsb->name, ROMFS_MAXFN); | ||
514 | if (!silent) | ||
515 | printk(KERN_NOTICE "ROMFS: Mounting image '%*.*s' through %s\n", | ||
516 | (unsigned) len, (unsigned) len, rsb->name, storage); | ||
517 | |||
518 | kfree(rsb); | ||
519 | rsb = NULL; | ||
520 | |||
521 | /* find the root directory */ | ||
522 | pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK; | ||
523 | |||
524 | root = romfs_iget(sb, pos); | ||
525 | if (!root) | ||
526 | goto error; | ||
527 | |||
528 | sb->s_root = d_alloc_root(root); | ||
529 | if (!sb->s_root) | ||
530 | goto error_i; | ||
531 | |||
532 | return 0; | ||
533 | |||
534 | error_i: | ||
535 | iput(root); | ||
536 | error: | ||
537 | return -EINVAL; | ||
538 | error_rsb_inval: | ||
539 | ret = -EINVAL; | ||
540 | error_rsb: | ||
541 | return ret; | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * get a superblock for mounting | ||
546 | */ | ||
547 | static int romfs_get_sb(struct file_system_type *fs_type, | ||
548 | int flags, const char *dev_name, | ||
549 | void *data, struct vfsmount *mnt) | ||
550 | { | ||
551 | int ret = -EINVAL; | ||
552 | |||
553 | #ifdef CONFIG_ROMFS_ON_MTD | ||
554 | ret = get_sb_mtd(fs_type, flags, dev_name, data, romfs_fill_super, | ||
555 | mnt); | ||
556 | #endif | ||
557 | #ifdef CONFIG_ROMFS_ON_BLOCK | ||
558 | if (ret == -EINVAL) | ||
559 | ret = get_sb_bdev(fs_type, flags, dev_name, data, | ||
560 | romfs_fill_super, mnt); | ||
561 | #endif | ||
562 | return ret; | ||
563 | } | ||
564 | |||
565 | /* | ||
566 | * destroy a romfs superblock in the appropriate manner | ||
567 | */ | ||
568 | static void romfs_kill_sb(struct super_block *sb) | ||
569 | { | ||
570 | #ifdef CONFIG_ROMFS_ON_MTD | ||
571 | if (sb->s_mtd) { | ||
572 | kill_mtd_super(sb); | ||
573 | return; | ||
574 | } | ||
575 | #endif | ||
576 | #ifdef CONFIG_ROMFS_ON_BLOCK | ||
577 | if (sb->s_bdev) { | ||
578 | kill_block_super(sb); | ||
579 | return; | ||
580 | } | ||
581 | #endif | ||
582 | } | ||
583 | |||
584 | static struct file_system_type romfs_fs_type = { | ||
585 | .owner = THIS_MODULE, | ||
586 | .name = "romfs", | ||
587 | .get_sb = romfs_get_sb, | ||
588 | .kill_sb = romfs_kill_sb, | ||
589 | .fs_flags = FS_REQUIRES_DEV, | ||
590 | }; | ||
591 | |||
592 | /* | ||
593 | * inode storage initialiser | ||
594 | */ | ||
595 | static void romfs_i_init_once(void *_inode) | ||
596 | { | ||
597 | struct romfs_inode_info *inode = _inode; | ||
598 | |||
599 | inode_init_once(&inode->vfs_inode); | ||
600 | } | ||
601 | |||
602 | /* | ||
603 | * romfs module initialisation | ||
604 | */ | ||
605 | static int __init init_romfs_fs(void) | ||
606 | { | ||
607 | int ret; | ||
608 | |||
609 | printk(KERN_INFO "ROMFS MTD (C) 2007 Red Hat, Inc.\n"); | ||
610 | |||
611 | romfs_inode_cachep = | ||
612 | kmem_cache_create("romfs_i", | ||
613 | sizeof(struct romfs_inode_info), 0, | ||
614 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
615 | romfs_i_init_once); | ||
616 | |||
617 | if (!romfs_inode_cachep) { | ||
618 | printk(KERN_ERR | ||
619 | "ROMFS error: Failed to initialise inode cache\n"); | ||
620 | return -ENOMEM; | ||
621 | } | ||
622 | ret = register_filesystem(&romfs_fs_type); | ||
623 | if (ret) { | ||
624 | printk(KERN_ERR "ROMFS error: Failed to register filesystem\n"); | ||
625 | goto error_register; | ||
626 | } | ||
627 | return 0; | ||
628 | |||
629 | error_register: | ||
630 | kmem_cache_destroy(romfs_inode_cachep); | ||
631 | return ret; | ||
632 | } | ||
633 | |||
634 | /* | ||
635 | * romfs module removal | ||
636 | */ | ||
637 | static void __exit exit_romfs_fs(void) | ||
638 | { | ||
639 | unregister_filesystem(&romfs_fs_type); | ||
640 | kmem_cache_destroy(romfs_inode_cachep); | ||
641 | } | ||
642 | |||
643 | module_init(init_romfs_fs); | ||
644 | module_exit(exit_romfs_fs); | ||
645 | |||
646 | MODULE_DESCRIPTION("Direct-MTD Capable RomFS"); | ||
647 | MODULE_AUTHOR("Red Hat, Inc."); | ||
648 | MODULE_LICENSE("GPL"); /* Actually dual-licensed, but it doesn't matter for */ | ||
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index 69e971d5ddc1..2b1b8fe5e037 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/dcache.h> | 40 | #include <linux/dcache.h> |
41 | #include <linux/exportfs.h> | 41 | #include <linux/exportfs.h> |
42 | #include <linux/zlib.h> | 42 | #include <linux/zlib.h> |
43 | #include <linux/slab.h> | ||
43 | 44 | ||
44 | #include "squashfs_fs.h" | 45 | #include "squashfs_fs.h" |
45 | #include "squashfs_fs_sb.h" | 46 | #include "squashfs_fs_sb.h" |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index f393620890ee..af1914462f02 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -194,29 +194,26 @@ static int make_free_space(struct ubifs_info *c) | |||
194 | } | 194 | } |
195 | 195 | ||
196 | /** | 196 | /** |
197 | * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. | 197 | * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index. |
198 | * @c: UBIFS file-system description object | 198 | * @c: UBIFS file-system description object |
199 | * | 199 | * |
200 | * This function calculates and returns the number of eraseblocks which should | 200 | * This function calculates and returns the number of LEBs which should be kept |
201 | * be kept for index usage. | 201 | * for index usage. |
202 | */ | 202 | */ |
203 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c) | 203 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c) |
204 | { | 204 | { |
205 | int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; | 205 | int idx_lebs; |
206 | long long idx_size; | 206 | long long idx_size; |
207 | 207 | ||
208 | idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; | 208 | idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; |
209 | |||
210 | /* And make sure we have thrice the index size of space reserved */ | 209 | /* And make sure we have thrice the index size of space reserved */ |
211 | idx_size = idx_size + (idx_size << 1); | 210 | idx_size += idx_size << 1; |
212 | |||
213 | /* | 211 | /* |
214 | * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' | 212 | * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' |
215 | * pair, nor similarly the two variables for the new index size, so we | 213 | * pair, nor similarly the two variables for the new index size, so we |
216 | * have to do this costly 64-bit division on fast-path. | 214 | * have to do this costly 64-bit division on fast-path. |
217 | */ | 215 | */ |
218 | idx_size += eff_leb_size - 1; | 216 | idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size); |
219 | idx_lebs = div_u64(idx_size, eff_leb_size); | ||
220 | /* | 217 | /* |
221 | * The index head is not available for the in-the-gaps method, so add an | 218 | * The index head is not available for the in-the-gaps method, so add an |
222 | * extra LEB to compensate. | 219 | * extra LEB to compensate. |
@@ -310,23 +307,23 @@ static int can_use_rp(struct ubifs_info *c) | |||
310 | * do_budget_space - reserve flash space for index and data growth. | 307 | * do_budget_space - reserve flash space for index and data growth. |
311 | * @c: UBIFS file-system description object | 308 | * @c: UBIFS file-system description object |
312 | * | 309 | * |
313 | * This function makes sure UBIFS has enough free eraseblocks for index growth | 310 | * This function makes sure UBIFS has enough free LEBs for index growth and |
314 | * and data. | 311 | * data. |
315 | * | 312 | * |
316 | * When budgeting index space, UBIFS reserves thrice as many LEBs as the index | 313 | * When budgeting index space, UBIFS reserves thrice as many LEBs as the index |
317 | * would take if it was consolidated and written to the flash. This guarantees | 314 | * would take if it was consolidated and written to the flash. This guarantees |
318 | * that the "in-the-gaps" commit method always succeeds and UBIFS will always | 315 | * that the "in-the-gaps" commit method always succeeds and UBIFS will always |
319 | * be able to commit dirty index. So this function basically adds amount of | 316 | * be able to commit dirty index. So this function basically adds amount of |
320 | * budgeted index space to the size of the current index, multiplies this by 3, | 317 | * budgeted index space to the size of the current index, multiplies this by 3, |
321 | * and makes sure this does not exceed the amount of free eraseblocks. | 318 | * and makes sure this does not exceed the amount of free LEBs. |
322 | * | 319 | * |
323 | * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: | 320 | * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: |
324 | * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might | 321 | * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might |
325 | * be large, because UBIFS does not do any index consolidation as long as | 322 | * be large, because UBIFS does not do any index consolidation as long as |
326 | * there is free space. IOW, the index may take a lot of LEBs, but the LEBs | 323 | * there is free space. IOW, the index may take a lot of LEBs, but the LEBs |
327 | * will contain a lot of dirt. | 324 | * will contain a lot of dirt. |
328 | * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be | 325 | * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, |
329 | * consolidated to take up to @c->min_idx_lebs LEBs. | 326 | * the index may be consolidated to take up to @c->min_idx_lebs LEBs. |
330 | * | 327 | * |
331 | * This function returns zero in case of success, and %-ENOSPC in case of | 328 | * This function returns zero in case of success, and %-ENOSPC in case of |
332 | * failure. | 329 | * failure. |
@@ -695,12 +692,12 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free) | |||
695 | * This function calculates amount of free space to report to user-space. | 692 | * This function calculates amount of free space to report to user-space. |
696 | * | 693 | * |
697 | * Because UBIFS may introduce substantial overhead (the index, node headers, | 694 | * Because UBIFS may introduce substantial overhead (the index, node headers, |
698 | * alignment, wastage at the end of eraseblocks, etc), it cannot report real | 695 | * alignment, wastage at the end of LEBs, etc), it cannot report real amount of |
699 | * amount of free flash space it has (well, because not all dirty space is | 696 | * free flash space it has (well, because not all dirty space is reclaimable, |
700 | * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so, | 697 | * UBIFS does not actually know the real amount). If UBIFS did so, it would |
701 | * it would bread user expectations about what free space is. Users seem to | 698 | * bread user expectations about what free space is. Users seem to accustomed |
702 | * accustomed to assume that if the file-system reports N bytes of free space, | 699 | * to assume that if the file-system reports N bytes of free space, they would |
703 | * they would be able to fit a file of N bytes to the FS. This almost works for | 700 | * be able to fit a file of N bytes to the FS. This almost works for |
704 | * traditional file-systems, because they have way less overhead than UBIFS. | 701 | * traditional file-systems, because they have way less overhead than UBIFS. |
705 | * So, to keep users happy, UBIFS tries to take the overhead into account. | 702 | * So, to keep users happy, UBIFS tries to take the overhead into account. |
706 | */ | 703 | */ |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index e975bd82f38b..ce2cd8343618 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -479,9 +479,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
479 | "bad or corrupted node)"); | 479 | "bad or corrupted node)"); |
480 | else { | 480 | else { |
481 | for (i = 0; i < nlen && dent->name[i]; i++) | 481 | for (i = 0; i < nlen && dent->name[i]; i++) |
482 | printk("%c", dent->name[i]); | 482 | printk(KERN_CONT "%c", dent->name[i]); |
483 | } | 483 | } |
484 | printk("\n"); | 484 | printk(KERN_CONT "\n"); |
485 | 485 | ||
486 | break; | 486 | break; |
487 | } | 487 | } |
@@ -1214,7 +1214,7 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) | |||
1214 | 1214 | ||
1215 | /* | 1215 | /* |
1216 | * Make sure the last key in our znode is less or | 1216 | * Make sure the last key in our znode is less or |
1217 | * equivalent than the the key in zbranch which goes | 1217 | * equivalent than the key in the zbranch which goes |
1218 | * after our pointing zbranch. | 1218 | * after our pointing zbranch. |
1219 | */ | 1219 | */ |
1220 | cmp = keys_cmp(c, max, | 1220 | cmp = keys_cmp(c, max, |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 0ff89fe71e51..6d34dc7e33e1 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -430,6 +430,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, | |||
430 | struct ubifs_inode *ui = ubifs_inode(inode); | 430 | struct ubifs_inode *ui = ubifs_inode(inode); |
431 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 431 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
432 | int uninitialized_var(err), appending = !!(pos + len > inode->i_size); | 432 | int uninitialized_var(err), appending = !!(pos + len > inode->i_size); |
433 | int skipped_read = 0; | ||
433 | struct page *page; | 434 | struct page *page; |
434 | 435 | ||
435 | ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); | 436 | ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); |
@@ -444,7 +445,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, | |||
444 | 445 | ||
445 | if (!PageUptodate(page)) { | 446 | if (!PageUptodate(page)) { |
446 | /* The page is not loaded from the flash */ | 447 | /* The page is not loaded from the flash */ |
447 | if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) | 448 | if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { |
448 | /* | 449 | /* |
449 | * We change whole page so no need to load it. But we | 450 | * We change whole page so no need to load it. But we |
450 | * have to set the @PG_checked flag to make the further | 451 | * have to set the @PG_checked flag to make the further |
@@ -453,7 +454,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, | |||
453 | * the media. | 454 | * the media. |
454 | */ | 455 | */ |
455 | SetPageChecked(page); | 456 | SetPageChecked(page); |
456 | else { | 457 | skipped_read = 1; |
458 | } else { | ||
457 | err = do_readpage(page); | 459 | err = do_readpage(page); |
458 | if (err) { | 460 | if (err) { |
459 | unlock_page(page); | 461 | unlock_page(page); |
@@ -470,6 +472,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, | |||
470 | if (unlikely(err)) { | 472 | if (unlikely(err)) { |
471 | ubifs_assert(err == -ENOSPC); | 473 | ubifs_assert(err == -ENOSPC); |
472 | /* | 474 | /* |
475 | * If we skipped reading the page because we were going to | ||
476 | * write all of it, then it is not up to date. | ||
477 | */ | ||
478 | if (skipped_read) { | ||
479 | ClearPageChecked(page); | ||
480 | ClearPageUptodate(page); | ||
481 | } | ||
482 | /* | ||
473 | * Budgeting failed which means it would have to force | 483 | * Budgeting failed which means it would have to force |
474 | * write-back but didn't, because we set the @fast flag in the | 484 | * write-back but didn't, because we set the @fast flag in the |
475 | * request. Write-back cannot be done now, while we have the | 485 | * request. Write-back cannot be done now, while we have the |
@@ -949,7 +959,7 @@ static int do_writepage(struct page *page, int len) | |||
949 | * whole index and correct all inode sizes, which is long an unacceptable. | 959 | * whole index and correct all inode sizes, which is long an unacceptable. |
950 | * | 960 | * |
951 | * To prevent situations like this, UBIFS writes pages back only if they are | 961 | * To prevent situations like this, UBIFS writes pages back only if they are |
952 | * within last synchronized inode size, i.e. the the size which has been | 962 | * within the last synchronized inode size, i.e. the size which has been |
953 | * written to the flash media last time. Otherwise, UBIFS forces inode | 963 | * written to the flash media last time. Otherwise, UBIFS forces inode |
954 | * write-back, thus making sure the on-flash inode contains current inode size, | 964 | * write-back, thus making sure the on-flash inode contains current inode size, |
955 | * and then keeps writing pages back. | 965 | * and then keeps writing pages back. |
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 717d79c97c5e..1d54383d1269 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
@@ -478,7 +478,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, | |||
478 | * ubifs_find_free_space - find a data LEB with free space. | 478 | * ubifs_find_free_space - find a data LEB with free space. |
479 | * @c: the UBIFS file-system description object | 479 | * @c: the UBIFS file-system description object |
480 | * @min_space: minimum amount of required free space | 480 | * @min_space: minimum amount of required free space |
481 | * @free: contains amount of free space in the LEB on exit | 481 | * @offs: contains offset of where free space starts on exit |
482 | * @squeeze: whether to try to find space in a non-empty LEB first | 482 | * @squeeze: whether to try to find space in a non-empty LEB first |
483 | * | 483 | * |
484 | * This function looks for an LEB with at least @min_space bytes of free space. | 484 | * This function looks for an LEB with at least @min_space bytes of free space. |
@@ -490,7 +490,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, | |||
490 | * failed to find a LEB with @min_space bytes of free space and other a negative | 490 | * failed to find a LEB with @min_space bytes of free space and other a negative |
491 | * error codes in case of failure. | 491 | * error codes in case of failure. |
492 | */ | 492 | */ |
493 | int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | 493 | int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, |
494 | int squeeze) | 494 | int squeeze) |
495 | { | 495 | { |
496 | const struct ubifs_lprops *lprops; | 496 | const struct ubifs_lprops *lprops; |
@@ -558,10 +558,10 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | |||
558 | spin_unlock(&c->space_lock); | 558 | spin_unlock(&c->space_lock); |
559 | } | 559 | } |
560 | 560 | ||
561 | *free = lprops->free; | 561 | *offs = c->leb_size - lprops->free; |
562 | ubifs_release_lprops(c); | 562 | ubifs_release_lprops(c); |
563 | 563 | ||
564 | if (*free == c->leb_size) { | 564 | if (*offs == 0) { |
565 | /* | 565 | /* |
566 | * Ensure that empty LEBs have been unmapped. They may not have | 566 | * Ensure that empty LEBs have been unmapped. They may not have |
567 | * been, for example, because of an unclean unmount. Also | 567 | * been, for example, because of an unclean unmount. Also |
@@ -573,8 +573,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | |||
573 | return err; | 573 | return err; |
574 | } | 574 | } |
575 | 575 | ||
576 | dbg_find("found LEB %d, free %d", lnum, *free); | 576 | dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs); |
577 | ubifs_assert(*free >= min_space); | 577 | ubifs_assert(*offs <= c->leb_size - min_space); |
578 | return lnum; | 578 | return lnum; |
579 | 579 | ||
580 | out: | 580 | out: |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index a711d33b3d3e..f0f5f15d384e 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
@@ -47,7 +47,7 @@ | |||
47 | * have to waste large pieces of free space at the end of LEB B, because nodes | 47 | * have to waste large pieces of free space at the end of LEB B, because nodes |
48 | * from LEB A would not fit. And the worst situation is when all nodes are of | 48 | * from LEB A would not fit. And the worst situation is when all nodes are of |
49 | * maximum size. So dark watermark is the amount of free + dirty space in LEB | 49 | * maximum size. So dark watermark is the amount of free + dirty space in LEB |
50 | * which are guaranteed to be reclaimable. If LEB has less space, the GC migh | 50 | * which are guaranteed to be reclaimable. If LEB has less space, the GC might |
51 | * be unable to reclaim it. So, LEBs with free + dirty greater than dark | 51 | * be unable to reclaim it. So, LEBs with free + dirty greater than dark |
52 | * watermark are "good" LEBs from GC's point of few. The other LEBs are not so | 52 | * watermark are "good" LEBs from GC's point of few. The other LEBs are not so |
53 | * good, and GC takes extra care when moving them. | 53 | * good, and GC takes extra care when moving them. |
@@ -57,14 +57,6 @@ | |||
57 | #include "ubifs.h" | 57 | #include "ubifs.h" |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * GC tries to optimize the way it fit nodes to available space, and it sorts | ||
61 | * nodes a little. The below constants are watermarks which define "large", | ||
62 | * "medium", and "small" nodes. | ||
63 | */ | ||
64 | #define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4) | ||
65 | #define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ | ||
66 | |||
67 | /* | ||
68 | * GC may need to move more than one LEB to make progress. The below constants | 60 | * GC may need to move more than one LEB to make progress. The below constants |
69 | * define "soft" and "hard" limits on the number of LEBs the garbage collector | 61 | * define "soft" and "hard" limits on the number of LEBs the garbage collector |
70 | * may move. | 62 | * may move. |
@@ -116,83 +108,222 @@ static int switch_gc_head(struct ubifs_info *c) | |||
116 | } | 108 | } |
117 | 109 | ||
118 | /** | 110 | /** |
119 | * joinup - bring data nodes for an inode together. | 111 | * list_sort - sort a list. |
120 | * @c: UBIFS file-system description object | 112 | * @priv: private data, passed to @cmp |
121 | * @sleb: describes scanned LEB | 113 | * @head: the list to sort |
122 | * @inum: inode number | 114 | * @cmp: the elements comparison function |
123 | * @blk: block number | ||
124 | * @data: list to which to add data nodes | ||
125 | * | 115 | * |
126 | * This function looks at the first few nodes in the scanned LEB @sleb and adds | 116 | * This function has been implemented by Mark J Roberts <mjr@znex.org>. It |
127 | * them to @data if they are data nodes from @inum and have a larger block | 117 | * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted |
128 | * number than @blk. This function returns %0 on success and a negative error | 118 | * in ascending order. |
129 | * code on failure. | 119 | * |
120 | * The comparison function @cmp is supposed to return a negative value if @a is | ||
121 | * than @b, and a positive value if @a is greater than @b. If @a and @b are | ||
122 | * equivalent, then it does not matter what this function returns. | ||
130 | */ | 123 | */ |
131 | static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum, | 124 | static void list_sort(void *priv, struct list_head *head, |
132 | unsigned int blk, struct list_head *data) | 125 | int (*cmp)(void *priv, struct list_head *a, |
126 | struct list_head *b)) | ||
133 | { | 127 | { |
134 | int err, cnt = 6, lnum = sleb->lnum, offs; | 128 | struct list_head *p, *q, *e, *list, *tail, *oldhead; |
135 | struct ubifs_scan_node *snod, *tmp; | 129 | int insize, nmerges, psize, qsize, i; |
136 | union ubifs_key *key; | 130 | |
131 | if (list_empty(head)) | ||
132 | return; | ||
133 | |||
134 | list = head->next; | ||
135 | list_del(head); | ||
136 | insize = 1; | ||
137 | for (;;) { | ||
138 | p = oldhead = list; | ||
139 | list = tail = NULL; | ||
140 | nmerges = 0; | ||
141 | |||
142 | while (p) { | ||
143 | nmerges++; | ||
144 | q = p; | ||
145 | psize = 0; | ||
146 | for (i = 0; i < insize; i++) { | ||
147 | psize++; | ||
148 | q = q->next == oldhead ? NULL : q->next; | ||
149 | if (!q) | ||
150 | break; | ||
151 | } | ||
137 | 152 | ||
138 | list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { | 153 | qsize = insize; |
139 | key = &snod->key; | 154 | while (psize > 0 || (qsize > 0 && q)) { |
140 | if (key_inum(c, key) == inum && | 155 | if (!psize) { |
141 | key_type(c, key) == UBIFS_DATA_KEY && | 156 | e = q; |
142 | key_block(c, key) > blk) { | 157 | q = q->next; |
143 | offs = snod->offs; | 158 | qsize--; |
144 | err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0); | 159 | if (q == oldhead) |
145 | if (err < 0) | 160 | q = NULL; |
146 | return err; | 161 | } else if (!qsize || !q) { |
147 | list_del(&snod->list); | 162 | e = p; |
148 | if (err) { | 163 | p = p->next; |
149 | list_add_tail(&snod->list, data); | 164 | psize--; |
150 | blk = key_block(c, key); | 165 | if (p == oldhead) |
151 | } else | 166 | p = NULL; |
152 | kfree(snod); | 167 | } else if (cmp(priv, p, q) <= 0) { |
153 | cnt = 6; | 168 | e = p; |
154 | } else if (--cnt == 0) | 169 | p = p->next; |
170 | psize--; | ||
171 | if (p == oldhead) | ||
172 | p = NULL; | ||
173 | } else { | ||
174 | e = q; | ||
175 | q = q->next; | ||
176 | qsize--; | ||
177 | if (q == oldhead) | ||
178 | q = NULL; | ||
179 | } | ||
180 | if (tail) | ||
181 | tail->next = e; | ||
182 | else | ||
183 | list = e; | ||
184 | e->prev = tail; | ||
185 | tail = e; | ||
186 | } | ||
187 | p = q; | ||
188 | } | ||
189 | |||
190 | tail->next = list; | ||
191 | list->prev = tail; | ||
192 | |||
193 | if (nmerges <= 1) | ||
155 | break; | 194 | break; |
195 | |||
196 | insize *= 2; | ||
156 | } | 197 | } |
157 | return 0; | 198 | |
199 | head->next = list; | ||
200 | head->prev = list->prev; | ||
201 | list->prev->next = head; | ||
202 | list->prev = head; | ||
158 | } | 203 | } |
159 | 204 | ||
160 | /** | 205 | /** |
161 | * move_nodes - move nodes. | 206 | * data_nodes_cmp - compare 2 data nodes. |
207 | * @priv: UBIFS file-system description object | ||
208 | * @a: first data node | ||
209 | * @a: second data node | ||
210 | * | ||
211 | * This function compares data nodes @a and @b. Returns %1 if @a has greater | ||
212 | * inode or block number, and %-1 otherwise. | ||
213 | */ | ||
214 | int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
215 | { | ||
216 | ino_t inuma, inumb; | ||
217 | struct ubifs_info *c = priv; | ||
218 | struct ubifs_scan_node *sa, *sb; | ||
219 | |||
220 | cond_resched(); | ||
221 | sa = list_entry(a, struct ubifs_scan_node, list); | ||
222 | sb = list_entry(b, struct ubifs_scan_node, list); | ||
223 | ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY); | ||
224 | ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY); | ||
225 | |||
226 | inuma = key_inum(c, &sa->key); | ||
227 | inumb = key_inum(c, &sb->key); | ||
228 | |||
229 | if (inuma == inumb) { | ||
230 | unsigned int blka = key_block(c, &sa->key); | ||
231 | unsigned int blkb = key_block(c, &sb->key); | ||
232 | |||
233 | if (blka <= blkb) | ||
234 | return -1; | ||
235 | } else if (inuma <= inumb) | ||
236 | return -1; | ||
237 | |||
238 | return 1; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * nondata_nodes_cmp - compare 2 non-data nodes. | ||
243 | * @priv: UBIFS file-system description object | ||
244 | * @a: first node | ||
245 | * @a: second node | ||
246 | * | ||
247 | * This function compares nodes @a and @b. It makes sure that inode nodes go | ||
248 | * first and sorted by length in descending order. Directory entry nodes go | ||
249 | * after inode nodes and are sorted in ascending hash valuer order. | ||
250 | */ | ||
251 | int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
252 | { | ||
253 | int typea, typeb; | ||
254 | ino_t inuma, inumb; | ||
255 | struct ubifs_info *c = priv; | ||
256 | struct ubifs_scan_node *sa, *sb; | ||
257 | |||
258 | cond_resched(); | ||
259 | sa = list_entry(a, struct ubifs_scan_node, list); | ||
260 | sb = list_entry(b, struct ubifs_scan_node, list); | ||
261 | typea = key_type(c, &sa->key); | ||
262 | typeb = key_type(c, &sb->key); | ||
263 | ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY); | ||
264 | |||
265 | /* Inodes go before directory entries */ | ||
266 | if (typea == UBIFS_INO_KEY) { | ||
267 | if (typeb == UBIFS_INO_KEY) | ||
268 | return sb->len - sa->len; | ||
269 | return -1; | ||
270 | } | ||
271 | if (typeb == UBIFS_INO_KEY) | ||
272 | return 1; | ||
273 | |||
274 | ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY); | ||
275 | inuma = key_inum(c, &sa->key); | ||
276 | inumb = key_inum(c, &sb->key); | ||
277 | |||
278 | if (inuma == inumb) { | ||
279 | uint32_t hasha = key_hash(c, &sa->key); | ||
280 | uint32_t hashb = key_hash(c, &sb->key); | ||
281 | |||
282 | if (hasha <= hashb) | ||
283 | return -1; | ||
284 | } else if (inuma <= inumb) | ||
285 | return -1; | ||
286 | |||
287 | return 1; | ||
288 | } | ||
289 | |||
290 | /** | ||
291 | * sort_nodes - sort nodes for GC. | ||
162 | * @c: UBIFS file-system description object | 292 | * @c: UBIFS file-system description object |
163 | * @sleb: describes nodes to move | 293 | * @sleb: describes nodes to sort and contains the result on exit |
294 | * @nondata: contains non-data nodes on exit | ||
295 | * @min: minimum node size is returned here | ||
164 | * | 296 | * |
165 | * This function moves valid nodes from data LEB described by @sleb to the GC | 297 | * This function sorts the list of inodes to garbage collect. First of all, it |
166 | * journal head. The obsolete nodes are dropped. | 298 | * kills obsolete nodes and separates data and non-data nodes to the |
299 | * @sleb->nodes and @nondata lists correspondingly. | ||
300 | * | ||
301 | * Data nodes are then sorted in block number order - this is important for | ||
302 | * bulk-read; data nodes with lower inode number go before data nodes with | ||
303 | * higher inode number, and data nodes with lower block number go before data | ||
304 | * nodes with higher block number; | ||
167 | * | 305 | * |
168 | * When moving nodes we have to deal with classical bin-packing problem: the | 306 | * Non-data nodes are sorted as follows. |
169 | * space in the current GC journal head LEB and in @c->gc_lnum are the "bins", | 307 | * o First go inode nodes - they are sorted in descending length order. |
170 | * where the nodes in the @sleb->nodes list are the elements which should be | 308 | * o Then go directory entry nodes - they are sorted in hash order, which |
171 | * fit optimally to the bins. This function uses the "first fit decreasing" | 309 | * should supposedly optimize 'readdir()'. Direntry nodes with lower parent |
172 | * strategy, although it does not really sort the nodes but just split them on | 310 | * inode number go before direntry nodes with higher parent inode number, |
173 | * 3 classes - large, medium, and small, so they are roughly sorted. | 311 | * and direntry nodes with lower name hash values go before direntry nodes |
312 | * with higher name hash values. | ||
174 | * | 313 | * |
175 | * This function returns zero in case of success, %-EAGAIN if commit is | 314 | * This function returns zero in case of success and a negative error code in |
176 | * required, and other negative error codes in case of other failures. | 315 | * case of failure. |
177 | */ | 316 | */ |
178 | static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | 317 | static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb, |
318 | struct list_head *nondata, int *min) | ||
179 | { | 319 | { |
180 | struct ubifs_scan_node *snod, *tmp; | 320 | struct ubifs_scan_node *snod, *tmp; |
181 | struct list_head data, large, medium, small; | ||
182 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | ||
183 | int avail, err, min = INT_MAX; | ||
184 | unsigned int blk = 0; | ||
185 | ino_t inum = 0; | ||
186 | 321 | ||
187 | INIT_LIST_HEAD(&data); | 322 | *min = INT_MAX; |
188 | INIT_LIST_HEAD(&large); | ||
189 | INIT_LIST_HEAD(&medium); | ||
190 | INIT_LIST_HEAD(&small); | ||
191 | 323 | ||
192 | while (!list_empty(&sleb->nodes)) { | 324 | /* Separate data nodes and non-data nodes */ |
193 | struct list_head *lst = sleb->nodes.next; | 325 | list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { |
194 | 326 | int err; | |
195 | snod = list_entry(lst, struct ubifs_scan_node, list); | ||
196 | 327 | ||
197 | ubifs_assert(snod->type != UBIFS_IDX_NODE); | 328 | ubifs_assert(snod->type != UBIFS_IDX_NODE); |
198 | ubifs_assert(snod->type != UBIFS_REF_NODE); | 329 | ubifs_assert(snod->type != UBIFS_REF_NODE); |
@@ -201,53 +332,72 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | |||
201 | err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, | 332 | err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, |
202 | snod->offs, 0); | 333 | snod->offs, 0); |
203 | if (err < 0) | 334 | if (err < 0) |
204 | goto out; | 335 | return err; |
205 | 336 | ||
206 | list_del(lst); | ||
207 | if (!err) { | 337 | if (!err) { |
208 | /* The node is obsolete, remove it from the list */ | 338 | /* The node is obsolete, remove it from the list */ |
339 | list_del(&snod->list); | ||
209 | kfree(snod); | 340 | kfree(snod); |
210 | continue; | 341 | continue; |
211 | } | 342 | } |
212 | 343 | ||
213 | /* | 344 | if (snod->len < *min) |
214 | * Sort the list of nodes so that data nodes go first, large | 345 | *min = snod->len; |
215 | * nodes go second, and small nodes go last. | 346 | |
216 | */ | 347 | if (key_type(c, &snod->key) != UBIFS_DATA_KEY) |
217 | if (key_type(c, &snod->key) == UBIFS_DATA_KEY) { | 348 | list_move_tail(&snod->list, nondata); |
218 | if (inum != key_inum(c, &snod->key)) { | ||
219 | if (inum) { | ||
220 | /* | ||
221 | * Try to move data nodes from the same | ||
222 | * inode together. | ||
223 | */ | ||
224 | err = joinup(c, sleb, inum, blk, &data); | ||
225 | if (err) | ||
226 | goto out; | ||
227 | } | ||
228 | inum = key_inum(c, &snod->key); | ||
229 | blk = key_block(c, &snod->key); | ||
230 | } | ||
231 | list_add_tail(lst, &data); | ||
232 | } else if (snod->len > MEDIUM_NODE_WM) | ||
233 | list_add_tail(lst, &large); | ||
234 | else if (snod->len > SMALL_NODE_WM) | ||
235 | list_add_tail(lst, &medium); | ||
236 | else | ||
237 | list_add_tail(lst, &small); | ||
238 | |||
239 | /* And find the smallest node */ | ||
240 | if (snod->len < min) | ||
241 | min = snod->len; | ||
242 | } | 349 | } |
243 | 350 | ||
244 | /* | 351 | /* Sort data and non-data nodes */ |
245 | * Join the tree lists so that we'd have one roughly sorted list | 352 | list_sort(c, &sleb->nodes, &data_nodes_cmp); |
246 | * ('large' will be the head of the joined list). | 353 | list_sort(c, nondata, &nondata_nodes_cmp); |
247 | */ | 354 | return 0; |
248 | list_splice(&data, &large); | 355 | } |
249 | list_splice(&medium, large.prev); | 356 | |
250 | list_splice(&small, large.prev); | 357 | /** |
358 | * move_node - move a node. | ||
359 | * @c: UBIFS file-system description object | ||
360 | * @sleb: describes the LEB to move nodes from | ||
361 | * @snod: the mode to move | ||
362 | * @wbuf: write-buffer to move node to | ||
363 | * | ||
364 | * This function moves node @snod to @wbuf, changes TNC correspondingly, and | ||
365 | * destroys @snod. Returns zero in case of success and a negative error code in | ||
366 | * case of failure. | ||
367 | */ | ||
368 | static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | ||
369 | struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf) | ||
370 | { | ||
371 | int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used; | ||
372 | |||
373 | cond_resched(); | ||
374 | err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len); | ||
375 | if (err) | ||
376 | return err; | ||
377 | |||
378 | err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, | ||
379 | snod->offs, new_lnum, new_offs, | ||
380 | snod->len); | ||
381 | list_del(&snod->list); | ||
382 | kfree(snod); | ||
383 | return err; | ||
384 | } | ||
385 | |||
386 | /** | ||
387 | * move_nodes - move nodes. | ||
388 | * @c: UBIFS file-system description object | ||
389 | * @sleb: describes the LEB to move nodes from | ||
390 | * | ||
391 | * This function moves valid nodes from data LEB described by @sleb to the GC | ||
392 | * journal head. This function returns zero in case of success, %-EAGAIN if | ||
393 | * commit is required, and other negative error codes in case of other | ||
394 | * failures. | ||
395 | */ | ||
396 | static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | ||
397 | { | ||
398 | int err, min; | ||
399 | LIST_HEAD(nondata); | ||
400 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | ||
251 | 401 | ||
252 | if (wbuf->lnum == -1) { | 402 | if (wbuf->lnum == -1) { |
253 | /* | 403 | /* |
@@ -256,42 +406,59 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | |||
256 | */ | 406 | */ |
257 | err = switch_gc_head(c); | 407 | err = switch_gc_head(c); |
258 | if (err) | 408 | if (err) |
259 | goto out; | 409 | return err; |
260 | } | 410 | } |
261 | 411 | ||
412 | err = sort_nodes(c, sleb, &nondata, &min); | ||
413 | if (err) | ||
414 | goto out; | ||
415 | |||
262 | /* Write nodes to their new location. Use the first-fit strategy */ | 416 | /* Write nodes to their new location. Use the first-fit strategy */ |
263 | while (1) { | 417 | while (1) { |
264 | avail = c->leb_size - wbuf->offs - wbuf->used; | 418 | int avail; |
265 | list_for_each_entry_safe(snod, tmp, &large, list) { | 419 | struct ubifs_scan_node *snod, *tmp; |
266 | int new_lnum, new_offs; | 420 | |
421 | /* Move data nodes */ | ||
422 | list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { | ||
423 | avail = c->leb_size - wbuf->offs - wbuf->used; | ||
424 | if (snod->len > avail) | ||
425 | /* | ||
426 | * Do not skip data nodes in order to optimize | ||
427 | * bulk-read. | ||
428 | */ | ||
429 | break; | ||
430 | |||
431 | err = move_node(c, sleb, snod, wbuf); | ||
432 | if (err) | ||
433 | goto out; | ||
434 | } | ||
267 | 435 | ||
436 | /* Move non-data nodes */ | ||
437 | list_for_each_entry_safe(snod, tmp, &nondata, list) { | ||
438 | avail = c->leb_size - wbuf->offs - wbuf->used; | ||
268 | if (avail < min) | 439 | if (avail < min) |
269 | break; | 440 | break; |
270 | 441 | ||
271 | if (snod->len > avail) | 442 | if (snod->len > avail) { |
272 | /* This node does not fit */ | 443 | /* |
444 | * Keep going only if this is an inode with | ||
445 | * some data. Otherwise stop and switch the GC | ||
446 | * head. IOW, we assume that data-less inode | ||
447 | * nodes and direntry nodes are roughly of the | ||
448 | * same size. | ||
449 | */ | ||
450 | if (key_type(c, &snod->key) == UBIFS_DENT_KEY || | ||
451 | snod->len == UBIFS_INO_NODE_SZ) | ||
452 | break; | ||
273 | continue; | 453 | continue; |
454 | } | ||
274 | 455 | ||
275 | cond_resched(); | 456 | err = move_node(c, sleb, snod, wbuf); |
276 | |||
277 | new_lnum = wbuf->lnum; | ||
278 | new_offs = wbuf->offs + wbuf->used; | ||
279 | err = ubifs_wbuf_write_nolock(wbuf, snod->node, | ||
280 | snod->len); | ||
281 | if (err) | 457 | if (err) |
282 | goto out; | 458 | goto out; |
283 | err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, | ||
284 | snod->offs, new_lnum, new_offs, | ||
285 | snod->len); | ||
286 | if (err) | ||
287 | goto out; | ||
288 | |||
289 | avail = c->leb_size - wbuf->offs - wbuf->used; | ||
290 | list_del(&snod->list); | ||
291 | kfree(snod); | ||
292 | } | 459 | } |
293 | 460 | ||
294 | if (list_empty(&large)) | 461 | if (list_empty(&sleb->nodes) && list_empty(&nondata)) |
295 | break; | 462 | break; |
296 | 463 | ||
297 | /* | 464 | /* |
@@ -306,10 +473,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | |||
306 | return 0; | 473 | return 0; |
307 | 474 | ||
308 | out: | 475 | out: |
309 | list_for_each_entry_safe(snod, tmp, &large, list) { | 476 | list_splice_tail(&nondata, &sleb->nodes); |
310 | list_del(&snod->list); | ||
311 | kfree(snod); | ||
312 | } | ||
313 | return err; | 477 | return err; |
314 | } | 478 | } |
315 | 479 | ||
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index a11ca0958a23..64b5f3a309f5 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c | |||
@@ -114,7 +114,7 @@ static inline void zero_trun_node_unused(struct ubifs_trun_node *trun) | |||
114 | */ | 114 | */ |
115 | static int reserve_space(struct ubifs_info *c, int jhead, int len) | 115 | static int reserve_space(struct ubifs_info *c, int jhead, int len) |
116 | { | 116 | { |
117 | int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze; | 117 | int err = 0, err1, retries = 0, avail, lnum, offs, squeeze; |
118 | struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; | 118 | struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; |
119 | 119 | ||
120 | /* | 120 | /* |
@@ -139,10 +139,9 @@ again: | |||
139 | * Write buffer wasn't seek'ed or there is no enough space - look for an | 139 | * Write buffer wasn't seek'ed or there is no enough space - look for an |
140 | * LEB with some empty space. | 140 | * LEB with some empty space. |
141 | */ | 141 | */ |
142 | lnum = ubifs_find_free_space(c, len, &free, squeeze); | 142 | lnum = ubifs_find_free_space(c, len, &offs, squeeze); |
143 | if (lnum >= 0) { | 143 | if (lnum >= 0) { |
144 | /* Found an LEB, add it to the journal head */ | 144 | /* Found an LEB, add it to the journal head */ |
145 | offs = c->leb_size - free; | ||
146 | err = ubifs_add_bud_to_log(c, jhead, lnum, offs); | 145 | err = ubifs_add_bud_to_log(c, jhead, lnum, offs); |
147 | if (err) | 146 | if (err) |
148 | goto out_return; | 147 | goto out_return; |
@@ -1366,7 +1365,7 @@ out_ro: | |||
1366 | * @host: host inode | 1365 | * @host: host inode |
1367 | * | 1366 | * |
1368 | * This function writes the updated version of an extended attribute inode and | 1367 | * This function writes the updated version of an extended attribute inode and |
1369 | * the host inode tho the journal (to the base head). The host inode is written | 1368 | * the host inode to the journal (to the base head). The host inode is written |
1370 | * after the extended attribute inode in order to guarantee that the extended | 1369 | * after the extended attribute inode in order to guarantee that the extended |
1371 | * attribute will be flushed when the inode is synchronized by 'fsync()' and | 1370 | * attribute will be flushed when the inode is synchronized by 'fsync()' and |
1372 | * consequently, the write-buffer is synchronized. This function returns zero | 1371 | * consequently, the write-buffer is synchronized. This function returns zero |
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index efb3430a2581..5fa27ea031ba 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h | |||
@@ -381,8 +381,8 @@ static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) | |||
381 | * @c: UBIFS file-system description object | 381 | * @c: UBIFS file-system description object |
382 | * @key: the key to get hash from | 382 | * @key: the key to get hash from |
383 | */ | 383 | */ |
384 | static inline int key_hash(const struct ubifs_info *c, | 384 | static inline uint32_t key_hash(const struct ubifs_info *c, |
385 | const union ubifs_key *key) | 385 | const union ubifs_key *key) |
386 | { | 386 | { |
387 | return key->u32[1] & UBIFS_S_KEY_HASH_MASK; | 387 | return key->u32[1] & UBIFS_S_KEY_HASH_MASK; |
388 | } | 388 | } |
@@ -392,7 +392,7 @@ static inline int key_hash(const struct ubifs_info *c, | |||
392 | * @c: UBIFS file-system description object | 392 | * @c: UBIFS file-system description object |
393 | * @k: the key to get hash from | 393 | * @k: the key to get hash from |
394 | */ | 394 | */ |
395 | static inline int key_hash_flash(const struct ubifs_info *c, const void *k) | 395 | static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k) |
396 | { | 396 | { |
397 | const union ubifs_key *key = k; | 397 | const union ubifs_key *key = k; |
398 | 398 | ||
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 3e0aa7367556..56e33772a1ee 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c | |||
@@ -239,7 +239,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) | |||
239 | } | 239 | } |
240 | 240 | ||
241 | /* | 241 | /* |
242 | * Make sure the the amount of space in buds will not exceed | 242 | * Make sure the amount of space in buds will not exceed the |
243 | * 'c->max_bud_bytes' limit, because we want to guarantee mount time | 243 | * 'c->max_bud_bytes' limit, because we want to guarantee mount time |
244 | * limits. | 244 | * limits. |
245 | * | 245 | * |
@@ -367,7 +367,6 @@ static void remove_buds(struct ubifs_info *c) | |||
367 | bud->jhead, c->leb_size - bud->start, | 367 | bud->jhead, c->leb_size - bud->start, |
368 | c->cmt_bud_bytes); | 368 | c->cmt_bud_bytes); |
369 | rb_erase(p1, &c->buds); | 369 | rb_erase(p1, &c->buds); |
370 | list_del(&bud->list); | ||
371 | /* | 370 | /* |
372 | * If the commit does not finish, the recovery will need | 371 | * If the commit does not finish, the recovery will need |
373 | * to replay the journal, in which case the old buds | 372 | * to replay the journal, in which case the old buds |
@@ -375,7 +374,7 @@ static void remove_buds(struct ubifs_info *c) | |||
375 | * commit i.e. do not allow them to be garbage | 374 | * commit i.e. do not allow them to be garbage |
376 | * collected. | 375 | * collected. |
377 | */ | 376 | */ |
378 | list_add(&bud->list, &c->old_buds); | 377 | list_move(&bud->list, &c->old_buds); |
379 | } | 378 | } |
380 | } | 379 | } |
381 | spin_unlock(&c->buds_lock); | 380 | spin_unlock(&c->buds_lock); |
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 3216a1f277f8..8cbfb8248025 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c | |||
@@ -229,7 +229,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
229 | while (offs + len > c->leb_size) { | 229 | while (offs + len > c->leb_size) { |
230 | alen = ALIGN(offs, c->min_io_size); | 230 | alen = ALIGN(offs, c->min_io_size); |
231 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 231 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
232 | dbg_chk_lpt_sz(c, 2, alen - offs); | 232 | dbg_chk_lpt_sz(c, 2, c->leb_size - offs); |
233 | err = alloc_lpt_leb(c, &lnum); | 233 | err = alloc_lpt_leb(c, &lnum); |
234 | if (err) | 234 | if (err) |
235 | goto no_space; | 235 | goto no_space; |
@@ -272,7 +272,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
272 | if (offs + c->lsave_sz > c->leb_size) { | 272 | if (offs + c->lsave_sz > c->leb_size) { |
273 | alen = ALIGN(offs, c->min_io_size); | 273 | alen = ALIGN(offs, c->min_io_size); |
274 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 274 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
275 | dbg_chk_lpt_sz(c, 2, alen - offs); | 275 | dbg_chk_lpt_sz(c, 2, c->leb_size - offs); |
276 | err = alloc_lpt_leb(c, &lnum); | 276 | err = alloc_lpt_leb(c, &lnum); |
277 | if (err) | 277 | if (err) |
278 | goto no_space; | 278 | goto no_space; |
@@ -292,7 +292,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
292 | if (offs + c->ltab_sz > c->leb_size) { | 292 | if (offs + c->ltab_sz > c->leb_size) { |
293 | alen = ALIGN(offs, c->min_io_size); | 293 | alen = ALIGN(offs, c->min_io_size); |
294 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 294 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
295 | dbg_chk_lpt_sz(c, 2, alen - offs); | 295 | dbg_chk_lpt_sz(c, 2, c->leb_size - offs); |
296 | err = alloc_lpt_leb(c, &lnum); | 296 | err = alloc_lpt_leb(c, &lnum); |
297 | if (err) | 297 | if (err) |
298 | goto no_space; | 298 | goto no_space; |
@@ -416,14 +416,12 @@ static int write_cnodes(struct ubifs_info *c) | |||
416 | alen, UBI_SHORTTERM); | 416 | alen, UBI_SHORTTERM); |
417 | if (err) | 417 | if (err) |
418 | return err; | 418 | return err; |
419 | dbg_chk_lpt_sz(c, 4, alen - wlen); | ||
420 | } | 419 | } |
421 | dbg_chk_lpt_sz(c, 2, 0); | 420 | dbg_chk_lpt_sz(c, 2, c->leb_size - offs); |
422 | err = realloc_lpt_leb(c, &lnum); | 421 | err = realloc_lpt_leb(c, &lnum); |
423 | if (err) | 422 | if (err) |
424 | goto no_space; | 423 | goto no_space; |
425 | offs = 0; | 424 | offs = from = 0; |
426 | from = 0; | ||
427 | ubifs_assert(lnum >= c->lpt_first && | 425 | ubifs_assert(lnum >= c->lpt_first && |
428 | lnum <= c->lpt_last); | 426 | lnum <= c->lpt_last); |
429 | err = ubifs_leb_unmap(c, lnum); | 427 | err = ubifs_leb_unmap(c, lnum); |
@@ -477,11 +475,11 @@ static int write_cnodes(struct ubifs_info *c) | |||
477 | UBI_SHORTTERM); | 475 | UBI_SHORTTERM); |
478 | if (err) | 476 | if (err) |
479 | return err; | 477 | return err; |
480 | dbg_chk_lpt_sz(c, 2, alen - wlen); | 478 | dbg_chk_lpt_sz(c, 2, c->leb_size - offs); |
481 | err = realloc_lpt_leb(c, &lnum); | 479 | err = realloc_lpt_leb(c, &lnum); |
482 | if (err) | 480 | if (err) |
483 | goto no_space; | 481 | goto no_space; |
484 | offs = 0; | 482 | offs = from = 0; |
485 | ubifs_assert(lnum >= c->lpt_first && | 483 | ubifs_assert(lnum >= c->lpt_first && |
486 | lnum <= c->lpt_last); | 484 | lnum <= c->lpt_last); |
487 | err = ubifs_leb_unmap(c, lnum); | 485 | err = ubifs_leb_unmap(c, lnum); |
@@ -504,11 +502,11 @@ static int write_cnodes(struct ubifs_info *c) | |||
504 | UBI_SHORTTERM); | 502 | UBI_SHORTTERM); |
505 | if (err) | 503 | if (err) |
506 | return err; | 504 | return err; |
507 | dbg_chk_lpt_sz(c, 2, alen - wlen); | 505 | dbg_chk_lpt_sz(c, 2, c->leb_size - offs); |
508 | err = realloc_lpt_leb(c, &lnum); | 506 | err = realloc_lpt_leb(c, &lnum); |
509 | if (err) | 507 | if (err) |
510 | goto no_space; | 508 | goto no_space; |
511 | offs = 0; | 509 | offs = from = 0; |
512 | ubifs_assert(lnum >= c->lpt_first && | 510 | ubifs_assert(lnum >= c->lpt_first && |
513 | lnum <= c->lpt_last); | 511 | lnum <= c->lpt_last); |
514 | err = ubifs_leb_unmap(c, lnum); | 512 | err = ubifs_leb_unmap(c, lnum); |
@@ -1756,10 +1754,16 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) | |||
1756 | /** | 1754 | /** |
1757 | * dbg_chk_lpt_sz - check LPT does not write more than LPT size. | 1755 | * dbg_chk_lpt_sz - check LPT does not write more than LPT size. |
1758 | * @c: the UBIFS file-system description object | 1756 | * @c: the UBIFS file-system description object |
1759 | * @action: action | 1757 | * @action: what to do |
1760 | * @len: length written | 1758 | * @len: length written |
1761 | * | 1759 | * |
1762 | * This function returns %0 on success and a negative error code on failure. | 1760 | * This function returns %0 on success and a negative error code on failure. |
1761 | * The @action argument may be one of: | ||
1762 | * o %0 - LPT debugging checking starts, initialize debugging variables; | ||
1763 | * o %1 - wrote an LPT node, increase LPT size by @len bytes; | ||
1764 | * o %2 - switched to a different LEB and wasted @len bytes; | ||
1765 | * o %3 - check that we've written the right number of bytes. | ||
1766 | * o %4 - wasted @len bytes; | ||
1763 | */ | 1767 | */ |
1764 | int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) | 1768 | int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) |
1765 | { | 1769 | { |
@@ -1917,12 +1921,12 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) | |||
1917 | lnum, offs); | 1921 | lnum, offs); |
1918 | err = ubifs_unpack_nnode(c, buf, &nnode); | 1922 | err = ubifs_unpack_nnode(c, buf, &nnode); |
1919 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | 1923 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { |
1920 | printk("%d:%d", nnode.nbranch[i].lnum, | 1924 | printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, |
1921 | nnode.nbranch[i].offs); | 1925 | nnode.nbranch[i].offs); |
1922 | if (i != UBIFS_LPT_FANOUT - 1) | 1926 | if (i != UBIFS_LPT_FANOUT - 1) |
1923 | printk(", "); | 1927 | printk(KERN_CONT ", "); |
1924 | } | 1928 | } |
1925 | printk("\n"); | 1929 | printk(KERN_CONT "\n"); |
1926 | break; | 1930 | break; |
1927 | } | 1931 | } |
1928 | case UBIFS_LPT_LTAB: | 1932 | case UBIFS_LPT_LTAB: |
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 90acac603e63..10662975d2ef 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c | |||
@@ -425,59 +425,35 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, | |||
425 | * @lnum: LEB number of the LEB from which @buf was read | 425 | * @lnum: LEB number of the LEB from which @buf was read |
426 | * @offs: offset from which @buf was read | 426 | * @offs: offset from which @buf was read |
427 | * | 427 | * |
428 | * This function scans @buf for more nodes and returns %0 is a node is found and | 428 | * This function ensures that the corrupted node at @offs is the last thing |
429 | * %1 if no more nodes are found. | 429 | * written to a LEB. This function returns %1 if more data is not found and |
430 | * %0 if more data is found. | ||
430 | */ | 431 | */ |
431 | static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, | 432 | static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, |
432 | int lnum, int offs) | 433 | int lnum, int offs) |
433 | { | 434 | { |
434 | int skip, next_offs = 0; | 435 | struct ubifs_ch *ch = buf; |
436 | int skip, dlen = le32_to_cpu(ch->len); | ||
435 | 437 | ||
436 | if (len > UBIFS_DATA_NODE_SZ) { | 438 | /* Check for empty space after the corrupt node's common header */ |
437 | struct ubifs_ch *ch = buf; | 439 | skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; |
438 | int dlen = le32_to_cpu(ch->len); | 440 | if (is_empty(buf + skip, len - skip)) |
439 | 441 | return 1; | |
440 | if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ && | 442 | /* |
441 | dlen <= UBIFS_MAX_DATA_NODE_SZ) | 443 | * The area after the common header size is not empty, so the common |
442 | /* The corrupt node looks like a data node */ | 444 | * header must be intact. Check it. |
443 | next_offs = ALIGN(offs + dlen, 8); | 445 | */ |
444 | } | 446 | if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) { |
445 | 447 | dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs); | |
446 | if (c->min_io_size == 1) | 448 | return 0; |
447 | skip = 8; | ||
448 | else | ||
449 | skip = ALIGN(offs + 1, c->min_io_size) - offs; | ||
450 | |||
451 | offs += skip; | ||
452 | buf += skip; | ||
453 | len -= skip; | ||
454 | while (len > 8) { | ||
455 | struct ubifs_ch *ch = buf; | ||
456 | uint32_t magic = le32_to_cpu(ch->magic); | ||
457 | int ret; | ||
458 | |||
459 | if (magic == UBIFS_NODE_MAGIC) { | ||
460 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); | ||
461 | if (ret == SCANNED_A_NODE || ret > 0) { | ||
462 | /* | ||
463 | * There is a small chance this is just data in | ||
464 | * a data node, so check that possibility. e.g. | ||
465 | * this is part of a file that itself contains | ||
466 | * a UBIFS image. | ||
467 | */ | ||
468 | if (next_offs && offs + le32_to_cpu(ch->len) <= | ||
469 | next_offs) | ||
470 | continue; | ||
471 | dbg_rcvry("unexpected node at %d:%d", lnum, | ||
472 | offs); | ||
473 | return 0; | ||
474 | } | ||
475 | } | ||
476 | offs += 8; | ||
477 | buf += 8; | ||
478 | len -= 8; | ||
479 | } | 449 | } |
480 | return 1; | 450 | /* Now we know the corrupt node's length we can skip over it */ |
451 | skip = ALIGN(offs + dlen, c->min_io_size) - offs; | ||
452 | /* After which there should be empty space */ | ||
453 | if (is_empty(buf + skip, len - skip)) | ||
454 | return 1; | ||
455 | dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip); | ||
456 | return 0; | ||
481 | } | 457 | } |
482 | 458 | ||
483 | /** | 459 | /** |
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index ce42a7b0ca5a..11cc80125a49 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c | |||
@@ -143,7 +143,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) | |||
143 | dirty -= c->leb_size - lp->free; | 143 | dirty -= c->leb_size - lp->free; |
144 | /* | 144 | /* |
145 | * If the replay order was perfect the dirty space would now be | 145 | * If the replay order was perfect the dirty space would now be |
146 | * zero. The order is not perfect because the the journal heads | 146 | * zero. The order is not perfect because the journal heads |
147 | * race with each other. This is not a problem but is does mean | 147 | * race with each other. This is not a problem but is does mean |
148 | * that the dirty space may temporarily exceed c->leb_size | 148 | * that the dirty space may temporarily exceed c->leb_size |
149 | * during the replay. | 149 | * during the replay. |
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index e070c643d1bb..57085e43320f 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c | |||
@@ -193,6 +193,7 @@ static int create_default_filesystem(struct ubifs_info *c) | |||
193 | if (tmp64 > DEFAULT_MAX_RP_SIZE) | 193 | if (tmp64 > DEFAULT_MAX_RP_SIZE) |
194 | tmp64 = DEFAULT_MAX_RP_SIZE; | 194 | tmp64 = DEFAULT_MAX_RP_SIZE; |
195 | sup->rp_size = cpu_to_le64(tmp64); | 195 | sup->rp_size = cpu_to_le64(tmp64); |
196 | sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION); | ||
196 | 197 | ||
197 | err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); | 198 | err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); |
198 | kfree(sup); | 199 | kfree(sup); |
@@ -532,17 +533,39 @@ int ubifs_read_superblock(struct ubifs_info *c) | |||
532 | if (IS_ERR(sup)) | 533 | if (IS_ERR(sup)) |
533 | return PTR_ERR(sup); | 534 | return PTR_ERR(sup); |
534 | 535 | ||
536 | c->fmt_version = le32_to_cpu(sup->fmt_version); | ||
537 | c->ro_compat_version = le32_to_cpu(sup->ro_compat_version); | ||
538 | |||
535 | /* | 539 | /* |
536 | * The software supports all previous versions but not future versions, | 540 | * The software supports all previous versions but not future versions, |
537 | * due to the unavailability of time-travelling equipment. | 541 | * due to the unavailability of time-travelling equipment. |
538 | */ | 542 | */ |
539 | c->fmt_version = le32_to_cpu(sup->fmt_version); | ||
540 | if (c->fmt_version > UBIFS_FORMAT_VERSION) { | 543 | if (c->fmt_version > UBIFS_FORMAT_VERSION) { |
541 | ubifs_err("on-flash format version is %d, but software only " | 544 | struct super_block *sb = c->vfs_sb; |
542 | "supports up to version %d", c->fmt_version, | 545 | int mounting_ro = sb->s_flags & MS_RDONLY; |
543 | UBIFS_FORMAT_VERSION); | 546 | |
544 | err = -EINVAL; | 547 | ubifs_assert(!c->ro_media || mounting_ro); |
545 | goto out; | 548 | if (!mounting_ro || |
549 | c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { | ||
550 | ubifs_err("on-flash format version is w%d/r%d, but " | ||
551 | "software only supports up to version " | ||
552 | "w%d/r%d", c->fmt_version, | ||
553 | c->ro_compat_version, UBIFS_FORMAT_VERSION, | ||
554 | UBIFS_RO_COMPAT_VERSION); | ||
555 | if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { | ||
556 | ubifs_msg("only R/O mounting is possible"); | ||
557 | err = -EROFS; | ||
558 | } else | ||
559 | err = -EINVAL; | ||
560 | goto out; | ||
561 | } | ||
562 | |||
563 | /* | ||
564 | * The FS is mounted R/O, and the media format is | ||
565 | * R/O-compatible with the UBIFS implementation, so we can | ||
566 | * mount. | ||
567 | */ | ||
568 | c->rw_incompat = 1; | ||
546 | } | 569 | } |
547 | 570 | ||
548 | if (c->fmt_version < 3) { | 571 | if (c->fmt_version < 3) { |
@@ -623,7 +646,6 @@ int ubifs_read_superblock(struct ubifs_info *c) | |||
623 | c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; | 646 | c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; |
624 | c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; | 647 | c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; |
625 | c->main_first = c->leb_cnt - c->main_lebs; | 648 | c->main_first = c->leb_cnt - c->main_lebs; |
626 | c->report_rp_size = ubifs_reported_space(c, c->rp_size); | ||
627 | 649 | ||
628 | err = validate_sb(c, sup); | 650 | err = validate_sb(c, sup); |
629 | out: | 651 | out: |
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index e7bab52a1410..02feb59cefca 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c | |||
@@ -206,8 +206,7 @@ static int shrink_tnc_trees(int nr, int age, int *contention) | |||
206 | * Move this one to the end of the list to provide some | 206 | * Move this one to the end of the list to provide some |
207 | * fairness. | 207 | * fairness. |
208 | */ | 208 | */ |
209 | list_del(&c->infos_list); | 209 | list_move_tail(&c->infos_list, &ubifs_infos); |
210 | list_add_tail(&c->infos_list, &ubifs_infos); | ||
211 | mutex_unlock(&c->umount_mutex); | 210 | mutex_unlock(&c->umount_mutex); |
212 | if (freed >= nr) | 211 | if (freed >= nr) |
213 | break; | 212 | break; |
@@ -263,8 +262,7 @@ static int kick_a_thread(void) | |||
263 | } | 262 | } |
264 | 263 | ||
265 | if (i == 1) { | 264 | if (i == 1) { |
266 | list_del(&c->infos_list); | 265 | list_move_tail(&c->infos_list, &ubifs_infos); |
267 | list_add_tail(&c->infos_list, &ubifs_infos); | ||
268 | spin_unlock(&ubifs_infos_lock); | 266 | spin_unlock(&ubifs_infos_lock); |
269 | 267 | ||
270 | ubifs_request_bg_commit(c); | 268 | ubifs_request_bg_commit(c); |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index c5c98355459a..faa44f90608a 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -421,8 +421,8 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
421 | seq_printf(s, ",no_chk_data_crc"); | 421 | seq_printf(s, ",no_chk_data_crc"); |
422 | 422 | ||
423 | if (c->mount_opts.override_compr) { | 423 | if (c->mount_opts.override_compr) { |
424 | seq_printf(s, ",compr="); | 424 | seq_printf(s, ",compr=%s", |
425 | seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type)); | 425 | ubifs_compr_name(c->mount_opts.compr_type)); |
426 | } | 426 | } |
427 | 427 | ||
428 | return 0; | 428 | return 0; |
@@ -700,6 +700,8 @@ static int init_constants_sb(struct ubifs_info *c) | |||
700 | if (err) | 700 | if (err) |
701 | return err; | 701 | return err; |
702 | 702 | ||
703 | /* Initialize effective LEB size used in budgeting calculations */ | ||
704 | c->idx_leb_size = c->leb_size - c->max_idx_node_sz; | ||
703 | return 0; | 705 | return 0; |
704 | } | 706 | } |
705 | 707 | ||
@@ -716,6 +718,7 @@ static void init_constants_master(struct ubifs_info *c) | |||
716 | long long tmp64; | 718 | long long tmp64; |
717 | 719 | ||
718 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 720 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
721 | c->report_rp_size = ubifs_reported_space(c, c->rp_size); | ||
719 | 722 | ||
720 | /* | 723 | /* |
721 | * Calculate total amount of FS blocks. This number is not used | 724 | * Calculate total amount of FS blocks. This number is not used |
@@ -1201,7 +1204,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1201 | goto out_cbuf; | 1204 | goto out_cbuf; |
1202 | 1205 | ||
1203 | /* Create background thread */ | 1206 | /* Create background thread */ |
1204 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | 1207 | c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); |
1205 | if (IS_ERR(c->bgt)) { | 1208 | if (IS_ERR(c->bgt)) { |
1206 | err = PTR_ERR(c->bgt); | 1209 | err = PTR_ERR(c->bgt); |
1207 | c->bgt = NULL; | 1210 | c->bgt = NULL; |
@@ -1318,11 +1321,15 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1318 | else { | 1321 | else { |
1319 | c->need_recovery = 0; | 1322 | c->need_recovery = 0; |
1320 | ubifs_msg("recovery completed"); | 1323 | ubifs_msg("recovery completed"); |
1321 | /* GC LEB has to be empty and taken at this point */ | 1324 | /* |
1322 | ubifs_assert(c->lst.taken_empty_lebs == 1); | 1325 | * GC LEB has to be empty and taken at this point. But |
1326 | * the journal head LEBs may also be accounted as | ||
1327 | * "empty taken" if they are empty. | ||
1328 | */ | ||
1329 | ubifs_assert(c->lst.taken_empty_lebs > 0); | ||
1323 | } | 1330 | } |
1324 | } else | 1331 | } else |
1325 | ubifs_assert(c->lst.taken_empty_lebs == 1); | 1332 | ubifs_assert(c->lst.taken_empty_lebs > 0); |
1326 | 1333 | ||
1327 | err = dbg_check_filesystem(c); | 1334 | err = dbg_check_filesystem(c); |
1328 | if (err) | 1335 | if (err) |
@@ -1344,8 +1351,9 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1344 | x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; | 1351 | x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; |
1345 | ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " | 1352 | ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " |
1346 | "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); | 1353 | "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); |
1347 | ubifs_msg("media format: %d (latest is %d)", | 1354 | ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)", |
1348 | c->fmt_version, UBIFS_FORMAT_VERSION); | 1355 | c->fmt_version, c->ro_compat_version, |
1356 | UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); | ||
1349 | ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); | 1357 | ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); |
1350 | ubifs_msg("reserved for root: %llu bytes (%llu KiB)", | 1358 | ubifs_msg("reserved for root: %llu bytes (%llu KiB)", |
1351 | c->report_rp_size, c->report_rp_size >> 10); | 1359 | c->report_rp_size, c->report_rp_size >> 10); |
@@ -1485,6 +1493,15 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
1485 | { | 1493 | { |
1486 | int err, lnum; | 1494 | int err, lnum; |
1487 | 1495 | ||
1496 | if (c->rw_incompat) { | ||
1497 | ubifs_err("the file-system is not R/W-compatible"); | ||
1498 | ubifs_msg("on-flash format version is w%d/r%d, but software " | ||
1499 | "only supports up to version w%d/r%d", c->fmt_version, | ||
1500 | c->ro_compat_version, UBIFS_FORMAT_VERSION, | ||
1501 | UBIFS_RO_COMPAT_VERSION); | ||
1502 | return -EROFS; | ||
1503 | } | ||
1504 | |||
1488 | mutex_lock(&c->umount_mutex); | 1505 | mutex_lock(&c->umount_mutex); |
1489 | dbg_save_space_info(c); | 1506 | dbg_save_space_info(c); |
1490 | c->remounting_rw = 1; | 1507 | c->remounting_rw = 1; |
@@ -1554,7 +1571,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
1554 | ubifs_create_buds_lists(c); | 1571 | ubifs_create_buds_lists(c); |
1555 | 1572 | ||
1556 | /* Create background thread */ | 1573 | /* Create background thread */ |
1557 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | 1574 | c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); |
1558 | if (IS_ERR(c->bgt)) { | 1575 | if (IS_ERR(c->bgt)) { |
1559 | err = PTR_ERR(c->bgt); | 1576 | err = PTR_ERR(c->bgt); |
1560 | c->bgt = NULL; | 1577 | c->bgt = NULL; |
@@ -1775,7 +1792,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) | |||
1775 | c->bu.buf = NULL; | 1792 | c->bu.buf = NULL; |
1776 | } | 1793 | } |
1777 | 1794 | ||
1778 | ubifs_assert(c->lst.taken_empty_lebs == 1); | 1795 | ubifs_assert(c->lst.taken_empty_lebs > 0); |
1779 | return 0; | 1796 | return 0; |
1780 | } | 1797 | } |
1781 | 1798 | ||
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index fa28a84c6a1b..f249f7b0d656 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -1252,7 +1252,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | |||
1252 | * splitting in the middle of the colliding sequence. Also, when | 1252 | * splitting in the middle of the colliding sequence. Also, when |
1253 | * removing the leftmost key, we would have to correct the key of the | 1253 | * removing the leftmost key, we would have to correct the key of the |
1254 | * parent node, which would introduce additional complications. Namely, | 1254 | * parent node, which would introduce additional complications. Namely, |
1255 | * if we changed the the leftmost key of the parent znode, the garbage | 1255 | * if we changed the leftmost key of the parent znode, the garbage |
1256 | * collector would be unable to find it (GC is doing this when GC'ing | 1256 | * collector would be unable to find it (GC is doing this when GC'ing |
1257 | * indexing LEBs). Although we already have an additional RB-tree where | 1257 | * indexing LEBs). Although we already have an additional RB-tree where |
1258 | * we save such changed znodes (see 'ins_clr_old_idx_znode()') until | 1258 | * we save such changed znodes (see 'ins_clr_old_idx_znode()') until |
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index b25fc36cf72f..3eee07e0c495 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
@@ -36,9 +36,31 @@ | |||
36 | /* UBIFS node magic number (must not have the padding byte first or last) */ | 36 | /* UBIFS node magic number (must not have the padding byte first or last) */ |
37 | #define UBIFS_NODE_MAGIC 0x06101831 | 37 | #define UBIFS_NODE_MAGIC 0x06101831 |
38 | 38 | ||
39 | /* UBIFS on-flash format version */ | 39 | /* |
40 | * UBIFS on-flash format version. This version is increased when the on-flash | ||
41 | * format is changing. If this happens, UBIFS is will support older versions as | ||
42 | * well. But older UBIFS code will not support newer formats. Format changes | ||
43 | * will be rare and only when absolutely necessary, e.g. to fix a bug or to add | ||
44 | * a new feature. | ||
45 | * | ||
46 | * UBIFS went into mainline kernel with format version 4. The older formats | ||
47 | * were development formats. | ||
48 | */ | ||
40 | #define UBIFS_FORMAT_VERSION 4 | 49 | #define UBIFS_FORMAT_VERSION 4 |
41 | 50 | ||
51 | /* | ||
52 | * Read-only compatibility version. If the UBIFS format is changed, older UBIFS | ||
53 | * implementations will not be able to mount newer formats in read-write mode. | ||
54 | * However, depending on the change, it may be possible to mount newer formats | ||
55 | * in R/O mode. This is indicated by the R/O compatibility version which is | ||
56 | * stored in the super-block. | ||
57 | * | ||
58 | * This is needed to support boot-loaders which only need R/O mounting. With | ||
59 | * this flag it is possible to do UBIFS format changes without a need to update | ||
60 | * boot-loaders. | ||
61 | */ | ||
62 | #define UBIFS_RO_COMPAT_VERSION 0 | ||
63 | |||
42 | /* Minimum logical eraseblock size in bytes */ | 64 | /* Minimum logical eraseblock size in bytes */ |
43 | #define UBIFS_MIN_LEB_SZ (15*1024) | 65 | #define UBIFS_MIN_LEB_SZ (15*1024) |
44 | 66 | ||
@@ -53,7 +75,7 @@ | |||
53 | 75 | ||
54 | /* | 76 | /* |
55 | * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes | 77 | * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes |
56 | * shorter than uncompressed data length, UBIFS preferes to leave this data | 78 | * shorter than uncompressed data length, UBIFS prefers to leave this data |
57 | * node uncompress, because it'll be read faster. | 79 | * node uncompress, because it'll be read faster. |
58 | */ | 80 | */ |
59 | #define UBIFS_MIN_COMPRESS_DIFF 64 | 81 | #define UBIFS_MIN_COMPRESS_DIFF 64 |
@@ -586,6 +608,7 @@ struct ubifs_pad_node { | |||
586 | * @padding2: reserved for future, zeroes | 608 | * @padding2: reserved for future, zeroes |
587 | * @time_gran: time granularity in nanoseconds | 609 | * @time_gran: time granularity in nanoseconds |
588 | * @uuid: UUID generated when the file system image was created | 610 | * @uuid: UUID generated when the file system image was created |
611 | * @ro_compat_version: UBIFS R/O compatibility version | ||
589 | */ | 612 | */ |
590 | struct ubifs_sb_node { | 613 | struct ubifs_sb_node { |
591 | struct ubifs_ch ch; | 614 | struct ubifs_ch ch; |
@@ -612,7 +635,8 @@ struct ubifs_sb_node { | |||
612 | __le64 rp_size; | 635 | __le64 rp_size; |
613 | __le32 time_gran; | 636 | __le32 time_gran; |
614 | __u8 uuid[16]; | 637 | __u8 uuid[16]; |
615 | __u8 padding2[3972]; | 638 | __le32 ro_compat_version; |
639 | __u8 padding2[3968]; | ||
616 | } __attribute__ ((packed)); | 640 | } __attribute__ ((packed)); |
617 | 641 | ||
618 | /** | 642 | /** |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 039a68bee29a..0a8341e14088 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -934,6 +934,7 @@ struct ubifs_debug_info; | |||
934 | * by @commit_sem | 934 | * by @commit_sem |
935 | * @cnt_lock: protects @highest_inum and @max_sqnum counters | 935 | * @cnt_lock: protects @highest_inum and @max_sqnum counters |
936 | * @fmt_version: UBIFS on-flash format version | 936 | * @fmt_version: UBIFS on-flash format version |
937 | * @ro_compat_version: R/O compatibility version | ||
937 | * @uuid: UUID from super block | 938 | * @uuid: UUID from super block |
938 | * | 939 | * |
939 | * @lhead_lnum: log head logical eraseblock number | 940 | * @lhead_lnum: log head logical eraseblock number |
@@ -966,6 +967,7 @@ struct ubifs_debug_info; | |||
966 | * recovery) | 967 | * recovery) |
967 | * @bulk_read: enable bulk-reads | 968 | * @bulk_read: enable bulk-reads |
968 | * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) | 969 | * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) |
970 | * @rw_incompat: the media is not R/W compatible | ||
969 | * | 971 | * |
970 | * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and | 972 | * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and |
971 | * @calc_idx_sz | 973 | * @calc_idx_sz |
@@ -1015,6 +1017,8 @@ struct ubifs_debug_info; | |||
1015 | * @min_io_shift: number of bits in @min_io_size minus one | 1017 | * @min_io_shift: number of bits in @min_io_size minus one |
1016 | * @leb_size: logical eraseblock size in bytes | 1018 | * @leb_size: logical eraseblock size in bytes |
1017 | * @half_leb_size: half LEB size | 1019 | * @half_leb_size: half LEB size |
1020 | * @idx_leb_size: how many bytes of an LEB are effectively available when it is | ||
1021 | * used to store indexing nodes (@leb_size - @max_idx_node_sz) | ||
1018 | * @leb_cnt: count of logical eraseblocks | 1022 | * @leb_cnt: count of logical eraseblocks |
1019 | * @max_leb_cnt: maximum count of logical eraseblocks | 1023 | * @max_leb_cnt: maximum count of logical eraseblocks |
1020 | * @old_leb_cnt: count of logical eraseblocks before re-size | 1024 | * @old_leb_cnt: count of logical eraseblocks before re-size |
@@ -1132,8 +1136,8 @@ struct ubifs_debug_info; | |||
1132 | * previous commit start | 1136 | * previous commit start |
1133 | * @uncat_list: list of un-categorized LEBs | 1137 | * @uncat_list: list of un-categorized LEBs |
1134 | * @empty_list: list of empty LEBs | 1138 | * @empty_list: list of empty LEBs |
1135 | * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) | 1139 | * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) |
1136 | * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) | 1140 | * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) |
1137 | * @freeable_cnt: number of freeable LEBs in @freeable_list | 1141 | * @freeable_cnt: number of freeable LEBs in @freeable_list |
1138 | * | 1142 | * |
1139 | * @ltab_lnum: LEB number of LPT's own lprops table | 1143 | * @ltab_lnum: LEB number of LPT's own lprops table |
@@ -1177,6 +1181,7 @@ struct ubifs_info { | |||
1177 | unsigned long long cmt_no; | 1181 | unsigned long long cmt_no; |
1178 | spinlock_t cnt_lock; | 1182 | spinlock_t cnt_lock; |
1179 | int fmt_version; | 1183 | int fmt_version; |
1184 | int ro_compat_version; | ||
1180 | unsigned char uuid[16]; | 1185 | unsigned char uuid[16]; |
1181 | 1186 | ||
1182 | int lhead_lnum; | 1187 | int lhead_lnum; |
@@ -1205,6 +1210,7 @@ struct ubifs_info { | |||
1205 | unsigned int no_chk_data_crc:1; | 1210 | unsigned int no_chk_data_crc:1; |
1206 | unsigned int bulk_read:1; | 1211 | unsigned int bulk_read:1; |
1207 | unsigned int default_compr:2; | 1212 | unsigned int default_compr:2; |
1213 | unsigned int rw_incompat:1; | ||
1208 | 1214 | ||
1209 | struct mutex tnc_mutex; | 1215 | struct mutex tnc_mutex; |
1210 | struct ubifs_zbranch zroot; | 1216 | struct ubifs_zbranch zroot; |
@@ -1253,6 +1259,7 @@ struct ubifs_info { | |||
1253 | int min_io_shift; | 1259 | int min_io_shift; |
1254 | int leb_size; | 1260 | int leb_size; |
1255 | int half_leb_size; | 1261 | int half_leb_size; |
1262 | int idx_leb_size; | ||
1256 | int leb_cnt; | 1263 | int leb_cnt; |
1257 | int max_leb_cnt; | 1264 | int max_leb_cnt; |
1258 | int old_leb_cnt; | 1265 | int old_leb_cnt; |
@@ -1500,7 +1507,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free); | |||
1500 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); | 1507 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); |
1501 | 1508 | ||
1502 | /* find.c */ | 1509 | /* find.c */ |
1503 | int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | 1510 | int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, |
1504 | int squeeze); | 1511 | int squeeze); |
1505 | int ubifs_find_free_leb_for_idx(struct ubifs_info *c); | 1512 | int ubifs_find_free_leb_for_idx(struct ubifs_info *c); |
1506 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | 1513 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, |