diff options
Diffstat (limited to 'fs')
105 files changed, 23102 insertions, 1364 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 86b203fc3c56..9f7270f36b2a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -175,9 +175,34 @@ source "fs/qnx4/Kconfig" | |||
175 | source "fs/romfs/Kconfig" | 175 | source "fs/romfs/Kconfig" |
176 | source "fs/sysv/Kconfig" | 176 | source "fs/sysv/Kconfig" |
177 | source "fs/ufs/Kconfig" | 177 | source "fs/ufs/Kconfig" |
178 | |||
179 | source "fs/exofs/Kconfig" | 178 | source "fs/exofs/Kconfig" |
180 | 179 | ||
180 | config NILFS2_FS | ||
181 | tristate "NILFS2 file system support (EXPERIMENTAL)" | ||
182 | depends on BLOCK && EXPERIMENTAL | ||
183 | select CRC32 | ||
184 | help | ||
185 | NILFS2 is a log-structured file system (LFS) supporting continuous | ||
186 | snapshotting. In addition to versioning capability of the entire | ||
187 | file system, users can even restore files mistakenly overwritten or | ||
188 | destroyed just a few seconds ago. Since this file system can keep | ||
189 | consistency like conventional LFS, it achieves quick recovery after | ||
190 | system crashes. | ||
191 | |||
192 | NILFS2 creates a number of checkpoints every few seconds or per | ||
193 | synchronous write basis (unless there is no change). Users can | ||
194 | select significant versions among continuously created checkpoints, | ||
195 | and can change them into snapshots which will be preserved for long | ||
196 | periods until they are changed back to checkpoints. Each | ||
197 | snapshot is mountable as a read-only file system concurrently with | ||
198 | its writable mount, and this feature is convenient for online backup. | ||
199 | |||
200 | Some features including atime, extended attributes, and POSIX ACLs, | ||
201 | are not supported yet. | ||
202 | |||
203 | To compile this file system support as a module, choose M here: the | ||
204 | module will be called nilfs2. If unsure, say N. | ||
205 | |||
181 | endif # MISC_FILESYSTEMS | 206 | endif # MISC_FILESYSTEMS |
182 | 207 | ||
183 | menuconfig NETWORK_FILESYSTEMS | 208 | menuconfig NETWORK_FILESYSTEMS |
diff --git a/fs/Makefile b/fs/Makefile index 70b2aed87133..af6d04700d9c 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -114,6 +114,7 @@ obj-$(CONFIG_JFS_FS) += jfs/ | |||
114 | obj-$(CONFIG_XFS_FS) += xfs/ | 114 | obj-$(CONFIG_XFS_FS) += xfs/ |
115 | obj-$(CONFIG_9P_FS) += 9p/ | 115 | obj-$(CONFIG_9P_FS) += 9p/ |
116 | obj-$(CONFIG_AFS_FS) += afs/ | 116 | obj-$(CONFIG_AFS_FS) += afs/ |
117 | obj-$(CONFIG_NILFS2_FS) += nilfs2/ | ||
117 | obj-$(CONFIG_BEFS_FS) += befs/ | 118 | obj-$(CONFIG_BEFS_FS) += befs/ |
118 | obj-$(CONFIG_HOSTFS) += hostfs/ | 119 | obj-$(CONFIG_HOSTFS) += hostfs/ |
119 | obj-$(CONFIG_HPPFS) += hppfs/ | 120 | obj-$(CONFIG_HPPFS) += hppfs/ |
diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c index 49f189423063..7ad36506c256 100644 --- a/fs/afs/netdevices.c +++ b/fs/afs/netdevices.c | |||
@@ -20,8 +20,7 @@ int afs_get_MAC_address(u8 *mac, size_t maclen) | |||
20 | struct net_device *dev; | 20 | struct net_device *dev; |
21 | int ret = -ENODEV; | 21 | int ret = -ENODEV; |
22 | 22 | ||
23 | if (maclen != ETH_ALEN) | 23 | BUG_ON(maclen != ETH_ALEN); |
24 | BUG(); | ||
25 | 24 | ||
26 | rtnl_lock(); | 25 | rtnl_lock(); |
27 | dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER); | 26 | dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER); |
diff --git a/fs/befs/debug.c b/fs/befs/debug.c index b8e304a0661e..622e73775c83 100644 --- a/fs/befs/debug.c +++ b/fs/befs/debug.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/slab.h> | ||
20 | 21 | ||
21 | #endif /* __KERNEL__ */ | 22 | #endif /* __KERNEL__ */ |
22 | 23 | ||
diff --git a/fs/befs/super.c b/fs/befs/super.c index 41f2b4d0093e..ca40f828f64d 100644 --- a/fs/befs/super.c +++ b/fs/befs/super.c | |||
@@ -8,6 +8,7 @@ | |||
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
11 | #include <asm/page.h> /* for PAGE_SIZE */ | ||
11 | 12 | ||
12 | #include "befs.h" | 13 | #include "befs.h" |
13 | #include "super.h" | 14 | #include "super.h" |
diff --git a/fs/buffer.c b/fs/buffer.c index 5d55a896ff78..13edf7ad3ff1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -737,7 +737,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
737 | { | 737 | { |
738 | struct buffer_head *bh; | 738 | struct buffer_head *bh; |
739 | struct list_head tmp; | 739 | struct list_head tmp; |
740 | struct address_space *mapping; | 740 | struct address_space *mapping, *prev_mapping = NULL; |
741 | int err = 0, err2; | 741 | int err = 0, err2; |
742 | 742 | ||
743 | INIT_LIST_HEAD(&tmp); | 743 | INIT_LIST_HEAD(&tmp); |
@@ -762,7 +762,18 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
762 | * contents - it is a noop if I/O is still in | 762 | * contents - it is a noop if I/O is still in |
763 | * flight on potentially older contents. | 763 | * flight on potentially older contents. |
764 | */ | 764 | */ |
765 | ll_rw_block(SWRITE_SYNC, 1, &bh); | 765 | ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); |
766 | |||
767 | /* | ||
768 | * Kick off IO for the previous mapping. Note | ||
769 | * that we will not run the very last mapping, | ||
770 | * wait_on_buffer() will do that for us | ||
771 | * through sync_buffer(). | ||
772 | */ | ||
773 | if (prev_mapping && prev_mapping != mapping) | ||
774 | blk_run_address_space(prev_mapping); | ||
775 | prev_mapping = mapping; | ||
776 | |||
766 | brelse(bh); | 777 | brelse(bh); |
767 | spin_lock(lock); | 778 | spin_lock(lock); |
768 | } | 779 | } |
@@ -1585,6 +1596,16 @@ EXPORT_SYMBOL(unmap_underlying_metadata); | |||
1585 | * locked buffer. This only can happen if someone has written the buffer | 1596 | * locked buffer. This only can happen if someone has written the buffer |
1586 | * directly, with submit_bh(). At the address_space level PageWriteback | 1597 | * directly, with submit_bh(). At the address_space level PageWriteback |
1587 | * prevents this contention from occurring. | 1598 | * prevents this contention from occurring. |
1599 | * | ||
1600 | * If block_write_full_page() is called with wbc->sync_mode == | ||
1601 | * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this | ||
1602 | * causes the writes to be flagged as synchronous writes, but the | ||
1603 | * block device queue will NOT be unplugged, since usually many pages | ||
1604 | * will be pushed to the out before the higher-level caller actually | ||
1605 | * waits for the writes to be completed. The various wait functions, | ||
1606 | * such as wait_on_writeback_range() will ultimately call sync_page() | ||
1607 | * which will ultimately call blk_run_backing_dev(), which will end up | ||
1608 | * unplugging the device queue. | ||
1588 | */ | 1609 | */ |
1589 | static int __block_write_full_page(struct inode *inode, struct page *page, | 1610 | static int __block_write_full_page(struct inode *inode, struct page *page, |
1590 | get_block_t *get_block, struct writeback_control *wbc) | 1611 | get_block_t *get_block, struct writeback_control *wbc) |
@@ -1595,7 +1616,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1595 | struct buffer_head *bh, *head; | 1616 | struct buffer_head *bh, *head; |
1596 | const unsigned blocksize = 1 << inode->i_blkbits; | 1617 | const unsigned blocksize = 1 << inode->i_blkbits; |
1597 | int nr_underway = 0; | 1618 | int nr_underway = 0; |
1598 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | 1619 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? |
1620 | WRITE_SYNC_PLUG : WRITE); | ||
1599 | 1621 | ||
1600 | BUG_ON(!PageLocked(page)); | 1622 | BUG_ON(!PageLocked(page)); |
1601 | 1623 | ||
@@ -2957,12 +2979,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) | |||
2957 | for (i = 0; i < nr; i++) { | 2979 | for (i = 0; i < nr; i++) { |
2958 | struct buffer_head *bh = bhs[i]; | 2980 | struct buffer_head *bh = bhs[i]; |
2959 | 2981 | ||
2960 | if (rw == SWRITE || rw == SWRITE_SYNC) | 2982 | if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) |
2961 | lock_buffer(bh); | 2983 | lock_buffer(bh); |
2962 | else if (!trylock_buffer(bh)) | 2984 | else if (!trylock_buffer(bh)) |
2963 | continue; | 2985 | continue; |
2964 | 2986 | ||
2965 | if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { | 2987 | if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || |
2988 | rw == SWRITE_SYNC_PLUG) { | ||
2966 | if (test_clear_buffer_dirty(bh)) { | 2989 | if (test_clear_buffer_dirty(bh)) { |
2967 | bh->b_end_io = end_buffer_write_sync; | 2990 | bh->b_end_io = end_buffer_write_sync; |
2968 | get_bh(bh); | 2991 | get_bh(bh); |
@@ -2998,7 +3021,7 @@ int sync_dirty_buffer(struct buffer_head *bh) | |||
2998 | if (test_clear_buffer_dirty(bh)) { | 3021 | if (test_clear_buffer_dirty(bh)) { |
2999 | get_bh(bh); | 3022 | get_bh(bh); |
3000 | bh->b_end_io = end_buffer_write_sync; | 3023 | bh->b_end_io = end_buffer_write_sync; |
3001 | ret = submit_bh(WRITE, bh); | 3024 | ret = submit_bh(WRITE_SYNC, bh); |
3002 | wait_on_buffer(bh); | 3025 | wait_on_buffer(bh); |
3003 | if (buffer_eopnotsupp(bh)) { | 3026 | if (buffer_eopnotsupp(bh)) { |
3004 | clear_buffer_eopnotsupp(bh); | 3027 | clear_buffer_eopnotsupp(bh); |
diff --git a/fs/direct-io.c b/fs/direct-io.c index b6d43908ff7a..da258e7249cc 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -1126,7 +1126,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1126 | int acquire_i_mutex = 0; | 1126 | int acquire_i_mutex = 0; |
1127 | 1127 | ||
1128 | if (rw & WRITE) | 1128 | if (rw & WRITE) |
1129 | rw = WRITE_SYNC; | 1129 | rw = WRITE_ODIRECT; |
1130 | 1130 | ||
1131 | if (bdev) | 1131 | if (bdev) |
1132 | bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); | 1132 | bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); |
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig index 8e0cfe44b0fc..fb3c1a21b135 100644 --- a/fs/ext3/Kconfig +++ b/fs/ext3/Kconfig | |||
@@ -28,6 +28,25 @@ config EXT3_FS | |||
28 | To compile this file system support as a module, choose M here: the | 28 | To compile this file system support as a module, choose M here: the |
29 | module will be called ext3. | 29 | module will be called ext3. |
30 | 30 | ||
31 | config EXT3_DEFAULTS_TO_ORDERED | ||
32 | bool "Default to 'data=ordered' in ext3 (legacy option)" | ||
33 | depends on EXT3_FS | ||
34 | help | ||
35 | If a filesystem does not explicitly specify a data ordering | ||
36 | mode, and the journal capability allowed it, ext3 used to | ||
37 | historically default to 'data=ordered'. | ||
38 | |||
39 | That was a rather unfortunate choice, because it leads to all | ||
40 | kinds of latency problems, and the 'data=writeback' mode is more | ||
41 | appropriate these days. | ||
42 | |||
43 | You should probably always answer 'n' here, and if you really | ||
44 | want to use 'data=ordered' mode, set it in the filesystem itself | ||
45 | with 'tune2fs -o journal_data_ordered'. | ||
46 | |||
47 | But if you really want to enable the legacy default, you can do | ||
48 | so by answering 'y' to this question. | ||
49 | |||
31 | config EXT3_FS_XATTR | 50 | config EXT3_FS_XATTR |
32 | bool "Ext3 extended attributes" | 51 | bool "Ext3 extended attributes" |
33 | depends on EXT3_FS | 52 | depends on EXT3_FS |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 466a332e0bd1..fcfa24361856 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1521,12 +1521,16 @@ static int ext3_ordered_writepage(struct page *page, | |||
1521 | if (!page_has_buffers(page)) { | 1521 | if (!page_has_buffers(page)) { |
1522 | create_empty_buffers(page, inode->i_sb->s_blocksize, | 1522 | create_empty_buffers(page, inode->i_sb->s_blocksize, |
1523 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 1523 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
1524 | } else if (!walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { | 1524 | page_bufs = page_buffers(page); |
1525 | /* Provide NULL instead of get_block so that we catch bugs if buffers weren't really mapped */ | 1525 | } else { |
1526 | return block_write_full_page(page, NULL, wbc); | 1526 | page_bufs = page_buffers(page); |
1527 | if (!walk_page_buffers(NULL, page_bufs, 0, PAGE_CACHE_SIZE, | ||
1528 | NULL, buffer_unmapped)) { | ||
1529 | /* Provide NULL get_block() to catch bugs if buffers | ||
1530 | * weren't really mapped */ | ||
1531 | return block_write_full_page(page, NULL, wbc); | ||
1532 | } | ||
1527 | } | 1533 | } |
1528 | page_bufs = page_buffers(page); | ||
1529 | |||
1530 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); | 1534 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); |
1531 | 1535 | ||
1532 | if (IS_ERR(handle)) { | 1536 | if (IS_ERR(handle)) { |
@@ -1581,6 +1585,15 @@ static int ext3_writeback_writepage(struct page *page, | |||
1581 | if (ext3_journal_current_handle()) | 1585 | if (ext3_journal_current_handle()) |
1582 | goto out_fail; | 1586 | goto out_fail; |
1583 | 1587 | ||
1588 | if (page_has_buffers(page)) { | ||
1589 | if (!walk_page_buffers(NULL, page_buffers(page), 0, | ||
1590 | PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { | ||
1591 | /* Provide NULL get_block() to catch bugs if buffers | ||
1592 | * weren't really mapped */ | ||
1593 | return block_write_full_page(page, NULL, wbc); | ||
1594 | } | ||
1595 | } | ||
1596 | |||
1584 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); | 1597 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); |
1585 | if (IS_ERR(handle)) { | 1598 | if (IS_ERR(handle)) { |
1586 | ret = PTR_ERR(handle); | 1599 | ret = PTR_ERR(handle); |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 9e5b8e387e1e..599dbfe504c3 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -44,6 +44,12 @@ | |||
44 | #include "acl.h" | 44 | #include "acl.h" |
45 | #include "namei.h" | 45 | #include "namei.h" |
46 | 46 | ||
47 | #ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED | ||
48 | #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA | ||
49 | #else | ||
50 | #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA | ||
51 | #endif | ||
52 | |||
47 | static int ext3_load_journal(struct super_block *, struct ext3_super_block *, | 53 | static int ext3_load_journal(struct super_block *, struct ext3_super_block *, |
48 | unsigned long journal_devnum); | 54 | unsigned long journal_devnum); |
49 | static int ext3_create_journal(struct super_block *, struct ext3_super_block *, | 55 | static int ext3_create_journal(struct super_block *, struct ext3_super_block *, |
@@ -1919,7 +1925,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1919 | cope, else JOURNAL_DATA */ | 1925 | cope, else JOURNAL_DATA */ |
1920 | if (journal_check_available_features | 1926 | if (journal_check_available_features |
1921 | (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) | 1927 | (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) |
1922 | set_opt(sbi->s_mount_opt, ORDERED_DATA); | 1928 | set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE); |
1923 | else | 1929 | else |
1924 | set_opt(sbi->s_mount_opt, JOURNAL_DATA); | 1930 | set_opt(sbi->s_mount_opt, JOURNAL_DATA); |
1925 | break; | 1931 | break; |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ac77d8b8251d..6132353dcf62 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -342,7 +342,7 @@ static int ext4_valid_extent_idx(struct inode *inode, | |||
342 | ext4_fsblk_t block = idx_pblock(ext_idx); | 342 | ext4_fsblk_t block = idx_pblock(ext_idx); |
343 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | 343 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; |
344 | if (unlikely(block < le32_to_cpu(es->s_first_data_block) || | 344 | if (unlikely(block < le32_to_cpu(es->s_first_data_block) || |
345 | (block > ext4_blocks_count(es)))) | 345 | (block >= ext4_blocks_count(es)))) |
346 | return 0; | 346 | return 0; |
347 | else | 347 | else |
348 | return 1; | 348 | return 1; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a2e7952bc5f9..c6bd6ced3bb7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -372,16 +372,16 @@ static int ext4_block_to_path(struct inode *inode, | |||
372 | } | 372 | } |
373 | 373 | ||
374 | static int __ext4_check_blockref(const char *function, struct inode *inode, | 374 | static int __ext4_check_blockref(const char *function, struct inode *inode, |
375 | unsigned int *p, unsigned int max) { | 375 | __le32 *p, unsigned int max) { |
376 | 376 | ||
377 | unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es); | 377 | unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es); |
378 | unsigned int *bref = p; | 378 | __le32 *bref = p; |
379 | while (bref < p+max) { | 379 | while (bref < p+max) { |
380 | if (unlikely(*bref >= maxblocks)) { | 380 | if (unlikely(le32_to_cpu(*bref) >= maxblocks)) { |
381 | ext4_error(inode->i_sb, function, | 381 | ext4_error(inode->i_sb, function, |
382 | "block reference %u >= max (%u) " | 382 | "block reference %u >= max (%u) " |
383 | "in inode #%lu, offset=%d", | 383 | "in inode #%lu, offset=%d", |
384 | *bref, maxblocks, | 384 | le32_to_cpu(*bref), maxblocks, |
385 | inode->i_ino, (int)(bref-p)); | 385 | inode->i_ino, (int)(bref-p)); |
386 | return -EIO; | 386 | return -EIO; |
387 | } | 387 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9987bba99db3..2958f4e6f222 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -2508,6 +2508,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2508 | if (EXT4_BLOCKS_PER_GROUP(sb) == 0) | 2508 | if (EXT4_BLOCKS_PER_GROUP(sb) == 0) |
2509 | goto cantfind_ext4; | 2509 | goto cantfind_ext4; |
2510 | 2510 | ||
2511 | /* check blocks count against device size */ | ||
2512 | blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; | ||
2513 | if (blocks_count && ext4_blocks_count(es) > blocks_count) { | ||
2514 | printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu " | ||
2515 | "exceeds size of device (%llu blocks)\n", | ||
2516 | ext4_blocks_count(es), blocks_count); | ||
2517 | goto failed_mount; | ||
2518 | } | ||
2519 | |||
2511 | /* | 2520 | /* |
2512 | * It makes no sense for the first data block to be beyond the end | 2521 | * It makes no sense for the first data block to be beyond the end |
2513 | * of the filesystem. | 2522 | * of the filesystem. |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index f8077b9c8981..a8e8513a78a9 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -351,8 +351,13 @@ void journal_commit_transaction(journal_t *journal) | |||
351 | spin_lock(&journal->j_state_lock); | 351 | spin_lock(&journal->j_state_lock); |
352 | commit_transaction->t_state = T_LOCKED; | 352 | commit_transaction->t_state = T_LOCKED; |
353 | 353 | ||
354 | /* | ||
355 | * Use plugged writes here, since we want to submit several before | ||
356 | * we unplug the device. We don't do explicit unplugging in here, | ||
357 | * instead we rely on sync_buffer() doing the unplug for us. | ||
358 | */ | ||
354 | if (commit_transaction->t_synchronous_commit) | 359 | if (commit_transaction->t_synchronous_commit) |
355 | write_op = WRITE_SYNC; | 360 | write_op = WRITE_SYNC_PLUG; |
356 | spin_lock(&commit_transaction->t_handle_lock); | 361 | spin_lock(&commit_transaction->t_handle_lock); |
357 | while (commit_transaction->t_updates) { | 362 | while (commit_transaction->t_updates) { |
358 | DEFINE_WAIT(wait); | 363 | DEFINE_WAIT(wait); |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 4ea72377c7a2..073c8c3df7cd 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -138,7 +138,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
138 | set_buffer_ordered(bh); | 138 | set_buffer_ordered(bh); |
139 | barrier_done = 1; | 139 | barrier_done = 1; |
140 | } | 140 | } |
141 | ret = submit_bh(WRITE_SYNC, bh); | 141 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
142 | if (barrier_done) | 142 | if (barrier_done) |
143 | clear_buffer_ordered(bh); | 143 | clear_buffer_ordered(bh); |
144 | 144 | ||
@@ -159,7 +159,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
159 | lock_buffer(bh); | 159 | lock_buffer(bh); |
160 | set_buffer_uptodate(bh); | 160 | set_buffer_uptodate(bh); |
161 | clear_buffer_dirty(bh); | 161 | clear_buffer_dirty(bh); |
162 | ret = submit_bh(WRITE_SYNC, bh); | 162 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
163 | } | 163 | } |
164 | *cbh = bh; | 164 | *cbh = bh; |
165 | return ret; | 165 | return ret; |
@@ -190,7 +190,7 @@ retry: | |||
190 | set_buffer_uptodate(bh); | 190 | set_buffer_uptodate(bh); |
191 | bh->b_end_io = journal_end_buffer_io_sync; | 191 | bh->b_end_io = journal_end_buffer_io_sync; |
192 | 192 | ||
193 | ret = submit_bh(WRITE_SYNC, bh); | 193 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
194 | if (ret) { | 194 | if (ret) { |
195 | unlock_buffer(bh); | 195 | unlock_buffer(bh); |
196 | return ret; | 196 | return ret; |
@@ -402,8 +402,13 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
402 | spin_lock(&journal->j_state_lock); | 402 | spin_lock(&journal->j_state_lock); |
403 | commit_transaction->t_state = T_LOCKED; | 403 | commit_transaction->t_state = T_LOCKED; |
404 | 404 | ||
405 | /* | ||
406 | * Use plugged writes here, since we want to submit several before | ||
407 | * we unplug the device. We don't do explicit unplugging in here, | ||
408 | * instead we rely on sync_buffer() doing the unplug for us. | ||
409 | */ | ||
405 | if (commit_transaction->t_synchronous_commit) | 410 | if (commit_transaction->t_synchronous_commit) |
406 | write_op = WRITE_SYNC; | 411 | write_op = WRITE_SYNC_PLUG; |
407 | stats.u.run.rs_wait = commit_transaction->t_max_wait; | 412 | stats.u.run.rs_wait = commit_transaction->t_max_wait; |
408 | stats.u.run.rs_locked = jiffies; | 413 | stats.u.run.rs_locked = jiffies; |
409 | stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, | 414 | stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start, |
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 77ccf8cb0823..043740dde20c 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c | |||
@@ -38,12 +38,12 @@ static int jffs2_acl_count(size_t size) | |||
38 | size_t s; | 38 | size_t s; |
39 | 39 | ||
40 | size -= sizeof(struct jffs2_acl_header); | 40 | size -= sizeof(struct jffs2_acl_header); |
41 | s = size - 4 * sizeof(struct jffs2_acl_entry_short); | 41 | if (size < 4 * sizeof(struct jffs2_acl_entry_short)) { |
42 | if (s < 0) { | ||
43 | if (size % sizeof(struct jffs2_acl_entry_short)) | 42 | if (size % sizeof(struct jffs2_acl_entry_short)) |
44 | return -1; | 43 | return -1; |
45 | return size / sizeof(struct jffs2_acl_entry_short); | 44 | return size / sizeof(struct jffs2_acl_entry_short); |
46 | } else { | 45 | } else { |
46 | s = size - 4 * sizeof(struct jffs2_acl_entry_short); | ||
47 | if (s % sizeof(struct jffs2_acl_entry)) | 47 | if (s % sizeof(struct jffs2_acl_entry)) |
48 | return -1; | 48 | return -1; |
49 | return s / sizeof(struct jffs2_acl_entry) + 4; | 49 | return s / sizeof(struct jffs2_acl_entry) + 4; |
diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c index f9211252b5f1..9eff2bdae8a7 100644 --- a/fs/jffs2/malloc.c +++ b/fs/jffs2/malloc.c | |||
@@ -284,10 +284,9 @@ void jffs2_free_inode_cache(struct jffs2_inode_cache *x) | |||
284 | struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void) | 284 | struct jffs2_xattr_datum *jffs2_alloc_xattr_datum(void) |
285 | { | 285 | { |
286 | struct jffs2_xattr_datum *xd; | 286 | struct jffs2_xattr_datum *xd; |
287 | xd = kmem_cache_alloc(xattr_datum_cache, GFP_KERNEL); | 287 | xd = kmem_cache_zalloc(xattr_datum_cache, GFP_KERNEL); |
288 | dbg_memalloc("%p\n", xd); | 288 | dbg_memalloc("%p\n", xd); |
289 | 289 | ||
290 | memset(xd, 0, sizeof(struct jffs2_xattr_datum)); | ||
291 | xd->class = RAWNODE_CLASS_XATTR_DATUM; | 290 | xd->class = RAWNODE_CLASS_XATTR_DATUM; |
292 | xd->node = (void *)xd; | 291 | xd->node = (void *)xd; |
293 | INIT_LIST_HEAD(&xd->xindex); | 292 | INIT_LIST_HEAD(&xd->xindex); |
@@ -303,10 +302,9 @@ void jffs2_free_xattr_datum(struct jffs2_xattr_datum *xd) | |||
303 | struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void) | 302 | struct jffs2_xattr_ref *jffs2_alloc_xattr_ref(void) |
304 | { | 303 | { |
305 | struct jffs2_xattr_ref *ref; | 304 | struct jffs2_xattr_ref *ref; |
306 | ref = kmem_cache_alloc(xattr_ref_cache, GFP_KERNEL); | 305 | ref = kmem_cache_zalloc(xattr_ref_cache, GFP_KERNEL); |
307 | dbg_memalloc("%p\n", ref); | 306 | dbg_memalloc("%p\n", ref); |
308 | 307 | ||
309 | memset(ref, 0, sizeof(struct jffs2_xattr_ref)); | ||
310 | ref->class = RAWNODE_CLASS_XATTR_REF; | 308 | ref->class = RAWNODE_CLASS_XATTR_REF; |
311 | ref->node = (void *)ref; | 309 | ref->node = (void *)ref; |
312 | return ref; | 310 | return ref; |
diff --git a/fs/libfs.c b/fs/libfs.c index 4910a36f516e..cd223190c4e9 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -575,6 +575,21 @@ ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, | |||
575 | * possibly a read which collects the result - which is stored in a | 575 | * possibly a read which collects the result - which is stored in a |
576 | * file-local buffer. | 576 | * file-local buffer. |
577 | */ | 577 | */ |
578 | |||
579 | void simple_transaction_set(struct file *file, size_t n) | ||
580 | { | ||
581 | struct simple_transaction_argresp *ar = file->private_data; | ||
582 | |||
583 | BUG_ON(n > SIMPLE_TRANSACTION_LIMIT); | ||
584 | |||
585 | /* | ||
586 | * The barrier ensures that ar->size will really remain zero until | ||
587 | * ar->data is ready for reading. | ||
588 | */ | ||
589 | smp_mb(); | ||
590 | ar->size = n; | ||
591 | } | ||
592 | |||
578 | char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) | 593 | char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) |
579 | { | 594 | { |
580 | struct simple_transaction_argresp *ar; | 595 | struct simple_transaction_argresp *ar; |
@@ -820,6 +835,7 @@ EXPORT_SYMBOL(simple_sync_file); | |||
820 | EXPORT_SYMBOL(simple_unlink); | 835 | EXPORT_SYMBOL(simple_unlink); |
821 | EXPORT_SYMBOL(simple_read_from_buffer); | 836 | EXPORT_SYMBOL(simple_read_from_buffer); |
822 | EXPORT_SYMBOL(memory_read_from_buffer); | 837 | EXPORT_SYMBOL(memory_read_from_buffer); |
838 | EXPORT_SYMBOL(simple_transaction_set); | ||
823 | EXPORT_SYMBOL(simple_transaction_get); | 839 | EXPORT_SYMBOL(simple_transaction_get); |
824 | EXPORT_SYMBOL(simple_transaction_read); | 840 | EXPORT_SYMBOL(simple_transaction_read); |
825 | EXPORT_SYMBOL(simple_transaction_release); | 841 | EXPORT_SYMBOL(simple_transaction_release); |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 763b78a6e9de..83ee34203bd7 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -426,8 +426,15 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, | |||
426 | ret = nlm_granted; | 426 | ret = nlm_granted; |
427 | goto out; | 427 | goto out; |
428 | case -EAGAIN: | 428 | case -EAGAIN: |
429 | /* | ||
430 | * If this is a blocking request for an | ||
431 | * already pending lock request then we need | ||
432 | * to put it back on lockd's block list | ||
433 | */ | ||
434 | if (wait) | ||
435 | break; | ||
429 | ret = nlm_lck_denied; | 436 | ret = nlm_lck_denied; |
430 | break; | 437 | goto out; |
431 | case FILE_LOCK_DEFERRED: | 438 | case FILE_LOCK_DEFERRED: |
432 | if (wait) | 439 | if (wait) |
433 | break; | 440 | break; |
@@ -443,10 +450,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, | |||
443 | goto out; | 450 | goto out; |
444 | } | 451 | } |
445 | 452 | ||
446 | ret = nlm_lck_denied; | ||
447 | if (!wait) | ||
448 | goto out; | ||
449 | |||
450 | ret = nlm_lck_blocked; | 453 | ret = nlm_lck_blocked; |
451 | 454 | ||
452 | /* Append to list of blocked */ | 455 | /* Append to list of blocked */ |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 3523b895eb4b..5a97bcfe03e5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -516,8 +516,6 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
516 | goto out_unlock; | 516 | goto out_unlock; |
517 | 517 | ||
518 | ret = nfs_updatepage(filp, page, 0, pagelen); | 518 | ret = nfs_updatepage(filp, page, 0, pagelen); |
519 | if (ret == 0) | ||
520 | ret = pagelen; | ||
521 | out_unlock: | 519 | out_unlock: |
522 | unlock_page(page); | 520 | unlock_page(page); |
523 | if (ret) | 521 | if (ret) |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 82eaadbff408..6717200923fe 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -1228,7 +1228,6 @@ static int nfs_parse_mount_options(char *raw, | |||
1228 | goto out_nomem; | 1228 | goto out_nomem; |
1229 | token = match_token(string, | 1229 | token = match_token(string, |
1230 | nfs_xprt_protocol_tokens, args); | 1230 | nfs_xprt_protocol_tokens, args); |
1231 | kfree(string); | ||
1232 | 1231 | ||
1233 | switch (token) { | 1232 | switch (token) { |
1234 | case Opt_xprt_udp: | 1233 | case Opt_xprt_udp: |
@@ -1258,6 +1257,7 @@ static int nfs_parse_mount_options(char *raw, | |||
1258 | goto out_nomem; | 1257 | goto out_nomem; |
1259 | token = match_token(string, | 1258 | token = match_token(string, |
1260 | nfs_xprt_protocol_tokens, args); | 1259 | nfs_xprt_protocol_tokens, args); |
1260 | kfree(string); | ||
1261 | 1261 | ||
1262 | switch (token) { | 1262 | switch (token) { |
1263 | case Opt_xprt_udp: | 1263 | case Opt_xprt_udp: |
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 44d7d04dab95..503b9da159a3 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
@@ -1,6 +1,7 @@ | |||
1 | config NFSD | 1 | config NFSD |
2 | tristate "NFS server support" | 2 | tristate "NFS server support" |
3 | depends on INET | 3 | depends on INET |
4 | depends on FILE_LOCKING | ||
4 | select LOCKD | 5 | select LOCKD |
5 | select SUNRPC | 6 | select SUNRPC |
6 | select EXPORTFS | 7 | select EXPORTFS |
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 9dbd2eb91281..7c9fe838f038 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/unistd.h> | 18 | #include <linux/unistd.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/major.h> | 20 | #include <linux/major.h> |
21 | #include <linux/magic.h> | ||
21 | 22 | ||
22 | #include <linux/sunrpc/svc.h> | 23 | #include <linux/sunrpc/svc.h> |
23 | #include <linux/nfsd/nfsd.h> | 24 | #include <linux/nfsd/nfsd.h> |
@@ -202,6 +203,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, | |||
202 | struct nfsd3_writeres *resp) | 203 | struct nfsd3_writeres *resp) |
203 | { | 204 | { |
204 | __be32 nfserr; | 205 | __be32 nfserr; |
206 | unsigned long cnt = argp->len; | ||
205 | 207 | ||
206 | dprintk("nfsd: WRITE(3) %s %d bytes at %ld%s\n", | 208 | dprintk("nfsd: WRITE(3) %s %d bytes at %ld%s\n", |
207 | SVCFH_fmt(&argp->fh), | 209 | SVCFH_fmt(&argp->fh), |
@@ -214,9 +216,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, | |||
214 | nfserr = nfsd_write(rqstp, &resp->fh, NULL, | 216 | nfserr = nfsd_write(rqstp, &resp->fh, NULL, |
215 | argp->offset, | 217 | argp->offset, |
216 | rqstp->rq_vec, argp->vlen, | 218 | rqstp->rq_vec, argp->vlen, |
217 | argp->len, | 219 | &cnt, |
218 | &resp->committed); | 220 | &resp->committed); |
219 | resp->count = argp->count; | 221 | resp->count = cnt; |
220 | RETURN_STATUS(nfserr); | 222 | RETURN_STATUS(nfserr); |
221 | } | 223 | } |
222 | 224 | ||
@@ -569,7 +571,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
569 | struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; | 571 | struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; |
570 | 572 | ||
571 | /* Note that we don't care for remote fs's here */ | 573 | /* Note that we don't care for remote fs's here */ |
572 | if (sb->s_magic == 0x4d44 /* MSDOS_SUPER_MAGIC */) { | 574 | if (sb->s_magic == MSDOS_SUPER_MAGIC) { |
573 | resp->f_properties = NFS3_FSF_BILLYBOY; | 575 | resp->f_properties = NFS3_FSF_BILLYBOY; |
574 | } | 576 | } |
575 | resp->f_maxfilesize = sb->s_maxbytes; | 577 | resp->f_maxfilesize = sb->s_maxbytes; |
@@ -610,7 +612,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
610 | resp->p_link_max = EXT2_LINK_MAX; | 612 | resp->p_link_max = EXT2_LINK_MAX; |
611 | resp->p_name_max = EXT2_NAME_LEN; | 613 | resp->p_name_max = EXT2_NAME_LEN; |
612 | break; | 614 | break; |
613 | case 0x4d44: /* MSDOS_SUPER_MAGIC */ | 615 | case MSDOS_SUPER_MAGIC: |
614 | resp->p_case_insensitive = 1; | 616 | resp->p_case_insensitive = 1; |
615 | resp->p_case_preserving = 0; | 617 | resp->p_case_preserving = 0; |
616 | break; | 618 | break; |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index c464181b5994..290289bd44f7 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -218,7 +218,7 @@ static int | |||
218 | encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) | 218 | encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) |
219 | { | 219 | { |
220 | __be32 *p; | 220 | __be32 *p; |
221 | int len = cb_rec->cbr_fhlen; | 221 | int len = cb_rec->cbr_fh.fh_size; |
222 | 222 | ||
223 | RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); | 223 | RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); |
224 | WRITE32(OP_CB_RECALL); | 224 | WRITE32(OP_CB_RECALL); |
@@ -226,7 +226,7 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) | |||
226 | WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); | 226 | WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); |
227 | WRITE32(cb_rec->cbr_trunc); | 227 | WRITE32(cb_rec->cbr_trunc); |
228 | WRITE32(len); | 228 | WRITE32(len); |
229 | WRITEMEM(cb_rec->cbr_fhval, len); | 229 | WRITEMEM(&cb_rec->cbr_fh.fh_base, len); |
230 | return 0; | 230 | return 0; |
231 | } | 231 | } |
232 | 232 | ||
@@ -361,9 +361,8 @@ static struct rpc_program cb_program = { | |||
361 | /* Reference counting, callback cleanup, etc., all look racy as heck. | 361 | /* Reference counting, callback cleanup, etc., all look racy as heck. |
362 | * And why is cb_set an atomic? */ | 362 | * And why is cb_set an atomic? */ |
363 | 363 | ||
364 | static int do_probe_callback(void *data) | 364 | static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp) |
365 | { | 365 | { |
366 | struct nfs4_client *clp = data; | ||
367 | struct sockaddr_in addr; | 366 | struct sockaddr_in addr; |
368 | struct nfs4_callback *cb = &clp->cl_callback; | 367 | struct nfs4_callback *cb = &clp->cl_callback; |
369 | struct rpc_timeout timeparms = { | 368 | struct rpc_timeout timeparms = { |
@@ -384,17 +383,10 @@ static int do_probe_callback(void *data) | |||
384 | .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), | 383 | .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), |
385 | .client_name = clp->cl_principal, | 384 | .client_name = clp->cl_principal, |
386 | }; | 385 | }; |
387 | struct rpc_message msg = { | ||
388 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], | ||
389 | .rpc_argp = clp, | ||
390 | }; | ||
391 | struct rpc_clnt *client; | 386 | struct rpc_clnt *client; |
392 | int status; | ||
393 | 387 | ||
394 | if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) { | 388 | if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) |
395 | status = nfserr_cb_path_down; | 389 | return ERR_PTR(-EINVAL); |
396 | goto out_err; | ||
397 | } | ||
398 | 390 | ||
399 | /* Initialize address */ | 391 | /* Initialize address */ |
400 | memset(&addr, 0, sizeof(addr)); | 392 | memset(&addr, 0, sizeof(addr)); |
@@ -404,9 +396,29 @@ static int do_probe_callback(void *data) | |||
404 | 396 | ||
405 | /* Create RPC client */ | 397 | /* Create RPC client */ |
406 | client = rpc_create(&args); | 398 | client = rpc_create(&args); |
399 | if (IS_ERR(client)) | ||
400 | dprintk("NFSD: couldn't create callback client: %ld\n", | ||
401 | PTR_ERR(client)); | ||
402 | return client; | ||
403 | |||
404 | } | ||
405 | |||
406 | static int do_probe_callback(void *data) | ||
407 | { | ||
408 | struct nfs4_client *clp = data; | ||
409 | struct nfs4_callback *cb = &clp->cl_callback; | ||
410 | struct rpc_message msg = { | ||
411 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], | ||
412 | .rpc_argp = clp, | ||
413 | }; | ||
414 | struct rpc_clnt *client; | ||
415 | int status; | ||
416 | |||
417 | client = setup_callback_client(clp); | ||
407 | if (IS_ERR(client)) { | 418 | if (IS_ERR(client)) { |
408 | dprintk("NFSD: couldn't create callback client\n"); | ||
409 | status = PTR_ERR(client); | 419 | status = PTR_ERR(client); |
420 | dprintk("NFSD: couldn't create callback client: %d\n", | ||
421 | status); | ||
410 | goto out_err; | 422 | goto out_err; |
411 | } | 423 | } |
412 | 424 | ||
@@ -422,10 +434,10 @@ static int do_probe_callback(void *data) | |||
422 | out_release_client: | 434 | out_release_client: |
423 | rpc_shutdown_client(client); | 435 | rpc_shutdown_client(client); |
424 | out_err: | 436 | out_err: |
425 | dprintk("NFSD: warning: no callback path to client %.*s\n", | 437 | dprintk("NFSD: warning: no callback path to client %.*s: error %d\n", |
426 | (int)clp->cl_name.len, clp->cl_name.data); | 438 | (int)clp->cl_name.len, clp->cl_name.data, status); |
427 | put_nfs4_client(clp); | 439 | put_nfs4_client(clp); |
428 | return status; | 440 | return 0; |
429 | } | 441 | } |
430 | 442 | ||
431 | /* | 443 | /* |
@@ -451,7 +463,6 @@ nfsd4_probe_callback(struct nfs4_client *clp) | |||
451 | 463 | ||
452 | /* | 464 | /* |
453 | * called with dp->dl_count inc'ed. | 465 | * called with dp->dl_count inc'ed. |
454 | * nfs4_lock_state() may or may not have been called. | ||
455 | */ | 466 | */ |
456 | void | 467 | void |
457 | nfsd4_cb_recall(struct nfs4_delegation *dp) | 468 | nfsd4_cb_recall(struct nfs4_delegation *dp) |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9fa60a3ad48c..b2883e9c6381 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -93,6 +93,21 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o | |||
93 | open->op_truncate = 0; | 93 | open->op_truncate = 0; |
94 | 94 | ||
95 | if (open->op_create) { | 95 | if (open->op_create) { |
96 | /* FIXME: check session persistence and pnfs flags. | ||
97 | * The nfsv4.1 spec requires the following semantics: | ||
98 | * | ||
99 | * Persistent | pNFS | Server REQUIRED | Client Allowed | ||
100 | * Reply Cache | server | | | ||
101 | * -------------+--------+-----------------+-------------------- | ||
102 | * no | no | EXCLUSIVE4_1 | EXCLUSIVE4_1 | ||
103 | * | | | (SHOULD) | ||
104 | * | | and EXCLUSIVE4 | or EXCLUSIVE4 | ||
105 | * | | | (SHOULD NOT) | ||
106 | * no | yes | EXCLUSIVE4_1 | EXCLUSIVE4_1 | ||
107 | * yes | no | GUARDED4 | GUARDED4 | ||
108 | * yes | yes | GUARDED4 | GUARDED4 | ||
109 | */ | ||
110 | |||
96 | /* | 111 | /* |
97 | * Note: create modes (UNCHECKED,GUARDED...) are the same | 112 | * Note: create modes (UNCHECKED,GUARDED...) are the same |
98 | * in NFSv4 as in v3. | 113 | * in NFSv4 as in v3. |
@@ -103,11 +118,13 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o | |||
103 | (u32 *)open->op_verf.data, | 118 | (u32 *)open->op_verf.data, |
104 | &open->op_truncate, &created); | 119 | &open->op_truncate, &created); |
105 | 120 | ||
106 | /* If we ever decide to use different attrs to store the | 121 | /* |
107 | * verifier in nfsd_create_v3, then we'll need to change this | 122 | * Following rfc 3530 14.2.16, use the returned bitmask |
123 | * to indicate which attributes we used to store the | ||
124 | * verifier: | ||
108 | */ | 125 | */ |
109 | if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) | 126 | if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) |
110 | open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS | | 127 | open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | |
111 | FATTR4_WORD1_TIME_MODIFY); | 128 | FATTR4_WORD1_TIME_MODIFY); |
112 | } else { | 129 | } else { |
113 | status = nfsd_lookup(rqstp, current_fh, | 130 | status = nfsd_lookup(rqstp, current_fh, |
@@ -118,13 +135,11 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o | |||
118 | goto out; | 135 | goto out; |
119 | 136 | ||
120 | set_change_info(&open->op_cinfo, current_fh); | 137 | set_change_info(&open->op_cinfo, current_fh); |
121 | |||
122 | /* set reply cache */ | ||
123 | fh_dup2(current_fh, &resfh); | 138 | fh_dup2(current_fh, &resfh); |
124 | open->op_stateowner->so_replay.rp_openfh_len = resfh.fh_handle.fh_size; | ||
125 | memcpy(open->op_stateowner->so_replay.rp_openfh, | ||
126 | &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size); | ||
127 | 139 | ||
140 | /* set reply cache */ | ||
141 | fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh, | ||
142 | &resfh.fh_handle); | ||
128 | if (!created) | 143 | if (!created) |
129 | status = do_open_permission(rqstp, current_fh, open, | 144 | status = do_open_permission(rqstp, current_fh, open, |
130 | NFSD_MAY_NOP); | 145 | NFSD_MAY_NOP); |
@@ -150,10 +165,8 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ | |||
150 | memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); | 165 | memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); |
151 | 166 | ||
152 | /* set replay cache */ | 167 | /* set replay cache */ |
153 | open->op_stateowner->so_replay.rp_openfh_len = current_fh->fh_handle.fh_size; | 168 | fh_copy_shallow(&open->op_stateowner->so_replay.rp_openfh, |
154 | memcpy(open->op_stateowner->so_replay.rp_openfh, | 169 | ¤t_fh->fh_handle); |
155 | ¤t_fh->fh_handle.fh_base, | ||
156 | current_fh->fh_handle.fh_size); | ||
157 | 170 | ||
158 | open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && | 171 | open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && |
159 | (open->op_iattr.ia_size == 0); | 172 | (open->op_iattr.ia_size == 0); |
@@ -164,12 +177,23 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ | |||
164 | return status; | 177 | return status; |
165 | } | 178 | } |
166 | 179 | ||
180 | static void | ||
181 | copy_clientid(clientid_t *clid, struct nfsd4_session *session) | ||
182 | { | ||
183 | struct nfsd4_sessionid *sid = | ||
184 | (struct nfsd4_sessionid *)session->se_sessionid.data; | ||
185 | |||
186 | clid->cl_boot = sid->clientid.cl_boot; | ||
187 | clid->cl_id = sid->clientid.cl_id; | ||
188 | } | ||
167 | 189 | ||
168 | static __be32 | 190 | static __be32 |
169 | nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 191 | nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
170 | struct nfsd4_open *open) | 192 | struct nfsd4_open *open) |
171 | { | 193 | { |
172 | __be32 status; | 194 | __be32 status; |
195 | struct nfsd4_compoundres *resp; | ||
196 | |||
173 | dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", | 197 | dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", |
174 | (int)open->op_fname.len, open->op_fname.data, | 198 | (int)open->op_fname.len, open->op_fname.data, |
175 | open->op_stateowner); | 199 | open->op_stateowner); |
@@ -178,16 +202,19 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
178 | if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) | 202 | if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) |
179 | return nfserr_inval; | 203 | return nfserr_inval; |
180 | 204 | ||
205 | if (nfsd4_has_session(cstate)) | ||
206 | copy_clientid(&open->op_clientid, cstate->session); | ||
207 | |||
181 | nfs4_lock_state(); | 208 | nfs4_lock_state(); |
182 | 209 | ||
183 | /* check seqid for replay. set nfs4_owner */ | 210 | /* check seqid for replay. set nfs4_owner */ |
184 | status = nfsd4_process_open1(open); | 211 | resp = rqstp->rq_resp; |
212 | status = nfsd4_process_open1(&resp->cstate, open); | ||
185 | if (status == nfserr_replay_me) { | 213 | if (status == nfserr_replay_me) { |
186 | struct nfs4_replay *rp = &open->op_stateowner->so_replay; | 214 | struct nfs4_replay *rp = &open->op_stateowner->so_replay; |
187 | fh_put(&cstate->current_fh); | 215 | fh_put(&cstate->current_fh); |
188 | cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len; | 216 | fh_copy_shallow(&cstate->current_fh.fh_handle, |
189 | memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh, | 217 | &rp->rp_openfh); |
190 | rp->rp_openfh_len); | ||
191 | status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); | 218 | status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); |
192 | if (status) | 219 | if (status) |
193 | dprintk("nfsd4_open: replay failed" | 220 | dprintk("nfsd4_open: replay failed" |
@@ -209,10 +236,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
209 | 236 | ||
210 | switch (open->op_claim_type) { | 237 | switch (open->op_claim_type) { |
211 | case NFS4_OPEN_CLAIM_DELEGATE_CUR: | 238 | case NFS4_OPEN_CLAIM_DELEGATE_CUR: |
212 | status = nfserr_inval; | ||
213 | if (open->op_create) | ||
214 | goto out; | ||
215 | /* fall through */ | ||
216 | case NFS4_OPEN_CLAIM_NULL: | 239 | case NFS4_OPEN_CLAIM_NULL: |
217 | /* | 240 | /* |
218 | * (1) set CURRENT_FH to the file being opened, | 241 | * (1) set CURRENT_FH to the file being opened, |
@@ -455,8 +478,9 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
455 | if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) | 478 | if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) |
456 | return nfserr_inval; | 479 | return nfserr_inval; |
457 | 480 | ||
458 | getattr->ga_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; | 481 | getattr->ga_bmval[0] &= nfsd_suppattrs0(cstate->minorversion); |
459 | getattr->ga_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; | 482 | getattr->ga_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); |
483 | getattr->ga_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); | ||
460 | 484 | ||
461 | getattr->ga_fhp = &cstate->current_fh; | 485 | getattr->ga_fhp = &cstate->current_fh; |
462 | return nfs_ok; | 486 | return nfs_ok; |
@@ -520,9 +544,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
520 | 544 | ||
521 | nfs4_lock_state(); | 545 | nfs4_lock_state(); |
522 | /* check stateid */ | 546 | /* check stateid */ |
523 | if ((status = nfs4_preprocess_stateid_op(&cstate->current_fh, | 547 | if ((status = nfs4_preprocess_stateid_op(cstate, &read->rd_stateid, |
524 | &read->rd_stateid, | 548 | RD_STATE, &read->rd_filp))) { |
525 | CHECK_FH | RD_STATE, &read->rd_filp))) { | ||
526 | dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); | 549 | dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); |
527 | goto out; | 550 | goto out; |
528 | } | 551 | } |
@@ -548,8 +571,9 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
548 | if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) | 571 | if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) |
549 | return nfserr_inval; | 572 | return nfserr_inval; |
550 | 573 | ||
551 | readdir->rd_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; | 574 | readdir->rd_bmval[0] &= nfsd_suppattrs0(cstate->minorversion); |
552 | readdir->rd_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; | 575 | readdir->rd_bmval[1] &= nfsd_suppattrs1(cstate->minorversion); |
576 | readdir->rd_bmval[2] &= nfsd_suppattrs2(cstate->minorversion); | ||
553 | 577 | ||
554 | if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || | 578 | if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || |
555 | (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) | 579 | (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) |
@@ -653,8 +677,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
653 | 677 | ||
654 | if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { | 678 | if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { |
655 | nfs4_lock_state(); | 679 | nfs4_lock_state(); |
656 | status = nfs4_preprocess_stateid_op(&cstate->current_fh, | 680 | status = nfs4_preprocess_stateid_op(cstate, |
657 | &setattr->sa_stateid, CHECK_FH | WR_STATE, NULL); | 681 | &setattr->sa_stateid, WR_STATE, NULL); |
658 | nfs4_unlock_state(); | 682 | nfs4_unlock_state(); |
659 | if (status) { | 683 | if (status) { |
660 | dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); | 684 | dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); |
@@ -685,6 +709,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
685 | struct file *filp = NULL; | 709 | struct file *filp = NULL; |
686 | u32 *p; | 710 | u32 *p; |
687 | __be32 status = nfs_ok; | 711 | __be32 status = nfs_ok; |
712 | unsigned long cnt; | ||
688 | 713 | ||
689 | /* no need to check permission - this will be done in nfsd_write() */ | 714 | /* no need to check permission - this will be done in nfsd_write() */ |
690 | 715 | ||
@@ -692,8 +717,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
692 | return nfserr_inval; | 717 | return nfserr_inval; |
693 | 718 | ||
694 | nfs4_lock_state(); | 719 | nfs4_lock_state(); |
695 | status = nfs4_preprocess_stateid_op(&cstate->current_fh, stateid, | 720 | status = nfs4_preprocess_stateid_op(cstate, stateid, WR_STATE, &filp); |
696 | CHECK_FH | WR_STATE, &filp); | ||
697 | if (filp) | 721 | if (filp) |
698 | get_file(filp); | 722 | get_file(filp); |
699 | nfs4_unlock_state(); | 723 | nfs4_unlock_state(); |
@@ -703,7 +727,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
703 | return status; | 727 | return status; |
704 | } | 728 | } |
705 | 729 | ||
706 | write->wr_bytes_written = write->wr_buflen; | 730 | cnt = write->wr_buflen; |
707 | write->wr_how_written = write->wr_stable_how; | 731 | write->wr_how_written = write->wr_stable_how; |
708 | p = (u32 *)write->wr_verifier.data; | 732 | p = (u32 *)write->wr_verifier.data; |
709 | *p++ = nfssvc_boot.tv_sec; | 733 | *p++ = nfssvc_boot.tv_sec; |
@@ -711,10 +735,12 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
711 | 735 | ||
712 | status = nfsd_write(rqstp, &cstate->current_fh, filp, | 736 | status = nfsd_write(rqstp, &cstate->current_fh, filp, |
713 | write->wr_offset, rqstp->rq_vec, write->wr_vlen, | 737 | write->wr_offset, rqstp->rq_vec, write->wr_vlen, |
714 | write->wr_buflen, &write->wr_how_written); | 738 | &cnt, &write->wr_how_written); |
715 | if (filp) | 739 | if (filp) |
716 | fput(filp); | 740 | fput(filp); |
717 | 741 | ||
742 | write->wr_bytes_written = cnt; | ||
743 | |||
718 | if (status == nfserr_symlink) | 744 | if (status == nfserr_symlink) |
719 | status = nfserr_inval; | 745 | status = nfserr_inval; |
720 | return status; | 746 | return status; |
@@ -737,8 +763,9 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
737 | if (status) | 763 | if (status) |
738 | return status; | 764 | return status; |
739 | 765 | ||
740 | if ((verify->ve_bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) | 766 | if ((verify->ve_bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) |
741 | || (verify->ve_bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) | 767 | || (verify->ve_bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) |
768 | || (verify->ve_bmval[2] & ~nfsd_suppattrs2(cstate->minorversion))) | ||
742 | return nfserr_attrnotsupp; | 769 | return nfserr_attrnotsupp; |
743 | if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) | 770 | if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) |
744 | || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) | 771 | || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) |
@@ -766,7 +793,8 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
766 | if (status) | 793 | if (status) |
767 | goto out_kfree; | 794 | goto out_kfree; |
768 | 795 | ||
769 | p = buf + 3; | 796 | /* skip bitmap */ |
797 | p = buf + 1 + ntohl(buf[0]); | ||
770 | status = nfserr_not_same; | 798 | status = nfserr_not_same; |
771 | if (ntohl(*p++) != verify->ve_attrlen) | 799 | if (ntohl(*p++) != verify->ve_attrlen) |
772 | goto out_kfree; | 800 | goto out_kfree; |
@@ -813,39 +841,17 @@ static inline void nfsd4_increment_op_stats(u32 opnum) | |||
813 | nfsdstats.nfs4_opcount[opnum]++; | 841 | nfsdstats.nfs4_opcount[opnum]++; |
814 | } | 842 | } |
815 | 843 | ||
816 | static void cstate_free(struct nfsd4_compound_state *cstate) | ||
817 | { | ||
818 | if (cstate == NULL) | ||
819 | return; | ||
820 | fh_put(&cstate->current_fh); | ||
821 | fh_put(&cstate->save_fh); | ||
822 | BUG_ON(cstate->replay_owner); | ||
823 | kfree(cstate); | ||
824 | } | ||
825 | |||
826 | static struct nfsd4_compound_state *cstate_alloc(void) | ||
827 | { | ||
828 | struct nfsd4_compound_state *cstate; | ||
829 | |||
830 | cstate = kmalloc(sizeof(struct nfsd4_compound_state), GFP_KERNEL); | ||
831 | if (cstate == NULL) | ||
832 | return NULL; | ||
833 | fh_init(&cstate->current_fh, NFS4_FHSIZE); | ||
834 | fh_init(&cstate->save_fh, NFS4_FHSIZE); | ||
835 | cstate->replay_owner = NULL; | ||
836 | return cstate; | ||
837 | } | ||
838 | |||
839 | typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, | 844 | typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, |
840 | void *); | 845 | void *); |
846 | enum nfsd4_op_flags { | ||
847 | ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ | ||
848 | ALLOWED_ON_ABSENT_FS = 2 << 0, /* ops processed on absent fs */ | ||
849 | ALLOWED_AS_FIRST_OP = 3 << 0, /* ops reqired first in compound */ | ||
850 | }; | ||
841 | 851 | ||
842 | struct nfsd4_operation { | 852 | struct nfsd4_operation { |
843 | nfsd4op_func op_func; | 853 | nfsd4op_func op_func; |
844 | u32 op_flags; | 854 | u32 op_flags; |
845 | /* Most ops require a valid current filehandle; a few don't: */ | ||
846 | #define ALLOWED_WITHOUT_FH 1 | ||
847 | /* GETATTR and ops not listed as returning NFS4ERR_MOVED: */ | ||
848 | #define ALLOWED_ON_ABSENT_FS 2 | ||
849 | char *op_name; | 855 | char *op_name; |
850 | }; | 856 | }; |
851 | 857 | ||
@@ -854,6 +860,51 @@ static struct nfsd4_operation nfsd4_ops[]; | |||
854 | static const char *nfsd4_op_name(unsigned opnum); | 860 | static const char *nfsd4_op_name(unsigned opnum); |
855 | 861 | ||
856 | /* | 862 | /* |
863 | * This is a replay of a compound for which no cache entry pages | ||
864 | * were used. Encode the sequence operation, and if cachethis is FALSE | ||
865 | * encode the uncache rep error on the next operation. | ||
866 | */ | ||
867 | static __be32 | ||
868 | nfsd4_enc_uncached_replay(struct nfsd4_compoundargs *args, | ||
869 | struct nfsd4_compoundres *resp) | ||
870 | { | ||
871 | struct nfsd4_op *op; | ||
872 | |||
873 | dprintk("--> %s resp->opcnt %d ce_cachethis %u \n", __func__, | ||
874 | resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis); | ||
875 | |||
876 | /* Encode the replayed sequence operation */ | ||
877 | BUG_ON(resp->opcnt != 1); | ||
878 | op = &args->ops[resp->opcnt - 1]; | ||
879 | nfsd4_encode_operation(resp, op); | ||
880 | |||
881 | /*return nfserr_retry_uncached_rep in next operation. */ | ||
882 | if (resp->cstate.slot->sl_cache_entry.ce_cachethis == 0) { | ||
883 | op = &args->ops[resp->opcnt++]; | ||
884 | op->status = nfserr_retry_uncached_rep; | ||
885 | nfsd4_encode_operation(resp, op); | ||
886 | } | ||
887 | return op->status; | ||
888 | } | ||
889 | |||
890 | /* | ||
891 | * Enforce NFSv4.1 COMPOUND ordering rules. | ||
892 | * | ||
893 | * TODO: | ||
894 | * - enforce NFS4ERR_NOT_ONLY_OP, | ||
895 | * - DESTROY_SESSION MUST be the final operation in the COMPOUND request. | ||
896 | */ | ||
897 | static bool nfs41_op_ordering_ok(struct nfsd4_compoundargs *args) | ||
898 | { | ||
899 | if (args->minorversion && args->opcnt > 0) { | ||
900 | struct nfsd4_op *op = &args->ops[0]; | ||
901 | return (op->status == nfserr_op_illegal) || | ||
902 | (nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP); | ||
903 | } | ||
904 | return true; | ||
905 | } | ||
906 | |||
907 | /* | ||
857 | * COMPOUND call. | 908 | * COMPOUND call. |
858 | */ | 909 | */ |
859 | static __be32 | 910 | static __be32 |
@@ -863,12 +914,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
863 | { | 914 | { |
864 | struct nfsd4_op *op; | 915 | struct nfsd4_op *op; |
865 | struct nfsd4_operation *opdesc; | 916 | struct nfsd4_operation *opdesc; |
866 | struct nfsd4_compound_state *cstate = NULL; | 917 | struct nfsd4_compound_state *cstate = &resp->cstate; |
867 | int slack_bytes; | 918 | int slack_bytes; |
868 | __be32 status; | 919 | __be32 status; |
869 | 920 | ||
870 | resp->xbuf = &rqstp->rq_res; | 921 | resp->xbuf = &rqstp->rq_res; |
871 | resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; | 922 | resp->p = rqstp->rq_res.head[0].iov_base + |
923 | rqstp->rq_res.head[0].iov_len; | ||
872 | resp->tagp = resp->p; | 924 | resp->tagp = resp->p; |
873 | /* reserve space for: taglen, tag, and opcnt */ | 925 | /* reserve space for: taglen, tag, and opcnt */ |
874 | resp->p += 2 + XDR_QUADLEN(args->taglen); | 926 | resp->p += 2 + XDR_QUADLEN(args->taglen); |
@@ -877,18 +929,25 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
877 | resp->tag = args->tag; | 929 | resp->tag = args->tag; |
878 | resp->opcnt = 0; | 930 | resp->opcnt = 0; |
879 | resp->rqstp = rqstp; | 931 | resp->rqstp = rqstp; |
932 | resp->cstate.minorversion = args->minorversion; | ||
933 | resp->cstate.replay_owner = NULL; | ||
934 | fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); | ||
935 | fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); | ||
936 | /* Use the deferral mechanism only for NFSv4.0 compounds */ | ||
937 | rqstp->rq_usedeferral = (args->minorversion == 0); | ||
880 | 938 | ||
881 | /* | 939 | /* |
882 | * According to RFC3010, this takes precedence over all other errors. | 940 | * According to RFC3010, this takes precedence over all other errors. |
883 | */ | 941 | */ |
884 | status = nfserr_minor_vers_mismatch; | 942 | status = nfserr_minor_vers_mismatch; |
885 | if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) | 943 | if (args->minorversion > nfsd_supported_minorversion) |
886 | goto out; | 944 | goto out; |
887 | 945 | ||
888 | status = nfserr_resource; | 946 | if (!nfs41_op_ordering_ok(args)) { |
889 | cstate = cstate_alloc(); | 947 | op = &args->ops[0]; |
890 | if (cstate == NULL) | 948 | op->status = nfserr_sequence_pos; |
891 | goto out; | 949 | goto encode_op; |
950 | } | ||
892 | 951 | ||
893 | status = nfs_ok; | 952 | status = nfs_ok; |
894 | while (!status && resp->opcnt < args->opcnt) { | 953 | while (!status && resp->opcnt < args->opcnt) { |
@@ -897,7 +956,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
897 | dprintk("nfsv4 compound op #%d/%d: %d (%s)\n", | 956 | dprintk("nfsv4 compound op #%d/%d: %d (%s)\n", |
898 | resp->opcnt, args->opcnt, op->opnum, | 957 | resp->opcnt, args->opcnt, op->opnum, |
899 | nfsd4_op_name(op->opnum)); | 958 | nfsd4_op_name(op->opnum)); |
900 | |||
901 | /* | 959 | /* |
902 | * The XDR decode routines may have pre-set op->status; | 960 | * The XDR decode routines may have pre-set op->status; |
903 | * for example, if there is a miscellaneous XDR error | 961 | * for example, if there is a miscellaneous XDR error |
@@ -938,6 +996,15 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
938 | BUG_ON(op->status == nfs_ok); | 996 | BUG_ON(op->status == nfs_ok); |
939 | 997 | ||
940 | encode_op: | 998 | encode_op: |
999 | /* Only from SEQUENCE or CREATE_SESSION */ | ||
1000 | if (resp->cstate.status == nfserr_replay_cache) { | ||
1001 | dprintk("%s NFS4.1 replay from cache\n", __func__); | ||
1002 | if (nfsd4_not_cached(resp)) | ||
1003 | status = nfsd4_enc_uncached_replay(args, resp); | ||
1004 | else | ||
1005 | status = op->status; | ||
1006 | goto out; | ||
1007 | } | ||
941 | if (op->status == nfserr_replay_me) { | 1008 | if (op->status == nfserr_replay_me) { |
942 | op->replay = &cstate->replay_owner->so_replay; | 1009 | op->replay = &cstate->replay_owner->so_replay; |
943 | nfsd4_encode_replay(resp, op); | 1010 | nfsd4_encode_replay(resp, op); |
@@ -961,15 +1028,24 @@ encode_op: | |||
961 | 1028 | ||
962 | nfsd4_increment_op_stats(op->opnum); | 1029 | nfsd4_increment_op_stats(op->opnum); |
963 | } | 1030 | } |
1031 | if (!rqstp->rq_usedeferral && status == nfserr_dropit) { | ||
1032 | dprintk("%s Dropit - send NFS4ERR_DELAY\n", __func__); | ||
1033 | status = nfserr_jukebox; | ||
1034 | } | ||
964 | 1035 | ||
965 | cstate_free(cstate); | 1036 | resp->cstate.status = status; |
1037 | fh_put(&resp->cstate.current_fh); | ||
1038 | fh_put(&resp->cstate.save_fh); | ||
1039 | BUG_ON(resp->cstate.replay_owner); | ||
966 | out: | 1040 | out: |
967 | nfsd4_release_compoundargs(args); | 1041 | nfsd4_release_compoundargs(args); |
1042 | /* Reset deferral mechanism for RPC deferrals */ | ||
1043 | rqstp->rq_usedeferral = 1; | ||
968 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); | 1044 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); |
969 | return status; | 1045 | return status; |
970 | } | 1046 | } |
971 | 1047 | ||
972 | static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { | 1048 | static struct nfsd4_operation nfsd4_ops[] = { |
973 | [OP_ACCESS] = { | 1049 | [OP_ACCESS] = { |
974 | .op_func = (nfsd4op_func)nfsd4_access, | 1050 | .op_func = (nfsd4op_func)nfsd4_access, |
975 | .op_name = "OP_ACCESS", | 1051 | .op_name = "OP_ACCESS", |
@@ -1045,7 +1121,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { | |||
1045 | .op_name = "OP_PUTFH", | 1121 | .op_name = "OP_PUTFH", |
1046 | }, | 1122 | }, |
1047 | [OP_PUTPUBFH] = { | 1123 | [OP_PUTPUBFH] = { |
1048 | /* unsupported, just for future reference: */ | 1124 | .op_func = (nfsd4op_func)nfsd4_putrootfh, |
1049 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, | 1125 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, |
1050 | .op_name = "OP_PUTPUBFH", | 1126 | .op_name = "OP_PUTPUBFH", |
1051 | }, | 1127 | }, |
@@ -1119,6 +1195,28 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { | |||
1119 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, | 1195 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, |
1120 | .op_name = "OP_RELEASE_LOCKOWNER", | 1196 | .op_name = "OP_RELEASE_LOCKOWNER", |
1121 | }, | 1197 | }, |
1198 | |||
1199 | /* NFSv4.1 operations */ | ||
1200 | [OP_EXCHANGE_ID] = { | ||
1201 | .op_func = (nfsd4op_func)nfsd4_exchange_id, | ||
1202 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | ||
1203 | .op_name = "OP_EXCHANGE_ID", | ||
1204 | }, | ||
1205 | [OP_CREATE_SESSION] = { | ||
1206 | .op_func = (nfsd4op_func)nfsd4_create_session, | ||
1207 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | ||
1208 | .op_name = "OP_CREATE_SESSION", | ||
1209 | }, | ||
1210 | [OP_DESTROY_SESSION] = { | ||
1211 | .op_func = (nfsd4op_func)nfsd4_destroy_session, | ||
1212 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | ||
1213 | .op_name = "OP_DESTROY_SESSION", | ||
1214 | }, | ||
1215 | [OP_SEQUENCE] = { | ||
1216 | .op_func = (nfsd4op_func)nfsd4_sequence, | ||
1217 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, | ||
1218 | .op_name = "OP_SEQUENCE", | ||
1219 | }, | ||
1122 | }; | 1220 | }; |
1123 | 1221 | ||
1124 | static const char *nfsd4_op_name(unsigned opnum) | 1222 | static const char *nfsd4_op_name(unsigned opnum) |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 74f7b67567fd..3444c0052a87 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -182,36 +182,26 @@ out_unlock: | |||
182 | 182 | ||
183 | typedef int (recdir_func)(struct dentry *, struct dentry *); | 183 | typedef int (recdir_func)(struct dentry *, struct dentry *); |
184 | 184 | ||
185 | struct dentry_list { | 185 | struct name_list { |
186 | struct dentry *dentry; | 186 | char name[HEXDIR_LEN]; |
187 | struct list_head list; | 187 | struct list_head list; |
188 | }; | 188 | }; |
189 | 189 | ||
190 | struct dentry_list_arg { | ||
191 | struct list_head dentries; | ||
192 | struct dentry *parent; | ||
193 | }; | ||
194 | |||
195 | static int | 190 | static int |
196 | nfsd4_build_dentrylist(void *arg, const char *name, int namlen, | 191 | nfsd4_build_namelist(void *arg, const char *name, int namlen, |
197 | loff_t offset, u64 ino, unsigned int d_type) | 192 | loff_t offset, u64 ino, unsigned int d_type) |
198 | { | 193 | { |
199 | struct dentry_list_arg *dla = arg; | 194 | struct list_head *names = arg; |
200 | struct list_head *dentries = &dla->dentries; | 195 | struct name_list *entry; |
201 | struct dentry *parent = dla->parent; | ||
202 | struct dentry *dentry; | ||
203 | struct dentry_list *child; | ||
204 | 196 | ||
205 | if (name && isdotent(name, namlen)) | 197 | if (namlen != HEXDIR_LEN - 1) |
206 | return 0; | 198 | return 0; |
207 | dentry = lookup_one_len(name, parent, namlen); | 199 | entry = kmalloc(sizeof(struct name_list), GFP_KERNEL); |
208 | if (IS_ERR(dentry)) | 200 | if (entry == NULL) |
209 | return PTR_ERR(dentry); | ||
210 | child = kmalloc(sizeof(*child), GFP_KERNEL); | ||
211 | if (child == NULL) | ||
212 | return -ENOMEM; | 201 | return -ENOMEM; |
213 | child->dentry = dentry; | 202 | memcpy(entry->name, name, HEXDIR_LEN - 1); |
214 | list_add(&child->list, dentries); | 203 | entry->name[HEXDIR_LEN - 1] = '\0'; |
204 | list_add(&entry->list, names); | ||
215 | return 0; | 205 | return 0; |
216 | } | 206 | } |
217 | 207 | ||
@@ -220,11 +210,9 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) | |||
220 | { | 210 | { |
221 | const struct cred *original_cred; | 211 | const struct cred *original_cred; |
222 | struct file *filp; | 212 | struct file *filp; |
223 | struct dentry_list_arg dla = { | 213 | LIST_HEAD(names); |
224 | .parent = dir, | 214 | struct name_list *entry; |
225 | }; | 215 | struct dentry *dentry; |
226 | struct list_head *dentries = &dla.dentries; | ||
227 | struct dentry_list *child; | ||
228 | int status; | 216 | int status; |
229 | 217 | ||
230 | if (!rec_dir_init) | 218 | if (!rec_dir_init) |
@@ -233,31 +221,34 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) | |||
233 | status = nfs4_save_creds(&original_cred); | 221 | status = nfs4_save_creds(&original_cred); |
234 | if (status < 0) | 222 | if (status < 0) |
235 | return status; | 223 | return status; |
236 | INIT_LIST_HEAD(dentries); | ||
237 | 224 | ||
238 | filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY, | 225 | filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY, |
239 | current_cred()); | 226 | current_cred()); |
240 | status = PTR_ERR(filp); | 227 | status = PTR_ERR(filp); |
241 | if (IS_ERR(filp)) | 228 | if (IS_ERR(filp)) |
242 | goto out; | 229 | goto out; |
243 | INIT_LIST_HEAD(dentries); | 230 | status = vfs_readdir(filp, nfsd4_build_namelist, &names); |
244 | status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla); | ||
245 | fput(filp); | 231 | fput(filp); |
246 | while (!list_empty(dentries)) { | 232 | while (!list_empty(&names)) { |
247 | child = list_entry(dentries->next, struct dentry_list, list); | 233 | entry = list_entry(names.next, struct name_list, list); |
248 | status = f(dir, child->dentry); | 234 | |
235 | dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); | ||
236 | if (IS_ERR(dentry)) { | ||
237 | status = PTR_ERR(dentry); | ||
238 | goto out; | ||
239 | } | ||
240 | status = f(dir, dentry); | ||
241 | dput(dentry); | ||
249 | if (status) | 242 | if (status) |
250 | goto out; | 243 | goto out; |
251 | list_del(&child->list); | 244 | list_del(&entry->list); |
252 | dput(child->dentry); | 245 | kfree(entry); |
253 | kfree(child); | ||
254 | } | 246 | } |
255 | out: | 247 | out: |
256 | while (!list_empty(dentries)) { | 248 | while (!list_empty(&names)) { |
257 | child = list_entry(dentries->next, struct dentry_list, list); | 249 | entry = list_entry(names.next, struct name_list, list); |
258 | list_del(&child->list); | 250 | list_del(&entry->list); |
259 | dput(child->dentry); | 251 | kfree(entry); |
260 | kfree(child); | ||
261 | } | 252 | } |
262 | nfs4_reset_creds(original_cred); | 253 | nfs4_reset_creds(original_cred); |
263 | return status; | 254 | return status; |
@@ -353,7 +344,8 @@ purge_old(struct dentry *parent, struct dentry *child) | |||
353 | { | 344 | { |
354 | int status; | 345 | int status; |
355 | 346 | ||
356 | if (nfs4_has_reclaimed_state(child->d_name.name)) | 347 | /* note: we currently use this path only for minorversion 0 */ |
348 | if (nfs4_has_reclaimed_state(child->d_name.name, false)) | ||
357 | return 0; | 349 | return 0; |
358 | 350 | ||
359 | status = nfsd4_clear_clid_dir(parent, child); | 351 | status = nfsd4_clear_clid_dir(parent, child); |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b6f60f48e94b..c65a27b76a9d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -68,6 +68,7 @@ static u32 current_delegid = 1; | |||
68 | static u32 nfs4_init; | 68 | static u32 nfs4_init; |
69 | static stateid_t zerostateid; /* bits all 0 */ | 69 | static stateid_t zerostateid; /* bits all 0 */ |
70 | static stateid_t onestateid; /* bits all 1 */ | 70 | static stateid_t onestateid; /* bits all 1 */ |
71 | static u64 current_sessionid = 1; | ||
71 | 72 | ||
72 | #define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) | 73 | #define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t))) |
73 | #define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) | 74 | #define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t))) |
@@ -75,18 +76,21 @@ static stateid_t onestateid; /* bits all 1 */ | |||
75 | /* forward declarations */ | 76 | /* forward declarations */ |
76 | static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); | 77 | static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); |
77 | static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); | 78 | static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); |
78 | static void release_stateid_lockowners(struct nfs4_stateid *open_stp); | ||
79 | static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; | 79 | static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; |
80 | static void nfs4_set_recdir(char *recdir); | 80 | static void nfs4_set_recdir(char *recdir); |
81 | 81 | ||
82 | /* Locking: | 82 | /* Locking: */ |
83 | * | 83 | |
84 | * client_mutex: | 84 | /* Currently used for almost all code touching nfsv4 state: */ |
85 | * protects clientid_hashtbl[], clientstr_hashtbl[], | ||
86 | * unconfstr_hashtbl[], uncofid_hashtbl[]. | ||
87 | */ | ||
88 | static DEFINE_MUTEX(client_mutex); | 85 | static DEFINE_MUTEX(client_mutex); |
89 | 86 | ||
87 | /* | ||
88 | * Currently used for the del_recall_lru and file hash table. In an | ||
89 | * effort to decrease the scope of the client_mutex, this spinlock may | ||
90 | * eventually cover more: | ||
91 | */ | ||
92 | static DEFINE_SPINLOCK(recall_lock); | ||
93 | |||
90 | static struct kmem_cache *stateowner_slab = NULL; | 94 | static struct kmem_cache *stateowner_slab = NULL; |
91 | static struct kmem_cache *file_slab = NULL; | 95 | static struct kmem_cache *file_slab = NULL; |
92 | static struct kmem_cache *stateid_slab = NULL; | 96 | static struct kmem_cache *stateid_slab = NULL; |
@@ -117,37 +121,23 @@ opaque_hashval(const void *ptr, int nbytes) | |||
117 | return x; | 121 | return x; |
118 | } | 122 | } |
119 | 123 | ||
120 | /* forward declarations */ | ||
121 | static void release_stateowner(struct nfs4_stateowner *sop); | ||
122 | static void release_stateid(struct nfs4_stateid *stp, int flags); | ||
123 | |||
124 | /* | ||
125 | * Delegation state | ||
126 | */ | ||
127 | |||
128 | /* recall_lock protects the del_recall_lru */ | ||
129 | static DEFINE_SPINLOCK(recall_lock); | ||
130 | static struct list_head del_recall_lru; | 124 | static struct list_head del_recall_lru; |
131 | 125 | ||
132 | static void | ||
133 | free_nfs4_file(struct kref *kref) | ||
134 | { | ||
135 | struct nfs4_file *fp = container_of(kref, struct nfs4_file, fi_ref); | ||
136 | list_del(&fp->fi_hash); | ||
137 | iput(fp->fi_inode); | ||
138 | kmem_cache_free(file_slab, fp); | ||
139 | } | ||
140 | |||
141 | static inline void | 126 | static inline void |
142 | put_nfs4_file(struct nfs4_file *fi) | 127 | put_nfs4_file(struct nfs4_file *fi) |
143 | { | 128 | { |
144 | kref_put(&fi->fi_ref, free_nfs4_file); | 129 | if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) { |
130 | list_del(&fi->fi_hash); | ||
131 | spin_unlock(&recall_lock); | ||
132 | iput(fi->fi_inode); | ||
133 | kmem_cache_free(file_slab, fi); | ||
134 | } | ||
145 | } | 135 | } |
146 | 136 | ||
147 | static inline void | 137 | static inline void |
148 | get_nfs4_file(struct nfs4_file *fi) | 138 | get_nfs4_file(struct nfs4_file *fi) |
149 | { | 139 | { |
150 | kref_get(&fi->fi_ref); | 140 | atomic_inc(&fi->fi_ref); |
151 | } | 141 | } |
152 | 142 | ||
153 | static int num_delegations; | 143 | static int num_delegations; |
@@ -220,9 +210,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f | |||
220 | dp->dl_stateid.si_stateownerid = current_delegid++; | 210 | dp->dl_stateid.si_stateownerid = current_delegid++; |
221 | dp->dl_stateid.si_fileid = 0; | 211 | dp->dl_stateid.si_fileid = 0; |
222 | dp->dl_stateid.si_generation = 0; | 212 | dp->dl_stateid.si_generation = 0; |
223 | dp->dl_fhlen = current_fh->fh_handle.fh_size; | 213 | fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); |
224 | memcpy(dp->dl_fhval, ¤t_fh->fh_handle.fh_base, | ||
225 | current_fh->fh_handle.fh_size); | ||
226 | dp->dl_time = 0; | 214 | dp->dl_time = 0; |
227 | atomic_set(&dp->dl_count, 1); | 215 | atomic_set(&dp->dl_count, 1); |
228 | list_add(&dp->dl_perfile, &fp->fi_delegations); | 216 | list_add(&dp->dl_perfile, &fp->fi_delegations); |
@@ -311,6 +299,291 @@ static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; | |||
311 | static struct list_head client_lru; | 299 | static struct list_head client_lru; |
312 | static struct list_head close_lru; | 300 | static struct list_head close_lru; |
313 | 301 | ||
302 | static void unhash_generic_stateid(struct nfs4_stateid *stp) | ||
303 | { | ||
304 | list_del(&stp->st_hash); | ||
305 | list_del(&stp->st_perfile); | ||
306 | list_del(&stp->st_perstateowner); | ||
307 | } | ||
308 | |||
309 | static void free_generic_stateid(struct nfs4_stateid *stp) | ||
310 | { | ||
311 | put_nfs4_file(stp->st_file); | ||
312 | kmem_cache_free(stateid_slab, stp); | ||
313 | } | ||
314 | |||
315 | static void release_lock_stateid(struct nfs4_stateid *stp) | ||
316 | { | ||
317 | unhash_generic_stateid(stp); | ||
318 | locks_remove_posix(stp->st_vfs_file, (fl_owner_t)stp->st_stateowner); | ||
319 | free_generic_stateid(stp); | ||
320 | } | ||
321 | |||
322 | static void unhash_lockowner(struct nfs4_stateowner *sop) | ||
323 | { | ||
324 | struct nfs4_stateid *stp; | ||
325 | |||
326 | list_del(&sop->so_idhash); | ||
327 | list_del(&sop->so_strhash); | ||
328 | list_del(&sop->so_perstateid); | ||
329 | while (!list_empty(&sop->so_stateids)) { | ||
330 | stp = list_first_entry(&sop->so_stateids, | ||
331 | struct nfs4_stateid, st_perstateowner); | ||
332 | release_lock_stateid(stp); | ||
333 | } | ||
334 | } | ||
335 | |||
336 | static void release_lockowner(struct nfs4_stateowner *sop) | ||
337 | { | ||
338 | unhash_lockowner(sop); | ||
339 | nfs4_put_stateowner(sop); | ||
340 | } | ||
341 | |||
342 | static void | ||
343 | release_stateid_lockowners(struct nfs4_stateid *open_stp) | ||
344 | { | ||
345 | struct nfs4_stateowner *lock_sop; | ||
346 | |||
347 | while (!list_empty(&open_stp->st_lockowners)) { | ||
348 | lock_sop = list_entry(open_stp->st_lockowners.next, | ||
349 | struct nfs4_stateowner, so_perstateid); | ||
350 | /* list_del(&open_stp->st_lockowners); */ | ||
351 | BUG_ON(lock_sop->so_is_open_owner); | ||
352 | release_lockowner(lock_sop); | ||
353 | } | ||
354 | } | ||
355 | |||
356 | static void release_open_stateid(struct nfs4_stateid *stp) | ||
357 | { | ||
358 | unhash_generic_stateid(stp); | ||
359 | release_stateid_lockowners(stp); | ||
360 | nfsd_close(stp->st_vfs_file); | ||
361 | free_generic_stateid(stp); | ||
362 | } | ||
363 | |||
364 | static void unhash_openowner(struct nfs4_stateowner *sop) | ||
365 | { | ||
366 | struct nfs4_stateid *stp; | ||
367 | |||
368 | list_del(&sop->so_idhash); | ||
369 | list_del(&sop->so_strhash); | ||
370 | list_del(&sop->so_perclient); | ||
371 | list_del(&sop->so_perstateid); /* XXX: necessary? */ | ||
372 | while (!list_empty(&sop->so_stateids)) { | ||
373 | stp = list_first_entry(&sop->so_stateids, | ||
374 | struct nfs4_stateid, st_perstateowner); | ||
375 | release_open_stateid(stp); | ||
376 | } | ||
377 | } | ||
378 | |||
379 | static void release_openowner(struct nfs4_stateowner *sop) | ||
380 | { | ||
381 | unhash_openowner(sop); | ||
382 | list_del(&sop->so_close_lru); | ||
383 | nfs4_put_stateowner(sop); | ||
384 | } | ||
385 | |||
386 | static DEFINE_SPINLOCK(sessionid_lock); | ||
387 | #define SESSION_HASH_SIZE 512 | ||
388 | static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE]; | ||
389 | |||
390 | static inline int | ||
391 | hash_sessionid(struct nfs4_sessionid *sessionid) | ||
392 | { | ||
393 | struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid; | ||
394 | |||
395 | return sid->sequence % SESSION_HASH_SIZE; | ||
396 | } | ||
397 | |||
398 | static inline void | ||
399 | dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) | ||
400 | { | ||
401 | u32 *ptr = (u32 *)(&sessionid->data[0]); | ||
402 | dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]); | ||
403 | } | ||
404 | |||
405 | static void | ||
406 | gen_sessionid(struct nfsd4_session *ses) | ||
407 | { | ||
408 | struct nfs4_client *clp = ses->se_client; | ||
409 | struct nfsd4_sessionid *sid; | ||
410 | |||
411 | sid = (struct nfsd4_sessionid *)ses->se_sessionid.data; | ||
412 | sid->clientid = clp->cl_clientid; | ||
413 | sid->sequence = current_sessionid++; | ||
414 | sid->reserved = 0; | ||
415 | } | ||
416 | |||
417 | /* | ||
418 | * Give the client the number of slots it requests bound by | ||
419 | * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages. | ||
420 | * | ||
421 | * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we | ||
422 | * should (up to a point) re-negotiate active sessions and reduce their | ||
423 | * slot usage to make rooom for new connections. For now we just fail the | ||
424 | * create session. | ||
425 | */ | ||
426 | static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) | ||
427 | { | ||
428 | int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; | ||
429 | |||
430 | spin_lock(&nfsd_serv->sv_lock); | ||
431 | if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) | ||
432 | np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; | ||
433 | nfsd_serv->sv_drc_pages_used += np; | ||
434 | spin_unlock(&nfsd_serv->sv_lock); | ||
435 | |||
436 | if (np <= 0) { | ||
437 | status = nfserr_resource; | ||
438 | fchan->maxreqs = 0; | ||
439 | } else | ||
440 | fchan->maxreqs = np / NFSD_PAGES_PER_SLOT; | ||
441 | |||
442 | return status; | ||
443 | } | ||
444 | |||
445 | /* | ||
446 | * fchan holds the client values on input, and the server values on output | ||
447 | */ | ||
448 | static int init_forechannel_attrs(struct svc_rqst *rqstp, | ||
449 | struct nfsd4_session *session, | ||
450 | struct nfsd4_channel_attrs *fchan) | ||
451 | { | ||
452 | int status = 0; | ||
453 | __u32 maxcount = svc_max_payload(rqstp); | ||
454 | |||
455 | /* headerpadsz set to zero in encode routine */ | ||
456 | |||
457 | /* Use the client's max request and max response size if possible */ | ||
458 | if (fchan->maxreq_sz > maxcount) | ||
459 | fchan->maxreq_sz = maxcount; | ||
460 | session->se_fmaxreq_sz = fchan->maxreq_sz; | ||
461 | |||
462 | if (fchan->maxresp_sz > maxcount) | ||
463 | fchan->maxresp_sz = maxcount; | ||
464 | session->se_fmaxresp_sz = fchan->maxresp_sz; | ||
465 | |||
466 | /* Set the max response cached size our default which is | ||
467 | * a multiple of PAGE_SIZE and small */ | ||
468 | session->se_fmaxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; | ||
469 | fchan->maxresp_cached = session->se_fmaxresp_cached; | ||
470 | |||
471 | /* Use the client's maxops if possible */ | ||
472 | if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) | ||
473 | fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; | ||
474 | session->se_fmaxops = fchan->maxops; | ||
475 | |||
476 | /* try to use the client requested number of slots */ | ||
477 | if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) | ||
478 | fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; | ||
479 | |||
480 | /* FIXME: Error means no more DRC pages so the server should | ||
481 | * recover pages from existing sessions. For now fail session | ||
482 | * creation. | ||
483 | */ | ||
484 | status = set_forechannel_maxreqs(fchan); | ||
485 | |||
486 | session->se_fnumslots = fchan->maxreqs; | ||
487 | return status; | ||
488 | } | ||
489 | |||
490 | static int | ||
491 | alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, | ||
492 | struct nfsd4_create_session *cses) | ||
493 | { | ||
494 | struct nfsd4_session *new, tmp; | ||
495 | int idx, status = nfserr_resource, slotsize; | ||
496 | |||
497 | memset(&tmp, 0, sizeof(tmp)); | ||
498 | |||
499 | /* FIXME: For now, we just accept the client back channel attributes. */ | ||
500 | status = init_forechannel_attrs(rqstp, &tmp, &cses->fore_channel); | ||
501 | if (status) | ||
502 | goto out; | ||
503 | |||
504 | /* allocate struct nfsd4_session and slot table in one piece */ | ||
505 | slotsize = tmp.se_fnumslots * sizeof(struct nfsd4_slot); | ||
506 | new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); | ||
507 | if (!new) | ||
508 | goto out; | ||
509 | |||
510 | memcpy(new, &tmp, sizeof(*new)); | ||
511 | |||
512 | new->se_client = clp; | ||
513 | gen_sessionid(new); | ||
514 | idx = hash_sessionid(&new->se_sessionid); | ||
515 | memcpy(clp->cl_sessionid.data, new->se_sessionid.data, | ||
516 | NFS4_MAX_SESSIONID_LEN); | ||
517 | |||
518 | new->se_flags = cses->flags; | ||
519 | kref_init(&new->se_ref); | ||
520 | spin_lock(&sessionid_lock); | ||
521 | list_add(&new->se_hash, &sessionid_hashtbl[idx]); | ||
522 | list_add(&new->se_perclnt, &clp->cl_sessions); | ||
523 | spin_unlock(&sessionid_lock); | ||
524 | |||
525 | status = nfs_ok; | ||
526 | out: | ||
527 | return status; | ||
528 | } | ||
529 | |||
530 | /* caller must hold sessionid_lock */ | ||
531 | static struct nfsd4_session * | ||
532 | find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) | ||
533 | { | ||
534 | struct nfsd4_session *elem; | ||
535 | int idx; | ||
536 | |||
537 | dump_sessionid(__func__, sessionid); | ||
538 | idx = hash_sessionid(sessionid); | ||
539 | dprintk("%s: idx is %d\n", __func__, idx); | ||
540 | /* Search in the appropriate list */ | ||
541 | list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) { | ||
542 | dump_sessionid("list traversal", &elem->se_sessionid); | ||
543 | if (!memcmp(elem->se_sessionid.data, sessionid->data, | ||
544 | NFS4_MAX_SESSIONID_LEN)) { | ||
545 | return elem; | ||
546 | } | ||
547 | } | ||
548 | |||
549 | dprintk("%s: session not found\n", __func__); | ||
550 | return NULL; | ||
551 | } | ||
552 | |||
553 | /* caller must hold sessionid_lock */ | ||
554 | static void | ||
555 | unhash_session(struct nfsd4_session *ses) | ||
556 | { | ||
557 | list_del(&ses->se_hash); | ||
558 | list_del(&ses->se_perclnt); | ||
559 | } | ||
560 | |||
561 | static void | ||
562 | release_session(struct nfsd4_session *ses) | ||
563 | { | ||
564 | spin_lock(&sessionid_lock); | ||
565 | unhash_session(ses); | ||
566 | spin_unlock(&sessionid_lock); | ||
567 | nfsd4_put_session(ses); | ||
568 | } | ||
569 | |||
570 | static void nfsd4_release_respages(struct page **respages, short resused); | ||
571 | |||
572 | void | ||
573 | free_session(struct kref *kref) | ||
574 | { | ||
575 | struct nfsd4_session *ses; | ||
576 | int i; | ||
577 | |||
578 | ses = container_of(kref, struct nfsd4_session, se_ref); | ||
579 | for (i = 0; i < ses->se_fnumslots; i++) { | ||
580 | struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; | ||
581 | nfsd4_release_respages(e->ce_respages, e->ce_resused); | ||
582 | } | ||
583 | kfree(ses->se_slots); | ||
584 | kfree(ses); | ||
585 | } | ||
586 | |||
314 | static inline void | 587 | static inline void |
315 | renew_client(struct nfs4_client *clp) | 588 | renew_client(struct nfs4_client *clp) |
316 | { | 589 | { |
@@ -330,8 +603,8 @@ STALE_CLIENTID(clientid_t *clid) | |||
330 | { | 603 | { |
331 | if (clid->cl_boot == boot_time) | 604 | if (clid->cl_boot == boot_time) |
332 | return 0; | 605 | return 0; |
333 | dprintk("NFSD stale clientid (%08x/%08x)\n", | 606 | dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", |
334 | clid->cl_boot, clid->cl_id); | 607 | clid->cl_boot, clid->cl_id, boot_time); |
335 | return 1; | 608 | return 1; |
336 | } | 609 | } |
337 | 610 | ||
@@ -376,6 +649,8 @@ static inline void | |||
376 | free_client(struct nfs4_client *clp) | 649 | free_client(struct nfs4_client *clp) |
377 | { | 650 | { |
378 | shutdown_callback_client(clp); | 651 | shutdown_callback_client(clp); |
652 | nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages, | ||
653 | clp->cl_slot.sl_cache_entry.ce_resused); | ||
379 | if (clp->cl_cred.cr_group_info) | 654 | if (clp->cl_cred.cr_group_info) |
380 | put_group_info(clp->cl_cred.cr_group_info); | 655 | put_group_info(clp->cl_cred.cr_group_info); |
381 | kfree(clp->cl_principal); | 656 | kfree(clp->cl_principal); |
@@ -420,7 +695,13 @@ expire_client(struct nfs4_client *clp) | |||
420 | list_del(&clp->cl_lru); | 695 | list_del(&clp->cl_lru); |
421 | while (!list_empty(&clp->cl_openowners)) { | 696 | while (!list_empty(&clp->cl_openowners)) { |
422 | sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); | 697 | sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); |
423 | release_stateowner(sop); | 698 | release_openowner(sop); |
699 | } | ||
700 | while (!list_empty(&clp->cl_sessions)) { | ||
701 | struct nfsd4_session *ses; | ||
702 | ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, | ||
703 | se_perclnt); | ||
704 | release_session(ses); | ||
424 | } | 705 | } |
425 | put_nfs4_client(clp); | 706 | put_nfs4_client(clp); |
426 | } | 707 | } |
@@ -439,6 +720,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir) | |||
439 | INIT_LIST_HEAD(&clp->cl_strhash); | 720 | INIT_LIST_HEAD(&clp->cl_strhash); |
440 | INIT_LIST_HEAD(&clp->cl_openowners); | 721 | INIT_LIST_HEAD(&clp->cl_openowners); |
441 | INIT_LIST_HEAD(&clp->cl_delegations); | 722 | INIT_LIST_HEAD(&clp->cl_delegations); |
723 | INIT_LIST_HEAD(&clp->cl_sessions); | ||
442 | INIT_LIST_HEAD(&clp->cl_lru); | 724 | INIT_LIST_HEAD(&clp->cl_lru); |
443 | return clp; | 725 | return clp; |
444 | } | 726 | } |
@@ -568,25 +850,45 @@ find_unconfirmed_client(clientid_t *clid) | |||
568 | return NULL; | 850 | return NULL; |
569 | } | 851 | } |
570 | 852 | ||
853 | /* | ||
854 | * Return 1 iff clp's clientid establishment method matches the use_exchange_id | ||
855 | * parameter. Matching is based on the fact the at least one of the | ||
856 | * EXCHGID4_FLAG_USE_{NON_PNFS,PNFS_MDS,PNFS_DS} flags must be set for v4.1 | ||
857 | * | ||
858 | * FIXME: we need to unify the clientid namespaces for nfsv4.x | ||
859 | * and correctly deal with client upgrade/downgrade in EXCHANGE_ID | ||
860 | * and SET_CLIENTID{,_CONFIRM} | ||
861 | */ | ||
862 | static inline int | ||
863 | match_clientid_establishment(struct nfs4_client *clp, bool use_exchange_id) | ||
864 | { | ||
865 | bool has_exchange_flags = (clp->cl_exchange_flags != 0); | ||
866 | return use_exchange_id == has_exchange_flags; | ||
867 | } | ||
868 | |||
571 | static struct nfs4_client * | 869 | static struct nfs4_client * |
572 | find_confirmed_client_by_str(const char *dname, unsigned int hashval) | 870 | find_confirmed_client_by_str(const char *dname, unsigned int hashval, |
871 | bool use_exchange_id) | ||
573 | { | 872 | { |
574 | struct nfs4_client *clp; | 873 | struct nfs4_client *clp; |
575 | 874 | ||
576 | list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { | 875 | list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) { |
577 | if (same_name(clp->cl_recdir, dname)) | 876 | if (same_name(clp->cl_recdir, dname) && |
877 | match_clientid_establishment(clp, use_exchange_id)) | ||
578 | return clp; | 878 | return clp; |
579 | } | 879 | } |
580 | return NULL; | 880 | return NULL; |
581 | } | 881 | } |
582 | 882 | ||
583 | static struct nfs4_client * | 883 | static struct nfs4_client * |
584 | find_unconfirmed_client_by_str(const char *dname, unsigned int hashval) | 884 | find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, |
885 | bool use_exchange_id) | ||
585 | { | 886 | { |
586 | struct nfs4_client *clp; | 887 | struct nfs4_client *clp; |
587 | 888 | ||
588 | list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { | 889 | list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) { |
589 | if (same_name(clp->cl_recdir, dname)) | 890 | if (same_name(clp->cl_recdir, dname) && |
891 | match_clientid_establishment(clp, use_exchange_id)) | ||
590 | return clp; | 892 | return clp; |
591 | } | 893 | } |
592 | return NULL; | 894 | return NULL; |
@@ -685,6 +987,534 @@ out_err: | |||
685 | return; | 987 | return; |
686 | } | 988 | } |
687 | 989 | ||
990 | void | ||
991 | nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp) | ||
992 | { | ||
993 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | ||
994 | |||
995 | resp->cstate.statp = statp; | ||
996 | } | ||
997 | |||
998 | /* | ||
999 | * Dereference the result pages. | ||
1000 | */ | ||
1001 | static void | ||
1002 | nfsd4_release_respages(struct page **respages, short resused) | ||
1003 | { | ||
1004 | int i; | ||
1005 | |||
1006 | dprintk("--> %s\n", __func__); | ||
1007 | for (i = 0; i < resused; i++) { | ||
1008 | if (!respages[i]) | ||
1009 | continue; | ||
1010 | put_page(respages[i]); | ||
1011 | respages[i] = NULL; | ||
1012 | } | ||
1013 | } | ||
1014 | |||
1015 | static void | ||
1016 | nfsd4_copy_pages(struct page **topages, struct page **frompages, short count) | ||
1017 | { | ||
1018 | int i; | ||
1019 | |||
1020 | for (i = 0; i < count; i++) { | ||
1021 | topages[i] = frompages[i]; | ||
1022 | if (!topages[i]) | ||
1023 | continue; | ||
1024 | get_page(topages[i]); | ||
1025 | } | ||
1026 | } | ||
1027 | |||
1028 | /* | ||
1029 | * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous | ||
1030 | * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total | ||
1031 | * length of the XDR response is less than se_fmaxresp_cached | ||
1032 | * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a | ||
1033 | * of the reply (e.g. readdir). | ||
1034 | * | ||
1035 | * Store the base and length of the rq_req.head[0] page | ||
1036 | * of the NFSv4.1 data, just past the rpc header. | ||
1037 | */ | ||
1038 | void | ||
1039 | nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) | ||
1040 | { | ||
1041 | struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; | ||
1042 | struct svc_rqst *rqstp = resp->rqstp; | ||
1043 | struct nfsd4_compoundargs *args = rqstp->rq_argp; | ||
1044 | struct nfsd4_op *op = &args->ops[resp->opcnt]; | ||
1045 | struct kvec *resv = &rqstp->rq_res.head[0]; | ||
1046 | |||
1047 | dprintk("--> %s entry %p\n", __func__, entry); | ||
1048 | |||
1049 | /* Don't cache a failed OP_SEQUENCE. */ | ||
1050 | if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status) | ||
1051 | return; | ||
1052 | |||
1053 | nfsd4_release_respages(entry->ce_respages, entry->ce_resused); | ||
1054 | entry->ce_opcnt = resp->opcnt; | ||
1055 | entry->ce_status = resp->cstate.status; | ||
1056 | |||
1057 | /* | ||
1058 | * Don't need a page to cache just the sequence operation - the slot | ||
1059 | * does this for us! | ||
1060 | */ | ||
1061 | |||
1062 | if (nfsd4_not_cached(resp)) { | ||
1063 | entry->ce_resused = 0; | ||
1064 | entry->ce_rpchdrlen = 0; | ||
1065 | dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__, | ||
1066 | resp->cstate.slot->sl_cache_entry.ce_cachethis); | ||
1067 | return; | ||
1068 | } | ||
1069 | entry->ce_resused = rqstp->rq_resused; | ||
1070 | if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1) | ||
1071 | entry->ce_resused = NFSD_PAGES_PER_SLOT + 1; | ||
1072 | nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages, | ||
1073 | entry->ce_resused); | ||
1074 | entry->ce_datav.iov_base = resp->cstate.statp; | ||
1075 | entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp - | ||
1076 | (char *)page_address(rqstp->rq_respages[0])); | ||
1077 | /* Current request rpc header length*/ | ||
1078 | entry->ce_rpchdrlen = (char *)resp->cstate.statp - | ||
1079 | (char *)page_address(rqstp->rq_respages[0]); | ||
1080 | } | ||
1081 | |||
1082 | /* | ||
1083 | * We keep the rpc header, but take the nfs reply from the replycache. | ||
1084 | */ | ||
1085 | static int | ||
1086 | nfsd41_copy_replay_data(struct nfsd4_compoundres *resp, | ||
1087 | struct nfsd4_cache_entry *entry) | ||
1088 | { | ||
1089 | struct svc_rqst *rqstp = resp->rqstp; | ||
1090 | struct kvec *resv = &resp->rqstp->rq_res.head[0]; | ||
1091 | int len; | ||
1092 | |||
1093 | /* Current request rpc header length*/ | ||
1094 | len = (char *)resp->cstate.statp - | ||
1095 | (char *)page_address(rqstp->rq_respages[0]); | ||
1096 | if (entry->ce_datav.iov_len + len > PAGE_SIZE) { | ||
1097 | dprintk("%s v41 cached reply too large (%Zd).\n", __func__, | ||
1098 | entry->ce_datav.iov_len); | ||
1099 | return 0; | ||
1100 | } | ||
1101 | /* copy the cached reply nfsd data past the current rpc header */ | ||
1102 | memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base, | ||
1103 | entry->ce_datav.iov_len); | ||
1104 | resv->iov_len = len + entry->ce_datav.iov_len; | ||
1105 | return 1; | ||
1106 | } | ||
1107 | |||
1108 | /* | ||
1109 | * Keep the first page of the replay. Copy the NFSv4.1 data from the first | ||
1110 | * cached page. Replace any futher replay pages from the cache. | ||
1111 | */ | ||
1112 | __be32 | ||
1113 | nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, | ||
1114 | struct nfsd4_sequence *seq) | ||
1115 | { | ||
1116 | struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; | ||
1117 | __be32 status; | ||
1118 | |||
1119 | dprintk("--> %s entry %p\n", __func__, entry); | ||
1120 | |||
1121 | /* | ||
1122 | * If this is just the sequence operation, we did not keep | ||
1123 | * a page in the cache entry because we can just use the | ||
1124 | * slot info stored in struct nfsd4_sequence that was checked | ||
1125 | * against the slot in nfsd4_sequence(). | ||
1126 | * | ||
1127 | * This occurs when seq->cachethis is FALSE, or when the client | ||
1128 | * session inactivity timer fires and a solo sequence operation | ||
1129 | * is sent (lease renewal). | ||
1130 | */ | ||
1131 | if (seq && nfsd4_not_cached(resp)) { | ||
1132 | seq->maxslots = resp->cstate.session->se_fnumslots; | ||
1133 | return nfs_ok; | ||
1134 | } | ||
1135 | |||
1136 | if (!nfsd41_copy_replay_data(resp, entry)) { | ||
1137 | /* | ||
1138 | * Not enough room to use the replay rpc header, send the | ||
1139 | * cached header. Release all the allocated result pages. | ||
1140 | */ | ||
1141 | svc_free_res_pages(resp->rqstp); | ||
1142 | nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages, | ||
1143 | entry->ce_resused); | ||
1144 | } else { | ||
1145 | /* Release all but the first allocated result page */ | ||
1146 | |||
1147 | resp->rqstp->rq_resused--; | ||
1148 | svc_free_res_pages(resp->rqstp); | ||
1149 | |||
1150 | nfsd4_copy_pages(&resp->rqstp->rq_respages[1], | ||
1151 | &entry->ce_respages[1], | ||
1152 | entry->ce_resused - 1); | ||
1153 | } | ||
1154 | |||
1155 | resp->rqstp->rq_resused = entry->ce_resused; | ||
1156 | resp->opcnt = entry->ce_opcnt; | ||
1157 | resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen; | ||
1158 | status = entry->ce_status; | ||
1159 | |||
1160 | return status; | ||
1161 | } | ||
1162 | |||
1163 | /* | ||
1164 | * Set the exchange_id flags returned by the server. | ||
1165 | */ | ||
1166 | static void | ||
1167 | nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) | ||
1168 | { | ||
1169 | /* pNFS is not supported */ | ||
1170 | new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; | ||
1171 | |||
1172 | /* Referrals are supported, Migration is not. */ | ||
1173 | new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; | ||
1174 | |||
1175 | /* set the wire flags to return to client. */ | ||
1176 | clid->flags = new->cl_exchange_flags; | ||
1177 | } | ||
1178 | |||
1179 | __be32 | ||
1180 | nfsd4_exchange_id(struct svc_rqst *rqstp, | ||
1181 | struct nfsd4_compound_state *cstate, | ||
1182 | struct nfsd4_exchange_id *exid) | ||
1183 | { | ||
1184 | struct nfs4_client *unconf, *conf, *new; | ||
1185 | int status; | ||
1186 | unsigned int strhashval; | ||
1187 | char dname[HEXDIR_LEN]; | ||
1188 | nfs4_verifier verf = exid->verifier; | ||
1189 | u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; | ||
1190 | |||
1191 | dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " | ||
1192 | " ip_addr=%u flags %x, spa_how %d\n", | ||
1193 | __func__, rqstp, exid, exid->clname.len, exid->clname.data, | ||
1194 | ip_addr, exid->flags, exid->spa_how); | ||
1195 | |||
1196 | if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A)) | ||
1197 | return nfserr_inval; | ||
1198 | |||
1199 | /* Currently only support SP4_NONE */ | ||
1200 | switch (exid->spa_how) { | ||
1201 | case SP4_NONE: | ||
1202 | break; | ||
1203 | case SP4_SSV: | ||
1204 | return nfserr_encr_alg_unsupp; | ||
1205 | default: | ||
1206 | BUG(); /* checked by xdr code */ | ||
1207 | case SP4_MACH_CRED: | ||
1208 | return nfserr_serverfault; /* no excuse :-/ */ | ||
1209 | } | ||
1210 | |||
1211 | status = nfs4_make_rec_clidname(dname, &exid->clname); | ||
1212 | |||
1213 | if (status) | ||
1214 | goto error; | ||
1215 | |||
1216 | strhashval = clientstr_hashval(dname); | ||
1217 | |||
1218 | nfs4_lock_state(); | ||
1219 | status = nfs_ok; | ||
1220 | |||
1221 | conf = find_confirmed_client_by_str(dname, strhashval, true); | ||
1222 | if (conf) { | ||
1223 | if (!same_verf(&verf, &conf->cl_verifier)) { | ||
1224 | /* 18.35.4 case 8 */ | ||
1225 | if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { | ||
1226 | status = nfserr_not_same; | ||
1227 | goto out; | ||
1228 | } | ||
1229 | /* Client reboot: destroy old state */ | ||
1230 | expire_client(conf); | ||
1231 | goto out_new; | ||
1232 | } | ||
1233 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { | ||
1234 | /* 18.35.4 case 9 */ | ||
1235 | if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { | ||
1236 | status = nfserr_perm; | ||
1237 | goto out; | ||
1238 | } | ||
1239 | expire_client(conf); | ||
1240 | goto out_new; | ||
1241 | } | ||
1242 | if (ip_addr != conf->cl_addr && | ||
1243 | !(exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A)) { | ||
1244 | /* Client collision. 18.35.4 case 3 */ | ||
1245 | status = nfserr_clid_inuse; | ||
1246 | goto out; | ||
1247 | } | ||
1248 | /* | ||
1249 | * Set bit when the owner id and verifier map to an already | ||
1250 | * confirmed client id (18.35.3). | ||
1251 | */ | ||
1252 | exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; | ||
1253 | |||
1254 | /* | ||
1255 | * Falling into 18.35.4 case 2, possible router replay. | ||
1256 | * Leave confirmed record intact and return same result. | ||
1257 | */ | ||
1258 | copy_verf(conf, &verf); | ||
1259 | new = conf; | ||
1260 | goto out_copy; | ||
1261 | } else { | ||
1262 | /* 18.35.4 case 7 */ | ||
1263 | if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { | ||
1264 | status = nfserr_noent; | ||
1265 | goto out; | ||
1266 | } | ||
1267 | } | ||
1268 | |||
1269 | unconf = find_unconfirmed_client_by_str(dname, strhashval, true); | ||
1270 | if (unconf) { | ||
1271 | /* | ||
1272 | * Possible retry or client restart. Per 18.35.4 case 4, | ||
1273 | * a new unconfirmed record should be generated regardless | ||
1274 | * of whether any properties have changed. | ||
1275 | */ | ||
1276 | expire_client(unconf); | ||
1277 | } | ||
1278 | |||
1279 | out_new: | ||
1280 | /* Normal case */ | ||
1281 | new = create_client(exid->clname, dname); | ||
1282 | if (new == NULL) { | ||
1283 | status = nfserr_resource; | ||
1284 | goto out; | ||
1285 | } | ||
1286 | |||
1287 | copy_verf(new, &verf); | ||
1288 | copy_cred(&new->cl_cred, &rqstp->rq_cred); | ||
1289 | new->cl_addr = ip_addr; | ||
1290 | gen_clid(new); | ||
1291 | gen_confirm(new); | ||
1292 | add_to_unconfirmed(new, strhashval); | ||
1293 | out_copy: | ||
1294 | exid->clientid.cl_boot = new->cl_clientid.cl_boot; | ||
1295 | exid->clientid.cl_id = new->cl_clientid.cl_id; | ||
1296 | |||
1297 | new->cl_slot.sl_seqid = 0; | ||
1298 | exid->seqid = 1; | ||
1299 | nfsd4_set_ex_flags(new, exid); | ||
1300 | |||
1301 | dprintk("nfsd4_exchange_id seqid %d flags %x\n", | ||
1302 | new->cl_slot.sl_seqid, new->cl_exchange_flags); | ||
1303 | status = nfs_ok; | ||
1304 | |||
1305 | out: | ||
1306 | nfs4_unlock_state(); | ||
1307 | error: | ||
1308 | dprintk("nfsd4_exchange_id returns %d\n", ntohl(status)); | ||
1309 | return status; | ||
1310 | } | ||
1311 | |||
1312 | static int | ||
1313 | check_slot_seqid(u32 seqid, struct nfsd4_slot *slot) | ||
1314 | { | ||
1315 | dprintk("%s enter. seqid %d slot->sl_seqid %d\n", __func__, seqid, | ||
1316 | slot->sl_seqid); | ||
1317 | |||
1318 | /* The slot is in use, and no response has been sent. */ | ||
1319 | if (slot->sl_inuse) { | ||
1320 | if (seqid == slot->sl_seqid) | ||
1321 | return nfserr_jukebox; | ||
1322 | else | ||
1323 | return nfserr_seq_misordered; | ||
1324 | } | ||
1325 | /* Normal */ | ||
1326 | if (likely(seqid == slot->sl_seqid + 1)) | ||
1327 | return nfs_ok; | ||
1328 | /* Replay */ | ||
1329 | if (seqid == slot->sl_seqid) | ||
1330 | return nfserr_replay_cache; | ||
1331 | /* Wraparound */ | ||
1332 | if (seqid == 1 && (slot->sl_seqid + 1) == 0) | ||
1333 | return nfs_ok; | ||
1334 | /* Misordered replay or misordered new request */ | ||
1335 | return nfserr_seq_misordered; | ||
1336 | } | ||
1337 | |||
1338 | __be32 | ||
1339 | nfsd4_create_session(struct svc_rqst *rqstp, | ||
1340 | struct nfsd4_compound_state *cstate, | ||
1341 | struct nfsd4_create_session *cr_ses) | ||
1342 | { | ||
1343 | u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr; | ||
1344 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | ||
1345 | struct nfs4_client *conf, *unconf; | ||
1346 | struct nfsd4_slot *slot = NULL; | ||
1347 | int status = 0; | ||
1348 | |||
1349 | nfs4_lock_state(); | ||
1350 | unconf = find_unconfirmed_client(&cr_ses->clientid); | ||
1351 | conf = find_confirmed_client(&cr_ses->clientid); | ||
1352 | |||
1353 | if (conf) { | ||
1354 | slot = &conf->cl_slot; | ||
1355 | status = check_slot_seqid(cr_ses->seqid, slot); | ||
1356 | if (status == nfserr_replay_cache) { | ||
1357 | dprintk("Got a create_session replay! seqid= %d\n", | ||
1358 | slot->sl_seqid); | ||
1359 | cstate->slot = slot; | ||
1360 | cstate->status = status; | ||
1361 | /* Return the cached reply status */ | ||
1362 | status = nfsd4_replay_cache_entry(resp, NULL); | ||
1363 | goto out; | ||
1364 | } else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) { | ||
1365 | status = nfserr_seq_misordered; | ||
1366 | dprintk("Sequence misordered!\n"); | ||
1367 | dprintk("Expected seqid= %d but got seqid= %d\n", | ||
1368 | slot->sl_seqid, cr_ses->seqid); | ||
1369 | goto out; | ||
1370 | } | ||
1371 | conf->cl_slot.sl_seqid++; | ||
1372 | } else if (unconf) { | ||
1373 | if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || | ||
1374 | (ip_addr != unconf->cl_addr)) { | ||
1375 | status = nfserr_clid_inuse; | ||
1376 | goto out; | ||
1377 | } | ||
1378 | |||
1379 | slot = &unconf->cl_slot; | ||
1380 | status = check_slot_seqid(cr_ses->seqid, slot); | ||
1381 | if (status) { | ||
1382 | /* an unconfirmed replay returns misordered */ | ||
1383 | status = nfserr_seq_misordered; | ||
1384 | goto out; | ||
1385 | } | ||
1386 | |||
1387 | slot->sl_seqid++; /* from 0 to 1 */ | ||
1388 | move_to_confirmed(unconf); | ||
1389 | |||
1390 | /* | ||
1391 | * We do not support RDMA or persistent sessions | ||
1392 | */ | ||
1393 | cr_ses->flags &= ~SESSION4_PERSIST; | ||
1394 | cr_ses->flags &= ~SESSION4_RDMA; | ||
1395 | |||
1396 | conf = unconf; | ||
1397 | } else { | ||
1398 | status = nfserr_stale_clientid; | ||
1399 | goto out; | ||
1400 | } | ||
1401 | |||
1402 | status = alloc_init_session(rqstp, conf, cr_ses); | ||
1403 | if (status) | ||
1404 | goto out; | ||
1405 | |||
1406 | memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, | ||
1407 | NFS4_MAX_SESSIONID_LEN); | ||
1408 | cr_ses->seqid = slot->sl_seqid; | ||
1409 | |||
1410 | slot->sl_inuse = true; | ||
1411 | cstate->slot = slot; | ||
1412 | /* Ensure a page is used for the cache */ | ||
1413 | slot->sl_cache_entry.ce_cachethis = 1; | ||
1414 | out: | ||
1415 | nfs4_unlock_state(); | ||
1416 | dprintk("%s returns %d\n", __func__, ntohl(status)); | ||
1417 | return status; | ||
1418 | } | ||
1419 | |||
1420 | __be32 | ||
1421 | nfsd4_destroy_session(struct svc_rqst *r, | ||
1422 | struct nfsd4_compound_state *cstate, | ||
1423 | struct nfsd4_destroy_session *sessionid) | ||
1424 | { | ||
1425 | struct nfsd4_session *ses; | ||
1426 | u32 status = nfserr_badsession; | ||
1427 | |||
1428 | /* Notes: | ||
1429 | * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid | ||
1430 | * - Should we return nfserr_back_chan_busy if waiting for | ||
1431 | * callbacks on to-be-destroyed session? | ||
1432 | * - Do we need to clear any callback info from previous session? | ||
1433 | */ | ||
1434 | |||
1435 | dump_sessionid(__func__, &sessionid->sessionid); | ||
1436 | spin_lock(&sessionid_lock); | ||
1437 | ses = find_in_sessionid_hashtbl(&sessionid->sessionid); | ||
1438 | if (!ses) { | ||
1439 | spin_unlock(&sessionid_lock); | ||
1440 | goto out; | ||
1441 | } | ||
1442 | |||
1443 | unhash_session(ses); | ||
1444 | spin_unlock(&sessionid_lock); | ||
1445 | |||
1446 | /* wait for callbacks */ | ||
1447 | shutdown_callback_client(ses->se_client); | ||
1448 | nfsd4_put_session(ses); | ||
1449 | status = nfs_ok; | ||
1450 | out: | ||
1451 | dprintk("%s returns %d\n", __func__, ntohl(status)); | ||
1452 | return status; | ||
1453 | } | ||
1454 | |||
1455 | __be32 | ||
1456 | nfsd4_sequence(struct svc_rqst *rqstp, | ||
1457 | struct nfsd4_compound_state *cstate, | ||
1458 | struct nfsd4_sequence *seq) | ||
1459 | { | ||
1460 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | ||
1461 | struct nfsd4_session *session; | ||
1462 | struct nfsd4_slot *slot; | ||
1463 | int status; | ||
1464 | |||
1465 | if (resp->opcnt != 1) | ||
1466 | return nfserr_sequence_pos; | ||
1467 | |||
1468 | spin_lock(&sessionid_lock); | ||
1469 | status = nfserr_badsession; | ||
1470 | session = find_in_sessionid_hashtbl(&seq->sessionid); | ||
1471 | if (!session) | ||
1472 | goto out; | ||
1473 | |||
1474 | status = nfserr_badslot; | ||
1475 | if (seq->slotid >= session->se_fnumslots) | ||
1476 | goto out; | ||
1477 | |||
1478 | slot = &session->se_slots[seq->slotid]; | ||
1479 | dprintk("%s: slotid %d\n", __func__, seq->slotid); | ||
1480 | |||
1481 | status = check_slot_seqid(seq->seqid, slot); | ||
1482 | if (status == nfserr_replay_cache) { | ||
1483 | cstate->slot = slot; | ||
1484 | cstate->session = session; | ||
1485 | /* Return the cached reply status and set cstate->status | ||
1486 | * for nfsd4_svc_encode_compoundres processing */ | ||
1487 | status = nfsd4_replay_cache_entry(resp, seq); | ||
1488 | cstate->status = nfserr_replay_cache; | ||
1489 | goto replay_cache; | ||
1490 | } | ||
1491 | if (status) | ||
1492 | goto out; | ||
1493 | |||
1494 | /* Success! bump slot seqid */ | ||
1495 | slot->sl_inuse = true; | ||
1496 | slot->sl_seqid = seq->seqid; | ||
1497 | slot->sl_cache_entry.ce_cachethis = seq->cachethis; | ||
1498 | /* Always set the cache entry cachethis for solo sequence */ | ||
1499 | if (nfsd4_is_solo_sequence(resp)) | ||
1500 | slot->sl_cache_entry.ce_cachethis = 1; | ||
1501 | |||
1502 | cstate->slot = slot; | ||
1503 | cstate->session = session; | ||
1504 | |||
1505 | replay_cache: | ||
1506 | /* Renew the clientid on success and on replay. | ||
1507 | * Hold a session reference until done processing the compound: | ||
1508 | * nfsd4_put_session called only if the cstate slot is set. | ||
1509 | */ | ||
1510 | renew_client(session->se_client); | ||
1511 | nfsd4_get_session(session); | ||
1512 | out: | ||
1513 | spin_unlock(&sessionid_lock); | ||
1514 | dprintk("%s: return %d\n", __func__, ntohl(status)); | ||
1515 | return status; | ||
1516 | } | ||
1517 | |||
688 | __be32 | 1518 | __be32 |
689 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 1519 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
690 | struct nfsd4_setclientid *setclid) | 1520 | struct nfsd4_setclientid *setclid) |
@@ -716,14 +1546,13 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
716 | strhashval = clientstr_hashval(dname); | 1546 | strhashval = clientstr_hashval(dname); |
717 | 1547 | ||
718 | nfs4_lock_state(); | 1548 | nfs4_lock_state(); |
719 | conf = find_confirmed_client_by_str(dname, strhashval); | 1549 | conf = find_confirmed_client_by_str(dname, strhashval, false); |
720 | if (conf) { | 1550 | if (conf) { |
721 | /* RFC 3530 14.2.33 CASE 0: */ | 1551 | /* RFC 3530 14.2.33 CASE 0: */ |
722 | status = nfserr_clid_inuse; | 1552 | status = nfserr_clid_inuse; |
723 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred) | 1553 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { |
724 | || conf->cl_addr != sin->sin_addr.s_addr) { | 1554 | dprintk("NFSD: setclientid: string in use by client" |
725 | dprintk("NFSD: setclientid: string in use by clientat %pI4\n", | 1555 | " at %pI4\n", &conf->cl_addr); |
726 | &conf->cl_addr); | ||
727 | goto out; | 1556 | goto out; |
728 | } | 1557 | } |
729 | } | 1558 | } |
@@ -732,7 +1561,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
732 | * has a description of SETCLIENTID request processing consisting | 1561 | * has a description of SETCLIENTID request processing consisting |
733 | * of 5 bullet points, labeled as CASE0 - CASE4 below. | 1562 | * of 5 bullet points, labeled as CASE0 - CASE4 below. |
734 | */ | 1563 | */ |
735 | unconf = find_unconfirmed_client_by_str(dname, strhashval); | 1564 | unconf = find_unconfirmed_client_by_str(dname, strhashval, false); |
736 | status = nfserr_resource; | 1565 | status = nfserr_resource; |
737 | if (!conf) { | 1566 | if (!conf) { |
738 | /* | 1567 | /* |
@@ -887,7 +1716,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
887 | unsigned int hash = | 1716 | unsigned int hash = |
888 | clientstr_hashval(unconf->cl_recdir); | 1717 | clientstr_hashval(unconf->cl_recdir); |
889 | conf = find_confirmed_client_by_str(unconf->cl_recdir, | 1718 | conf = find_confirmed_client_by_str(unconf->cl_recdir, |
890 | hash); | 1719 | hash, false); |
891 | if (conf) { | 1720 | if (conf) { |
892 | nfsd4_remove_clid_dir(conf); | 1721 | nfsd4_remove_clid_dir(conf); |
893 | expire_client(conf); | 1722 | expire_client(conf); |
@@ -923,11 +1752,13 @@ alloc_init_file(struct inode *ino) | |||
923 | 1752 | ||
924 | fp = kmem_cache_alloc(file_slab, GFP_KERNEL); | 1753 | fp = kmem_cache_alloc(file_slab, GFP_KERNEL); |
925 | if (fp) { | 1754 | if (fp) { |
926 | kref_init(&fp->fi_ref); | 1755 | atomic_set(&fp->fi_ref, 1); |
927 | INIT_LIST_HEAD(&fp->fi_hash); | 1756 | INIT_LIST_HEAD(&fp->fi_hash); |
928 | INIT_LIST_HEAD(&fp->fi_stateids); | 1757 | INIT_LIST_HEAD(&fp->fi_stateids); |
929 | INIT_LIST_HEAD(&fp->fi_delegations); | 1758 | INIT_LIST_HEAD(&fp->fi_delegations); |
1759 | spin_lock(&recall_lock); | ||
930 | list_add(&fp->fi_hash, &file_hashtbl[hashval]); | 1760 | list_add(&fp->fi_hash, &file_hashtbl[hashval]); |
1761 | spin_unlock(&recall_lock); | ||
931 | fp->fi_inode = igrab(ino); | 1762 | fp->fi_inode = igrab(ino); |
932 | fp->fi_id = current_fileid++; | 1763 | fp->fi_id = current_fileid++; |
933 | fp->fi_had_conflict = false; | 1764 | fp->fi_had_conflict = false; |
@@ -1037,48 +1868,6 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str | |||
1037 | return sop; | 1868 | return sop; |
1038 | } | 1869 | } |
1039 | 1870 | ||
1040 | static void | ||
1041 | release_stateid_lockowners(struct nfs4_stateid *open_stp) | ||
1042 | { | ||
1043 | struct nfs4_stateowner *lock_sop; | ||
1044 | |||
1045 | while (!list_empty(&open_stp->st_lockowners)) { | ||
1046 | lock_sop = list_entry(open_stp->st_lockowners.next, | ||
1047 | struct nfs4_stateowner, so_perstateid); | ||
1048 | /* list_del(&open_stp->st_lockowners); */ | ||
1049 | BUG_ON(lock_sop->so_is_open_owner); | ||
1050 | release_stateowner(lock_sop); | ||
1051 | } | ||
1052 | } | ||
1053 | |||
1054 | static void | ||
1055 | unhash_stateowner(struct nfs4_stateowner *sop) | ||
1056 | { | ||
1057 | struct nfs4_stateid *stp; | ||
1058 | |||
1059 | list_del(&sop->so_idhash); | ||
1060 | list_del(&sop->so_strhash); | ||
1061 | if (sop->so_is_open_owner) | ||
1062 | list_del(&sop->so_perclient); | ||
1063 | list_del(&sop->so_perstateid); | ||
1064 | while (!list_empty(&sop->so_stateids)) { | ||
1065 | stp = list_entry(sop->so_stateids.next, | ||
1066 | struct nfs4_stateid, st_perstateowner); | ||
1067 | if (sop->so_is_open_owner) | ||
1068 | release_stateid(stp, OPEN_STATE); | ||
1069 | else | ||
1070 | release_stateid(stp, LOCK_STATE); | ||
1071 | } | ||
1072 | } | ||
1073 | |||
1074 | static void | ||
1075 | release_stateowner(struct nfs4_stateowner *sop) | ||
1076 | { | ||
1077 | unhash_stateowner(sop); | ||
1078 | list_del(&sop->so_close_lru); | ||
1079 | nfs4_put_stateowner(sop); | ||
1080 | } | ||
1081 | |||
1082 | static inline void | 1871 | static inline void |
1083 | init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { | 1872 | init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { |
1084 | struct nfs4_stateowner *sop = open->op_stateowner; | 1873 | struct nfs4_stateowner *sop = open->op_stateowner; |
@@ -1100,30 +1889,13 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open * | |||
1100 | stp->st_stateid.si_generation = 0; | 1889 | stp->st_stateid.si_generation = 0; |
1101 | stp->st_access_bmap = 0; | 1890 | stp->st_access_bmap = 0; |
1102 | stp->st_deny_bmap = 0; | 1891 | stp->st_deny_bmap = 0; |
1103 | __set_bit(open->op_share_access, &stp->st_access_bmap); | 1892 | __set_bit(open->op_share_access & ~NFS4_SHARE_WANT_MASK, |
1893 | &stp->st_access_bmap); | ||
1104 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); | 1894 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); |
1105 | stp->st_openstp = NULL; | 1895 | stp->st_openstp = NULL; |
1106 | } | 1896 | } |
1107 | 1897 | ||
1108 | static void | 1898 | static void |
1109 | release_stateid(struct nfs4_stateid *stp, int flags) | ||
1110 | { | ||
1111 | struct file *filp = stp->st_vfs_file; | ||
1112 | |||
1113 | list_del(&stp->st_hash); | ||
1114 | list_del(&stp->st_perfile); | ||
1115 | list_del(&stp->st_perstateowner); | ||
1116 | if (flags & OPEN_STATE) { | ||
1117 | release_stateid_lockowners(stp); | ||
1118 | stp->st_vfs_file = NULL; | ||
1119 | nfsd_close(filp); | ||
1120 | } else if (flags & LOCK_STATE) | ||
1121 | locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner); | ||
1122 | put_nfs4_file(stp->st_file); | ||
1123 | kmem_cache_free(stateid_slab, stp); | ||
1124 | } | ||
1125 | |||
1126 | static void | ||
1127 | move_to_close_lru(struct nfs4_stateowner *sop) | 1899 | move_to_close_lru(struct nfs4_stateowner *sop) |
1128 | { | 1900 | { |
1129 | dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); | 1901 | dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); |
@@ -1160,20 +1932,33 @@ find_file(struct inode *ino) | |||
1160 | unsigned int hashval = file_hashval(ino); | 1932 | unsigned int hashval = file_hashval(ino); |
1161 | struct nfs4_file *fp; | 1933 | struct nfs4_file *fp; |
1162 | 1934 | ||
1935 | spin_lock(&recall_lock); | ||
1163 | list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { | 1936 | list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { |
1164 | if (fp->fi_inode == ino) { | 1937 | if (fp->fi_inode == ino) { |
1165 | get_nfs4_file(fp); | 1938 | get_nfs4_file(fp); |
1939 | spin_unlock(&recall_lock); | ||
1166 | return fp; | 1940 | return fp; |
1167 | } | 1941 | } |
1168 | } | 1942 | } |
1943 | spin_unlock(&recall_lock); | ||
1169 | return NULL; | 1944 | return NULL; |
1170 | } | 1945 | } |
1171 | 1946 | ||
1172 | static inline int access_valid(u32 x) | 1947 | static inline int access_valid(u32 x, u32 minorversion) |
1173 | { | 1948 | { |
1174 | if (x < NFS4_SHARE_ACCESS_READ) | 1949 | if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ) |
1175 | return 0; | 1950 | return 0; |
1176 | if (x > NFS4_SHARE_ACCESS_BOTH) | 1951 | if ((x & NFS4_SHARE_ACCESS_MASK) > NFS4_SHARE_ACCESS_BOTH) |
1952 | return 0; | ||
1953 | x &= ~NFS4_SHARE_ACCESS_MASK; | ||
1954 | if (minorversion && x) { | ||
1955 | if ((x & NFS4_SHARE_WANT_MASK) > NFS4_SHARE_WANT_CANCEL) | ||
1956 | return 0; | ||
1957 | if ((x & NFS4_SHARE_WHEN_MASK) > NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED) | ||
1958 | return 0; | ||
1959 | x &= ~(NFS4_SHARE_WANT_MASK | NFS4_SHARE_WHEN_MASK); | ||
1960 | } | ||
1961 | if (x) | ||
1177 | return 0; | 1962 | return 0; |
1178 | return 1; | 1963 | return 1; |
1179 | } | 1964 | } |
@@ -1409,7 +2194,8 @@ static struct lock_manager_operations nfsd_lease_mng_ops = { | |||
1409 | 2194 | ||
1410 | 2195 | ||
1411 | __be32 | 2196 | __be32 |
1412 | nfsd4_process_open1(struct nfsd4_open *open) | 2197 | nfsd4_process_open1(struct nfsd4_compound_state *cstate, |
2198 | struct nfsd4_open *open) | ||
1413 | { | 2199 | { |
1414 | clientid_t *clientid = &open->op_clientid; | 2200 | clientid_t *clientid = &open->op_clientid; |
1415 | struct nfs4_client *clp = NULL; | 2201 | struct nfs4_client *clp = NULL; |
@@ -1432,10 +2218,13 @@ nfsd4_process_open1(struct nfsd4_open *open) | |||
1432 | return nfserr_expired; | 2218 | return nfserr_expired; |
1433 | goto renew; | 2219 | goto renew; |
1434 | } | 2220 | } |
2221 | /* When sessions are used, skip open sequenceid processing */ | ||
2222 | if (nfsd4_has_session(cstate)) | ||
2223 | goto renew; | ||
1435 | if (!sop->so_confirmed) { | 2224 | if (!sop->so_confirmed) { |
1436 | /* Replace unconfirmed owners without checking for replay. */ | 2225 | /* Replace unconfirmed owners without checking for replay. */ |
1437 | clp = sop->so_client; | 2226 | clp = sop->so_client; |
1438 | release_stateowner(sop); | 2227 | release_openowner(sop); |
1439 | open->op_stateowner = NULL; | 2228 | open->op_stateowner = NULL; |
1440 | goto renew; | 2229 | goto renew; |
1441 | } | 2230 | } |
@@ -1709,6 +2498,7 @@ out: | |||
1709 | __be32 | 2498 | __be32 |
1710 | nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) | 2499 | nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) |
1711 | { | 2500 | { |
2501 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | ||
1712 | struct nfs4_file *fp = NULL; | 2502 | struct nfs4_file *fp = NULL; |
1713 | struct inode *ino = current_fh->fh_dentry->d_inode; | 2503 | struct inode *ino = current_fh->fh_dentry->d_inode; |
1714 | struct nfs4_stateid *stp = NULL; | 2504 | struct nfs4_stateid *stp = NULL; |
@@ -1716,7 +2506,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf | |||
1716 | __be32 status; | 2506 | __be32 status; |
1717 | 2507 | ||
1718 | status = nfserr_inval; | 2508 | status = nfserr_inval; |
1719 | if (!access_valid(open->op_share_access) | 2509 | if (!access_valid(open->op_share_access, resp->cstate.minorversion) |
1720 | || !deny_valid(open->op_share_deny)) | 2510 | || !deny_valid(open->op_share_deny)) |
1721 | goto out; | 2511 | goto out; |
1722 | /* | 2512 | /* |
@@ -1764,12 +2554,17 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf | |||
1764 | init_stateid(stp, fp, open); | 2554 | init_stateid(stp, fp, open); |
1765 | status = nfsd4_truncate(rqstp, current_fh, open); | 2555 | status = nfsd4_truncate(rqstp, current_fh, open); |
1766 | if (status) { | 2556 | if (status) { |
1767 | release_stateid(stp, OPEN_STATE); | 2557 | release_open_stateid(stp); |
1768 | goto out; | 2558 | goto out; |
1769 | } | 2559 | } |
2560 | if (nfsd4_has_session(&resp->cstate)) | ||
2561 | update_stateid(&stp->st_stateid); | ||
1770 | } | 2562 | } |
1771 | memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); | 2563 | memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); |
1772 | 2564 | ||
2565 | if (nfsd4_has_session(&resp->cstate)) | ||
2566 | open->op_stateowner->so_confirmed = 1; | ||
2567 | |||
1773 | /* | 2568 | /* |
1774 | * Attempt to hand out a delegation. No error return, because the | 2569 | * Attempt to hand out a delegation. No error return, because the |
1775 | * OPEN succeeds even if we fail. | 2570 | * OPEN succeeds even if we fail. |
@@ -1790,7 +2585,8 @@ out: | |||
1790 | * To finish the open response, we just need to set the rflags. | 2585 | * To finish the open response, we just need to set the rflags. |
1791 | */ | 2586 | */ |
1792 | open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; | 2587 | open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; |
1793 | if (!open->op_stateowner->so_confirmed) | 2588 | if (!open->op_stateowner->so_confirmed && |
2589 | !nfsd4_has_session(&resp->cstate)) | ||
1794 | open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; | 2590 | open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; |
1795 | 2591 | ||
1796 | return status; | 2592 | return status; |
@@ -1898,7 +2694,7 @@ nfs4_laundromat(void) | |||
1898 | } | 2694 | } |
1899 | dprintk("NFSD: purging unused open stateowner (so_id %d)\n", | 2695 | dprintk("NFSD: purging unused open stateowner (so_id %d)\n", |
1900 | sop->so_id); | 2696 | sop->so_id); |
1901 | release_stateowner(sop); | 2697 | release_openowner(sop); |
1902 | } | 2698 | } |
1903 | if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) | 2699 | if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) |
1904 | clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; | 2700 | clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; |
@@ -1983,10 +2779,7 @@ out: | |||
1983 | static inline __be32 | 2779 | static inline __be32 |
1984 | check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) | 2780 | check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) |
1985 | { | 2781 | { |
1986 | /* Trying to call delegreturn with a special stateid? Yuch: */ | 2782 | if (ONE_STATEID(stateid) && (flags & RD_STATE)) |
1987 | if (!(flags & (RD_STATE | WR_STATE))) | ||
1988 | return nfserr_bad_stateid; | ||
1989 | else if (ONE_STATEID(stateid) && (flags & RD_STATE)) | ||
1990 | return nfs_ok; | 2783 | return nfs_ok; |
1991 | else if (locks_in_grace()) { | 2784 | else if (locks_in_grace()) { |
1992 | /* Answer in remaining cases depends on existance of | 2785 | /* Answer in remaining cases depends on existance of |
@@ -2005,14 +2798,20 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) | |||
2005 | * that are not able to provide mandatory locking. | 2798 | * that are not able to provide mandatory locking. |
2006 | */ | 2799 | */ |
2007 | static inline int | 2800 | static inline int |
2008 | io_during_grace_disallowed(struct inode *inode, int flags) | 2801 | grace_disallows_io(struct inode *inode) |
2009 | { | 2802 | { |
2010 | return locks_in_grace() && (flags & (RD_STATE | WR_STATE)) | 2803 | return locks_in_grace() && mandatory_lock(inode); |
2011 | && mandatory_lock(inode); | ||
2012 | } | 2804 | } |
2013 | 2805 | ||
2014 | static int check_stateid_generation(stateid_t *in, stateid_t *ref) | 2806 | static int check_stateid_generation(stateid_t *in, stateid_t *ref, int flags) |
2015 | { | 2807 | { |
2808 | /* | ||
2809 | * When sessions are used the stateid generation number is ignored | ||
2810 | * when it is zero. | ||
2811 | */ | ||
2812 | if ((flags & HAS_SESSION) && in->si_generation == 0) | ||
2813 | goto out; | ||
2814 | |||
2016 | /* If the client sends us a stateid from the future, it's buggy: */ | 2815 | /* If the client sends us a stateid from the future, it's buggy: */ |
2017 | if (in->si_generation > ref->si_generation) | 2816 | if (in->si_generation > ref->si_generation) |
2018 | return nfserr_bad_stateid; | 2817 | return nfserr_bad_stateid; |
@@ -2028,74 +2827,77 @@ static int check_stateid_generation(stateid_t *in, stateid_t *ref) | |||
2028 | */ | 2827 | */ |
2029 | if (in->si_generation < ref->si_generation) | 2828 | if (in->si_generation < ref->si_generation) |
2030 | return nfserr_old_stateid; | 2829 | return nfserr_old_stateid; |
2830 | out: | ||
2031 | return nfs_ok; | 2831 | return nfs_ok; |
2032 | } | 2832 | } |
2033 | 2833 | ||
2834 | static int is_delegation_stateid(stateid_t *stateid) | ||
2835 | { | ||
2836 | return stateid->si_fileid == 0; | ||
2837 | } | ||
2838 | |||
2034 | /* | 2839 | /* |
2035 | * Checks for stateid operations | 2840 | * Checks for stateid operations |
2036 | */ | 2841 | */ |
2037 | __be32 | 2842 | __be32 |
2038 | nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp) | 2843 | nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, |
2844 | stateid_t *stateid, int flags, struct file **filpp) | ||
2039 | { | 2845 | { |
2040 | struct nfs4_stateid *stp = NULL; | 2846 | struct nfs4_stateid *stp = NULL; |
2041 | struct nfs4_delegation *dp = NULL; | 2847 | struct nfs4_delegation *dp = NULL; |
2042 | stateid_t *stidp; | 2848 | struct svc_fh *current_fh = &cstate->current_fh; |
2043 | struct inode *ino = current_fh->fh_dentry->d_inode; | 2849 | struct inode *ino = current_fh->fh_dentry->d_inode; |
2044 | __be32 status; | 2850 | __be32 status; |
2045 | 2851 | ||
2046 | dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n", | ||
2047 | stateid->si_boot, stateid->si_stateownerid, | ||
2048 | stateid->si_fileid, stateid->si_generation); | ||
2049 | if (filpp) | 2852 | if (filpp) |
2050 | *filpp = NULL; | 2853 | *filpp = NULL; |
2051 | 2854 | ||
2052 | if (io_during_grace_disallowed(ino, flags)) | 2855 | if (grace_disallows_io(ino)) |
2053 | return nfserr_grace; | 2856 | return nfserr_grace; |
2054 | 2857 | ||
2858 | if (nfsd4_has_session(cstate)) | ||
2859 | flags |= HAS_SESSION; | ||
2860 | |||
2055 | if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) | 2861 | if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) |
2056 | return check_special_stateids(current_fh, stateid, flags); | 2862 | return check_special_stateids(current_fh, stateid, flags); |
2057 | 2863 | ||
2058 | /* STALE STATEID */ | ||
2059 | status = nfserr_stale_stateid; | 2864 | status = nfserr_stale_stateid; |
2060 | if (STALE_STATEID(stateid)) | 2865 | if (STALE_STATEID(stateid)) |
2061 | goto out; | 2866 | goto out; |
2062 | 2867 | ||
2063 | /* BAD STATEID */ | ||
2064 | status = nfserr_bad_stateid; | 2868 | status = nfserr_bad_stateid; |
2065 | if (!stateid->si_fileid) { /* delegation stateid */ | 2869 | if (is_delegation_stateid(stateid)) { |
2066 | if(!(dp = find_delegation_stateid(ino, stateid))) { | 2870 | dp = find_delegation_stateid(ino, stateid); |
2067 | dprintk("NFSD: delegation stateid not found\n"); | 2871 | if (!dp) |
2068 | goto out; | 2872 | goto out; |
2069 | } | 2873 | status = check_stateid_generation(stateid, &dp->dl_stateid, |
2070 | stidp = &dp->dl_stateid; | 2874 | flags); |
2875 | if (status) | ||
2876 | goto out; | ||
2877 | status = nfs4_check_delegmode(dp, flags); | ||
2878 | if (status) | ||
2879 | goto out; | ||
2880 | renew_client(dp->dl_client); | ||
2881 | if (filpp) | ||
2882 | *filpp = dp->dl_vfs_file; | ||
2071 | } else { /* open or lock stateid */ | 2883 | } else { /* open or lock stateid */ |
2072 | if (!(stp = find_stateid(stateid, flags))) { | 2884 | stp = find_stateid(stateid, flags); |
2073 | dprintk("NFSD: open or lock stateid not found\n"); | 2885 | if (!stp) |
2074 | goto out; | 2886 | goto out; |
2075 | } | 2887 | if (nfs4_check_fh(current_fh, stp)) |
2076 | if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) | ||
2077 | goto out; | 2888 | goto out; |
2078 | if (!stp->st_stateowner->so_confirmed) | 2889 | if (!stp->st_stateowner->so_confirmed) |
2079 | goto out; | 2890 | goto out; |
2080 | stidp = &stp->st_stateid; | 2891 | status = check_stateid_generation(stateid, &stp->st_stateid, |
2081 | } | 2892 | flags); |
2082 | status = check_stateid_generation(stateid, stidp); | 2893 | if (status) |
2083 | if (status) | 2894 | goto out; |
2084 | goto out; | 2895 | status = nfs4_check_openmode(stp, flags); |
2085 | if (stp) { | 2896 | if (status) |
2086 | if ((status = nfs4_check_openmode(stp,flags))) | ||
2087 | goto out; | 2897 | goto out; |
2088 | renew_client(stp->st_stateowner->so_client); | 2898 | renew_client(stp->st_stateowner->so_client); |
2089 | if (filpp) | 2899 | if (filpp) |
2090 | *filpp = stp->st_vfs_file; | 2900 | *filpp = stp->st_vfs_file; |
2091 | } else { | ||
2092 | if ((status = nfs4_check_delegmode(dp, flags))) | ||
2093 | goto out; | ||
2094 | renew_client(dp->dl_client); | ||
2095 | if (flags & DELEG_RET) | ||
2096 | unhash_delegation(dp); | ||
2097 | if (filpp) | ||
2098 | *filpp = dp->dl_vfs_file; | ||
2099 | } | 2901 | } |
2100 | status = nfs_ok; | 2902 | status = nfs_ok; |
2101 | out: | 2903 | out: |
@@ -2113,10 +2915,14 @@ setlkflg (int type) | |||
2113 | * Checks for sequence id mutating operations. | 2915 | * Checks for sequence id mutating operations. |
2114 | */ | 2916 | */ |
2115 | static __be32 | 2917 | static __be32 |
2116 | nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, struct nfsd4_lock *lock) | 2918 | nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, |
2919 | stateid_t *stateid, int flags, | ||
2920 | struct nfs4_stateowner **sopp, | ||
2921 | struct nfs4_stateid **stpp, struct nfsd4_lock *lock) | ||
2117 | { | 2922 | { |
2118 | struct nfs4_stateid *stp; | 2923 | struct nfs4_stateid *stp; |
2119 | struct nfs4_stateowner *sop; | 2924 | struct nfs4_stateowner *sop; |
2925 | struct svc_fh *current_fh = &cstate->current_fh; | ||
2120 | __be32 status; | 2926 | __be32 status; |
2121 | 2927 | ||
2122 | dprintk("NFSD: preprocess_seqid_op: seqid=%d " | 2928 | dprintk("NFSD: preprocess_seqid_op: seqid=%d " |
@@ -2134,6 +2940,10 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei | |||
2134 | 2940 | ||
2135 | if (STALE_STATEID(stateid)) | 2941 | if (STALE_STATEID(stateid)) |
2136 | return nfserr_stale_stateid; | 2942 | return nfserr_stale_stateid; |
2943 | |||
2944 | if (nfsd4_has_session(cstate)) | ||
2945 | flags |= HAS_SESSION; | ||
2946 | |||
2137 | /* | 2947 | /* |
2138 | * We return BAD_STATEID if filehandle doesn't match stateid, | 2948 | * We return BAD_STATEID if filehandle doesn't match stateid, |
2139 | * the confirmed flag is incorrecly set, or the generation | 2949 | * the confirmed flag is incorrecly set, or the generation |
@@ -2166,8 +2976,9 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei | |||
2166 | if (lock->lk_is_new) { | 2976 | if (lock->lk_is_new) { |
2167 | if (!sop->so_is_open_owner) | 2977 | if (!sop->so_is_open_owner) |
2168 | return nfserr_bad_stateid; | 2978 | return nfserr_bad_stateid; |
2169 | if (!same_clid(&clp->cl_clientid, lockclid)) | 2979 | if (!(flags & HAS_SESSION) && |
2170 | return nfserr_bad_stateid; | 2980 | !same_clid(&clp->cl_clientid, lockclid)) |
2981 | return nfserr_bad_stateid; | ||
2171 | /* stp is the open stateid */ | 2982 | /* stp is the open stateid */ |
2172 | status = nfs4_check_openmode(stp, lkflg); | 2983 | status = nfs4_check_openmode(stp, lkflg); |
2173 | if (status) | 2984 | if (status) |
@@ -2190,7 +3001,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei | |||
2190 | * For the moment, we ignore the possibility of | 3001 | * For the moment, we ignore the possibility of |
2191 | * generation number wraparound. | 3002 | * generation number wraparound. |
2192 | */ | 3003 | */ |
2193 | if (seqid != sop->so_seqid) | 3004 | if (!(flags & HAS_SESSION) && seqid != sop->so_seqid) |
2194 | goto check_replay; | 3005 | goto check_replay; |
2195 | 3006 | ||
2196 | if (sop->so_confirmed && flags & CONFIRM) { | 3007 | if (sop->so_confirmed && flags & CONFIRM) { |
@@ -2203,7 +3014,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei | |||
2203 | " confirmed yet!\n"); | 3014 | " confirmed yet!\n"); |
2204 | return nfserr_bad_stateid; | 3015 | return nfserr_bad_stateid; |
2205 | } | 3016 | } |
2206 | status = check_stateid_generation(stateid, &stp->st_stateid); | 3017 | status = check_stateid_generation(stateid, &stp->st_stateid, flags); |
2207 | if (status) | 3018 | if (status) |
2208 | return status; | 3019 | return status; |
2209 | renew_client(sop->so_client); | 3020 | renew_client(sop->so_client); |
@@ -2239,7 +3050,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2239 | 3050 | ||
2240 | nfs4_lock_state(); | 3051 | nfs4_lock_state(); |
2241 | 3052 | ||
2242 | if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, | 3053 | if ((status = nfs4_preprocess_seqid_op(cstate, |
2243 | oc->oc_seqid, &oc->oc_req_stateid, | 3054 | oc->oc_seqid, &oc->oc_req_stateid, |
2244 | CONFIRM | OPEN_STATE, | 3055 | CONFIRM | OPEN_STATE, |
2245 | &oc->oc_stateowner, &stp, NULL))) | 3056 | &oc->oc_stateowner, &stp, NULL))) |
@@ -2304,12 +3115,12 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, | |||
2304 | (int)cstate->current_fh.fh_dentry->d_name.len, | 3115 | (int)cstate->current_fh.fh_dentry->d_name.len, |
2305 | cstate->current_fh.fh_dentry->d_name.name); | 3116 | cstate->current_fh.fh_dentry->d_name.name); |
2306 | 3117 | ||
2307 | if (!access_valid(od->od_share_access) | 3118 | if (!access_valid(od->od_share_access, cstate->minorversion) |
2308 | || !deny_valid(od->od_share_deny)) | 3119 | || !deny_valid(od->od_share_deny)) |
2309 | return nfserr_inval; | 3120 | return nfserr_inval; |
2310 | 3121 | ||
2311 | nfs4_lock_state(); | 3122 | nfs4_lock_state(); |
2312 | if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, | 3123 | if ((status = nfs4_preprocess_seqid_op(cstate, |
2313 | od->od_seqid, | 3124 | od->od_seqid, |
2314 | &od->od_stateid, | 3125 | &od->od_stateid, |
2315 | OPEN_STATE, | 3126 | OPEN_STATE, |
@@ -2362,7 +3173,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2362 | 3173 | ||
2363 | nfs4_lock_state(); | 3174 | nfs4_lock_state(); |
2364 | /* check close_lru for replay */ | 3175 | /* check close_lru for replay */ |
2365 | if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, | 3176 | if ((status = nfs4_preprocess_seqid_op(cstate, |
2366 | close->cl_seqid, | 3177 | close->cl_seqid, |
2367 | &close->cl_stateid, | 3178 | &close->cl_stateid, |
2368 | OPEN_STATE | CLOSE_STATE, | 3179 | OPEN_STATE | CLOSE_STATE, |
@@ -2373,7 +3184,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2373 | memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t)); | 3184 | memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t)); |
2374 | 3185 | ||
2375 | /* release_stateid() calls nfsd_close() if needed */ | 3186 | /* release_stateid() calls nfsd_close() if needed */ |
2376 | release_stateid(stp, OPEN_STATE); | 3187 | release_open_stateid(stp); |
2377 | 3188 | ||
2378 | /* place unused nfs4_stateowners on so_close_lru list to be | 3189 | /* place unused nfs4_stateowners on so_close_lru list to be |
2379 | * released by the laundromat service after the lease period | 3190 | * released by the laundromat service after the lease period |
@@ -2394,16 +3205,40 @@ __be32 | |||
2394 | nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 3205 | nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
2395 | struct nfsd4_delegreturn *dr) | 3206 | struct nfsd4_delegreturn *dr) |
2396 | { | 3207 | { |
3208 | struct nfs4_delegation *dp; | ||
3209 | stateid_t *stateid = &dr->dr_stateid; | ||
3210 | struct inode *inode; | ||
2397 | __be32 status; | 3211 | __be32 status; |
3212 | int flags = 0; | ||
2398 | 3213 | ||
2399 | if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) | 3214 | if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) |
2400 | goto out; | 3215 | return status; |
3216 | inode = cstate->current_fh.fh_dentry->d_inode; | ||
2401 | 3217 | ||
3218 | if (nfsd4_has_session(cstate)) | ||
3219 | flags |= HAS_SESSION; | ||
2402 | nfs4_lock_state(); | 3220 | nfs4_lock_state(); |
2403 | status = nfs4_preprocess_stateid_op(&cstate->current_fh, | 3221 | status = nfserr_bad_stateid; |
2404 | &dr->dr_stateid, DELEG_RET, NULL); | 3222 | if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) |
2405 | nfs4_unlock_state(); | 3223 | goto out; |
3224 | status = nfserr_stale_stateid; | ||
3225 | if (STALE_STATEID(stateid)) | ||
3226 | goto out; | ||
3227 | status = nfserr_bad_stateid; | ||
3228 | if (!is_delegation_stateid(stateid)) | ||
3229 | goto out; | ||
3230 | dp = find_delegation_stateid(inode, stateid); | ||
3231 | if (!dp) | ||
3232 | goto out; | ||
3233 | status = check_stateid_generation(stateid, &dp->dl_stateid, flags); | ||
3234 | if (status) | ||
3235 | goto out; | ||
3236 | renew_client(dp->dl_client); | ||
3237 | |||
3238 | unhash_delegation(dp); | ||
2406 | out: | 3239 | out: |
3240 | nfs4_unlock_state(); | ||
3241 | |||
2407 | return status; | 3242 | return status; |
2408 | } | 3243 | } |
2409 | 3244 | ||
@@ -2684,11 +3519,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2684 | struct nfs4_file *fp; | 3519 | struct nfs4_file *fp; |
2685 | 3520 | ||
2686 | status = nfserr_stale_clientid; | 3521 | status = nfserr_stale_clientid; |
2687 | if (STALE_CLIENTID(&lock->lk_new_clientid)) | 3522 | if (!nfsd4_has_session(cstate) && |
3523 | STALE_CLIENTID(&lock->lk_new_clientid)) | ||
2688 | goto out; | 3524 | goto out; |
2689 | 3525 | ||
2690 | /* validate and update open stateid and open seqid */ | 3526 | /* validate and update open stateid and open seqid */ |
2691 | status = nfs4_preprocess_seqid_op(&cstate->current_fh, | 3527 | status = nfs4_preprocess_seqid_op(cstate, |
2692 | lock->lk_new_open_seqid, | 3528 | lock->lk_new_open_seqid, |
2693 | &lock->lk_new_open_stateid, | 3529 | &lock->lk_new_open_stateid, |
2694 | OPEN_STATE, | 3530 | OPEN_STATE, |
@@ -2715,7 +3551,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2715 | goto out; | 3551 | goto out; |
2716 | } else { | 3552 | } else { |
2717 | /* lock (lock owner + lock stateid) already exists */ | 3553 | /* lock (lock owner + lock stateid) already exists */ |
2718 | status = nfs4_preprocess_seqid_op(&cstate->current_fh, | 3554 | status = nfs4_preprocess_seqid_op(cstate, |
2719 | lock->lk_old_lock_seqid, | 3555 | lock->lk_old_lock_seqid, |
2720 | &lock->lk_old_lock_stateid, | 3556 | &lock->lk_old_lock_stateid, |
2721 | LOCK_STATE, | 3557 | LOCK_STATE, |
@@ -2788,7 +3624,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2788 | } | 3624 | } |
2789 | out: | 3625 | out: |
2790 | if (status && lock->lk_is_new && lock_sop) | 3626 | if (status && lock->lk_is_new && lock_sop) |
2791 | release_stateowner(lock_sop); | 3627 | release_lockowner(lock_sop); |
2792 | if (lock->lk_replay_owner) { | 3628 | if (lock->lk_replay_owner) { |
2793 | nfs4_get_stateowner(lock->lk_replay_owner); | 3629 | nfs4_get_stateowner(lock->lk_replay_owner); |
2794 | cstate->replay_owner = lock->lk_replay_owner; | 3630 | cstate->replay_owner = lock->lk_replay_owner; |
@@ -2838,7 +3674,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2838 | nfs4_lock_state(); | 3674 | nfs4_lock_state(); |
2839 | 3675 | ||
2840 | status = nfserr_stale_clientid; | 3676 | status = nfserr_stale_clientid; |
2841 | if (STALE_CLIENTID(&lockt->lt_clientid)) | 3677 | if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid)) |
2842 | goto out; | 3678 | goto out; |
2843 | 3679 | ||
2844 | if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) { | 3680 | if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) { |
@@ -2911,7 +3747,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
2911 | 3747 | ||
2912 | nfs4_lock_state(); | 3748 | nfs4_lock_state(); |
2913 | 3749 | ||
2914 | if ((status = nfs4_preprocess_seqid_op(&cstate->current_fh, | 3750 | if ((status = nfs4_preprocess_seqid_op(cstate, |
2915 | locku->lu_seqid, | 3751 | locku->lu_seqid, |
2916 | &locku->lu_stateid, | 3752 | &locku->lu_stateid, |
2917 | LOCK_STATE, | 3753 | LOCK_STATE, |
@@ -3037,7 +3873,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, | |||
3037 | /* unhash_stateowner deletes so_perclient only | 3873 | /* unhash_stateowner deletes so_perclient only |
3038 | * for openowners. */ | 3874 | * for openowners. */ |
3039 | list_del(&sop->so_perclient); | 3875 | list_del(&sop->so_perclient); |
3040 | release_stateowner(sop); | 3876 | release_lockowner(sop); |
3041 | } | 3877 | } |
3042 | out: | 3878 | out: |
3043 | nfs4_unlock_state(); | 3879 | nfs4_unlock_state(); |
@@ -3051,12 +3887,12 @@ alloc_reclaim(void) | |||
3051 | } | 3887 | } |
3052 | 3888 | ||
3053 | int | 3889 | int |
3054 | nfs4_has_reclaimed_state(const char *name) | 3890 | nfs4_has_reclaimed_state(const char *name, bool use_exchange_id) |
3055 | { | 3891 | { |
3056 | unsigned int strhashval = clientstr_hashval(name); | 3892 | unsigned int strhashval = clientstr_hashval(name); |
3057 | struct nfs4_client *clp; | 3893 | struct nfs4_client *clp; |
3058 | 3894 | ||
3059 | clp = find_confirmed_client_by_str(name, strhashval); | 3895 | clp = find_confirmed_client_by_str(name, strhashval, use_exchange_id); |
3060 | return clp ? 1 : 0; | 3896 | return clp ? 1 : 0; |
3061 | } | 3897 | } |
3062 | 3898 | ||
@@ -3153,6 +3989,8 @@ nfs4_state_init(void) | |||
3153 | INIT_LIST_HEAD(&unconf_str_hashtbl[i]); | 3989 | INIT_LIST_HEAD(&unconf_str_hashtbl[i]); |
3154 | INIT_LIST_HEAD(&unconf_id_hashtbl[i]); | 3990 | INIT_LIST_HEAD(&unconf_id_hashtbl[i]); |
3155 | } | 3991 | } |
3992 | for (i = 0; i < SESSION_HASH_SIZE; i++) | ||
3993 | INIT_LIST_HEAD(&sessionid_hashtbl[i]); | ||
3156 | for (i = 0; i < FILE_HASH_SIZE; i++) { | 3994 | for (i = 0; i < FILE_HASH_SIZE; i++) { |
3157 | INIT_LIST_HEAD(&file_hashtbl[i]); | 3995 | INIT_LIST_HEAD(&file_hashtbl[i]); |
3158 | } | 3996 | } |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 9250067943d8..b820c311931c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/fs.h> | 45 | #include <linux/fs.h> |
46 | #include <linux/namei.h> | 46 | #include <linux/namei.h> |
47 | #include <linux/vfs.h> | 47 | #include <linux/vfs.h> |
48 | #include <linux/utsname.h> | ||
48 | #include <linux/sunrpc/xdr.h> | 49 | #include <linux/sunrpc/xdr.h> |
49 | #include <linux/sunrpc/svc.h> | 50 | #include <linux/sunrpc/svc.h> |
50 | #include <linux/sunrpc/clnt.h> | 51 | #include <linux/sunrpc/clnt.h> |
@@ -188,6 +189,11 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) | |||
188 | return p; | 189 | return p; |
189 | } | 190 | } |
190 | 191 | ||
192 | static int zero_clientid(clientid_t *clid) | ||
193 | { | ||
194 | return (clid->cl_boot == 0) && (clid->cl_id == 0); | ||
195 | } | ||
196 | |||
191 | static int | 197 | static int |
192 | defer_free(struct nfsd4_compoundargs *argp, | 198 | defer_free(struct nfsd4_compoundargs *argp, |
193 | void (*release)(const void *), void *p) | 199 | void (*release)(const void *), void *p) |
@@ -230,6 +236,7 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) | |||
230 | 236 | ||
231 | bmval[0] = 0; | 237 | bmval[0] = 0; |
232 | bmval[1] = 0; | 238 | bmval[1] = 0; |
239 | bmval[2] = 0; | ||
233 | 240 | ||
234 | READ_BUF(4); | 241 | READ_BUF(4); |
235 | READ32(bmlen); | 242 | READ32(bmlen); |
@@ -241,13 +248,27 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) | |||
241 | READ32(bmval[0]); | 248 | READ32(bmval[0]); |
242 | if (bmlen > 1) | 249 | if (bmlen > 1) |
243 | READ32(bmval[1]); | 250 | READ32(bmval[1]); |
251 | if (bmlen > 2) | ||
252 | READ32(bmval[2]); | ||
244 | 253 | ||
245 | DECODE_TAIL; | 254 | DECODE_TAIL; |
246 | } | 255 | } |
247 | 256 | ||
257 | static u32 nfsd_attrmask[] = { | ||
258 | NFSD_WRITEABLE_ATTRS_WORD0, | ||
259 | NFSD_WRITEABLE_ATTRS_WORD1, | ||
260 | NFSD_WRITEABLE_ATTRS_WORD2 | ||
261 | }; | ||
262 | |||
263 | static u32 nfsd41_ex_attrmask[] = { | ||
264 | NFSD_SUPPATTR_EXCLCREAT_WORD0, | ||
265 | NFSD_SUPPATTR_EXCLCREAT_WORD1, | ||
266 | NFSD_SUPPATTR_EXCLCREAT_WORD2 | ||
267 | }; | ||
268 | |||
248 | static __be32 | 269 | static __be32 |
249 | nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, | 270 | nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable, |
250 | struct nfs4_acl **acl) | 271 | struct iattr *iattr, struct nfs4_acl **acl) |
251 | { | 272 | { |
252 | int expected_len, len = 0; | 273 | int expected_len, len = 0; |
253 | u32 dummy32; | 274 | u32 dummy32; |
@@ -263,9 +284,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia | |||
263 | * According to spec, unsupported attributes return ERR_ATTRNOTSUPP; | 284 | * According to spec, unsupported attributes return ERR_ATTRNOTSUPP; |
264 | * read-only attributes return ERR_INVAL. | 285 | * read-only attributes return ERR_INVAL. |
265 | */ | 286 | */ |
266 | if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) | 287 | if ((bmval[0] & ~nfsd_suppattrs0(argp->minorversion)) || |
288 | (bmval[1] & ~nfsd_suppattrs1(argp->minorversion)) || | ||
289 | (bmval[2] & ~nfsd_suppattrs2(argp->minorversion))) | ||
267 | return nfserr_attrnotsupp; | 290 | return nfserr_attrnotsupp; |
268 | if ((bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0) || (bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1)) | 291 | if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) || |
292 | (bmval[2] & ~writable[2])) | ||
269 | return nfserr_inval; | 293 | return nfserr_inval; |
270 | 294 | ||
271 | READ_BUF(4); | 295 | READ_BUF(4); |
@@ -400,6 +424,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia | |||
400 | goto xdr_error; | 424 | goto xdr_error; |
401 | } | 425 | } |
402 | } | 426 | } |
427 | BUG_ON(bmval[2]); /* no such writeable attr supported yet */ | ||
403 | if (len != expected_len) | 428 | if (len != expected_len) |
404 | goto xdr_error; | 429 | goto xdr_error; |
405 | 430 | ||
@@ -493,7 +518,9 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create | |||
493 | if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) | 518 | if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) |
494 | return status; | 519 | return status; |
495 | 520 | ||
496 | if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl))) | 521 | status = nfsd4_decode_fattr(argp, create->cr_bmval, nfsd_attrmask, |
522 | &create->cr_iattr, &create->cr_acl); | ||
523 | if (status) | ||
497 | goto out; | 524 | goto out; |
498 | 525 | ||
499 | DECODE_TAIL; | 526 | DECODE_TAIL; |
@@ -583,6 +610,8 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) | |||
583 | READ_BUF(lockt->lt_owner.len); | 610 | READ_BUF(lockt->lt_owner.len); |
584 | READMEM(lockt->lt_owner.data, lockt->lt_owner.len); | 611 | READMEM(lockt->lt_owner.data, lockt->lt_owner.len); |
585 | 612 | ||
613 | if (argp->minorversion && !zero_clientid(&lockt->lt_clientid)) | ||
614 | return nfserr_inval; | ||
586 | DECODE_TAIL; | 615 | DECODE_TAIL; |
587 | } | 616 | } |
588 | 617 | ||
@@ -652,13 +681,26 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) | |||
652 | switch (open->op_createmode) { | 681 | switch (open->op_createmode) { |
653 | case NFS4_CREATE_UNCHECKED: | 682 | case NFS4_CREATE_UNCHECKED: |
654 | case NFS4_CREATE_GUARDED: | 683 | case NFS4_CREATE_GUARDED: |
655 | if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl))) | 684 | status = nfsd4_decode_fattr(argp, open->op_bmval, |
685 | nfsd_attrmask, &open->op_iattr, &open->op_acl); | ||
686 | if (status) | ||
656 | goto out; | 687 | goto out; |
657 | break; | 688 | break; |
658 | case NFS4_CREATE_EXCLUSIVE: | 689 | case NFS4_CREATE_EXCLUSIVE: |
659 | READ_BUF(8); | 690 | READ_BUF(8); |
660 | COPYMEM(open->op_verf.data, 8); | 691 | COPYMEM(open->op_verf.data, 8); |
661 | break; | 692 | break; |
693 | case NFS4_CREATE_EXCLUSIVE4_1: | ||
694 | if (argp->minorversion < 1) | ||
695 | goto xdr_error; | ||
696 | READ_BUF(8); | ||
697 | COPYMEM(open->op_verf.data, 8); | ||
698 | status = nfsd4_decode_fattr(argp, open->op_bmval, | ||
699 | nfsd41_ex_attrmask, &open->op_iattr, | ||
700 | &open->op_acl); | ||
701 | if (status) | ||
702 | goto out; | ||
703 | break; | ||
662 | default: | 704 | default: |
663 | goto xdr_error; | 705 | goto xdr_error; |
664 | } | 706 | } |
@@ -851,7 +893,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta | |||
851 | status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); | 893 | status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); |
852 | if (status) | 894 | if (status) |
853 | return status; | 895 | return status; |
854 | return nfsd4_decode_fattr(argp, setattr->sa_bmval, | 896 | return nfsd4_decode_fattr(argp, setattr->sa_bmval, nfsd_attrmask, |
855 | &setattr->sa_iattr, &setattr->sa_acl); | 897 | &setattr->sa_iattr, &setattr->sa_acl); |
856 | } | 898 | } |
857 | 899 | ||
@@ -993,6 +1035,241 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel | |||
993 | READ_BUF(rlockowner->rl_owner.len); | 1035 | READ_BUF(rlockowner->rl_owner.len); |
994 | READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); | 1036 | READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); |
995 | 1037 | ||
1038 | if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid)) | ||
1039 | return nfserr_inval; | ||
1040 | DECODE_TAIL; | ||
1041 | } | ||
1042 | |||
1043 | static __be32 | ||
1044 | nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, | ||
1045 | struct nfsd4_exchange_id *exid) | ||
1046 | { | ||
1047 | int dummy; | ||
1048 | DECODE_HEAD; | ||
1049 | |||
1050 | READ_BUF(NFS4_VERIFIER_SIZE); | ||
1051 | COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE); | ||
1052 | |||
1053 | READ_BUF(4); | ||
1054 | READ32(exid->clname.len); | ||
1055 | |||
1056 | READ_BUF(exid->clname.len); | ||
1057 | SAVEMEM(exid->clname.data, exid->clname.len); | ||
1058 | |||
1059 | READ_BUF(4); | ||
1060 | READ32(exid->flags); | ||
1061 | |||
1062 | /* Ignore state_protect4_a */ | ||
1063 | READ_BUF(4); | ||
1064 | READ32(exid->spa_how); | ||
1065 | switch (exid->spa_how) { | ||
1066 | case SP4_NONE: | ||
1067 | break; | ||
1068 | case SP4_MACH_CRED: | ||
1069 | /* spo_must_enforce */ | ||
1070 | READ_BUF(4); | ||
1071 | READ32(dummy); | ||
1072 | READ_BUF(dummy * 4); | ||
1073 | p += dummy; | ||
1074 | |||
1075 | /* spo_must_allow */ | ||
1076 | READ_BUF(4); | ||
1077 | READ32(dummy); | ||
1078 | READ_BUF(dummy * 4); | ||
1079 | p += dummy; | ||
1080 | break; | ||
1081 | case SP4_SSV: | ||
1082 | /* ssp_ops */ | ||
1083 | READ_BUF(4); | ||
1084 | READ32(dummy); | ||
1085 | READ_BUF(dummy * 4); | ||
1086 | p += dummy; | ||
1087 | |||
1088 | READ_BUF(4); | ||
1089 | READ32(dummy); | ||
1090 | READ_BUF(dummy * 4); | ||
1091 | p += dummy; | ||
1092 | |||
1093 | /* ssp_hash_algs<> */ | ||
1094 | READ_BUF(4); | ||
1095 | READ32(dummy); | ||
1096 | READ_BUF(dummy); | ||
1097 | p += XDR_QUADLEN(dummy); | ||
1098 | |||
1099 | /* ssp_encr_algs<> */ | ||
1100 | READ_BUF(4); | ||
1101 | READ32(dummy); | ||
1102 | READ_BUF(dummy); | ||
1103 | p += XDR_QUADLEN(dummy); | ||
1104 | |||
1105 | /* ssp_window and ssp_num_gss_handles */ | ||
1106 | READ_BUF(8); | ||
1107 | READ32(dummy); | ||
1108 | READ32(dummy); | ||
1109 | break; | ||
1110 | default: | ||
1111 | goto xdr_error; | ||
1112 | } | ||
1113 | |||
1114 | /* Ignore Implementation ID */ | ||
1115 | READ_BUF(4); /* nfs_impl_id4 array length */ | ||
1116 | READ32(dummy); | ||
1117 | |||
1118 | if (dummy > 1) | ||
1119 | goto xdr_error; | ||
1120 | |||
1121 | if (dummy == 1) { | ||
1122 | /* nii_domain */ | ||
1123 | READ_BUF(4); | ||
1124 | READ32(dummy); | ||
1125 | READ_BUF(dummy); | ||
1126 | p += XDR_QUADLEN(dummy); | ||
1127 | |||
1128 | /* nii_name */ | ||
1129 | READ_BUF(4); | ||
1130 | READ32(dummy); | ||
1131 | READ_BUF(dummy); | ||
1132 | p += XDR_QUADLEN(dummy); | ||
1133 | |||
1134 | /* nii_date */ | ||
1135 | READ_BUF(12); | ||
1136 | p += 3; | ||
1137 | } | ||
1138 | DECODE_TAIL; | ||
1139 | } | ||
1140 | |||
1141 | static __be32 | ||
1142 | nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, | ||
1143 | struct nfsd4_create_session *sess) | ||
1144 | { | ||
1145 | DECODE_HEAD; | ||
1146 | |||
1147 | u32 dummy; | ||
1148 | char *machine_name; | ||
1149 | int i; | ||
1150 | int nr_secflavs; | ||
1151 | |||
1152 | READ_BUF(16); | ||
1153 | COPYMEM(&sess->clientid, 8); | ||
1154 | READ32(sess->seqid); | ||
1155 | READ32(sess->flags); | ||
1156 | |||
1157 | /* Fore channel attrs */ | ||
1158 | READ_BUF(28); | ||
1159 | READ32(dummy); /* headerpadsz is always 0 */ | ||
1160 | READ32(sess->fore_channel.maxreq_sz); | ||
1161 | READ32(sess->fore_channel.maxresp_sz); | ||
1162 | READ32(sess->fore_channel.maxresp_cached); | ||
1163 | READ32(sess->fore_channel.maxops); | ||
1164 | READ32(sess->fore_channel.maxreqs); | ||
1165 | READ32(sess->fore_channel.nr_rdma_attrs); | ||
1166 | if (sess->fore_channel.nr_rdma_attrs == 1) { | ||
1167 | READ_BUF(4); | ||
1168 | READ32(sess->fore_channel.rdma_attrs); | ||
1169 | } else if (sess->fore_channel.nr_rdma_attrs > 1) { | ||
1170 | dprintk("Too many fore channel attr bitmaps!\n"); | ||
1171 | goto xdr_error; | ||
1172 | } | ||
1173 | |||
1174 | /* Back channel attrs */ | ||
1175 | READ_BUF(28); | ||
1176 | READ32(dummy); /* headerpadsz is always 0 */ | ||
1177 | READ32(sess->back_channel.maxreq_sz); | ||
1178 | READ32(sess->back_channel.maxresp_sz); | ||
1179 | READ32(sess->back_channel.maxresp_cached); | ||
1180 | READ32(sess->back_channel.maxops); | ||
1181 | READ32(sess->back_channel.maxreqs); | ||
1182 | READ32(sess->back_channel.nr_rdma_attrs); | ||
1183 | if (sess->back_channel.nr_rdma_attrs == 1) { | ||
1184 | READ_BUF(4); | ||
1185 | READ32(sess->back_channel.rdma_attrs); | ||
1186 | } else if (sess->back_channel.nr_rdma_attrs > 1) { | ||
1187 | dprintk("Too many back channel attr bitmaps!\n"); | ||
1188 | goto xdr_error; | ||
1189 | } | ||
1190 | |||
1191 | READ_BUF(8); | ||
1192 | READ32(sess->callback_prog); | ||
1193 | |||
1194 | /* callback_sec_params4 */ | ||
1195 | READ32(nr_secflavs); | ||
1196 | for (i = 0; i < nr_secflavs; ++i) { | ||
1197 | READ_BUF(4); | ||
1198 | READ32(dummy); | ||
1199 | switch (dummy) { | ||
1200 | case RPC_AUTH_NULL: | ||
1201 | /* Nothing to read */ | ||
1202 | break; | ||
1203 | case RPC_AUTH_UNIX: | ||
1204 | READ_BUF(8); | ||
1205 | /* stamp */ | ||
1206 | READ32(dummy); | ||
1207 | |||
1208 | /* machine name */ | ||
1209 | READ32(dummy); | ||
1210 | READ_BUF(dummy); | ||
1211 | SAVEMEM(machine_name, dummy); | ||
1212 | |||
1213 | /* uid, gid */ | ||
1214 | READ_BUF(8); | ||
1215 | READ32(sess->uid); | ||
1216 | READ32(sess->gid); | ||
1217 | |||
1218 | /* more gids */ | ||
1219 | READ_BUF(4); | ||
1220 | READ32(dummy); | ||
1221 | READ_BUF(dummy * 4); | ||
1222 | for (i = 0; i < dummy; ++i) | ||
1223 | READ32(dummy); | ||
1224 | break; | ||
1225 | case RPC_AUTH_GSS: | ||
1226 | dprintk("RPC_AUTH_GSS callback secflavor " | ||
1227 | "not supported!\n"); | ||
1228 | READ_BUF(8); | ||
1229 | /* gcbp_service */ | ||
1230 | READ32(dummy); | ||
1231 | /* gcbp_handle_from_server */ | ||
1232 | READ32(dummy); | ||
1233 | READ_BUF(dummy); | ||
1234 | p += XDR_QUADLEN(dummy); | ||
1235 | /* gcbp_handle_from_client */ | ||
1236 | READ_BUF(4); | ||
1237 | READ32(dummy); | ||
1238 | READ_BUF(dummy); | ||
1239 | p += XDR_QUADLEN(dummy); | ||
1240 | break; | ||
1241 | default: | ||
1242 | dprintk("Illegal callback secflavor\n"); | ||
1243 | return nfserr_inval; | ||
1244 | } | ||
1245 | } | ||
1246 | DECODE_TAIL; | ||
1247 | } | ||
1248 | |||
1249 | static __be32 | ||
1250 | nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, | ||
1251 | struct nfsd4_destroy_session *destroy_session) | ||
1252 | { | ||
1253 | DECODE_HEAD; | ||
1254 | READ_BUF(NFS4_MAX_SESSIONID_LEN); | ||
1255 | COPYMEM(destroy_session->sessionid.data, NFS4_MAX_SESSIONID_LEN); | ||
1256 | |||
1257 | DECODE_TAIL; | ||
1258 | } | ||
1259 | |||
1260 | static __be32 | ||
1261 | nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, | ||
1262 | struct nfsd4_sequence *seq) | ||
1263 | { | ||
1264 | DECODE_HEAD; | ||
1265 | |||
1266 | READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); | ||
1267 | COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); | ||
1268 | READ32(seq->seqid); | ||
1269 | READ32(seq->slotid); | ||
1270 | READ32(seq->maxslots); | ||
1271 | READ32(seq->cachethis); | ||
1272 | |||
996 | DECODE_TAIL; | 1273 | DECODE_TAIL; |
997 | } | 1274 | } |
998 | 1275 | ||
@@ -1005,7 +1282,7 @@ nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) | |||
1005 | static __be32 | 1282 | static __be32 |
1006 | nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) | 1283 | nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) |
1007 | { | 1284 | { |
1008 | return nfserr_opnotsupp; | 1285 | return nfserr_notsupp; |
1009 | } | 1286 | } |
1010 | 1287 | ||
1011 | typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); | 1288 | typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); |
@@ -1031,7 +1308,7 @@ static nfsd4_dec nfsd4_dec_ops[] = { | |||
1031 | [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, | 1308 | [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, |
1032 | [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, | 1309 | [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, |
1033 | [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, | 1310 | [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, |
1034 | [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp, | 1311 | [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_noop, |
1035 | [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, | 1312 | [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, |
1036 | [OP_READ] = (nfsd4_dec)nfsd4_decode_read, | 1313 | [OP_READ] = (nfsd4_dec)nfsd4_decode_read, |
1037 | [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, | 1314 | [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, |
@@ -1050,6 +1327,67 @@ static nfsd4_dec nfsd4_dec_ops[] = { | |||
1050 | [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, | 1327 | [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, |
1051 | }; | 1328 | }; |
1052 | 1329 | ||
1330 | static nfsd4_dec nfsd41_dec_ops[] = { | ||
1331 | [OP_ACCESS] (nfsd4_dec)nfsd4_decode_access, | ||
1332 | [OP_CLOSE] (nfsd4_dec)nfsd4_decode_close, | ||
1333 | [OP_COMMIT] (nfsd4_dec)nfsd4_decode_commit, | ||
1334 | [OP_CREATE] (nfsd4_dec)nfsd4_decode_create, | ||
1335 | [OP_DELEGPURGE] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1336 | [OP_DELEGRETURN] (nfsd4_dec)nfsd4_decode_delegreturn, | ||
1337 | [OP_GETATTR] (nfsd4_dec)nfsd4_decode_getattr, | ||
1338 | [OP_GETFH] (nfsd4_dec)nfsd4_decode_noop, | ||
1339 | [OP_LINK] (nfsd4_dec)nfsd4_decode_link, | ||
1340 | [OP_LOCK] (nfsd4_dec)nfsd4_decode_lock, | ||
1341 | [OP_LOCKT] (nfsd4_dec)nfsd4_decode_lockt, | ||
1342 | [OP_LOCKU] (nfsd4_dec)nfsd4_decode_locku, | ||
1343 | [OP_LOOKUP] (nfsd4_dec)nfsd4_decode_lookup, | ||
1344 | [OP_LOOKUPP] (nfsd4_dec)nfsd4_decode_noop, | ||
1345 | [OP_NVERIFY] (nfsd4_dec)nfsd4_decode_verify, | ||
1346 | [OP_OPEN] (nfsd4_dec)nfsd4_decode_open, | ||
1347 | [OP_OPENATTR] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1348 | [OP_OPEN_CONFIRM] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1349 | [OP_OPEN_DOWNGRADE] (nfsd4_dec)nfsd4_decode_open_downgrade, | ||
1350 | [OP_PUTFH] (nfsd4_dec)nfsd4_decode_putfh, | ||
1351 | [OP_PUTPUBFH] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1352 | [OP_PUTROOTFH] (nfsd4_dec)nfsd4_decode_noop, | ||
1353 | [OP_READ] (nfsd4_dec)nfsd4_decode_read, | ||
1354 | [OP_READDIR] (nfsd4_dec)nfsd4_decode_readdir, | ||
1355 | [OP_READLINK] (nfsd4_dec)nfsd4_decode_noop, | ||
1356 | [OP_REMOVE] (nfsd4_dec)nfsd4_decode_remove, | ||
1357 | [OP_RENAME] (nfsd4_dec)nfsd4_decode_rename, | ||
1358 | [OP_RENEW] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1359 | [OP_RESTOREFH] (nfsd4_dec)nfsd4_decode_noop, | ||
1360 | [OP_SAVEFH] (nfsd4_dec)nfsd4_decode_noop, | ||
1361 | [OP_SECINFO] (nfsd4_dec)nfsd4_decode_secinfo, | ||
1362 | [OP_SETATTR] (nfsd4_dec)nfsd4_decode_setattr, | ||
1363 | [OP_SETCLIENTID] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1364 | [OP_SETCLIENTID_CONFIRM](nfsd4_dec)nfsd4_decode_notsupp, | ||
1365 | [OP_VERIFY] (nfsd4_dec)nfsd4_decode_verify, | ||
1366 | [OP_WRITE] (nfsd4_dec)nfsd4_decode_write, | ||
1367 | [OP_RELEASE_LOCKOWNER] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1368 | |||
1369 | /* new operations for NFSv4.1 */ | ||
1370 | [OP_BACKCHANNEL_CTL] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1371 | [OP_BIND_CONN_TO_SESSION](nfsd4_dec)nfsd4_decode_notsupp, | ||
1372 | [OP_EXCHANGE_ID] (nfsd4_dec)nfsd4_decode_exchange_id, | ||
1373 | [OP_CREATE_SESSION] (nfsd4_dec)nfsd4_decode_create_session, | ||
1374 | [OP_DESTROY_SESSION] (nfsd4_dec)nfsd4_decode_destroy_session, | ||
1375 | [OP_FREE_STATEID] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1376 | [OP_GET_DIR_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1377 | [OP_GETDEVICEINFO] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1378 | [OP_GETDEVICELIST] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1379 | [OP_LAYOUTCOMMIT] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1380 | [OP_LAYOUTGET] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1381 | [OP_LAYOUTRETURN] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1382 | [OP_SECINFO_NO_NAME] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1383 | [OP_SEQUENCE] (nfsd4_dec)nfsd4_decode_sequence, | ||
1384 | [OP_SET_SSV] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1385 | [OP_TEST_STATEID] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1386 | [OP_WANT_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1387 | [OP_DESTROY_CLIENTID] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1388 | [OP_RECLAIM_COMPLETE] (nfsd4_dec)nfsd4_decode_notsupp, | ||
1389 | }; | ||
1390 | |||
1053 | struct nfsd4_minorversion_ops { | 1391 | struct nfsd4_minorversion_ops { |
1054 | nfsd4_dec *decoders; | 1392 | nfsd4_dec *decoders; |
1055 | int nops; | 1393 | int nops; |
@@ -1057,6 +1395,7 @@ struct nfsd4_minorversion_ops { | |||
1057 | 1395 | ||
1058 | static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { | 1396 | static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { |
1059 | [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, | 1397 | [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, |
1398 | [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, | ||
1060 | }; | 1399 | }; |
1061 | 1400 | ||
1062 | static __be32 | 1401 | static __be32 |
@@ -1412,6 +1751,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1412 | { | 1751 | { |
1413 | u32 bmval0 = bmval[0]; | 1752 | u32 bmval0 = bmval[0]; |
1414 | u32 bmval1 = bmval[1]; | 1753 | u32 bmval1 = bmval[1]; |
1754 | u32 bmval2 = bmval[2]; | ||
1415 | struct kstat stat; | 1755 | struct kstat stat; |
1416 | struct svc_fh tempfh; | 1756 | struct svc_fh tempfh; |
1417 | struct kstatfs statfs; | 1757 | struct kstatfs statfs; |
@@ -1425,12 +1765,16 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1425 | int err; | 1765 | int err; |
1426 | int aclsupport = 0; | 1766 | int aclsupport = 0; |
1427 | struct nfs4_acl *acl = NULL; | 1767 | struct nfs4_acl *acl = NULL; |
1768 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | ||
1769 | u32 minorversion = resp->cstate.minorversion; | ||
1428 | 1770 | ||
1429 | BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); | 1771 | BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); |
1430 | BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); | 1772 | BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); |
1431 | BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1); | 1773 | BUG_ON(bmval1 & ~nfsd_suppattrs1(minorversion)); |
1774 | BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion)); | ||
1432 | 1775 | ||
1433 | if (exp->ex_fslocs.migrated) { | 1776 | if (exp->ex_fslocs.migrated) { |
1777 | BUG_ON(bmval[2]); | ||
1434 | status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); | 1778 | status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err); |
1435 | if (status) | 1779 | if (status) |
1436 | goto out; | 1780 | goto out; |
@@ -1476,22 +1820,42 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1476 | if ((buflen -= 16) < 0) | 1820 | if ((buflen -= 16) < 0) |
1477 | goto out_resource; | 1821 | goto out_resource; |
1478 | 1822 | ||
1479 | WRITE32(2); | 1823 | if (unlikely(bmval2)) { |
1480 | WRITE32(bmval0); | 1824 | WRITE32(3); |
1481 | WRITE32(bmval1); | 1825 | WRITE32(bmval0); |
1826 | WRITE32(bmval1); | ||
1827 | WRITE32(bmval2); | ||
1828 | } else if (likely(bmval1)) { | ||
1829 | WRITE32(2); | ||
1830 | WRITE32(bmval0); | ||
1831 | WRITE32(bmval1); | ||
1832 | } else { | ||
1833 | WRITE32(1); | ||
1834 | WRITE32(bmval0); | ||
1835 | } | ||
1482 | attrlenp = p++; /* to be backfilled later */ | 1836 | attrlenp = p++; /* to be backfilled later */ |
1483 | 1837 | ||
1484 | if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { | 1838 | if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { |
1485 | u32 word0 = NFSD_SUPPORTED_ATTRS_WORD0; | 1839 | u32 word0 = nfsd_suppattrs0(minorversion); |
1840 | u32 word1 = nfsd_suppattrs1(minorversion); | ||
1841 | u32 word2 = nfsd_suppattrs2(minorversion); | ||
1842 | |||
1486 | if ((buflen -= 12) < 0) | 1843 | if ((buflen -= 12) < 0) |
1487 | goto out_resource; | 1844 | goto out_resource; |
1488 | if (!aclsupport) | 1845 | if (!aclsupport) |
1489 | word0 &= ~FATTR4_WORD0_ACL; | 1846 | word0 &= ~FATTR4_WORD0_ACL; |
1490 | if (!exp->ex_fslocs.locations) | 1847 | if (!exp->ex_fslocs.locations) |
1491 | word0 &= ~FATTR4_WORD0_FS_LOCATIONS; | 1848 | word0 &= ~FATTR4_WORD0_FS_LOCATIONS; |
1492 | WRITE32(2); | 1849 | if (!word2) { |
1493 | WRITE32(word0); | 1850 | WRITE32(2); |
1494 | WRITE32(NFSD_SUPPORTED_ATTRS_WORD1); | 1851 | WRITE32(word0); |
1852 | WRITE32(word1); | ||
1853 | } else { | ||
1854 | WRITE32(3); | ||
1855 | WRITE32(word0); | ||
1856 | WRITE32(word1); | ||
1857 | WRITE32(word2); | ||
1858 | } | ||
1495 | } | 1859 | } |
1496 | if (bmval0 & FATTR4_WORD0_TYPE) { | 1860 | if (bmval0 & FATTR4_WORD0_TYPE) { |
1497 | if ((buflen -= 4) < 0) | 1861 | if ((buflen -= 4) < 0) |
@@ -1801,6 +2165,13 @@ out_acl: | |||
1801 | } | 2165 | } |
1802 | WRITE64(stat.ino); | 2166 | WRITE64(stat.ino); |
1803 | } | 2167 | } |
2168 | if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { | ||
2169 | WRITE32(3); | ||
2170 | WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); | ||
2171 | WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1); | ||
2172 | WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2); | ||
2173 | } | ||
2174 | |||
1804 | *attrlenp = htonl((char *)p - (char *)attrlenp - 4); | 2175 | *attrlenp = htonl((char *)p - (char *)attrlenp - 4); |
1805 | *countp = p - buffer; | 2176 | *countp = p - buffer; |
1806 | status = nfs_ok; | 2177 | status = nfs_ok; |
@@ -2572,6 +2943,143 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w | |||
2572 | } | 2943 | } |
2573 | 2944 | ||
2574 | static __be32 | 2945 | static __be32 |
2946 | nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr, | ||
2947 | struct nfsd4_exchange_id *exid) | ||
2948 | { | ||
2949 | ENCODE_HEAD; | ||
2950 | char *major_id; | ||
2951 | char *server_scope; | ||
2952 | int major_id_sz; | ||
2953 | int server_scope_sz; | ||
2954 | uint64_t minor_id = 0; | ||
2955 | |||
2956 | if (nfserr) | ||
2957 | return nfserr; | ||
2958 | |||
2959 | major_id = utsname()->nodename; | ||
2960 | major_id_sz = strlen(major_id); | ||
2961 | server_scope = utsname()->nodename; | ||
2962 | server_scope_sz = strlen(server_scope); | ||
2963 | |||
2964 | RESERVE_SPACE( | ||
2965 | 8 /* eir_clientid */ + | ||
2966 | 4 /* eir_sequenceid */ + | ||
2967 | 4 /* eir_flags */ + | ||
2968 | 4 /* spr_how (SP4_NONE) */ + | ||
2969 | 8 /* so_minor_id */ + | ||
2970 | 4 /* so_major_id.len */ + | ||
2971 | (XDR_QUADLEN(major_id_sz) * 4) + | ||
2972 | 4 /* eir_server_scope.len */ + | ||
2973 | (XDR_QUADLEN(server_scope_sz) * 4) + | ||
2974 | 4 /* eir_server_impl_id.count (0) */); | ||
2975 | |||
2976 | WRITEMEM(&exid->clientid, 8); | ||
2977 | WRITE32(exid->seqid); | ||
2978 | WRITE32(exid->flags); | ||
2979 | |||
2980 | /* state_protect4_r. Currently only support SP4_NONE */ | ||
2981 | BUG_ON(exid->spa_how != SP4_NONE); | ||
2982 | WRITE32(exid->spa_how); | ||
2983 | |||
2984 | /* The server_owner struct */ | ||
2985 | WRITE64(minor_id); /* Minor id */ | ||
2986 | /* major id */ | ||
2987 | WRITE32(major_id_sz); | ||
2988 | WRITEMEM(major_id, major_id_sz); | ||
2989 | |||
2990 | /* Server scope */ | ||
2991 | WRITE32(server_scope_sz); | ||
2992 | WRITEMEM(server_scope, server_scope_sz); | ||
2993 | |||
2994 | /* Implementation id */ | ||
2995 | WRITE32(0); /* zero length nfs_impl_id4 array */ | ||
2996 | ADJUST_ARGS(); | ||
2997 | return 0; | ||
2998 | } | ||
2999 | |||
3000 | static __be32 | ||
3001 | nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr, | ||
3002 | struct nfsd4_create_session *sess) | ||
3003 | { | ||
3004 | ENCODE_HEAD; | ||
3005 | |||
3006 | if (nfserr) | ||
3007 | return nfserr; | ||
3008 | |||
3009 | RESERVE_SPACE(24); | ||
3010 | WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN); | ||
3011 | WRITE32(sess->seqid); | ||
3012 | WRITE32(sess->flags); | ||
3013 | ADJUST_ARGS(); | ||
3014 | |||
3015 | RESERVE_SPACE(28); | ||
3016 | WRITE32(0); /* headerpadsz */ | ||
3017 | WRITE32(sess->fore_channel.maxreq_sz); | ||
3018 | WRITE32(sess->fore_channel.maxresp_sz); | ||
3019 | WRITE32(sess->fore_channel.maxresp_cached); | ||
3020 | WRITE32(sess->fore_channel.maxops); | ||
3021 | WRITE32(sess->fore_channel.maxreqs); | ||
3022 | WRITE32(sess->fore_channel.nr_rdma_attrs); | ||
3023 | ADJUST_ARGS(); | ||
3024 | |||
3025 | if (sess->fore_channel.nr_rdma_attrs) { | ||
3026 | RESERVE_SPACE(4); | ||
3027 | WRITE32(sess->fore_channel.rdma_attrs); | ||
3028 | ADJUST_ARGS(); | ||
3029 | } | ||
3030 | |||
3031 | RESERVE_SPACE(28); | ||
3032 | WRITE32(0); /* headerpadsz */ | ||
3033 | WRITE32(sess->back_channel.maxreq_sz); | ||
3034 | WRITE32(sess->back_channel.maxresp_sz); | ||
3035 | WRITE32(sess->back_channel.maxresp_cached); | ||
3036 | WRITE32(sess->back_channel.maxops); | ||
3037 | WRITE32(sess->back_channel.maxreqs); | ||
3038 | WRITE32(sess->back_channel.nr_rdma_attrs); | ||
3039 | ADJUST_ARGS(); | ||
3040 | |||
3041 | if (sess->back_channel.nr_rdma_attrs) { | ||
3042 | RESERVE_SPACE(4); | ||
3043 | WRITE32(sess->back_channel.rdma_attrs); | ||
3044 | ADJUST_ARGS(); | ||
3045 | } | ||
3046 | return 0; | ||
3047 | } | ||
3048 | |||
3049 | static __be32 | ||
3050 | nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, | ||
3051 | struct nfsd4_destroy_session *destroy_session) | ||
3052 | { | ||
3053 | return nfserr; | ||
3054 | } | ||
3055 | |||
3056 | __be32 | ||
3057 | nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, | ||
3058 | struct nfsd4_sequence *seq) | ||
3059 | { | ||
3060 | ENCODE_HEAD; | ||
3061 | |||
3062 | if (nfserr) | ||
3063 | return nfserr; | ||
3064 | |||
3065 | RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 20); | ||
3066 | WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); | ||
3067 | WRITE32(seq->seqid); | ||
3068 | WRITE32(seq->slotid); | ||
3069 | WRITE32(seq->maxslots); | ||
3070 | /* | ||
3071 | * FIXME: for now: | ||
3072 | * target_maxslots = maxslots | ||
3073 | * status_flags = 0 | ||
3074 | */ | ||
3075 | WRITE32(seq->maxslots); | ||
3076 | WRITE32(0); | ||
3077 | |||
3078 | ADJUST_ARGS(); | ||
3079 | return 0; | ||
3080 | } | ||
3081 | |||
3082 | static __be32 | ||
2575 | nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) | 3083 | nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) |
2576 | { | 3084 | { |
2577 | return nfserr; | 3085 | return nfserr; |
@@ -2579,6 +3087,11 @@ nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) | |||
2579 | 3087 | ||
2580 | typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); | 3088 | typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); |
2581 | 3089 | ||
3090 | /* | ||
3091 | * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1 | ||
3092 | * since we don't need to filter out obsolete ops as this is | ||
3093 | * done in the decoding phase. | ||
3094 | */ | ||
2582 | static nfsd4_enc nfsd4_enc_ops[] = { | 3095 | static nfsd4_enc nfsd4_enc_ops[] = { |
2583 | [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, | 3096 | [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, |
2584 | [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, | 3097 | [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, |
@@ -2617,8 +3130,77 @@ static nfsd4_enc nfsd4_enc_ops[] = { | |||
2617 | [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, | 3130 | [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop, |
2618 | [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, | 3131 | [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write, |
2619 | [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, | 3132 | [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop, |
3133 | |||
3134 | /* NFSv4.1 operations */ | ||
3135 | [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop, | ||
3136 | [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_noop, | ||
3137 | [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, | ||
3138 | [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, | ||
3139 | [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session, | ||
3140 | [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, | ||
3141 | [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, | ||
3142 | [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, | ||
3143 | [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, | ||
3144 | [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, | ||
3145 | [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, | ||
3146 | [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, | ||
3147 | [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_noop, | ||
3148 | [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, | ||
3149 | [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, | ||
3150 | [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_noop, | ||
3151 | [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, | ||
3152 | [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, | ||
3153 | [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, | ||
2620 | }; | 3154 | }; |
2621 | 3155 | ||
3156 | /* | ||
3157 | * Calculate the total amount of memory that the compound response has taken | ||
3158 | * after encoding the current operation. | ||
3159 | * | ||
3160 | * pad: add on 8 bytes for the next operation's op_code and status so that | ||
3161 | * there is room to cache a failure on the next operation. | ||
3162 | * | ||
3163 | * Compare this length to the session se_fmaxresp_cached. | ||
3164 | * | ||
3165 | * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so | ||
3166 | * will be at least a page and will therefore hold the xdr_buf head. | ||
3167 | */ | ||
3168 | static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp) | ||
3169 | { | ||
3170 | int status = 0; | ||
3171 | struct xdr_buf *xb = &resp->rqstp->rq_res; | ||
3172 | struct nfsd4_compoundargs *args = resp->rqstp->rq_argp; | ||
3173 | struct nfsd4_session *session = NULL; | ||
3174 | struct nfsd4_slot *slot = resp->cstate.slot; | ||
3175 | u32 length, tlen = 0, pad = 8; | ||
3176 | |||
3177 | if (!nfsd4_has_session(&resp->cstate)) | ||
3178 | return status; | ||
3179 | |||
3180 | session = resp->cstate.session; | ||
3181 | if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0) | ||
3182 | return status; | ||
3183 | |||
3184 | if (resp->opcnt >= args->opcnt) | ||
3185 | pad = 0; /* this is the last operation */ | ||
3186 | |||
3187 | if (xb->page_len == 0) { | ||
3188 | length = (char *)resp->p - (char *)xb->head[0].iov_base + pad; | ||
3189 | } else { | ||
3190 | if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0) | ||
3191 | tlen = (char *)resp->p - (char *)xb->tail[0].iov_base; | ||
3192 | |||
3193 | length = xb->head[0].iov_len + xb->page_len + tlen + pad; | ||
3194 | } | ||
3195 | dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, | ||
3196 | length, xb->page_len, tlen, pad); | ||
3197 | |||
3198 | if (length <= session->se_fmaxresp_cached) | ||
3199 | return status; | ||
3200 | else | ||
3201 | return nfserr_rep_too_big_to_cache; | ||
3202 | } | ||
3203 | |||
2622 | void | 3204 | void |
2623 | nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) | 3205 | nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) |
2624 | { | 3206 | { |
@@ -2635,6 +3217,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) | |||
2635 | BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || | 3217 | BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || |
2636 | !nfsd4_enc_ops[op->opnum]); | 3218 | !nfsd4_enc_ops[op->opnum]); |
2637 | op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); | 3219 | op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u); |
3220 | /* nfsd4_check_drc_limit guarantees enough room for error status */ | ||
3221 | if (!op->status && nfsd4_check_drc_limit(resp)) | ||
3222 | op->status = nfserr_rep_too_big_to_cache; | ||
2638 | status: | 3223 | status: |
2639 | /* | 3224 | /* |
2640 | * Note: We write the status directly, instead of using WRITE32(), | 3225 | * Note: We write the status directly, instead of using WRITE32(), |
@@ -2735,6 +3320,18 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo | |||
2735 | iov = &rqstp->rq_res.head[0]; | 3320 | iov = &rqstp->rq_res.head[0]; |
2736 | iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; | 3321 | iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; |
2737 | BUG_ON(iov->iov_len > PAGE_SIZE); | 3322 | BUG_ON(iov->iov_len > PAGE_SIZE); |
3323 | if (nfsd4_has_session(&resp->cstate)) { | ||
3324 | if (resp->cstate.status == nfserr_replay_cache && | ||
3325 | !nfsd4_not_cached(resp)) { | ||
3326 | iov->iov_len = resp->cstate.iovlen; | ||
3327 | } else { | ||
3328 | nfsd4_store_cache_entry(resp); | ||
3329 | dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); | ||
3330 | resp->cstate.slot->sl_inuse = 0; | ||
3331 | } | ||
3332 | if (resp->cstate.session) | ||
3333 | nfsd4_put_session(resp->cstate.session); | ||
3334 | } | ||
2738 | return 1; | 3335 | return 1; |
2739 | } | 3336 | } |
2740 | 3337 | ||
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index a4ed8644d69c..af16849d243a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -60,6 +60,7 @@ enum { | |||
60 | NFSD_FO_UnlockFS, | 60 | NFSD_FO_UnlockFS, |
61 | NFSD_Threads, | 61 | NFSD_Threads, |
62 | NFSD_Pool_Threads, | 62 | NFSD_Pool_Threads, |
63 | NFSD_Pool_Stats, | ||
63 | NFSD_Versions, | 64 | NFSD_Versions, |
64 | NFSD_Ports, | 65 | NFSD_Ports, |
65 | NFSD_MaxBlkSize, | 66 | NFSD_MaxBlkSize, |
@@ -172,6 +173,16 @@ static const struct file_operations exports_operations = { | |||
172 | .owner = THIS_MODULE, | 173 | .owner = THIS_MODULE, |
173 | }; | 174 | }; |
174 | 175 | ||
176 | extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); | ||
177 | |||
178 | static struct file_operations pool_stats_operations = { | ||
179 | .open = nfsd_pool_stats_open, | ||
180 | .read = seq_read, | ||
181 | .llseek = seq_lseek, | ||
182 | .release = seq_release, | ||
183 | .owner = THIS_MODULE, | ||
184 | }; | ||
185 | |||
175 | /*----------------------------------------------------------------------------*/ | 186 | /*----------------------------------------------------------------------------*/ |
176 | /* | 187 | /* |
177 | * payload - write methods | 188 | * payload - write methods |
@@ -781,8 +792,9 @@ out_free: | |||
781 | static ssize_t __write_versions(struct file *file, char *buf, size_t size) | 792 | static ssize_t __write_versions(struct file *file, char *buf, size_t size) |
782 | { | 793 | { |
783 | char *mesg = buf; | 794 | char *mesg = buf; |
784 | char *vers, sign; | 795 | char *vers, *minorp, sign; |
785 | int len, num; | 796 | int len, num; |
797 | unsigned minor; | ||
786 | ssize_t tlen = 0; | 798 | ssize_t tlen = 0; |
787 | char *sep; | 799 | char *sep; |
788 | 800 | ||
@@ -803,9 +815,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) | |||
803 | do { | 815 | do { |
804 | sign = *vers; | 816 | sign = *vers; |
805 | if (sign == '+' || sign == '-') | 817 | if (sign == '+' || sign == '-') |
806 | num = simple_strtol((vers+1), NULL, 0); | 818 | num = simple_strtol((vers+1), &minorp, 0); |
807 | else | 819 | else |
808 | num = simple_strtol(vers, NULL, 0); | 820 | num = simple_strtol(vers, &minorp, 0); |
821 | if (*minorp == '.') { | ||
822 | if (num < 4) | ||
823 | return -EINVAL; | ||
824 | minor = simple_strtoul(minorp+1, NULL, 0); | ||
825 | if (minor == 0) | ||
826 | return -EINVAL; | ||
827 | if (nfsd_minorversion(minor, sign == '-' ? | ||
828 | NFSD_CLEAR : NFSD_SET) < 0) | ||
829 | return -EINVAL; | ||
830 | goto next; | ||
831 | } | ||
809 | switch(num) { | 832 | switch(num) { |
810 | case 2: | 833 | case 2: |
811 | case 3: | 834 | case 3: |
@@ -815,6 +838,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) | |||
815 | default: | 838 | default: |
816 | return -EINVAL; | 839 | return -EINVAL; |
817 | } | 840 | } |
841 | next: | ||
818 | vers += len + 1; | 842 | vers += len + 1; |
819 | tlen += len; | 843 | tlen += len; |
820 | } while ((len = qword_get(&mesg, vers, size)) > 0); | 844 | } while ((len = qword_get(&mesg, vers, size)) > 0); |
@@ -833,6 +857,13 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) | |||
833 | num); | 857 | num); |
834 | sep = " "; | 858 | sep = " "; |
835 | } | 859 | } |
860 | if (nfsd_vers(4, NFSD_AVAIL)) | ||
861 | for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; minor++) | ||
862 | len += sprintf(buf+len, " %c4.%u", | ||
863 | (nfsd_vers(4, NFSD_TEST) && | ||
864 | nfsd_minorversion(minor, NFSD_TEST)) ? | ||
865 | '+' : '-', | ||
866 | minor); | ||
836 | len += sprintf(buf+len, "\n"); | 867 | len += sprintf(buf+len, "\n"); |
837 | return len; | 868 | return len; |
838 | } | 869 | } |
@@ -1248,6 +1279,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) | |||
1248 | [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, | 1279 | [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, |
1249 | [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, | 1280 | [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, |
1250 | [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, | 1281 | [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, |
1282 | [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, | ||
1251 | [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, | 1283 | [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, |
1252 | [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, | 1284 | [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, |
1253 | [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, | 1285 | [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, |
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 6f7f26351227..e298e260b5f1 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c | |||
@@ -180,6 +180,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, | |||
180 | { | 180 | { |
181 | __be32 nfserr; | 181 | __be32 nfserr; |
182 | int stable = 1; | 182 | int stable = 1; |
183 | unsigned long cnt = argp->len; | ||
183 | 184 | ||
184 | dprintk("nfsd: WRITE %s %d bytes at %d\n", | 185 | dprintk("nfsd: WRITE %s %d bytes at %d\n", |
185 | SVCFH_fmt(&argp->fh), | 186 | SVCFH_fmt(&argp->fh), |
@@ -188,7 +189,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, | |||
188 | nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, | 189 | nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, |
189 | argp->offset, | 190 | argp->offset, |
190 | rqstp->rq_vec, argp->vlen, | 191 | rqstp->rq_vec, argp->vlen, |
191 | argp->len, | 192 | &cnt, |
192 | &stable); | 193 | &stable); |
193 | return nfsd_return_attrs(nfserr, resp); | 194 | return nfsd_return_attrs(nfserr, resp); |
194 | } | 195 | } |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 7c09852be713..cbba4a935786 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/freezer.h> | 22 | #include <linux/freezer.h> |
23 | #include <linux/fs_struct.h> | 23 | #include <linux/fs_struct.h> |
24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
25 | #include <linux/swap.h> | ||
25 | 26 | ||
26 | #include <linux/sunrpc/types.h> | 27 | #include <linux/sunrpc/types.h> |
27 | #include <linux/sunrpc/stats.h> | 28 | #include <linux/sunrpc/stats.h> |
@@ -40,9 +41,6 @@ | |||
40 | extern struct svc_program nfsd_program; | 41 | extern struct svc_program nfsd_program; |
41 | static int nfsd(void *vrqstp); | 42 | static int nfsd(void *vrqstp); |
42 | struct timeval nfssvc_boot; | 43 | struct timeval nfssvc_boot; |
43 | static atomic_t nfsd_busy; | ||
44 | static unsigned long nfsd_last_call; | ||
45 | static DEFINE_SPINLOCK(nfsd_call_lock); | ||
46 | 44 | ||
47 | /* | 45 | /* |
48 | * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members | 46 | * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members |
@@ -123,6 +121,8 @@ struct svc_program nfsd_program = { | |||
123 | 121 | ||
124 | }; | 122 | }; |
125 | 123 | ||
124 | u32 nfsd_supported_minorversion; | ||
125 | |||
126 | int nfsd_vers(int vers, enum vers_op change) | 126 | int nfsd_vers(int vers, enum vers_op change) |
127 | { | 127 | { |
128 | if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) | 128 | if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) |
@@ -149,6 +149,28 @@ int nfsd_vers(int vers, enum vers_op change) | |||
149 | } | 149 | } |
150 | return 0; | 150 | return 0; |
151 | } | 151 | } |
152 | |||
153 | int nfsd_minorversion(u32 minorversion, enum vers_op change) | ||
154 | { | ||
155 | if (minorversion > NFSD_SUPPORTED_MINOR_VERSION) | ||
156 | return -1; | ||
157 | switch(change) { | ||
158 | case NFSD_SET: | ||
159 | nfsd_supported_minorversion = minorversion; | ||
160 | break; | ||
161 | case NFSD_CLEAR: | ||
162 | if (minorversion == 0) | ||
163 | return -1; | ||
164 | nfsd_supported_minorversion = minorversion - 1; | ||
165 | break; | ||
166 | case NFSD_TEST: | ||
167 | return minorversion <= nfsd_supported_minorversion; | ||
168 | case NFSD_AVAIL: | ||
169 | return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; | ||
170 | } | ||
171 | return 0; | ||
172 | } | ||
173 | |||
152 | /* | 174 | /* |
153 | * Maximum number of nfsd processes | 175 | * Maximum number of nfsd processes |
154 | */ | 176 | */ |
@@ -200,6 +222,28 @@ void nfsd_reset_versions(void) | |||
200 | } | 222 | } |
201 | } | 223 | } |
202 | 224 | ||
225 | /* | ||
226 | * Each session guarantees a negotiated per slot memory cache for replies | ||
227 | * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated | ||
228 | * NFSv4.1 server might want to use more memory for a DRC than a machine | ||
229 | * with mutiple services. | ||
230 | * | ||
231 | * Impose a hard limit on the number of pages for the DRC which varies | ||
232 | * according to the machines free pages. This is of course only a default. | ||
233 | * | ||
234 | * For now this is a #defined shift which could be under admin control | ||
235 | * in the future. | ||
236 | */ | ||
237 | static void set_max_drc(void) | ||
238 | { | ||
239 | /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ | ||
240 | #define NFSD_DRC_SIZE_SHIFT 7 | ||
241 | nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() | ||
242 | >> NFSD_DRC_SIZE_SHIFT; | ||
243 | nfsd_serv->sv_drc_pages_used = 0; | ||
244 | dprintk("%s svc_drc_max_pages %u\n", __func__, | ||
245 | nfsd_serv->sv_drc_max_pages); | ||
246 | } | ||
203 | 247 | ||
204 | int nfsd_create_serv(void) | 248 | int nfsd_create_serv(void) |
205 | { | 249 | { |
@@ -227,11 +271,12 @@ int nfsd_create_serv(void) | |||
227 | nfsd_max_blksize /= 2; | 271 | nfsd_max_blksize /= 2; |
228 | } | 272 | } |
229 | 273 | ||
230 | atomic_set(&nfsd_busy, 0); | ||
231 | nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, | 274 | nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, |
232 | nfsd_last_thread, nfsd, THIS_MODULE); | 275 | nfsd_last_thread, nfsd, THIS_MODULE); |
233 | if (nfsd_serv == NULL) | 276 | if (nfsd_serv == NULL) |
234 | err = -ENOMEM; | 277 | err = -ENOMEM; |
278 | else | ||
279 | set_max_drc(); | ||
235 | 280 | ||
236 | do_gettimeofday(&nfssvc_boot); /* record boot time */ | 281 | do_gettimeofday(&nfssvc_boot); /* record boot time */ |
237 | return err; | 282 | return err; |
@@ -375,26 +420,6 @@ nfsd_svc(unsigned short port, int nrservs) | |||
375 | return error; | 420 | return error; |
376 | } | 421 | } |
377 | 422 | ||
378 | static inline void | ||
379 | update_thread_usage(int busy_threads) | ||
380 | { | ||
381 | unsigned long prev_call; | ||
382 | unsigned long diff; | ||
383 | int decile; | ||
384 | |||
385 | spin_lock(&nfsd_call_lock); | ||
386 | prev_call = nfsd_last_call; | ||
387 | nfsd_last_call = jiffies; | ||
388 | decile = busy_threads*10/nfsdstats.th_cnt; | ||
389 | if (decile>0 && decile <= 10) { | ||
390 | diff = nfsd_last_call - prev_call; | ||
391 | if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP) | ||
392 | nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP; | ||
393 | if (decile == 10) | ||
394 | nfsdstats.th_fullcnt++; | ||
395 | } | ||
396 | spin_unlock(&nfsd_call_lock); | ||
397 | } | ||
398 | 423 | ||
399 | /* | 424 | /* |
400 | * This is the NFS server kernel thread | 425 | * This is the NFS server kernel thread |
@@ -460,8 +485,6 @@ nfsd(void *vrqstp) | |||
460 | continue; | 485 | continue; |
461 | } | 486 | } |
462 | 487 | ||
463 | update_thread_usage(atomic_read(&nfsd_busy)); | ||
464 | atomic_inc(&nfsd_busy); | ||
465 | 488 | ||
466 | /* Lock the export hash tables for reading. */ | 489 | /* Lock the export hash tables for reading. */ |
467 | exp_readlock(); | 490 | exp_readlock(); |
@@ -470,8 +493,6 @@ nfsd(void *vrqstp) | |||
470 | 493 | ||
471 | /* Unlock export hash tables */ | 494 | /* Unlock export hash tables */ |
472 | exp_readunlock(); | 495 | exp_readunlock(); |
473 | update_thread_usage(atomic_read(&nfsd_busy)); | ||
474 | atomic_dec(&nfsd_busy); | ||
475 | } | 496 | } |
476 | 497 | ||
477 | /* Clear signals before calling svc_exit_thread() */ | 498 | /* Clear signals before calling svc_exit_thread() */ |
@@ -539,6 +560,10 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) | |||
539 | + rqstp->rq_res.head[0].iov_len; | 560 | + rqstp->rq_res.head[0].iov_len; |
540 | rqstp->rq_res.head[0].iov_len += sizeof(__be32); | 561 | rqstp->rq_res.head[0].iov_len += sizeof(__be32); |
541 | 562 | ||
563 | /* NFSv4.1 DRC requires statp */ | ||
564 | if (rqstp->rq_vers == 4) | ||
565 | nfsd4_set_statp(rqstp, statp); | ||
566 | |||
542 | /* Now call the procedure handler, and encode NFS status. */ | 567 | /* Now call the procedure handler, and encode NFS status. */ |
543 | nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); | 568 | nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); |
544 | nfserr = map_new_errors(rqstp->rq_vers, nfserr); | 569 | nfserr = map_new_errors(rqstp->rq_vers, nfserr); |
@@ -570,3 +595,10 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) | |||
570 | nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); | 595 | nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); |
571 | return 1; | 596 | return 1; |
572 | } | 597 | } |
598 | |||
599 | int nfsd_pool_stats_open(struct inode *inode, struct file *file) | ||
600 | { | ||
601 | if (nfsd_serv == NULL) | ||
602 | return -ENODEV; | ||
603 | return svc_pool_stats_open(nfsd_serv, file); | ||
604 | } | ||
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 78376b6c0236..ab93fcfef254 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -366,8 +366,9 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | |||
366 | } | 366 | } |
367 | 367 | ||
368 | /* Revoke setuid/setgid on chown */ | 368 | /* Revoke setuid/setgid on chown */ |
369 | if (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || | 369 | if (!S_ISDIR(inode->i_mode) && |
370 | ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)) { | 370 | (((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) || |
371 | ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid))) { | ||
371 | iap->ia_valid |= ATTR_KILL_PRIV; | 372 | iap->ia_valid |= ATTR_KILL_PRIV; |
372 | if (iap->ia_valid & ATTR_MODE) { | 373 | if (iap->ia_valid & ATTR_MODE) { |
373 | /* we're setting mode too, just clear the s*id bits */ | 374 | /* we're setting mode too, just clear the s*id bits */ |
@@ -960,7 +961,7 @@ static void kill_suid(struct dentry *dentry) | |||
960 | static __be32 | 961 | static __be32 |
961 | nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | 962 | nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, |
962 | loff_t offset, struct kvec *vec, int vlen, | 963 | loff_t offset, struct kvec *vec, int vlen, |
963 | unsigned long cnt, int *stablep) | 964 | unsigned long *cnt, int *stablep) |
964 | { | 965 | { |
965 | struct svc_export *exp; | 966 | struct svc_export *exp; |
966 | struct dentry *dentry; | 967 | struct dentry *dentry; |
@@ -974,7 +975,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
974 | err = nfserr_perm; | 975 | err = nfserr_perm; |
975 | 976 | ||
976 | if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && | 977 | if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && |
977 | (!lock_may_write(file->f_path.dentry->d_inode, offset, cnt))) | 978 | (!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt))) |
978 | goto out; | 979 | goto out; |
979 | #endif | 980 | #endif |
980 | 981 | ||
@@ -1009,7 +1010,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
1009 | host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); | 1010 | host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); |
1010 | set_fs(oldfs); | 1011 | set_fs(oldfs); |
1011 | if (host_err >= 0) { | 1012 | if (host_err >= 0) { |
1012 | nfsdstats.io_write += cnt; | 1013 | nfsdstats.io_write += host_err; |
1013 | fsnotify_modify(file->f_path.dentry); | 1014 | fsnotify_modify(file->f_path.dentry); |
1014 | } | 1015 | } |
1015 | 1016 | ||
@@ -1054,9 +1055,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
1054 | } | 1055 | } |
1055 | 1056 | ||
1056 | dprintk("nfsd: write complete host_err=%d\n", host_err); | 1057 | dprintk("nfsd: write complete host_err=%d\n", host_err); |
1057 | if (host_err >= 0) | 1058 | if (host_err >= 0) { |
1058 | err = 0; | 1059 | err = 0; |
1059 | else | 1060 | *cnt = host_err; |
1061 | } else | ||
1060 | err = nfserrno(host_err); | 1062 | err = nfserrno(host_err); |
1061 | out: | 1063 | out: |
1062 | return err; | 1064 | return err; |
@@ -1098,7 +1100,7 @@ out: | |||
1098 | */ | 1100 | */ |
1099 | __be32 | 1101 | __be32 |
1100 | nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | 1102 | nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, |
1101 | loff_t offset, struct kvec *vec, int vlen, unsigned long cnt, | 1103 | loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, |
1102 | int *stablep) | 1104 | int *stablep) |
1103 | { | 1105 | { |
1104 | __be32 err = 0; | 1106 | __be32 err = 0; |
@@ -1179,6 +1181,21 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, | |||
1179 | return 0; | 1181 | return 0; |
1180 | } | 1182 | } |
1181 | 1183 | ||
1184 | /* HPUX client sometimes creates a file in mode 000, and sets size to 0. | ||
1185 | * setting size to 0 may fail for some specific file systems by the permission | ||
1186 | * checking which requires WRITE permission but the mode is 000. | ||
1187 | * we ignore the resizing(to 0) on the just new created file, since the size is | ||
1188 | * 0 after file created. | ||
1189 | * | ||
1190 | * call this only after vfs_create() is called. | ||
1191 | * */ | ||
1192 | static void | ||
1193 | nfsd_check_ignore_resizing(struct iattr *iap) | ||
1194 | { | ||
1195 | if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) | ||
1196 | iap->ia_valid &= ~ATTR_SIZE; | ||
1197 | } | ||
1198 | |||
1182 | /* | 1199 | /* |
1183 | * Create a file (regular, directory, device, fifo); UNIX sockets | 1200 | * Create a file (regular, directory, device, fifo); UNIX sockets |
1184 | * not yet implemented. | 1201 | * not yet implemented. |
@@ -1274,6 +1291,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1274 | switch (type) { | 1291 | switch (type) { |
1275 | case S_IFREG: | 1292 | case S_IFREG: |
1276 | host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); | 1293 | host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); |
1294 | if (!host_err) | ||
1295 | nfsd_check_ignore_resizing(iap); | ||
1277 | break; | 1296 | break; |
1278 | case S_IFDIR: | 1297 | case S_IFDIR: |
1279 | host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); | 1298 | host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); |
@@ -1427,6 +1446,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1427 | /* setattr will sync the child (or not) */ | 1446 | /* setattr will sync the child (or not) */ |
1428 | } | 1447 | } |
1429 | 1448 | ||
1449 | nfsd_check_ignore_resizing(iap); | ||
1450 | |||
1430 | if (createmode == NFS3_CREATE_EXCLUSIVE) { | 1451 | if (createmode == NFS3_CREATE_EXCLUSIVE) { |
1431 | /* Cram the verifier into atime/mtime */ | 1452 | /* Cram the verifier into atime/mtime */ |
1432 | iap->ia_valid = ATTR_MTIME|ATTR_ATIME | 1453 | iap->ia_valid = ATTR_MTIME|ATTR_ATIME |
diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile new file mode 100644 index 000000000000..df3e62c1ddc5 --- /dev/null +++ b/fs/nilfs2/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | obj-$(CONFIG_NILFS2_FS) += nilfs2.o | ||
2 | nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \ | ||
3 | btnode.o bmap.o btree.o direct.o dat.o recovery.o \ | ||
4 | the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \ | ||
5 | ifile.o alloc.o gcinode.o ioctl.o gcdat.o | ||
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c new file mode 100644 index 000000000000..d69e6ae59251 --- /dev/null +++ b/fs/nilfs2/alloc.c | |||
@@ -0,0 +1,504 @@ | |||
1 | /* | ||
2 | * alloc.c - NILFS dat/inode allocator | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Original code was written by Koji Sato <koji@osrg.net>. | ||
21 | * Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>, | ||
22 | * Amagai Yoshiji <amagai@osrg.net>. | ||
23 | */ | ||
24 | |||
25 | #include <linux/types.h> | ||
26 | #include <linux/buffer_head.h> | ||
27 | #include <linux/fs.h> | ||
28 | #include <linux/bitops.h> | ||
29 | #include "mdt.h" | ||
30 | #include "alloc.h" | ||
31 | |||
32 | |||
33 | static inline unsigned long | ||
34 | nilfs_palloc_groups_per_desc_block(const struct inode *inode) | ||
35 | { | ||
36 | return (1UL << inode->i_blkbits) / | ||
37 | sizeof(struct nilfs_palloc_group_desc); | ||
38 | } | ||
39 | |||
40 | static inline unsigned long | ||
41 | nilfs_palloc_groups_count(const struct inode *inode) | ||
42 | { | ||
43 | return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */)); | ||
44 | } | ||
45 | |||
46 | int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size) | ||
47 | { | ||
48 | struct nilfs_mdt_info *mi = NILFS_MDT(inode); | ||
49 | |||
50 | mi->mi_bgl = kmalloc(sizeof(*mi->mi_bgl), GFP_NOFS); | ||
51 | if (!mi->mi_bgl) | ||
52 | return -ENOMEM; | ||
53 | |||
54 | bgl_lock_init(mi->mi_bgl); | ||
55 | |||
56 | nilfs_mdt_set_entry_size(inode, entry_size, 0); | ||
57 | |||
58 | mi->mi_blocks_per_group = | ||
59 | DIV_ROUND_UP(nilfs_palloc_entries_per_group(inode), | ||
60 | mi->mi_entries_per_block) + 1; | ||
61 | /* Number of blocks in a group including entry blocks and | ||
62 | a bitmap block */ | ||
63 | mi->mi_blocks_per_desc_block = | ||
64 | nilfs_palloc_groups_per_desc_block(inode) * | ||
65 | mi->mi_blocks_per_group + 1; | ||
66 | /* Number of blocks per descriptor including the | ||
67 | descriptor block */ | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, | ||
72 | unsigned long *offset) | ||
73 | { | ||
74 | __u64 group = nr; | ||
75 | |||
76 | *offset = do_div(group, nilfs_palloc_entries_per_group(inode)); | ||
77 | return group; | ||
78 | } | ||
79 | |||
80 | static unsigned long | ||
81 | nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) | ||
82 | { | ||
83 | unsigned long desc_block = | ||
84 | group / nilfs_palloc_groups_per_desc_block(inode); | ||
85 | return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block; | ||
86 | } | ||
87 | |||
88 | static unsigned long | ||
89 | nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) | ||
90 | { | ||
91 | unsigned long desc_offset = | ||
92 | group % nilfs_palloc_groups_per_desc_block(inode); | ||
93 | return nilfs_palloc_desc_blkoff(inode, group) + 1 + | ||
94 | desc_offset * NILFS_MDT(inode)->mi_blocks_per_group; | ||
95 | } | ||
96 | |||
97 | static unsigned long | ||
98 | nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group, | ||
99 | const struct nilfs_palloc_group_desc *desc) | ||
100 | { | ||
101 | unsigned long nfree; | ||
102 | |||
103 | spin_lock(nilfs_mdt_bgl_lock(inode, group)); | ||
104 | nfree = le32_to_cpu(desc->pg_nfrees); | ||
105 | spin_unlock(nilfs_mdt_bgl_lock(inode, group)); | ||
106 | return nfree; | ||
107 | } | ||
108 | |||
109 | static void | ||
110 | nilfs_palloc_group_desc_add_entries(struct inode *inode, | ||
111 | unsigned long group, | ||
112 | struct nilfs_palloc_group_desc *desc, | ||
113 | u32 n) | ||
114 | { | ||
115 | spin_lock(nilfs_mdt_bgl_lock(inode, group)); | ||
116 | le32_add_cpu(&desc->pg_nfrees, n); | ||
117 | spin_unlock(nilfs_mdt_bgl_lock(inode, group)); | ||
118 | } | ||
119 | |||
120 | static unsigned long | ||
121 | nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) | ||
122 | { | ||
123 | unsigned long group, group_offset; | ||
124 | |||
125 | group = nilfs_palloc_group(inode, nr, &group_offset); | ||
126 | |||
127 | return nilfs_palloc_bitmap_blkoff(inode, group) + 1 + | ||
128 | group_offset / NILFS_MDT(inode)->mi_entries_per_block; | ||
129 | } | ||
130 | |||
131 | static void nilfs_palloc_desc_block_init(struct inode *inode, | ||
132 | struct buffer_head *bh, void *kaddr) | ||
133 | { | ||
134 | struct nilfs_palloc_group_desc *desc = kaddr + bh_offset(bh); | ||
135 | unsigned long n = nilfs_palloc_groups_per_desc_block(inode); | ||
136 | __le32 nfrees; | ||
137 | |||
138 | nfrees = cpu_to_le32(nilfs_palloc_entries_per_group(inode)); | ||
139 | while (n-- > 0) { | ||
140 | desc->pg_nfrees = nfrees; | ||
141 | desc++; | ||
142 | } | ||
143 | } | ||
144 | |||
145 | static int nilfs_palloc_get_desc_block(struct inode *inode, | ||
146 | unsigned long group, | ||
147 | int create, struct buffer_head **bhp) | ||
148 | { | ||
149 | return nilfs_mdt_get_block(inode, | ||
150 | nilfs_palloc_desc_blkoff(inode, group), | ||
151 | create, nilfs_palloc_desc_block_init, bhp); | ||
152 | } | ||
153 | |||
154 | static int nilfs_palloc_get_bitmap_block(struct inode *inode, | ||
155 | unsigned long group, | ||
156 | int create, struct buffer_head **bhp) | ||
157 | { | ||
158 | return nilfs_mdt_get_block(inode, | ||
159 | nilfs_palloc_bitmap_blkoff(inode, group), | ||
160 | create, NULL, bhp); | ||
161 | } | ||
162 | |||
163 | int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, | ||
164 | int create, struct buffer_head **bhp) | ||
165 | { | ||
166 | return nilfs_mdt_get_block(inode, nilfs_palloc_entry_blkoff(inode, nr), | ||
167 | create, NULL, bhp); | ||
168 | } | ||
169 | |||
170 | static struct nilfs_palloc_group_desc * | ||
171 | nilfs_palloc_block_get_group_desc(const struct inode *inode, | ||
172 | unsigned long group, | ||
173 | const struct buffer_head *bh, void *kaddr) | ||
174 | { | ||
175 | return (struct nilfs_palloc_group_desc *)(kaddr + bh_offset(bh)) + | ||
176 | group % nilfs_palloc_groups_per_desc_block(inode); | ||
177 | } | ||
178 | |||
179 | static unsigned char * | ||
180 | nilfs_palloc_block_get_bitmap(const struct inode *inode, | ||
181 | const struct buffer_head *bh, void *kaddr) | ||
182 | { | ||
183 | return (unsigned char *)(kaddr + bh_offset(bh)); | ||
184 | } | ||
185 | |||
186 | void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, | ||
187 | const struct buffer_head *bh, void *kaddr) | ||
188 | { | ||
189 | unsigned long entry_offset, group_offset; | ||
190 | |||
191 | nilfs_palloc_group(inode, nr, &group_offset); | ||
192 | entry_offset = group_offset % NILFS_MDT(inode)->mi_entries_per_block; | ||
193 | |||
194 | return kaddr + bh_offset(bh) + | ||
195 | entry_offset * NILFS_MDT(inode)->mi_entry_size; | ||
196 | } | ||
197 | |||
198 | static int nilfs_palloc_find_available_slot(struct inode *inode, | ||
199 | unsigned long group, | ||
200 | unsigned long target, | ||
201 | unsigned char *bitmap, | ||
202 | int bsize) /* size in bits */ | ||
203 | { | ||
204 | int curr, pos, end, i; | ||
205 | |||
206 | if (target > 0) { | ||
207 | end = (target + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1); | ||
208 | if (end > bsize) | ||
209 | end = bsize; | ||
210 | pos = nilfs_find_next_zero_bit(bitmap, end, target); | ||
211 | if (pos < end && | ||
212 | !nilfs_set_bit_atomic( | ||
213 | nilfs_mdt_bgl_lock(inode, group), pos, bitmap)) | ||
214 | return pos; | ||
215 | } else | ||
216 | end = 0; | ||
217 | |||
218 | for (i = 0, curr = end; | ||
219 | i < bsize; | ||
220 | i += BITS_PER_LONG, curr += BITS_PER_LONG) { | ||
221 | /* wrap around */ | ||
222 | if (curr >= bsize) | ||
223 | curr = 0; | ||
224 | while (*((unsigned long *)bitmap + curr / BITS_PER_LONG) | ||
225 | != ~0UL) { | ||
226 | end = curr + BITS_PER_LONG; | ||
227 | if (end > bsize) | ||
228 | end = bsize; | ||
229 | pos = nilfs_find_next_zero_bit(bitmap, end, curr); | ||
230 | if ((pos < end) && | ||
231 | !nilfs_set_bit_atomic( | ||
232 | nilfs_mdt_bgl_lock(inode, group), pos, | ||
233 | bitmap)) | ||
234 | return pos; | ||
235 | } | ||
236 | } | ||
237 | return -ENOSPC; | ||
238 | } | ||
239 | |||
240 | static unsigned long | ||
241 | nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, | ||
242 | unsigned long curr, unsigned long max) | ||
243 | { | ||
244 | return min_t(unsigned long, | ||
245 | nilfs_palloc_groups_per_desc_block(inode) - | ||
246 | curr % nilfs_palloc_groups_per_desc_block(inode), | ||
247 | max - curr + 1); | ||
248 | } | ||
249 | |||
250 | int nilfs_palloc_prepare_alloc_entry(struct inode *inode, | ||
251 | struct nilfs_palloc_req *req) | ||
252 | { | ||
253 | struct buffer_head *desc_bh, *bitmap_bh; | ||
254 | struct nilfs_palloc_group_desc *desc; | ||
255 | unsigned char *bitmap; | ||
256 | void *desc_kaddr, *bitmap_kaddr; | ||
257 | unsigned long group, maxgroup, ngroups; | ||
258 | unsigned long group_offset, maxgroup_offset; | ||
259 | unsigned long n, entries_per_group, groups_per_desc_block; | ||
260 | unsigned long i, j; | ||
261 | int pos, ret; | ||
262 | |||
263 | ngroups = nilfs_palloc_groups_count(inode); | ||
264 | maxgroup = ngroups - 1; | ||
265 | group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); | ||
266 | entries_per_group = nilfs_palloc_entries_per_group(inode); | ||
267 | groups_per_desc_block = nilfs_palloc_groups_per_desc_block(inode); | ||
268 | |||
269 | for (i = 0; i < ngroups; i += n) { | ||
270 | if (group >= ngroups) { | ||
271 | /* wrap around */ | ||
272 | group = 0; | ||
273 | maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr, | ||
274 | &maxgroup_offset) - 1; | ||
275 | } | ||
276 | ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh); | ||
277 | if (ret < 0) | ||
278 | return ret; | ||
279 | desc_kaddr = kmap(desc_bh->b_page); | ||
280 | desc = nilfs_palloc_block_get_group_desc( | ||
281 | inode, group, desc_bh, desc_kaddr); | ||
282 | n = nilfs_palloc_rest_groups_in_desc_block(inode, group, | ||
283 | maxgroup); | ||
284 | for (j = 0; j < n; j++, desc++, group++) { | ||
285 | if (nilfs_palloc_group_desc_nfrees(inode, group, desc) | ||
286 | > 0) { | ||
287 | ret = nilfs_palloc_get_bitmap_block( | ||
288 | inode, group, 1, &bitmap_bh); | ||
289 | if (ret < 0) | ||
290 | goto out_desc; | ||
291 | bitmap_kaddr = kmap(bitmap_bh->b_page); | ||
292 | bitmap = nilfs_palloc_block_get_bitmap( | ||
293 | inode, bitmap_bh, bitmap_kaddr); | ||
294 | pos = nilfs_palloc_find_available_slot( | ||
295 | inode, group, group_offset, bitmap, | ||
296 | entries_per_group); | ||
297 | if (pos >= 0) { | ||
298 | /* found a free entry */ | ||
299 | nilfs_palloc_group_desc_add_entries( | ||
300 | inode, group, desc, -1); | ||
301 | req->pr_entry_nr = | ||
302 | entries_per_group * group + pos; | ||
303 | kunmap(desc_bh->b_page); | ||
304 | kunmap(bitmap_bh->b_page); | ||
305 | |||
306 | req->pr_desc_bh = desc_bh; | ||
307 | req->pr_bitmap_bh = bitmap_bh; | ||
308 | return 0; | ||
309 | } | ||
310 | kunmap(bitmap_bh->b_page); | ||
311 | brelse(bitmap_bh); | ||
312 | } | ||
313 | |||
314 | group_offset = 0; | ||
315 | } | ||
316 | |||
317 | kunmap(desc_bh->b_page); | ||
318 | brelse(desc_bh); | ||
319 | } | ||
320 | |||
321 | /* no entries left */ | ||
322 | return -ENOSPC; | ||
323 | |||
324 | out_desc: | ||
325 | kunmap(desc_bh->b_page); | ||
326 | brelse(desc_bh); | ||
327 | return ret; | ||
328 | } | ||
329 | |||
330 | void nilfs_palloc_commit_alloc_entry(struct inode *inode, | ||
331 | struct nilfs_palloc_req *req) | ||
332 | { | ||
333 | nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); | ||
334 | nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); | ||
335 | nilfs_mdt_mark_dirty(inode); | ||
336 | |||
337 | brelse(req->pr_bitmap_bh); | ||
338 | brelse(req->pr_desc_bh); | ||
339 | } | ||
340 | |||
341 | void nilfs_palloc_commit_free_entry(struct inode *inode, | ||
342 | struct nilfs_palloc_req *req) | ||
343 | { | ||
344 | struct nilfs_palloc_group_desc *desc; | ||
345 | unsigned long group, group_offset; | ||
346 | unsigned char *bitmap; | ||
347 | void *desc_kaddr, *bitmap_kaddr; | ||
348 | |||
349 | group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); | ||
350 | desc_kaddr = kmap(req->pr_desc_bh->b_page); | ||
351 | desc = nilfs_palloc_block_get_group_desc(inode, group, | ||
352 | req->pr_desc_bh, desc_kaddr); | ||
353 | bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); | ||
354 | bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, | ||
355 | bitmap_kaddr); | ||
356 | |||
357 | if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), | ||
358 | group_offset, bitmap)) | ||
359 | printk(KERN_WARNING "%s: entry number %llu already freed\n", | ||
360 | __func__, (unsigned long long)req->pr_entry_nr); | ||
361 | |||
362 | nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); | ||
363 | |||
364 | kunmap(req->pr_bitmap_bh->b_page); | ||
365 | kunmap(req->pr_desc_bh->b_page); | ||
366 | |||
367 | nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); | ||
368 | nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); | ||
369 | nilfs_mdt_mark_dirty(inode); | ||
370 | |||
371 | brelse(req->pr_bitmap_bh); | ||
372 | brelse(req->pr_desc_bh); | ||
373 | } | ||
374 | |||
375 | void nilfs_palloc_abort_alloc_entry(struct inode *inode, | ||
376 | struct nilfs_palloc_req *req) | ||
377 | { | ||
378 | struct nilfs_palloc_group_desc *desc; | ||
379 | void *desc_kaddr, *bitmap_kaddr; | ||
380 | unsigned char *bitmap; | ||
381 | unsigned long group, group_offset; | ||
382 | |||
383 | group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); | ||
384 | desc_kaddr = kmap(req->pr_desc_bh->b_page); | ||
385 | desc = nilfs_palloc_block_get_group_desc(inode, group, | ||
386 | req->pr_desc_bh, desc_kaddr); | ||
387 | bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); | ||
388 | bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, | ||
389 | bitmap_kaddr); | ||
390 | if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), | ||
391 | group_offset, bitmap)) | ||
392 | printk(KERN_WARNING "%s: entry numer %llu already freed\n", | ||
393 | __func__, (unsigned long long)req->pr_entry_nr); | ||
394 | |||
395 | nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); | ||
396 | |||
397 | kunmap(req->pr_bitmap_bh->b_page); | ||
398 | kunmap(req->pr_desc_bh->b_page); | ||
399 | |||
400 | brelse(req->pr_bitmap_bh); | ||
401 | brelse(req->pr_desc_bh); | ||
402 | |||
403 | req->pr_entry_nr = 0; | ||
404 | req->pr_bitmap_bh = NULL; | ||
405 | req->pr_desc_bh = NULL; | ||
406 | } | ||
407 | |||
408 | int nilfs_palloc_prepare_free_entry(struct inode *inode, | ||
409 | struct nilfs_palloc_req *req) | ||
410 | { | ||
411 | struct buffer_head *desc_bh, *bitmap_bh; | ||
412 | unsigned long group, group_offset; | ||
413 | int ret; | ||
414 | |||
415 | group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); | ||
416 | ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh); | ||
417 | if (ret < 0) | ||
418 | return ret; | ||
419 | ret = nilfs_palloc_get_bitmap_block(inode, group, 1, &bitmap_bh); | ||
420 | if (ret < 0) { | ||
421 | brelse(desc_bh); | ||
422 | return ret; | ||
423 | } | ||
424 | |||
425 | req->pr_desc_bh = desc_bh; | ||
426 | req->pr_bitmap_bh = bitmap_bh; | ||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | void nilfs_palloc_abort_free_entry(struct inode *inode, | ||
431 | struct nilfs_palloc_req *req) | ||
432 | { | ||
433 | brelse(req->pr_bitmap_bh); | ||
434 | brelse(req->pr_desc_bh); | ||
435 | |||
436 | req->pr_entry_nr = 0; | ||
437 | req->pr_bitmap_bh = NULL; | ||
438 | req->pr_desc_bh = NULL; | ||
439 | } | ||
440 | |||
441 | static int | ||
442 | nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr) | ||
443 | { | ||
444 | __u64 first, last; | ||
445 | |||
446 | first = group * nilfs_palloc_entries_per_group(inode); | ||
447 | last = first + nilfs_palloc_entries_per_group(inode) - 1; | ||
448 | return (nr >= first) && (nr <= last); | ||
449 | } | ||
450 | |||
451 | int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) | ||
452 | { | ||
453 | struct buffer_head *desc_bh, *bitmap_bh; | ||
454 | struct nilfs_palloc_group_desc *desc; | ||
455 | unsigned char *bitmap; | ||
456 | void *desc_kaddr, *bitmap_kaddr; | ||
457 | unsigned long group, group_offset; | ||
458 | int i, j, n, ret; | ||
459 | |||
460 | for (i = 0; i < nitems; i += n) { | ||
461 | group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset); | ||
462 | ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh); | ||
463 | if (ret < 0) | ||
464 | return ret; | ||
465 | ret = nilfs_palloc_get_bitmap_block(inode, group, 0, | ||
466 | &bitmap_bh); | ||
467 | if (ret < 0) { | ||
468 | brelse(desc_bh); | ||
469 | return ret; | ||
470 | } | ||
471 | desc_kaddr = kmap(desc_bh->b_page); | ||
472 | desc = nilfs_palloc_block_get_group_desc( | ||
473 | inode, group, desc_bh, desc_kaddr); | ||
474 | bitmap_kaddr = kmap(bitmap_bh->b_page); | ||
475 | bitmap = nilfs_palloc_block_get_bitmap( | ||
476 | inode, bitmap_bh, bitmap_kaddr); | ||
477 | for (j = i, n = 0; | ||
478 | (j < nitems) && nilfs_palloc_group_is_in(inode, group, | ||
479 | entry_nrs[j]); | ||
480 | j++, n++) { | ||
481 | nilfs_palloc_group(inode, entry_nrs[j], &group_offset); | ||
482 | if (!nilfs_clear_bit_atomic( | ||
483 | nilfs_mdt_bgl_lock(inode, group), | ||
484 | group_offset, bitmap)) { | ||
485 | printk(KERN_WARNING | ||
486 | "%s: entry number %llu already freed\n", | ||
487 | __func__, | ||
488 | (unsigned long long)entry_nrs[j]); | ||
489 | } | ||
490 | } | ||
491 | nilfs_palloc_group_desc_add_entries(inode, group, desc, n); | ||
492 | |||
493 | kunmap(bitmap_bh->b_page); | ||
494 | kunmap(desc_bh->b_page); | ||
495 | |||
496 | nilfs_mdt_mark_buffer_dirty(desc_bh); | ||
497 | nilfs_mdt_mark_buffer_dirty(bitmap_bh); | ||
498 | nilfs_mdt_mark_dirty(inode); | ||
499 | |||
500 | brelse(bitmap_bh); | ||
501 | brelse(desc_bh); | ||
502 | } | ||
503 | return 0; | ||
504 | } | ||
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h new file mode 100644 index 000000000000..4ace5475c2c7 --- /dev/null +++ b/fs/nilfs2/alloc.h | |||
@@ -0,0 +1,72 @@ | |||
1 | /* | ||
2 | * alloc.h - persistent object (dat entry/disk inode) allocator/deallocator | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Original code was written by Koji Sato <koji@osrg.net>. | ||
21 | * Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>, | ||
22 | * Amagai Yoshiji <amagai@osrg.net>. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NILFS_ALLOC_H | ||
26 | #define _NILFS_ALLOC_H | ||
27 | |||
28 | #include <linux/types.h> | ||
29 | #include <linux/buffer_head.h> | ||
30 | #include <linux/fs.h> | ||
31 | |||
32 | static inline unsigned long | ||
33 | nilfs_palloc_entries_per_group(const struct inode *inode) | ||
34 | { | ||
35 | return 1UL << (inode->i_blkbits + 3 /* log2(8 = CHAR_BITS) */); | ||
36 | } | ||
37 | |||
38 | int nilfs_palloc_init_blockgroup(struct inode *, unsigned); | ||
39 | int nilfs_palloc_get_entry_block(struct inode *, __u64, int, | ||
40 | struct buffer_head **); | ||
41 | void *nilfs_palloc_block_get_entry(const struct inode *, __u64, | ||
42 | const struct buffer_head *, void *); | ||
43 | |||
44 | /** | ||
45 | * nilfs_palloc_req - persistent alloctor request and reply | ||
46 | * @pr_entry_nr: entry number (vblocknr or inode number) | ||
47 | * @pr_desc_bh: buffer head of the buffer containing block group descriptors | ||
48 | * @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap | ||
49 | * @pr_entry_bh: buffer head of the buffer containing translation entries | ||
50 | */ | ||
51 | struct nilfs_palloc_req { | ||
52 | __u64 pr_entry_nr; | ||
53 | struct buffer_head *pr_desc_bh; | ||
54 | struct buffer_head *pr_bitmap_bh; | ||
55 | struct buffer_head *pr_entry_bh; | ||
56 | }; | ||
57 | |||
58 | int nilfs_palloc_prepare_alloc_entry(struct inode *, | ||
59 | struct nilfs_palloc_req *); | ||
60 | void nilfs_palloc_commit_alloc_entry(struct inode *, | ||
61 | struct nilfs_palloc_req *); | ||
62 | void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *); | ||
63 | void nilfs_palloc_commit_free_entry(struct inode *, struct nilfs_palloc_req *); | ||
64 | int nilfs_palloc_prepare_free_entry(struct inode *, struct nilfs_palloc_req *); | ||
65 | void nilfs_palloc_abort_free_entry(struct inode *, struct nilfs_palloc_req *); | ||
66 | int nilfs_palloc_freev(struct inode *, __u64 *, size_t); | ||
67 | |||
68 | #define nilfs_set_bit_atomic ext2_set_bit_atomic | ||
69 | #define nilfs_clear_bit_atomic ext2_clear_bit_atomic | ||
70 | #define nilfs_find_next_zero_bit ext2_find_next_zero_bit | ||
71 | |||
72 | #endif /* _NILFS_ALLOC_H */ | ||
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c new file mode 100644 index 000000000000..24638e059bf3 --- /dev/null +++ b/fs/nilfs2/bmap.c | |||
@@ -0,0 +1,783 @@ | |||
1 | /* | ||
2 | * bmap.c - NILFS block mapping. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/fs.h> | ||
24 | #include <linux/string.h> | ||
25 | #include <linux/errno.h> | ||
26 | #include "nilfs.h" | ||
27 | #include "bmap.h" | ||
28 | #include "sb.h" | ||
29 | #include "btnode.h" | ||
30 | #include "mdt.h" | ||
31 | #include "dat.h" | ||
32 | #include "alloc.h" | ||
33 | |||
34 | int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, | ||
35 | __u64 *ptrp) | ||
36 | { | ||
37 | __u64 ptr; | ||
38 | int ret; | ||
39 | |||
40 | down_read(&bmap->b_sem); | ||
41 | ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); | ||
42 | if (ret < 0) | ||
43 | goto out; | ||
44 | if (bmap->b_pops->bpop_translate != NULL) { | ||
45 | ret = bmap->b_pops->bpop_translate(bmap, *ptrp, &ptr); | ||
46 | if (ret < 0) | ||
47 | goto out; | ||
48 | *ptrp = ptr; | ||
49 | } | ||
50 | |||
51 | out: | ||
52 | up_read(&bmap->b_sem); | ||
53 | return ret; | ||
54 | } | ||
55 | |||
56 | |||
57 | /** | ||
58 | * nilfs_bmap_lookup - find a record | ||
59 | * @bmap: bmap | ||
60 | * @key: key | ||
61 | * @recp: pointer to record | ||
62 | * | ||
63 | * Description: nilfs_bmap_lookup() finds a record whose key matches @key in | ||
64 | * @bmap. | ||
65 | * | ||
66 | * Return Value: On success, 0 is returned and the record associated with @key | ||
67 | * is stored in the place pointed by @recp. On error, one of the following | ||
68 | * negative error codes is returned. | ||
69 | * | ||
70 | * %-EIO - I/O error. | ||
71 | * | ||
72 | * %-ENOMEM - Insufficient amount of memory available. | ||
73 | * | ||
74 | * %-ENOENT - A record associated with @key does not exist. | ||
75 | */ | ||
76 | int nilfs_bmap_lookup(struct nilfs_bmap *bmap, | ||
77 | unsigned long key, | ||
78 | unsigned long *recp) | ||
79 | { | ||
80 | __u64 ptr; | ||
81 | int ret; | ||
82 | |||
83 | /* XXX: use macro for level 1 */ | ||
84 | ret = nilfs_bmap_lookup_at_level(bmap, key, 1, &ptr); | ||
85 | if (recp != NULL) | ||
86 | *recp = ptr; | ||
87 | return ret; | ||
88 | } | ||
89 | |||
90 | static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | ||
91 | { | ||
92 | __u64 keys[NILFS_BMAP_SMALL_HIGH + 1]; | ||
93 | __u64 ptrs[NILFS_BMAP_SMALL_HIGH + 1]; | ||
94 | int ret, n; | ||
95 | |||
96 | if (bmap->b_ops->bop_check_insert != NULL) { | ||
97 | ret = bmap->b_ops->bop_check_insert(bmap, key); | ||
98 | if (ret > 0) { | ||
99 | n = bmap->b_ops->bop_gather_data( | ||
100 | bmap, keys, ptrs, NILFS_BMAP_SMALL_HIGH + 1); | ||
101 | if (n < 0) | ||
102 | return n; | ||
103 | ret = nilfs_btree_convert_and_insert( | ||
104 | bmap, key, ptr, keys, ptrs, n, | ||
105 | NILFS_BMAP_LARGE_LOW, NILFS_BMAP_LARGE_HIGH); | ||
106 | if (ret == 0) | ||
107 | bmap->b_u.u_flags |= NILFS_BMAP_LARGE; | ||
108 | |||
109 | return ret; | ||
110 | } else if (ret < 0) | ||
111 | return ret; | ||
112 | } | ||
113 | |||
114 | return bmap->b_ops->bop_insert(bmap, key, ptr); | ||
115 | } | ||
116 | |||
117 | /** | ||
118 | * nilfs_bmap_insert - insert a new key-record pair into a bmap | ||
119 | * @bmap: bmap | ||
120 | * @key: key | ||
121 | * @rec: record | ||
122 | * | ||
123 | * Description: nilfs_bmap_insert() inserts the new key-record pair specified | ||
124 | * by @key and @rec into @bmap. | ||
125 | * | ||
126 | * Return Value: On success, 0 is returned. On error, one of the following | ||
127 | * negative error codes is returned. | ||
128 | * | ||
129 | * %-EIO - I/O error. | ||
130 | * | ||
131 | * %-ENOMEM - Insufficient amount of memory available. | ||
132 | * | ||
133 | * %-EEXIST - A record associated with @key already exist. | ||
134 | */ | ||
135 | int nilfs_bmap_insert(struct nilfs_bmap *bmap, | ||
136 | unsigned long key, | ||
137 | unsigned long rec) | ||
138 | { | ||
139 | int ret; | ||
140 | |||
141 | down_write(&bmap->b_sem); | ||
142 | ret = nilfs_bmap_do_insert(bmap, key, rec); | ||
143 | up_write(&bmap->b_sem); | ||
144 | return ret; | ||
145 | } | ||
146 | |||
147 | static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) | ||
148 | { | ||
149 | __u64 keys[NILFS_BMAP_LARGE_LOW + 1]; | ||
150 | __u64 ptrs[NILFS_BMAP_LARGE_LOW + 1]; | ||
151 | int ret, n; | ||
152 | |||
153 | if (bmap->b_ops->bop_check_delete != NULL) { | ||
154 | ret = bmap->b_ops->bop_check_delete(bmap, key); | ||
155 | if (ret > 0) { | ||
156 | n = bmap->b_ops->bop_gather_data( | ||
157 | bmap, keys, ptrs, NILFS_BMAP_LARGE_LOW + 1); | ||
158 | if (n < 0) | ||
159 | return n; | ||
160 | ret = nilfs_direct_delete_and_convert( | ||
161 | bmap, key, keys, ptrs, n, | ||
162 | NILFS_BMAP_SMALL_LOW, NILFS_BMAP_SMALL_HIGH); | ||
163 | if (ret == 0) | ||
164 | bmap->b_u.u_flags &= ~NILFS_BMAP_LARGE; | ||
165 | |||
166 | return ret; | ||
167 | } else if (ret < 0) | ||
168 | return ret; | ||
169 | } | ||
170 | |||
171 | return bmap->b_ops->bop_delete(bmap, key); | ||
172 | } | ||
173 | |||
174 | int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key) | ||
175 | { | ||
176 | __u64 lastkey; | ||
177 | int ret; | ||
178 | |||
179 | down_read(&bmap->b_sem); | ||
180 | ret = bmap->b_ops->bop_last_key(bmap, &lastkey); | ||
181 | if (!ret) | ||
182 | *key = lastkey; | ||
183 | up_read(&bmap->b_sem); | ||
184 | return ret; | ||
185 | } | ||
186 | |||
187 | /** | ||
188 | * nilfs_bmap_delete - delete a key-record pair from a bmap | ||
189 | * @bmap: bmap | ||
190 | * @key: key | ||
191 | * | ||
192 | * Description: nilfs_bmap_delete() deletes the key-record pair specified by | ||
193 | * @key from @bmap. | ||
194 | * | ||
195 | * Return Value: On success, 0 is returned. On error, one of the following | ||
196 | * negative error codes is returned. | ||
197 | * | ||
198 | * %-EIO - I/O error. | ||
199 | * | ||
200 | * %-ENOMEM - Insufficient amount of memory available. | ||
201 | * | ||
202 | * %-ENOENT - A record associated with @key does not exist. | ||
203 | */ | ||
204 | int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key) | ||
205 | { | ||
206 | int ret; | ||
207 | |||
208 | down_write(&bmap->b_sem); | ||
209 | ret = nilfs_bmap_do_delete(bmap, key); | ||
210 | up_write(&bmap->b_sem); | ||
211 | return ret; | ||
212 | } | ||
213 | |||
214 | static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) | ||
215 | { | ||
216 | __u64 lastkey; | ||
217 | int ret; | ||
218 | |||
219 | ret = bmap->b_ops->bop_last_key(bmap, &lastkey); | ||
220 | if (ret < 0) { | ||
221 | if (ret == -ENOENT) | ||
222 | ret = 0; | ||
223 | return ret; | ||
224 | } | ||
225 | |||
226 | while (key <= lastkey) { | ||
227 | ret = nilfs_bmap_do_delete(bmap, lastkey); | ||
228 | if (ret < 0) | ||
229 | return ret; | ||
230 | ret = bmap->b_ops->bop_last_key(bmap, &lastkey); | ||
231 | if (ret < 0) { | ||
232 | if (ret == -ENOENT) | ||
233 | ret = 0; | ||
234 | return ret; | ||
235 | } | ||
236 | } | ||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | /** | ||
241 | * nilfs_bmap_truncate - truncate a bmap to a specified key | ||
242 | * @bmap: bmap | ||
243 | * @key: key | ||
244 | * | ||
245 | * Description: nilfs_bmap_truncate() removes key-record pairs whose keys are | ||
246 | * greater than or equal to @key from @bmap. | ||
247 | * | ||
248 | * Return Value: On success, 0 is returned. On error, one of the following | ||
249 | * negative error codes is returned. | ||
250 | * | ||
251 | * %-EIO - I/O error. | ||
252 | * | ||
253 | * %-ENOMEM - Insufficient amount of memory available. | ||
254 | */ | ||
255 | int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key) | ||
256 | { | ||
257 | int ret; | ||
258 | |||
259 | down_write(&bmap->b_sem); | ||
260 | ret = nilfs_bmap_do_truncate(bmap, key); | ||
261 | up_write(&bmap->b_sem); | ||
262 | return ret; | ||
263 | } | ||
264 | |||
265 | /** | ||
266 | * nilfs_bmap_clear - free resources a bmap holds | ||
267 | * @bmap: bmap | ||
268 | * | ||
269 | * Description: nilfs_bmap_clear() frees resources associated with @bmap. | ||
270 | */ | ||
271 | void nilfs_bmap_clear(struct nilfs_bmap *bmap) | ||
272 | { | ||
273 | down_write(&bmap->b_sem); | ||
274 | if (bmap->b_ops->bop_clear != NULL) | ||
275 | bmap->b_ops->bop_clear(bmap); | ||
276 | up_write(&bmap->b_sem); | ||
277 | } | ||
278 | |||
279 | /** | ||
280 | * nilfs_bmap_propagate - propagate dirty state | ||
281 | * @bmap: bmap | ||
282 | * @bh: buffer head | ||
283 | * | ||
284 | * Description: nilfs_bmap_propagate() marks the buffers that directly or | ||
285 | * indirectly refer to the block specified by @bh dirty. | ||
286 | * | ||
287 | * Return Value: On success, 0 is returned. On error, one of the following | ||
288 | * negative error codes is returned. | ||
289 | * | ||
290 | * %-EIO - I/O error. | ||
291 | * | ||
292 | * %-ENOMEM - Insufficient amount of memory available. | ||
293 | */ | ||
294 | int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) | ||
295 | { | ||
296 | int ret; | ||
297 | |||
298 | down_write(&bmap->b_sem); | ||
299 | ret = bmap->b_ops->bop_propagate(bmap, bh); | ||
300 | up_write(&bmap->b_sem); | ||
301 | return ret; | ||
302 | } | ||
303 | |||
304 | /** | ||
305 | * nilfs_bmap_lookup_dirty_buffers - | ||
306 | * @bmap: bmap | ||
307 | * @listp: pointer to buffer head list | ||
308 | */ | ||
309 | void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap, | ||
310 | struct list_head *listp) | ||
311 | { | ||
312 | if (bmap->b_ops->bop_lookup_dirty_buffers != NULL) | ||
313 | bmap->b_ops->bop_lookup_dirty_buffers(bmap, listp); | ||
314 | } | ||
315 | |||
316 | /** | ||
317 | * nilfs_bmap_assign - assign a new block number to a block | ||
318 | * @bmap: bmap | ||
319 | * @bhp: pointer to buffer head | ||
320 | * @blocknr: block number | ||
321 | * @binfo: block information | ||
322 | * | ||
323 | * Description: nilfs_bmap_assign() assigns the block number @blocknr to the | ||
324 | * buffer specified by @bh. | ||
325 | * | ||
326 | * Return Value: On success, 0 is returned and the buffer head of a newly | ||
327 | * create buffer and the block information associated with the buffer are | ||
328 | * stored in the place pointed by @bh and @binfo, respectively. On error, one | ||
329 | * of the following negative error codes is returned. | ||
330 | * | ||
331 | * %-EIO - I/O error. | ||
332 | * | ||
333 | * %-ENOMEM - Insufficient amount of memory available. | ||
334 | */ | ||
335 | int nilfs_bmap_assign(struct nilfs_bmap *bmap, | ||
336 | struct buffer_head **bh, | ||
337 | unsigned long blocknr, | ||
338 | union nilfs_binfo *binfo) | ||
339 | { | ||
340 | int ret; | ||
341 | |||
342 | down_write(&bmap->b_sem); | ||
343 | ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo); | ||
344 | up_write(&bmap->b_sem); | ||
345 | return ret; | ||
346 | } | ||
347 | |||
348 | /** | ||
349 | * nilfs_bmap_mark - mark block dirty | ||
350 | * @bmap: bmap | ||
351 | * @key: key | ||
352 | * @level: level | ||
353 | * | ||
354 | * Description: nilfs_bmap_mark() marks the block specified by @key and @level | ||
355 | * as dirty. | ||
356 | * | ||
357 | * Return Value: On success, 0 is returned. On error, one of the following | ||
358 | * negative error codes is returned. | ||
359 | * | ||
360 | * %-EIO - I/O error. | ||
361 | * | ||
362 | * %-ENOMEM - Insufficient amount of memory available. | ||
363 | */ | ||
364 | int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) | ||
365 | { | ||
366 | int ret; | ||
367 | |||
368 | if (bmap->b_ops->bop_mark == NULL) | ||
369 | return 0; | ||
370 | |||
371 | down_write(&bmap->b_sem); | ||
372 | ret = bmap->b_ops->bop_mark(bmap, key, level); | ||
373 | up_write(&bmap->b_sem); | ||
374 | return ret; | ||
375 | } | ||
376 | |||
377 | /** | ||
378 | * nilfs_bmap_test_and_clear_dirty - test and clear a bmap dirty state | ||
379 | * @bmap: bmap | ||
380 | * | ||
381 | * Description: nilfs_test_and_clear() is the atomic operation to test and | ||
382 | * clear the dirty state of @bmap. | ||
383 | * | ||
384 | * Return Value: 1 is returned if @bmap is dirty, or 0 if clear. | ||
385 | */ | ||
386 | int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) | ||
387 | { | ||
388 | int ret; | ||
389 | |||
390 | down_write(&bmap->b_sem); | ||
391 | ret = nilfs_bmap_dirty(bmap); | ||
392 | nilfs_bmap_clear_dirty(bmap); | ||
393 | up_write(&bmap->b_sem); | ||
394 | return ret; | ||
395 | } | ||
396 | |||
397 | |||
398 | /* | ||
399 | * Internal use only | ||
400 | */ | ||
401 | |||
402 | void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n) | ||
403 | { | ||
404 | inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); | ||
405 | if (NILFS_MDT(bmap->b_inode)) | ||
406 | nilfs_mdt_mark_dirty(bmap->b_inode); | ||
407 | else | ||
408 | mark_inode_dirty(bmap->b_inode); | ||
409 | } | ||
410 | |||
411 | void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n) | ||
412 | { | ||
413 | inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); | ||
414 | if (NILFS_MDT(bmap->b_inode)) | ||
415 | nilfs_mdt_mark_dirty(bmap->b_inode); | ||
416 | else | ||
417 | mark_inode_dirty(bmap->b_inode); | ||
418 | } | ||
419 | |||
420 | int nilfs_bmap_get_block(const struct nilfs_bmap *bmap, __u64 ptr, | ||
421 | struct buffer_head **bhp) | ||
422 | { | ||
423 | return nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache, | ||
424 | ptr, 0, bhp, 0); | ||
425 | } | ||
426 | |||
427 | void nilfs_bmap_put_block(const struct nilfs_bmap *bmap, | ||
428 | struct buffer_head *bh) | ||
429 | { | ||
430 | brelse(bh); | ||
431 | } | ||
432 | |||
433 | int nilfs_bmap_get_new_block(const struct nilfs_bmap *bmap, __u64 ptr, | ||
434 | struct buffer_head **bhp) | ||
435 | { | ||
436 | int ret; | ||
437 | |||
438 | ret = nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache, | ||
439 | ptr, 0, bhp, 1); | ||
440 | if (ret < 0) | ||
441 | return ret; | ||
442 | set_buffer_nilfs_volatile(*bhp); | ||
443 | return 0; | ||
444 | } | ||
445 | |||
446 | void nilfs_bmap_delete_block(const struct nilfs_bmap *bmap, | ||
447 | struct buffer_head *bh) | ||
448 | { | ||
449 | nilfs_btnode_delete(bh); | ||
450 | } | ||
451 | |||
452 | __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, | ||
453 | const struct buffer_head *bh) | ||
454 | { | ||
455 | struct buffer_head *pbh; | ||
456 | __u64 key; | ||
457 | |||
458 | key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT - | ||
459 | bmap->b_inode->i_blkbits); | ||
460 | for (pbh = page_buffers(bh->b_page); pbh != bh; | ||
461 | pbh = pbh->b_this_page, key++); | ||
462 | |||
463 | return key; | ||
464 | } | ||
465 | |||
466 | __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) | ||
467 | { | ||
468 | __s64 diff; | ||
469 | |||
470 | diff = key - bmap->b_last_allocated_key; | ||
471 | if ((nilfs_bmap_keydiff_abs(diff) < NILFS_INODE_BMAP_SIZE) && | ||
472 | (bmap->b_last_allocated_ptr != NILFS_BMAP_INVALID_PTR) && | ||
473 | (bmap->b_last_allocated_ptr + diff > 0)) | ||
474 | return bmap->b_last_allocated_ptr + diff; | ||
475 | else | ||
476 | return NILFS_BMAP_INVALID_PTR; | ||
477 | } | ||
478 | |||
479 | static struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) | ||
480 | { | ||
481 | return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); | ||
482 | } | ||
483 | |||
484 | #define NILFS_BMAP_GROUP_DIV 8 | ||
485 | __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap) | ||
486 | { | ||
487 | struct inode *dat = nilfs_bmap_get_dat(bmap); | ||
488 | unsigned long entries_per_group = nilfs_palloc_entries_per_group(dat); | ||
489 | unsigned long group = bmap->b_inode->i_ino / entries_per_group; | ||
490 | |||
491 | return group * entries_per_group + | ||
492 | (bmap->b_inode->i_ino % NILFS_BMAP_GROUP_DIV) * | ||
493 | (entries_per_group / NILFS_BMAP_GROUP_DIV); | ||
494 | } | ||
495 | |||
496 | static int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap, | ||
497 | union nilfs_bmap_ptr_req *req) | ||
498 | { | ||
499 | return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
500 | } | ||
501 | |||
502 | static void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap, | ||
503 | union nilfs_bmap_ptr_req *req) | ||
504 | { | ||
505 | nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
506 | } | ||
507 | |||
508 | static void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap, | ||
509 | union nilfs_bmap_ptr_req *req) | ||
510 | { | ||
511 | nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
512 | } | ||
513 | |||
514 | static int nilfs_bmap_prepare_start_v(struct nilfs_bmap *bmap, | ||
515 | union nilfs_bmap_ptr_req *req) | ||
516 | { | ||
517 | return nilfs_dat_prepare_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
518 | } | ||
519 | |||
520 | static void nilfs_bmap_commit_start_v(struct nilfs_bmap *bmap, | ||
521 | union nilfs_bmap_ptr_req *req, | ||
522 | sector_t blocknr) | ||
523 | { | ||
524 | nilfs_dat_commit_start(nilfs_bmap_get_dat(bmap), &req->bpr_req, | ||
525 | blocknr); | ||
526 | } | ||
527 | |||
528 | static void nilfs_bmap_abort_start_v(struct nilfs_bmap *bmap, | ||
529 | union nilfs_bmap_ptr_req *req) | ||
530 | { | ||
531 | nilfs_dat_abort_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
532 | } | ||
533 | |||
534 | static int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap, | ||
535 | union nilfs_bmap_ptr_req *req) | ||
536 | { | ||
537 | return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
538 | } | ||
539 | |||
540 | static void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap, | ||
541 | union nilfs_bmap_ptr_req *req) | ||
542 | { | ||
543 | nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 0); | ||
544 | } | ||
545 | |||
546 | static void nilfs_bmap_commit_end_vmdt(struct nilfs_bmap *bmap, | ||
547 | union nilfs_bmap_ptr_req *req) | ||
548 | { | ||
549 | nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 1); | ||
550 | } | ||
551 | |||
552 | static void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap, | ||
553 | union nilfs_bmap_ptr_req *req) | ||
554 | { | ||
555 | nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
556 | } | ||
557 | |||
558 | int nilfs_bmap_move_v(const struct nilfs_bmap *bmap, __u64 vblocknr, | ||
559 | sector_t blocknr) | ||
560 | { | ||
561 | return nilfs_dat_move(nilfs_bmap_get_dat(bmap), vblocknr, blocknr); | ||
562 | } | ||
563 | |||
564 | int nilfs_bmap_mark_dirty(const struct nilfs_bmap *bmap, __u64 vblocknr) | ||
565 | { | ||
566 | return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr); | ||
567 | } | ||
568 | |||
569 | int nilfs_bmap_prepare_update(struct nilfs_bmap *bmap, | ||
570 | union nilfs_bmap_ptr_req *oldreq, | ||
571 | union nilfs_bmap_ptr_req *newreq) | ||
572 | { | ||
573 | int ret; | ||
574 | |||
575 | ret = bmap->b_pops->bpop_prepare_end_ptr(bmap, oldreq); | ||
576 | if (ret < 0) | ||
577 | return ret; | ||
578 | ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, newreq); | ||
579 | if (ret < 0) | ||
580 | bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq); | ||
581 | |||
582 | return ret; | ||
583 | } | ||
584 | |||
585 | void nilfs_bmap_commit_update(struct nilfs_bmap *bmap, | ||
586 | union nilfs_bmap_ptr_req *oldreq, | ||
587 | union nilfs_bmap_ptr_req *newreq) | ||
588 | { | ||
589 | bmap->b_pops->bpop_commit_end_ptr(bmap, oldreq); | ||
590 | bmap->b_pops->bpop_commit_alloc_ptr(bmap, newreq); | ||
591 | } | ||
592 | |||
593 | void nilfs_bmap_abort_update(struct nilfs_bmap *bmap, | ||
594 | union nilfs_bmap_ptr_req *oldreq, | ||
595 | union nilfs_bmap_ptr_req *newreq) | ||
596 | { | ||
597 | bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq); | ||
598 | bmap->b_pops->bpop_abort_alloc_ptr(bmap, newreq); | ||
599 | } | ||
600 | |||
601 | static int nilfs_bmap_translate_v(const struct nilfs_bmap *bmap, __u64 ptr, | ||
602 | __u64 *ptrp) | ||
603 | { | ||
604 | sector_t blocknr; | ||
605 | int ret; | ||
606 | |||
607 | ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), ptr, &blocknr); | ||
608 | if (ret < 0) | ||
609 | return ret; | ||
610 | if (ptrp != NULL) | ||
611 | *ptrp = blocknr; | ||
612 | return 0; | ||
613 | } | ||
614 | |||
615 | static int nilfs_bmap_prepare_alloc_p(struct nilfs_bmap *bmap, | ||
616 | union nilfs_bmap_ptr_req *req) | ||
617 | { | ||
618 | /* ignore target ptr */ | ||
619 | req->bpr_ptr = bmap->b_last_allocated_ptr++; | ||
620 | return 0; | ||
621 | } | ||
622 | |||
623 | static void nilfs_bmap_commit_alloc_p(struct nilfs_bmap *bmap, | ||
624 | union nilfs_bmap_ptr_req *req) | ||
625 | { | ||
626 | /* do nothing */ | ||
627 | } | ||
628 | |||
629 | static void nilfs_bmap_abort_alloc_p(struct nilfs_bmap *bmap, | ||
630 | union nilfs_bmap_ptr_req *req) | ||
631 | { | ||
632 | bmap->b_last_allocated_ptr--; | ||
633 | } | ||
634 | |||
635 | static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_v = { | ||
636 | .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v, | ||
637 | .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v, | ||
638 | .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v, | ||
639 | .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v, | ||
640 | .bpop_commit_start_ptr = nilfs_bmap_commit_start_v, | ||
641 | .bpop_abort_start_ptr = nilfs_bmap_abort_start_v, | ||
642 | .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v, | ||
643 | .bpop_commit_end_ptr = nilfs_bmap_commit_end_v, | ||
644 | .bpop_abort_end_ptr = nilfs_bmap_abort_end_v, | ||
645 | |||
646 | .bpop_translate = nilfs_bmap_translate_v, | ||
647 | }; | ||
648 | |||
649 | static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_vmdt = { | ||
650 | .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v, | ||
651 | .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v, | ||
652 | .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v, | ||
653 | .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v, | ||
654 | .bpop_commit_start_ptr = nilfs_bmap_commit_start_v, | ||
655 | .bpop_abort_start_ptr = nilfs_bmap_abort_start_v, | ||
656 | .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v, | ||
657 | .bpop_commit_end_ptr = nilfs_bmap_commit_end_vmdt, | ||
658 | .bpop_abort_end_ptr = nilfs_bmap_abort_end_v, | ||
659 | |||
660 | .bpop_translate = nilfs_bmap_translate_v, | ||
661 | }; | ||
662 | |||
663 | static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_p = { | ||
664 | .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_p, | ||
665 | .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_p, | ||
666 | .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_p, | ||
667 | .bpop_prepare_start_ptr = NULL, | ||
668 | .bpop_commit_start_ptr = NULL, | ||
669 | .bpop_abort_start_ptr = NULL, | ||
670 | .bpop_prepare_end_ptr = NULL, | ||
671 | .bpop_commit_end_ptr = NULL, | ||
672 | .bpop_abort_end_ptr = NULL, | ||
673 | |||
674 | .bpop_translate = NULL, | ||
675 | }; | ||
676 | |||
677 | static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = { | ||
678 | .bpop_prepare_alloc_ptr = NULL, | ||
679 | .bpop_commit_alloc_ptr = NULL, | ||
680 | .bpop_abort_alloc_ptr = NULL, | ||
681 | .bpop_prepare_start_ptr = NULL, | ||
682 | .bpop_commit_start_ptr = NULL, | ||
683 | .bpop_abort_start_ptr = NULL, | ||
684 | .bpop_prepare_end_ptr = NULL, | ||
685 | .bpop_commit_end_ptr = NULL, | ||
686 | .bpop_abort_end_ptr = NULL, | ||
687 | |||
688 | .bpop_translate = NULL, | ||
689 | }; | ||
690 | |||
691 | /** | ||
692 | * nilfs_bmap_read - read a bmap from an inode | ||
693 | * @bmap: bmap | ||
694 | * @raw_inode: on-disk inode | ||
695 | * | ||
696 | * Description: nilfs_bmap_read() initializes the bmap @bmap. | ||
697 | * | ||
698 | * Return Value: On success, 0 is returned. On error, the following negative | ||
699 | * error code is returned. | ||
700 | * | ||
701 | * %-ENOMEM - Insufficient amount of memory available. | ||
702 | */ | ||
703 | int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) | ||
704 | { | ||
705 | if (raw_inode == NULL) | ||
706 | memset(bmap->b_u.u_data, 0, NILFS_BMAP_SIZE); | ||
707 | else | ||
708 | memcpy(bmap->b_u.u_data, raw_inode->i_bmap, NILFS_BMAP_SIZE); | ||
709 | |||
710 | init_rwsem(&bmap->b_sem); | ||
711 | bmap->b_state = 0; | ||
712 | bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; | ||
713 | switch (bmap->b_inode->i_ino) { | ||
714 | case NILFS_DAT_INO: | ||
715 | bmap->b_pops = &nilfs_bmap_ptr_ops_p; | ||
716 | bmap->b_last_allocated_key = 0; /* XXX: use macro */ | ||
717 | bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; | ||
718 | break; | ||
719 | case NILFS_CPFILE_INO: | ||
720 | case NILFS_SUFILE_INO: | ||
721 | bmap->b_pops = &nilfs_bmap_ptr_ops_vmdt; | ||
722 | bmap->b_last_allocated_key = 0; /* XXX: use macro */ | ||
723 | bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; | ||
724 | break; | ||
725 | default: | ||
726 | bmap->b_pops = &nilfs_bmap_ptr_ops_v; | ||
727 | bmap->b_last_allocated_key = 0; /* XXX: use macro */ | ||
728 | bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; | ||
729 | break; | ||
730 | } | ||
731 | |||
732 | return (bmap->b_u.u_flags & NILFS_BMAP_LARGE) ? | ||
733 | nilfs_btree_init(bmap, | ||
734 | NILFS_BMAP_LARGE_LOW, | ||
735 | NILFS_BMAP_LARGE_HIGH) : | ||
736 | nilfs_direct_init(bmap, | ||
737 | NILFS_BMAP_SMALL_LOW, | ||
738 | NILFS_BMAP_SMALL_HIGH); | ||
739 | } | ||
740 | |||
741 | /** | ||
742 | * nilfs_bmap_write - write back a bmap to an inode | ||
743 | * @bmap: bmap | ||
744 | * @raw_inode: on-disk inode | ||
745 | * | ||
746 | * Description: nilfs_bmap_write() stores @bmap in @raw_inode. | ||
747 | */ | ||
748 | void nilfs_bmap_write(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) | ||
749 | { | ||
750 | down_write(&bmap->b_sem); | ||
751 | memcpy(raw_inode->i_bmap, bmap->b_u.u_data, | ||
752 | NILFS_INODE_BMAP_SIZE * sizeof(__le64)); | ||
753 | if (bmap->b_inode->i_ino == NILFS_DAT_INO) | ||
754 | bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; | ||
755 | |||
756 | up_write(&bmap->b_sem); | ||
757 | } | ||
758 | |||
759 | void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) | ||
760 | { | ||
761 | memset(&bmap->b_u, 0, NILFS_BMAP_SIZE); | ||
762 | init_rwsem(&bmap->b_sem); | ||
763 | bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; | ||
764 | bmap->b_pops = &nilfs_bmap_ptr_ops_gc; | ||
765 | bmap->b_last_allocated_key = 0; | ||
766 | bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; | ||
767 | bmap->b_state = 0; | ||
768 | nilfs_btree_init_gc(bmap); | ||
769 | } | ||
770 | |||
771 | void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) | ||
772 | { | ||
773 | memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union)); | ||
774 | init_rwsem(&gcbmap->b_sem); | ||
775 | gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; | ||
776 | } | ||
777 | |||
778 | void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) | ||
779 | { | ||
780 | memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union)); | ||
781 | init_rwsem(&bmap->b_sem); | ||
782 | bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; | ||
783 | } | ||
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h new file mode 100644 index 000000000000..4f2708abb1ba --- /dev/null +++ b/fs/nilfs2/bmap.h | |||
@@ -0,0 +1,244 @@ | |||
1 | /* | ||
2 | * bmap.h - NILFS block mapping. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_BMAP_H | ||
24 | #define _NILFS_BMAP_H | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/fs.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/nilfs2_fs.h> | ||
30 | #include "alloc.h" | ||
31 | |||
32 | #define NILFS_BMAP_INVALID_PTR 0 | ||
33 | |||
34 | #define nilfs_bmap_dkey_to_key(dkey) le64_to_cpu(dkey) | ||
35 | #define nilfs_bmap_key_to_dkey(key) cpu_to_le64(key) | ||
36 | #define nilfs_bmap_dptr_to_ptr(dptr) le64_to_cpu(dptr) | ||
37 | #define nilfs_bmap_ptr_to_dptr(ptr) cpu_to_le64(ptr) | ||
38 | |||
39 | #define nilfs_bmap_keydiff_abs(diff) ((diff) < 0 ? -(diff) : (diff)) | ||
40 | |||
41 | |||
42 | struct nilfs_bmap; | ||
43 | |||
44 | /** | ||
45 | * union nilfs_bmap_ptr_req - request for bmap ptr | ||
46 | * @bpr_ptr: bmap pointer | ||
47 | * @bpr_req: request for persistent allocator | ||
48 | */ | ||
49 | union nilfs_bmap_ptr_req { | ||
50 | __u64 bpr_ptr; | ||
51 | struct nilfs_palloc_req bpr_req; | ||
52 | }; | ||
53 | |||
54 | /** | ||
55 | * struct nilfs_bmap_stats - bmap statistics | ||
56 | * @bs_nblocks: number of blocks created or deleted | ||
57 | */ | ||
58 | struct nilfs_bmap_stats { | ||
59 | unsigned int bs_nblocks; | ||
60 | }; | ||
61 | |||
62 | /** | ||
63 | * struct nilfs_bmap_operations - bmap operation table | ||
64 | */ | ||
65 | struct nilfs_bmap_operations { | ||
66 | int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); | ||
67 | int (*bop_insert)(struct nilfs_bmap *, __u64, __u64); | ||
68 | int (*bop_delete)(struct nilfs_bmap *, __u64); | ||
69 | void (*bop_clear)(struct nilfs_bmap *); | ||
70 | |||
71 | int (*bop_propagate)(const struct nilfs_bmap *, struct buffer_head *); | ||
72 | void (*bop_lookup_dirty_buffers)(struct nilfs_bmap *, | ||
73 | struct list_head *); | ||
74 | |||
75 | int (*bop_assign)(struct nilfs_bmap *, | ||
76 | struct buffer_head **, | ||
77 | sector_t, | ||
78 | union nilfs_binfo *); | ||
79 | int (*bop_mark)(struct nilfs_bmap *, __u64, int); | ||
80 | |||
81 | /* The following functions are internal use only. */ | ||
82 | int (*bop_last_key)(const struct nilfs_bmap *, __u64 *); | ||
83 | int (*bop_check_insert)(const struct nilfs_bmap *, __u64); | ||
84 | int (*bop_check_delete)(struct nilfs_bmap *, __u64); | ||
85 | int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int); | ||
86 | }; | ||
87 | |||
88 | |||
89 | /** | ||
90 | * struct nilfs_bmap_ptr_operations - bmap ptr operation table | ||
91 | */ | ||
92 | struct nilfs_bmap_ptr_operations { | ||
93 | int (*bpop_prepare_alloc_ptr)(struct nilfs_bmap *, | ||
94 | union nilfs_bmap_ptr_req *); | ||
95 | void (*bpop_commit_alloc_ptr)(struct nilfs_bmap *, | ||
96 | union nilfs_bmap_ptr_req *); | ||
97 | void (*bpop_abort_alloc_ptr)(struct nilfs_bmap *, | ||
98 | union nilfs_bmap_ptr_req *); | ||
99 | int (*bpop_prepare_start_ptr)(struct nilfs_bmap *, | ||
100 | union nilfs_bmap_ptr_req *); | ||
101 | void (*bpop_commit_start_ptr)(struct nilfs_bmap *, | ||
102 | union nilfs_bmap_ptr_req *, | ||
103 | sector_t); | ||
104 | void (*bpop_abort_start_ptr)(struct nilfs_bmap *, | ||
105 | union nilfs_bmap_ptr_req *); | ||
106 | int (*bpop_prepare_end_ptr)(struct nilfs_bmap *, | ||
107 | union nilfs_bmap_ptr_req *); | ||
108 | void (*bpop_commit_end_ptr)(struct nilfs_bmap *, | ||
109 | union nilfs_bmap_ptr_req *); | ||
110 | void (*bpop_abort_end_ptr)(struct nilfs_bmap *, | ||
111 | union nilfs_bmap_ptr_req *); | ||
112 | |||
113 | int (*bpop_translate)(const struct nilfs_bmap *, __u64, __u64 *); | ||
114 | }; | ||
115 | |||
116 | |||
117 | #define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) | ||
118 | #define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) | ||
119 | #define NILFS_BMAP_NEW_PTR_INIT \ | ||
120 | (1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1)) | ||
121 | |||
122 | static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) | ||
123 | { | ||
124 | return !!(ptr & NILFS_BMAP_NEW_PTR_INIT); | ||
125 | } | ||
126 | |||
127 | |||
128 | /** | ||
129 | * struct nilfs_bmap - bmap structure | ||
130 | * @b_u: raw data | ||
131 | * @b_sem: semaphore | ||
132 | * @b_inode: owner of bmap | ||
133 | * @b_ops: bmap operation table | ||
134 | * @b_pops: bmap ptr operation table | ||
135 | * @b_low: low watermark of conversion | ||
136 | * @b_high: high watermark of conversion | ||
137 | * @b_last_allocated_key: last allocated key for data block | ||
138 | * @b_last_allocated_ptr: last allocated ptr for data block | ||
139 | * @b_state: state | ||
140 | */ | ||
141 | struct nilfs_bmap { | ||
142 | union { | ||
143 | __u8 u_flags; | ||
144 | __le64 u_data[NILFS_BMAP_SIZE / sizeof(__le64)]; | ||
145 | } b_u; | ||
146 | struct rw_semaphore b_sem; | ||
147 | struct inode *b_inode; | ||
148 | const struct nilfs_bmap_operations *b_ops; | ||
149 | const struct nilfs_bmap_ptr_operations *b_pops; | ||
150 | __u64 b_low; | ||
151 | __u64 b_high; | ||
152 | __u64 b_last_allocated_key; | ||
153 | __u64 b_last_allocated_ptr; | ||
154 | int b_state; | ||
155 | }; | ||
156 | |||
157 | /* state */ | ||
158 | #define NILFS_BMAP_DIRTY 0x00000001 | ||
159 | |||
160 | |||
161 | int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); | ||
162 | int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); | ||
163 | void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); | ||
164 | int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *); | ||
165 | int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); | ||
166 | int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); | ||
167 | int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *); | ||
168 | int nilfs_bmap_truncate(struct nilfs_bmap *, unsigned long); | ||
169 | void nilfs_bmap_clear(struct nilfs_bmap *); | ||
170 | int nilfs_bmap_propagate(struct nilfs_bmap *, struct buffer_head *); | ||
171 | void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *, struct list_head *); | ||
172 | int nilfs_bmap_assign(struct nilfs_bmap *, struct buffer_head **, | ||
173 | unsigned long, union nilfs_binfo *); | ||
174 | int nilfs_bmap_lookup_at_level(struct nilfs_bmap *, __u64, int, __u64 *); | ||
175 | int nilfs_bmap_mark(struct nilfs_bmap *, __u64, int); | ||
176 | |||
177 | void nilfs_bmap_init_gc(struct nilfs_bmap *); | ||
178 | void nilfs_bmap_init_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); | ||
179 | void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); | ||
180 | |||
181 | |||
182 | /* | ||
183 | * Internal use only | ||
184 | */ | ||
185 | |||
186 | int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t); | ||
187 | int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64); | ||
188 | |||
189 | |||
190 | __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, | ||
191 | const struct buffer_head *); | ||
192 | |||
193 | __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); | ||
194 | __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); | ||
195 | |||
196 | int nilfs_bmap_prepare_update(struct nilfs_bmap *, | ||
197 | union nilfs_bmap_ptr_req *, | ||
198 | union nilfs_bmap_ptr_req *); | ||
199 | void nilfs_bmap_commit_update(struct nilfs_bmap *, | ||
200 | union nilfs_bmap_ptr_req *, | ||
201 | union nilfs_bmap_ptr_req *); | ||
202 | void nilfs_bmap_abort_update(struct nilfs_bmap *, | ||
203 | union nilfs_bmap_ptr_req *, | ||
204 | union nilfs_bmap_ptr_req *); | ||
205 | |||
206 | void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int); | ||
207 | void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int); | ||
208 | |||
209 | |||
210 | int nilfs_bmap_get_block(const struct nilfs_bmap *, __u64, | ||
211 | struct buffer_head **); | ||
212 | void nilfs_bmap_put_block(const struct nilfs_bmap *, struct buffer_head *); | ||
213 | int nilfs_bmap_get_new_block(const struct nilfs_bmap *, __u64, | ||
214 | struct buffer_head **); | ||
215 | void nilfs_bmap_delete_block(const struct nilfs_bmap *, struct buffer_head *); | ||
216 | |||
217 | |||
218 | /* Assume that bmap semaphore is locked. */ | ||
219 | static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap) | ||
220 | { | ||
221 | return !!(bmap->b_state & NILFS_BMAP_DIRTY); | ||
222 | } | ||
223 | |||
224 | /* Assume that bmap semaphore is locked. */ | ||
225 | static inline void nilfs_bmap_set_dirty(struct nilfs_bmap *bmap) | ||
226 | { | ||
227 | bmap->b_state |= NILFS_BMAP_DIRTY; | ||
228 | } | ||
229 | |||
230 | /* Assume that bmap semaphore is locked. */ | ||
231 | static inline void nilfs_bmap_clear_dirty(struct nilfs_bmap *bmap) | ||
232 | { | ||
233 | bmap->b_state &= ~NILFS_BMAP_DIRTY; | ||
234 | } | ||
235 | |||
236 | |||
237 | #define NILFS_BMAP_LARGE 0x1 | ||
238 | |||
239 | #define NILFS_BMAP_SMALL_LOW NILFS_DIRECT_KEY_MIN | ||
240 | #define NILFS_BMAP_SMALL_HIGH NILFS_DIRECT_KEY_MAX | ||
241 | #define NILFS_BMAP_LARGE_LOW NILFS_BTREE_ROOT_NCHILDREN_MAX | ||
242 | #define NILFS_BMAP_LARGE_HIGH NILFS_BTREE_KEY_MAX | ||
243 | |||
244 | #endif /* _NILFS_BMAP_H */ | ||
diff --git a/fs/nilfs2/bmap_union.h b/fs/nilfs2/bmap_union.h new file mode 100644 index 000000000000..d41509bff47b --- /dev/null +++ b/fs/nilfs2/bmap_union.h | |||
@@ -0,0 +1,42 @@ | |||
1 | /* | ||
2 | * bmap_union.h - NILFS block mapping. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_BMAP_UNION_H | ||
24 | #define _NILFS_BMAP_UNION_H | ||
25 | |||
26 | #include "bmap.h" | ||
27 | #include "direct.h" | ||
28 | #include "btree.h" | ||
29 | |||
30 | /** | ||
31 | * nilfs_bmap_union - | ||
32 | * @bi_bmap: bmap structure | ||
33 | * @bi_btree: direct map structure | ||
34 | * @bi_direct: B-tree structure | ||
35 | */ | ||
36 | union nilfs_bmap_union { | ||
37 | struct nilfs_bmap bi_bmap; | ||
38 | struct nilfs_direct bi_direct; | ||
39 | struct nilfs_btree bi_btree; | ||
40 | }; | ||
41 | |||
42 | #endif /* _NILFS_BMAP_UNION_H */ | ||
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c new file mode 100644 index 000000000000..4cc07b2c30e0 --- /dev/null +++ b/fs/nilfs2/btnode.c | |||
@@ -0,0 +1,316 @@ | |||
1 | /* | ||
2 | * btnode.c - NILFS B-tree node cache | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * This file was originally written by Seiji Kihara <kihara@osrg.net> | ||
21 | * and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for | ||
22 | * stabilization and simplification. | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/backing-dev.h> | ||
30 | #include "nilfs.h" | ||
31 | #include "mdt.h" | ||
32 | #include "dat.h" | ||
33 | #include "page.h" | ||
34 | #include "btnode.h" | ||
35 | |||
36 | |||
37 | void nilfs_btnode_cache_init_once(struct address_space *btnc) | ||
38 | { | ||
39 | INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC); | ||
40 | spin_lock_init(&btnc->tree_lock); | ||
41 | INIT_LIST_HEAD(&btnc->private_list); | ||
42 | spin_lock_init(&btnc->private_lock); | ||
43 | |||
44 | spin_lock_init(&btnc->i_mmap_lock); | ||
45 | INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap); | ||
46 | INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); | ||
47 | } | ||
48 | |||
49 | static struct address_space_operations def_btnode_aops; | ||
50 | |||
51 | void nilfs_btnode_cache_init(struct address_space *btnc) | ||
52 | { | ||
53 | btnc->host = NULL; /* can safely set to host inode ? */ | ||
54 | btnc->flags = 0; | ||
55 | mapping_set_gfp_mask(btnc, GFP_NOFS); | ||
56 | btnc->assoc_mapping = NULL; | ||
57 | btnc->backing_dev_info = &default_backing_dev_info; | ||
58 | btnc->a_ops = &def_btnode_aops; | ||
59 | } | ||
60 | |||
61 | void nilfs_btnode_cache_clear(struct address_space *btnc) | ||
62 | { | ||
63 | invalidate_mapping_pages(btnc, 0, -1); | ||
64 | truncate_inode_pages(btnc, 0); | ||
65 | } | ||
66 | |||
67 | int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, | ||
68 | sector_t pblocknr, struct buffer_head **pbh, | ||
69 | int newblk) | ||
70 | { | ||
71 | struct buffer_head *bh; | ||
72 | struct inode *inode = NILFS_BTNC_I(btnc); | ||
73 | int err; | ||
74 | |||
75 | bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); | ||
76 | if (unlikely(!bh)) | ||
77 | return -ENOMEM; | ||
78 | |||
79 | err = -EEXIST; /* internal code */ | ||
80 | if (newblk) { | ||
81 | if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || | ||
82 | buffer_dirty(bh))) { | ||
83 | brelse(bh); | ||
84 | BUG(); | ||
85 | } | ||
86 | bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; | ||
87 | bh->b_blocknr = blocknr; | ||
88 | set_buffer_mapped(bh); | ||
89 | set_buffer_uptodate(bh); | ||
90 | goto found; | ||
91 | } | ||
92 | |||
93 | if (buffer_uptodate(bh) || buffer_dirty(bh)) | ||
94 | goto found; | ||
95 | |||
96 | if (pblocknr == 0) { | ||
97 | pblocknr = blocknr; | ||
98 | if (inode->i_ino != NILFS_DAT_INO) { | ||
99 | struct inode *dat = | ||
100 | nilfs_dat_inode(NILFS_I_NILFS(inode)); | ||
101 | |||
102 | /* blocknr is a virtual block number */ | ||
103 | err = nilfs_dat_translate(dat, blocknr, &pblocknr); | ||
104 | if (unlikely(err)) { | ||
105 | brelse(bh); | ||
106 | goto out_locked; | ||
107 | } | ||
108 | } | ||
109 | } | ||
110 | lock_buffer(bh); | ||
111 | if (buffer_uptodate(bh)) { | ||
112 | unlock_buffer(bh); | ||
113 | err = -EEXIST; /* internal code */ | ||
114 | goto found; | ||
115 | } | ||
116 | set_buffer_mapped(bh); | ||
117 | bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; | ||
118 | bh->b_blocknr = pblocknr; /* set block address for read */ | ||
119 | bh->b_end_io = end_buffer_read_sync; | ||
120 | get_bh(bh); | ||
121 | submit_bh(READ, bh); | ||
122 | bh->b_blocknr = blocknr; /* set back to the given block address */ | ||
123 | err = 0; | ||
124 | found: | ||
125 | *pbh = bh; | ||
126 | |||
127 | out_locked: | ||
128 | unlock_page(bh->b_page); | ||
129 | page_cache_release(bh->b_page); | ||
130 | return err; | ||
131 | } | ||
132 | |||
133 | int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr, | ||
134 | sector_t pblocknr, struct buffer_head **pbh, int newblk) | ||
135 | { | ||
136 | struct buffer_head *bh; | ||
137 | int err; | ||
138 | |||
139 | err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk); | ||
140 | if (err == -EEXIST) /* internal code (cache hit) */ | ||
141 | return 0; | ||
142 | if (unlikely(err)) | ||
143 | return err; | ||
144 | |||
145 | bh = *pbh; | ||
146 | wait_on_buffer(bh); | ||
147 | if (!buffer_uptodate(bh)) { | ||
148 | brelse(bh); | ||
149 | return -EIO; | ||
150 | } | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | /** | ||
155 | * nilfs_btnode_delete - delete B-tree node buffer | ||
156 | * @bh: buffer to be deleted | ||
157 | * | ||
158 | * nilfs_btnode_delete() invalidates the specified buffer and delete the page | ||
159 | * including the buffer if the page gets unbusy. | ||
160 | */ | ||
161 | void nilfs_btnode_delete(struct buffer_head *bh) | ||
162 | { | ||
163 | struct address_space *mapping; | ||
164 | struct page *page = bh->b_page; | ||
165 | pgoff_t index = page_index(page); | ||
166 | int still_dirty; | ||
167 | |||
168 | page_cache_get(page); | ||
169 | lock_page(page); | ||
170 | wait_on_page_writeback(page); | ||
171 | |||
172 | nilfs_forget_buffer(bh); | ||
173 | still_dirty = PageDirty(page); | ||
174 | mapping = page->mapping; | ||
175 | unlock_page(page); | ||
176 | page_cache_release(page); | ||
177 | |||
178 | if (!still_dirty && mapping) | ||
179 | invalidate_inode_pages2_range(mapping, index, index); | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * nilfs_btnode_prepare_change_key | ||
184 | * prepare to move contents of the block for old key to one of new key. | ||
185 | * the old buffer will not be removed, but might be reused for new buffer. | ||
186 | * it might return -ENOMEM because of memory allocation errors, | ||
187 | * and might return -EIO because of disk read errors. | ||
188 | */ | ||
189 | int nilfs_btnode_prepare_change_key(struct address_space *btnc, | ||
190 | struct nilfs_btnode_chkey_ctxt *ctxt) | ||
191 | { | ||
192 | struct buffer_head *obh, *nbh; | ||
193 | struct inode *inode = NILFS_BTNC_I(btnc); | ||
194 | __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; | ||
195 | int err; | ||
196 | |||
197 | if (oldkey == newkey) | ||
198 | return 0; | ||
199 | |||
200 | obh = ctxt->bh; | ||
201 | ctxt->newbh = NULL; | ||
202 | |||
203 | if (inode->i_blkbits == PAGE_CACHE_SHIFT) { | ||
204 | lock_page(obh->b_page); | ||
205 | /* | ||
206 | * We cannot call radix_tree_preload for the kernels older | ||
207 | * than 2.6.23, because it is not exported for modules. | ||
208 | */ | ||
209 | err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
210 | if (err) | ||
211 | goto failed_unlock; | ||
212 | /* BUG_ON(oldkey != obh->b_page->index); */ | ||
213 | if (unlikely(oldkey != obh->b_page->index)) | ||
214 | NILFS_PAGE_BUG(obh->b_page, | ||
215 | "invalid oldkey %lld (newkey=%lld)", | ||
216 | (unsigned long long)oldkey, | ||
217 | (unsigned long long)newkey); | ||
218 | |||
219 | retry: | ||
220 | spin_lock_irq(&btnc->tree_lock); | ||
221 | err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); | ||
222 | spin_unlock_irq(&btnc->tree_lock); | ||
223 | /* | ||
224 | * Note: page->index will not change to newkey until | ||
225 | * nilfs_btnode_commit_change_key() will be called. | ||
226 | * To protect the page in intermediate state, the page lock | ||
227 | * is held. | ||
228 | */ | ||
229 | radix_tree_preload_end(); | ||
230 | if (!err) | ||
231 | return 0; | ||
232 | else if (err != -EEXIST) | ||
233 | goto failed_unlock; | ||
234 | |||
235 | err = invalidate_inode_pages2_range(btnc, newkey, newkey); | ||
236 | if (!err) | ||
237 | goto retry; | ||
238 | /* fallback to copy mode */ | ||
239 | unlock_page(obh->b_page); | ||
240 | } | ||
241 | |||
242 | err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1); | ||
243 | if (likely(!err)) { | ||
244 | BUG_ON(nbh == obh); | ||
245 | ctxt->newbh = nbh; | ||
246 | } | ||
247 | return err; | ||
248 | |||
249 | failed_unlock: | ||
250 | unlock_page(obh->b_page); | ||
251 | return err; | ||
252 | } | ||
253 | |||
254 | /** | ||
255 | * nilfs_btnode_commit_change_key | ||
256 | * commit the change_key operation prepared by prepare_change_key(). | ||
257 | */ | ||
258 | void nilfs_btnode_commit_change_key(struct address_space *btnc, | ||
259 | struct nilfs_btnode_chkey_ctxt *ctxt) | ||
260 | { | ||
261 | struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh; | ||
262 | __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; | ||
263 | struct page *opage; | ||
264 | |||
265 | if (oldkey == newkey) | ||
266 | return; | ||
267 | |||
268 | if (nbh == NULL) { /* blocksize == pagesize */ | ||
269 | opage = obh->b_page; | ||
270 | if (unlikely(oldkey != opage->index)) | ||
271 | NILFS_PAGE_BUG(opage, | ||
272 | "invalid oldkey %lld (newkey=%lld)", | ||
273 | (unsigned long long)oldkey, | ||
274 | (unsigned long long)newkey); | ||
275 | if (!test_set_buffer_dirty(obh) && TestSetPageDirty(opage)) | ||
276 | BUG(); | ||
277 | |||
278 | spin_lock_irq(&btnc->tree_lock); | ||
279 | radix_tree_delete(&btnc->page_tree, oldkey); | ||
280 | radix_tree_tag_set(&btnc->page_tree, newkey, | ||
281 | PAGECACHE_TAG_DIRTY); | ||
282 | spin_unlock_irq(&btnc->tree_lock); | ||
283 | |||
284 | opage->index = obh->b_blocknr = newkey; | ||
285 | unlock_page(opage); | ||
286 | } else { | ||
287 | nilfs_copy_buffer(nbh, obh); | ||
288 | nilfs_btnode_mark_dirty(nbh); | ||
289 | |||
290 | nbh->b_blocknr = newkey; | ||
291 | ctxt->bh = nbh; | ||
292 | nilfs_btnode_delete(obh); /* will decrement bh->b_count */ | ||
293 | } | ||
294 | } | ||
295 | |||
296 | /** | ||
297 | * nilfs_btnode_abort_change_key | ||
298 | * abort the change_key operation prepared by prepare_change_key(). | ||
299 | */ | ||
300 | void nilfs_btnode_abort_change_key(struct address_space *btnc, | ||
301 | struct nilfs_btnode_chkey_ctxt *ctxt) | ||
302 | { | ||
303 | struct buffer_head *nbh = ctxt->newbh; | ||
304 | __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; | ||
305 | |||
306 | if (oldkey == newkey) | ||
307 | return; | ||
308 | |||
309 | if (nbh == NULL) { /* blocksize == pagesize */ | ||
310 | spin_lock_irq(&btnc->tree_lock); | ||
311 | radix_tree_delete(&btnc->page_tree, newkey); | ||
312 | spin_unlock_irq(&btnc->tree_lock); | ||
313 | unlock_page(ctxt->bh->b_page); | ||
314 | } else | ||
315 | brelse(nbh); | ||
316 | } | ||
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h new file mode 100644 index 000000000000..35faa86444a7 --- /dev/null +++ b/fs/nilfs2/btnode.h | |||
@@ -0,0 +1,58 @@ | |||
1 | /* | ||
2 | * btnode.h - NILFS B-tree node cache | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Seiji Kihara <kihara@osrg.net> | ||
21 | * Revised by Ryusuke Konishi <ryusuke@osrg.net> | ||
22 | */ | ||
23 | |||
24 | #ifndef _NILFS_BTNODE_H | ||
25 | #define _NILFS_BTNODE_H | ||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/fs.h> | ||
30 | #include <linux/backing-dev.h> | ||
31 | |||
32 | |||
33 | struct nilfs_btnode_chkey_ctxt { | ||
34 | __u64 oldkey; | ||
35 | __u64 newkey; | ||
36 | struct buffer_head *bh; | ||
37 | struct buffer_head *newbh; | ||
38 | }; | ||
39 | |||
40 | void nilfs_btnode_cache_init_once(struct address_space *); | ||
41 | void nilfs_btnode_cache_init(struct address_space *); | ||
42 | void nilfs_btnode_cache_clear(struct address_space *); | ||
43 | int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, | ||
44 | struct buffer_head **, int); | ||
45 | int nilfs_btnode_get(struct address_space *, __u64, sector_t, | ||
46 | struct buffer_head **, int); | ||
47 | void nilfs_btnode_delete(struct buffer_head *); | ||
48 | int nilfs_btnode_prepare_change_key(struct address_space *, | ||
49 | struct nilfs_btnode_chkey_ctxt *); | ||
50 | void nilfs_btnode_commit_change_key(struct address_space *, | ||
51 | struct nilfs_btnode_chkey_ctxt *); | ||
52 | void nilfs_btnode_abort_change_key(struct address_space *, | ||
53 | struct nilfs_btnode_chkey_ctxt *); | ||
54 | |||
55 | #define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh) | ||
56 | |||
57 | |||
58 | #endif /* _NILFS_BTNODE_H */ | ||
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c new file mode 100644 index 000000000000..6b37a2767293 --- /dev/null +++ b/fs/nilfs2/btree.c | |||
@@ -0,0 +1,2269 @@ | |||
1 | /* | ||
2 | * btree.c - NILFS B-tree. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/slab.h> | ||
24 | #include <linux/string.h> | ||
25 | #include <linux/errno.h> | ||
26 | #include <linux/pagevec.h> | ||
27 | #include "nilfs.h" | ||
28 | #include "page.h" | ||
29 | #include "btnode.h" | ||
30 | #include "btree.h" | ||
31 | #include "alloc.h" | ||
32 | |||
33 | /** | ||
34 | * struct nilfs_btree_path - A path on which B-tree operations are executed | ||
35 | * @bp_bh: buffer head of node block | ||
36 | * @bp_sib_bh: buffer head of sibling node block | ||
37 | * @bp_index: index of child node | ||
38 | * @bp_oldreq: ptr end request for old ptr | ||
39 | * @bp_newreq: ptr alloc request for new ptr | ||
40 | * @bp_op: rebalance operation | ||
41 | */ | ||
42 | struct nilfs_btree_path { | ||
43 | struct buffer_head *bp_bh; | ||
44 | struct buffer_head *bp_sib_bh; | ||
45 | int bp_index; | ||
46 | union nilfs_bmap_ptr_req bp_oldreq; | ||
47 | union nilfs_bmap_ptr_req bp_newreq; | ||
48 | struct nilfs_btnode_chkey_ctxt bp_ctxt; | ||
49 | void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *, | ||
50 | int, __u64 *, __u64 *); | ||
51 | }; | ||
52 | |||
53 | /* | ||
54 | * B-tree path operations | ||
55 | */ | ||
56 | |||
57 | static struct kmem_cache *nilfs_btree_path_cache; | ||
58 | |||
59 | int __init nilfs_btree_path_cache_init(void) | ||
60 | { | ||
61 | nilfs_btree_path_cache = | ||
62 | kmem_cache_create("nilfs2_btree_path_cache", | ||
63 | sizeof(struct nilfs_btree_path) * | ||
64 | NILFS_BTREE_LEVEL_MAX, 0, 0, NULL); | ||
65 | return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM; | ||
66 | } | ||
67 | |||
68 | void nilfs_btree_path_cache_destroy(void) | ||
69 | { | ||
70 | kmem_cache_destroy(nilfs_btree_path_cache); | ||
71 | } | ||
72 | |||
73 | static inline struct nilfs_btree_path * | ||
74 | nilfs_btree_alloc_path(const struct nilfs_btree *btree) | ||
75 | { | ||
76 | return (struct nilfs_btree_path *) | ||
77 | kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); | ||
78 | } | ||
79 | |||
80 | static inline void nilfs_btree_free_path(const struct nilfs_btree *btree, | ||
81 | struct nilfs_btree_path *path) | ||
82 | { | ||
83 | kmem_cache_free(nilfs_btree_path_cache, path); | ||
84 | } | ||
85 | |||
86 | static void nilfs_btree_init_path(const struct nilfs_btree *btree, | ||
87 | struct nilfs_btree_path *path) | ||
88 | { | ||
89 | int level; | ||
90 | |||
91 | for (level = NILFS_BTREE_LEVEL_DATA; | ||
92 | level < NILFS_BTREE_LEVEL_MAX; | ||
93 | level++) { | ||
94 | path[level].bp_bh = NULL; | ||
95 | path[level].bp_sib_bh = NULL; | ||
96 | path[level].bp_index = 0; | ||
97 | path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; | ||
98 | path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; | ||
99 | path[level].bp_op = NULL; | ||
100 | } | ||
101 | } | ||
102 | |||
103 | static void nilfs_btree_clear_path(const struct nilfs_btree *btree, | ||
104 | struct nilfs_btree_path *path) | ||
105 | { | ||
106 | int level; | ||
107 | |||
108 | for (level = NILFS_BTREE_LEVEL_DATA; | ||
109 | level < NILFS_BTREE_LEVEL_MAX; | ||
110 | level++) { | ||
111 | if (path[level].bp_bh != NULL) { | ||
112 | nilfs_bmap_put_block(&btree->bt_bmap, | ||
113 | path[level].bp_bh); | ||
114 | path[level].bp_bh = NULL; | ||
115 | } | ||
116 | /* sib_bh is released or deleted by prepare or commit | ||
117 | * operations. */ | ||
118 | path[level].bp_sib_bh = NULL; | ||
119 | path[level].bp_index = 0; | ||
120 | path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; | ||
121 | path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; | ||
122 | path[level].bp_op = NULL; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | |||
127 | /* | ||
128 | * B-tree node operations | ||
129 | */ | ||
130 | |||
131 | static inline int | ||
132 | nilfs_btree_node_get_flags(const struct nilfs_btree *btree, | ||
133 | const struct nilfs_btree_node *node) | ||
134 | { | ||
135 | return node->bn_flags; | ||
136 | } | ||
137 | |||
138 | static inline void | ||
139 | nilfs_btree_node_set_flags(struct nilfs_btree *btree, | ||
140 | struct nilfs_btree_node *node, | ||
141 | int flags) | ||
142 | { | ||
143 | node->bn_flags = flags; | ||
144 | } | ||
145 | |||
146 | static inline int nilfs_btree_node_root(const struct nilfs_btree *btree, | ||
147 | const struct nilfs_btree_node *node) | ||
148 | { | ||
149 | return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT; | ||
150 | } | ||
151 | |||
152 | static inline int | ||
153 | nilfs_btree_node_get_level(const struct nilfs_btree *btree, | ||
154 | const struct nilfs_btree_node *node) | ||
155 | { | ||
156 | return node->bn_level; | ||
157 | } | ||
158 | |||
159 | static inline void | ||
160 | nilfs_btree_node_set_level(struct nilfs_btree *btree, | ||
161 | struct nilfs_btree_node *node, | ||
162 | int level) | ||
163 | { | ||
164 | node->bn_level = level; | ||
165 | } | ||
166 | |||
167 | static inline int | ||
168 | nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree, | ||
169 | const struct nilfs_btree_node *node) | ||
170 | { | ||
171 | return le16_to_cpu(node->bn_nchildren); | ||
172 | } | ||
173 | |||
174 | static inline void | ||
175 | nilfs_btree_node_set_nchildren(struct nilfs_btree *btree, | ||
176 | struct nilfs_btree_node *node, | ||
177 | int nchildren) | ||
178 | { | ||
179 | node->bn_nchildren = cpu_to_le16(nchildren); | ||
180 | } | ||
181 | |||
182 | static inline int | ||
183 | nilfs_btree_node_size(const struct nilfs_btree *btree) | ||
184 | { | ||
185 | return 1 << btree->bt_bmap.b_inode->i_blkbits; | ||
186 | } | ||
187 | |||
188 | static inline int | ||
189 | nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree, | ||
190 | const struct nilfs_btree_node *node) | ||
191 | { | ||
192 | return nilfs_btree_node_root(btree, node) ? | ||
193 | NILFS_BTREE_ROOT_NCHILDREN_MIN : | ||
194 | NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); | ||
195 | } | ||
196 | |||
197 | static inline int | ||
198 | nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree, | ||
199 | const struct nilfs_btree_node *node) | ||
200 | { | ||
201 | return nilfs_btree_node_root(btree, node) ? | ||
202 | NILFS_BTREE_ROOT_NCHILDREN_MAX : | ||
203 | NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree)); | ||
204 | } | ||
205 | |||
206 | static inline __le64 * | ||
207 | nilfs_btree_node_dkeys(const struct nilfs_btree *btree, | ||
208 | const struct nilfs_btree_node *node) | ||
209 | { | ||
210 | return (__le64 *)((char *)(node + 1) + | ||
211 | (nilfs_btree_node_root(btree, node) ? | ||
212 | 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); | ||
213 | } | ||
214 | |||
215 | static inline __le64 * | ||
216 | nilfs_btree_node_dptrs(const struct nilfs_btree *btree, | ||
217 | const struct nilfs_btree_node *node) | ||
218 | { | ||
219 | return (__le64 *)(nilfs_btree_node_dkeys(btree, node) + | ||
220 | nilfs_btree_node_nchildren_max(btree, node)); | ||
221 | } | ||
222 | |||
223 | static inline __u64 | ||
224 | nilfs_btree_node_get_key(const struct nilfs_btree *btree, | ||
225 | const struct nilfs_btree_node *node, int index) | ||
226 | { | ||
227 | return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) + | ||
228 | index)); | ||
229 | } | ||
230 | |||
231 | static inline void | ||
232 | nilfs_btree_node_set_key(struct nilfs_btree *btree, | ||
233 | struct nilfs_btree_node *node, int index, __u64 key) | ||
234 | { | ||
235 | *(nilfs_btree_node_dkeys(btree, node) + index) = | ||
236 | nilfs_bmap_key_to_dkey(key); | ||
237 | } | ||
238 | |||
239 | static inline __u64 | ||
240 | nilfs_btree_node_get_ptr(const struct nilfs_btree *btree, | ||
241 | const struct nilfs_btree_node *node, | ||
242 | int index) | ||
243 | { | ||
244 | return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) + | ||
245 | index)); | ||
246 | } | ||
247 | |||
248 | static inline void | ||
249 | nilfs_btree_node_set_ptr(struct nilfs_btree *btree, | ||
250 | struct nilfs_btree_node *node, | ||
251 | int index, | ||
252 | __u64 ptr) | ||
253 | { | ||
254 | *(nilfs_btree_node_dptrs(btree, node) + index) = | ||
255 | nilfs_bmap_ptr_to_dptr(ptr); | ||
256 | } | ||
257 | |||
258 | static void nilfs_btree_node_init(struct nilfs_btree *btree, | ||
259 | struct nilfs_btree_node *node, | ||
260 | int flags, int level, int nchildren, | ||
261 | const __u64 *keys, const __u64 *ptrs) | ||
262 | { | ||
263 | __le64 *dkeys; | ||
264 | __le64 *dptrs; | ||
265 | int i; | ||
266 | |||
267 | nilfs_btree_node_set_flags(btree, node, flags); | ||
268 | nilfs_btree_node_set_level(btree, node, level); | ||
269 | nilfs_btree_node_set_nchildren(btree, node, nchildren); | ||
270 | |||
271 | dkeys = nilfs_btree_node_dkeys(btree, node); | ||
272 | dptrs = nilfs_btree_node_dptrs(btree, node); | ||
273 | for (i = 0; i < nchildren; i++) { | ||
274 | dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]); | ||
275 | dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]); | ||
276 | } | ||
277 | } | ||
278 | |||
279 | /* Assume the buffer heads corresponding to left and right are locked. */ | ||
280 | static void nilfs_btree_node_move_left(struct nilfs_btree *btree, | ||
281 | struct nilfs_btree_node *left, | ||
282 | struct nilfs_btree_node *right, | ||
283 | int n) | ||
284 | { | ||
285 | __le64 *ldkeys, *rdkeys; | ||
286 | __le64 *ldptrs, *rdptrs; | ||
287 | int lnchildren, rnchildren; | ||
288 | |||
289 | ldkeys = nilfs_btree_node_dkeys(btree, left); | ||
290 | ldptrs = nilfs_btree_node_dptrs(btree, left); | ||
291 | lnchildren = nilfs_btree_node_get_nchildren(btree, left); | ||
292 | |||
293 | rdkeys = nilfs_btree_node_dkeys(btree, right); | ||
294 | rdptrs = nilfs_btree_node_dptrs(btree, right); | ||
295 | rnchildren = nilfs_btree_node_get_nchildren(btree, right); | ||
296 | |||
297 | memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); | ||
298 | memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); | ||
299 | memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); | ||
300 | memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); | ||
301 | |||
302 | lnchildren += n; | ||
303 | rnchildren -= n; | ||
304 | nilfs_btree_node_set_nchildren(btree, left, lnchildren); | ||
305 | nilfs_btree_node_set_nchildren(btree, right, rnchildren); | ||
306 | } | ||
307 | |||
308 | /* Assume that the buffer heads corresponding to left and right are locked. */ | ||
309 | static void nilfs_btree_node_move_right(struct nilfs_btree *btree, | ||
310 | struct nilfs_btree_node *left, | ||
311 | struct nilfs_btree_node *right, | ||
312 | int n) | ||
313 | { | ||
314 | __le64 *ldkeys, *rdkeys; | ||
315 | __le64 *ldptrs, *rdptrs; | ||
316 | int lnchildren, rnchildren; | ||
317 | |||
318 | ldkeys = nilfs_btree_node_dkeys(btree, left); | ||
319 | ldptrs = nilfs_btree_node_dptrs(btree, left); | ||
320 | lnchildren = nilfs_btree_node_get_nchildren(btree, left); | ||
321 | |||
322 | rdkeys = nilfs_btree_node_dkeys(btree, right); | ||
323 | rdptrs = nilfs_btree_node_dptrs(btree, right); | ||
324 | rnchildren = nilfs_btree_node_get_nchildren(btree, right); | ||
325 | |||
326 | memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); | ||
327 | memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); | ||
328 | memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); | ||
329 | memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); | ||
330 | |||
331 | lnchildren -= n; | ||
332 | rnchildren += n; | ||
333 | nilfs_btree_node_set_nchildren(btree, left, lnchildren); | ||
334 | nilfs_btree_node_set_nchildren(btree, right, rnchildren); | ||
335 | } | ||
336 | |||
337 | /* Assume that the buffer head corresponding to node is locked. */ | ||
338 | static void nilfs_btree_node_insert(struct nilfs_btree *btree, | ||
339 | struct nilfs_btree_node *node, | ||
340 | __u64 key, __u64 ptr, int index) | ||
341 | { | ||
342 | __le64 *dkeys; | ||
343 | __le64 *dptrs; | ||
344 | int nchildren; | ||
345 | |||
346 | dkeys = nilfs_btree_node_dkeys(btree, node); | ||
347 | dptrs = nilfs_btree_node_dptrs(btree, node); | ||
348 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
349 | if (index < nchildren) { | ||
350 | memmove(dkeys + index + 1, dkeys + index, | ||
351 | (nchildren - index) * sizeof(*dkeys)); | ||
352 | memmove(dptrs + index + 1, dptrs + index, | ||
353 | (nchildren - index) * sizeof(*dptrs)); | ||
354 | } | ||
355 | dkeys[index] = nilfs_bmap_key_to_dkey(key); | ||
356 | dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr); | ||
357 | nchildren++; | ||
358 | nilfs_btree_node_set_nchildren(btree, node, nchildren); | ||
359 | } | ||
360 | |||
361 | /* Assume that the buffer head corresponding to node is locked. */ | ||
362 | static void nilfs_btree_node_delete(struct nilfs_btree *btree, | ||
363 | struct nilfs_btree_node *node, | ||
364 | __u64 *keyp, __u64 *ptrp, int index) | ||
365 | { | ||
366 | __u64 key; | ||
367 | __u64 ptr; | ||
368 | __le64 *dkeys; | ||
369 | __le64 *dptrs; | ||
370 | int nchildren; | ||
371 | |||
372 | dkeys = nilfs_btree_node_dkeys(btree, node); | ||
373 | dptrs = nilfs_btree_node_dptrs(btree, node); | ||
374 | key = nilfs_bmap_dkey_to_key(dkeys[index]); | ||
375 | ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]); | ||
376 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
377 | if (keyp != NULL) | ||
378 | *keyp = key; | ||
379 | if (ptrp != NULL) | ||
380 | *ptrp = ptr; | ||
381 | |||
382 | if (index < nchildren - 1) { | ||
383 | memmove(dkeys + index, dkeys + index + 1, | ||
384 | (nchildren - index - 1) * sizeof(*dkeys)); | ||
385 | memmove(dptrs + index, dptrs + index + 1, | ||
386 | (nchildren - index - 1) * sizeof(*dptrs)); | ||
387 | } | ||
388 | nchildren--; | ||
389 | nilfs_btree_node_set_nchildren(btree, node, nchildren); | ||
390 | } | ||
391 | |||
392 | static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, | ||
393 | const struct nilfs_btree_node *node, | ||
394 | __u64 key, int *indexp) | ||
395 | { | ||
396 | __u64 nkey; | ||
397 | int index, low, high, s; | ||
398 | |||
399 | /* binary search */ | ||
400 | low = 0; | ||
401 | high = nilfs_btree_node_get_nchildren(btree, node) - 1; | ||
402 | index = 0; | ||
403 | s = 0; | ||
404 | while (low <= high) { | ||
405 | index = (low + high) / 2; | ||
406 | nkey = nilfs_btree_node_get_key(btree, node, index); | ||
407 | if (nkey == key) { | ||
408 | s = 0; | ||
409 | goto out; | ||
410 | } else if (nkey < key) { | ||
411 | low = index + 1; | ||
412 | s = -1; | ||
413 | } else { | ||
414 | high = index - 1; | ||
415 | s = 1; | ||
416 | } | ||
417 | } | ||
418 | |||
419 | /* adjust index */ | ||
420 | if (nilfs_btree_node_get_level(btree, node) > | ||
421 | NILFS_BTREE_LEVEL_NODE_MIN) { | ||
422 | if ((s > 0) && (index > 0)) | ||
423 | index--; | ||
424 | } else if (s < 0) | ||
425 | index++; | ||
426 | |||
427 | out: | ||
428 | *indexp = index; | ||
429 | |||
430 | return s == 0; | ||
431 | } | ||
432 | |||
433 | static inline struct nilfs_btree_node * | ||
434 | nilfs_btree_get_root(const struct nilfs_btree *btree) | ||
435 | { | ||
436 | return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data; | ||
437 | } | ||
438 | |||
439 | static inline struct nilfs_btree_node * | ||
440 | nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree, | ||
441 | const struct nilfs_btree_path *path, | ||
442 | int level) | ||
443 | { | ||
444 | return (struct nilfs_btree_node *)path[level].bp_bh->b_data; | ||
445 | } | ||
446 | |||
447 | static inline struct nilfs_btree_node * | ||
448 | nilfs_btree_get_sib_node(const struct nilfs_btree *btree, | ||
449 | const struct nilfs_btree_path *path, | ||
450 | int level) | ||
451 | { | ||
452 | return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; | ||
453 | } | ||
454 | |||
455 | static inline int nilfs_btree_height(const struct nilfs_btree *btree) | ||
456 | { | ||
457 | return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree)) | ||
458 | + 1; | ||
459 | } | ||
460 | |||
461 | static inline struct nilfs_btree_node * | ||
462 | nilfs_btree_get_node(const struct nilfs_btree *btree, | ||
463 | const struct nilfs_btree_path *path, | ||
464 | int level) | ||
465 | { | ||
466 | return (level == nilfs_btree_height(btree) - 1) ? | ||
467 | nilfs_btree_get_root(btree) : | ||
468 | nilfs_btree_get_nonroot_node(btree, path, level); | ||
469 | } | ||
470 | |||
471 | static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, | ||
472 | struct nilfs_btree_path *path, | ||
473 | __u64 key, __u64 *ptrp, int minlevel) | ||
474 | { | ||
475 | struct nilfs_btree_node *node; | ||
476 | __u64 ptr; | ||
477 | int level, index, found, ret; | ||
478 | |||
479 | node = nilfs_btree_get_root(btree); | ||
480 | level = nilfs_btree_node_get_level(btree, node); | ||
481 | if ((level < minlevel) || | ||
482 | (nilfs_btree_node_get_nchildren(btree, node) <= 0)) | ||
483 | return -ENOENT; | ||
484 | |||
485 | found = nilfs_btree_node_lookup(btree, node, key, &index); | ||
486 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | ||
487 | path[level].bp_bh = NULL; | ||
488 | path[level].bp_index = index; | ||
489 | |||
490 | for (level--; level >= minlevel; level--) { | ||
491 | ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, | ||
492 | &path[level].bp_bh); | ||
493 | if (ret < 0) | ||
494 | return ret; | ||
495 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
496 | BUG_ON(level != nilfs_btree_node_get_level(btree, node)); | ||
497 | if (!found) | ||
498 | found = nilfs_btree_node_lookup(btree, node, key, | ||
499 | &index); | ||
500 | else | ||
501 | index = 0; | ||
502 | if (index < nilfs_btree_node_nchildren_max(btree, node)) | ||
503 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | ||
504 | else { | ||
505 | WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); | ||
506 | /* insert */ | ||
507 | ptr = NILFS_BMAP_INVALID_PTR; | ||
508 | } | ||
509 | path[level].bp_index = index; | ||
510 | } | ||
511 | if (!found) | ||
512 | return -ENOENT; | ||
513 | |||
514 | if (ptrp != NULL) | ||
515 | *ptrp = ptr; | ||
516 | |||
517 | return 0; | ||
518 | } | ||
519 | |||
520 | static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, | ||
521 | struct nilfs_btree_path *path, | ||
522 | __u64 *keyp, __u64 *ptrp) | ||
523 | { | ||
524 | struct nilfs_btree_node *node; | ||
525 | __u64 ptr; | ||
526 | int index, level, ret; | ||
527 | |||
528 | node = nilfs_btree_get_root(btree); | ||
529 | index = nilfs_btree_node_get_nchildren(btree, node) - 1; | ||
530 | if (index < 0) | ||
531 | return -ENOENT; | ||
532 | level = nilfs_btree_node_get_level(btree, node); | ||
533 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | ||
534 | path[level].bp_bh = NULL; | ||
535 | path[level].bp_index = index; | ||
536 | |||
537 | for (level--; level > 0; level--) { | ||
538 | ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, | ||
539 | &path[level].bp_bh); | ||
540 | if (ret < 0) | ||
541 | return ret; | ||
542 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
543 | BUG_ON(level != nilfs_btree_node_get_level(btree, node)); | ||
544 | index = nilfs_btree_node_get_nchildren(btree, node) - 1; | ||
545 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | ||
546 | path[level].bp_index = index; | ||
547 | } | ||
548 | |||
549 | if (keyp != NULL) | ||
550 | *keyp = nilfs_btree_node_get_key(btree, node, index); | ||
551 | if (ptrp != NULL) | ||
552 | *ptrp = ptr; | ||
553 | |||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | static int nilfs_btree_lookup(const struct nilfs_bmap *bmap, | ||
558 | __u64 key, int level, __u64 *ptrp) | ||
559 | { | ||
560 | struct nilfs_btree *btree; | ||
561 | struct nilfs_btree_path *path; | ||
562 | __u64 ptr; | ||
563 | int ret; | ||
564 | |||
565 | btree = (struct nilfs_btree *)bmap; | ||
566 | path = nilfs_btree_alloc_path(btree); | ||
567 | if (path == NULL) | ||
568 | return -ENOMEM; | ||
569 | nilfs_btree_init_path(btree, path); | ||
570 | |||
571 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); | ||
572 | |||
573 | if (ptrp != NULL) | ||
574 | *ptrp = ptr; | ||
575 | |||
576 | nilfs_btree_clear_path(btree, path); | ||
577 | nilfs_btree_free_path(btree, path); | ||
578 | |||
579 | return ret; | ||
580 | } | ||
581 | |||
582 | static void nilfs_btree_promote_key(struct nilfs_btree *btree, | ||
583 | struct nilfs_btree_path *path, | ||
584 | int level, __u64 key) | ||
585 | { | ||
586 | if (level < nilfs_btree_height(btree) - 1) { | ||
587 | do { | ||
588 | lock_buffer(path[level].bp_bh); | ||
589 | nilfs_btree_node_set_key( | ||
590 | btree, | ||
591 | nilfs_btree_get_nonroot_node( | ||
592 | btree, path, level), | ||
593 | path[level].bp_index, key); | ||
594 | if (!buffer_dirty(path[level].bp_bh)) | ||
595 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
596 | unlock_buffer(path[level].bp_bh); | ||
597 | } while ((path[level].bp_index == 0) && | ||
598 | (++level < nilfs_btree_height(btree) - 1)); | ||
599 | } | ||
600 | |||
601 | /* root */ | ||
602 | if (level == nilfs_btree_height(btree) - 1) { | ||
603 | nilfs_btree_node_set_key(btree, | ||
604 | nilfs_btree_get_root(btree), | ||
605 | path[level].bp_index, key); | ||
606 | } | ||
607 | } | ||
608 | |||
609 | static void nilfs_btree_do_insert(struct nilfs_btree *btree, | ||
610 | struct nilfs_btree_path *path, | ||
611 | int level, __u64 *keyp, __u64 *ptrp) | ||
612 | { | ||
613 | struct nilfs_btree_node *node; | ||
614 | |||
615 | if (level < nilfs_btree_height(btree) - 1) { | ||
616 | lock_buffer(path[level].bp_bh); | ||
617 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
618 | nilfs_btree_node_insert(btree, node, *keyp, *ptrp, | ||
619 | path[level].bp_index); | ||
620 | if (!buffer_dirty(path[level].bp_bh)) | ||
621 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
622 | unlock_buffer(path[level].bp_bh); | ||
623 | |||
624 | if (path[level].bp_index == 0) | ||
625 | nilfs_btree_promote_key(btree, path, level + 1, | ||
626 | nilfs_btree_node_get_key( | ||
627 | btree, node, 0)); | ||
628 | } else { | ||
629 | node = nilfs_btree_get_root(btree); | ||
630 | nilfs_btree_node_insert(btree, node, *keyp, *ptrp, | ||
631 | path[level].bp_index); | ||
632 | } | ||
633 | } | ||
634 | |||
635 | static void nilfs_btree_carry_left(struct nilfs_btree *btree, | ||
636 | struct nilfs_btree_path *path, | ||
637 | int level, __u64 *keyp, __u64 *ptrp) | ||
638 | { | ||
639 | struct nilfs_btree_node *node, *left; | ||
640 | int nchildren, lnchildren, n, move; | ||
641 | |||
642 | lock_buffer(path[level].bp_bh); | ||
643 | lock_buffer(path[level].bp_sib_bh); | ||
644 | |||
645 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
646 | left = nilfs_btree_get_sib_node(btree, path, level); | ||
647 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
648 | lnchildren = nilfs_btree_node_get_nchildren(btree, left); | ||
649 | move = 0; | ||
650 | |||
651 | n = (nchildren + lnchildren + 1) / 2 - lnchildren; | ||
652 | if (n > path[level].bp_index) { | ||
653 | /* move insert point */ | ||
654 | n--; | ||
655 | move = 1; | ||
656 | } | ||
657 | |||
658 | nilfs_btree_node_move_left(btree, left, node, n); | ||
659 | |||
660 | if (!buffer_dirty(path[level].bp_bh)) | ||
661 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
662 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
663 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
664 | |||
665 | unlock_buffer(path[level].bp_bh); | ||
666 | unlock_buffer(path[level].bp_sib_bh); | ||
667 | |||
668 | nilfs_btree_promote_key(btree, path, level + 1, | ||
669 | nilfs_btree_node_get_key(btree, node, 0)); | ||
670 | |||
671 | if (move) { | ||
672 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); | ||
673 | path[level].bp_bh = path[level].bp_sib_bh; | ||
674 | path[level].bp_sib_bh = NULL; | ||
675 | path[level].bp_index += lnchildren; | ||
676 | path[level + 1].bp_index--; | ||
677 | } else { | ||
678 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
679 | path[level].bp_sib_bh = NULL; | ||
680 | path[level].bp_index -= n; | ||
681 | } | ||
682 | |||
683 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); | ||
684 | } | ||
685 | |||
686 | static void nilfs_btree_carry_right(struct nilfs_btree *btree, | ||
687 | struct nilfs_btree_path *path, | ||
688 | int level, __u64 *keyp, __u64 *ptrp) | ||
689 | { | ||
690 | struct nilfs_btree_node *node, *right; | ||
691 | int nchildren, rnchildren, n, move; | ||
692 | |||
693 | lock_buffer(path[level].bp_bh); | ||
694 | lock_buffer(path[level].bp_sib_bh); | ||
695 | |||
696 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
697 | right = nilfs_btree_get_sib_node(btree, path, level); | ||
698 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
699 | rnchildren = nilfs_btree_node_get_nchildren(btree, right); | ||
700 | move = 0; | ||
701 | |||
702 | n = (nchildren + rnchildren + 1) / 2 - rnchildren; | ||
703 | if (n > nchildren - path[level].bp_index) { | ||
704 | /* move insert point */ | ||
705 | n--; | ||
706 | move = 1; | ||
707 | } | ||
708 | |||
709 | nilfs_btree_node_move_right(btree, node, right, n); | ||
710 | |||
711 | if (!buffer_dirty(path[level].bp_bh)) | ||
712 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
713 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
714 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
715 | |||
716 | unlock_buffer(path[level].bp_bh); | ||
717 | unlock_buffer(path[level].bp_sib_bh); | ||
718 | |||
719 | path[level + 1].bp_index++; | ||
720 | nilfs_btree_promote_key(btree, path, level + 1, | ||
721 | nilfs_btree_node_get_key(btree, right, 0)); | ||
722 | path[level + 1].bp_index--; | ||
723 | |||
724 | if (move) { | ||
725 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); | ||
726 | path[level].bp_bh = path[level].bp_sib_bh; | ||
727 | path[level].bp_sib_bh = NULL; | ||
728 | path[level].bp_index -= | ||
729 | nilfs_btree_node_get_nchildren(btree, node); | ||
730 | path[level + 1].bp_index++; | ||
731 | } else { | ||
732 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
733 | path[level].bp_sib_bh = NULL; | ||
734 | } | ||
735 | |||
736 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); | ||
737 | } | ||
738 | |||
739 | static void nilfs_btree_split(struct nilfs_btree *btree, | ||
740 | struct nilfs_btree_path *path, | ||
741 | int level, __u64 *keyp, __u64 *ptrp) | ||
742 | { | ||
743 | struct nilfs_btree_node *node, *right; | ||
744 | __u64 newkey; | ||
745 | __u64 newptr; | ||
746 | int nchildren, n, move; | ||
747 | |||
748 | lock_buffer(path[level].bp_bh); | ||
749 | lock_buffer(path[level].bp_sib_bh); | ||
750 | |||
751 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
752 | right = nilfs_btree_get_sib_node(btree, path, level); | ||
753 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
754 | move = 0; | ||
755 | |||
756 | n = (nchildren + 1) / 2; | ||
757 | if (n > nchildren - path[level].bp_index) { | ||
758 | n--; | ||
759 | move = 1; | ||
760 | } | ||
761 | |||
762 | nilfs_btree_node_move_right(btree, node, right, n); | ||
763 | |||
764 | if (!buffer_dirty(path[level].bp_bh)) | ||
765 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
766 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
767 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
768 | |||
769 | unlock_buffer(path[level].bp_bh); | ||
770 | unlock_buffer(path[level].bp_sib_bh); | ||
771 | |||
772 | newkey = nilfs_btree_node_get_key(btree, right, 0); | ||
773 | newptr = path[level].bp_newreq.bpr_ptr; | ||
774 | |||
775 | if (move) { | ||
776 | path[level].bp_index -= | ||
777 | nilfs_btree_node_get_nchildren(btree, node); | ||
778 | nilfs_btree_node_insert(btree, right, *keyp, *ptrp, | ||
779 | path[level].bp_index); | ||
780 | |||
781 | *keyp = nilfs_btree_node_get_key(btree, right, 0); | ||
782 | *ptrp = path[level].bp_newreq.bpr_ptr; | ||
783 | |||
784 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); | ||
785 | path[level].bp_bh = path[level].bp_sib_bh; | ||
786 | path[level].bp_sib_bh = NULL; | ||
787 | } else { | ||
788 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); | ||
789 | |||
790 | *keyp = nilfs_btree_node_get_key(btree, right, 0); | ||
791 | *ptrp = path[level].bp_newreq.bpr_ptr; | ||
792 | |||
793 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
794 | path[level].bp_sib_bh = NULL; | ||
795 | } | ||
796 | |||
797 | path[level + 1].bp_index++; | ||
798 | } | ||
799 | |||
800 | static void nilfs_btree_grow(struct nilfs_btree *btree, | ||
801 | struct nilfs_btree_path *path, | ||
802 | int level, __u64 *keyp, __u64 *ptrp) | ||
803 | { | ||
804 | struct nilfs_btree_node *root, *child; | ||
805 | int n; | ||
806 | |||
807 | lock_buffer(path[level].bp_sib_bh); | ||
808 | |||
809 | root = nilfs_btree_get_root(btree); | ||
810 | child = nilfs_btree_get_sib_node(btree, path, level); | ||
811 | |||
812 | n = nilfs_btree_node_get_nchildren(btree, root); | ||
813 | |||
814 | nilfs_btree_node_move_right(btree, root, child, n); | ||
815 | nilfs_btree_node_set_level(btree, root, level + 1); | ||
816 | |||
817 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
818 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
819 | |||
820 | unlock_buffer(path[level].bp_sib_bh); | ||
821 | |||
822 | path[level].bp_bh = path[level].bp_sib_bh; | ||
823 | path[level].bp_sib_bh = NULL; | ||
824 | |||
825 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); | ||
826 | |||
827 | *keyp = nilfs_btree_node_get_key(btree, child, 0); | ||
828 | *ptrp = path[level].bp_newreq.bpr_ptr; | ||
829 | } | ||
830 | |||
831 | static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree, | ||
832 | const struct nilfs_btree_path *path) | ||
833 | { | ||
834 | struct nilfs_btree_node *node; | ||
835 | int level; | ||
836 | |||
837 | if (path == NULL) | ||
838 | return NILFS_BMAP_INVALID_PTR; | ||
839 | |||
840 | /* left sibling */ | ||
841 | level = NILFS_BTREE_LEVEL_NODE_MIN; | ||
842 | if (path[level].bp_index > 0) { | ||
843 | node = nilfs_btree_get_node(btree, path, level); | ||
844 | return nilfs_btree_node_get_ptr(btree, node, | ||
845 | path[level].bp_index - 1); | ||
846 | } | ||
847 | |||
848 | /* parent */ | ||
849 | level = NILFS_BTREE_LEVEL_NODE_MIN + 1; | ||
850 | if (level <= nilfs_btree_height(btree) - 1) { | ||
851 | node = nilfs_btree_get_node(btree, path, level); | ||
852 | return nilfs_btree_node_get_ptr(btree, node, | ||
853 | path[level].bp_index); | ||
854 | } | ||
855 | |||
856 | return NILFS_BMAP_INVALID_PTR; | ||
857 | } | ||
858 | |||
859 | static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree, | ||
860 | const struct nilfs_btree_path *path, | ||
861 | __u64 key) | ||
862 | { | ||
863 | __u64 ptr; | ||
864 | |||
865 | ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key); | ||
866 | if (ptr != NILFS_BMAP_INVALID_PTR) | ||
867 | /* sequential access */ | ||
868 | return ptr; | ||
869 | else { | ||
870 | ptr = nilfs_btree_find_near(btree, path); | ||
871 | if (ptr != NILFS_BMAP_INVALID_PTR) | ||
872 | /* near */ | ||
873 | return ptr; | ||
874 | } | ||
875 | /* block group */ | ||
876 | return nilfs_bmap_find_target_in_group(&btree->bt_bmap); | ||
877 | } | ||
878 | |||
879 | static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key, | ||
880 | __u64 ptr) | ||
881 | { | ||
882 | btree->bt_bmap.b_last_allocated_key = key; | ||
883 | btree->bt_bmap.b_last_allocated_ptr = ptr; | ||
884 | } | ||
885 | |||
886 | static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | ||
887 | struct nilfs_btree_path *path, | ||
888 | int *levelp, __u64 key, __u64 ptr, | ||
889 | struct nilfs_bmap_stats *stats) | ||
890 | { | ||
891 | struct buffer_head *bh; | ||
892 | struct nilfs_btree_node *node, *parent, *sib; | ||
893 | __u64 sibptr; | ||
894 | int pindex, level, ret; | ||
895 | |||
896 | stats->bs_nblocks = 0; | ||
897 | level = NILFS_BTREE_LEVEL_DATA; | ||
898 | |||
899 | /* allocate a new ptr for data block */ | ||
900 | if (btree->bt_ops->btop_find_target != NULL) | ||
901 | path[level].bp_newreq.bpr_ptr = | ||
902 | btree->bt_ops->btop_find_target(btree, path, key); | ||
903 | |||
904 | ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( | ||
905 | &btree->bt_bmap, &path[level].bp_newreq); | ||
906 | if (ret < 0) | ||
907 | goto err_out_data; | ||
908 | |||
909 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; | ||
910 | level < nilfs_btree_height(btree) - 1; | ||
911 | level++) { | ||
912 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
913 | if (nilfs_btree_node_get_nchildren(btree, node) < | ||
914 | nilfs_btree_node_nchildren_max(btree, node)) { | ||
915 | path[level].bp_op = nilfs_btree_do_insert; | ||
916 | stats->bs_nblocks++; | ||
917 | goto out; | ||
918 | } | ||
919 | |||
920 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
921 | pindex = path[level + 1].bp_index; | ||
922 | |||
923 | /* left sibling */ | ||
924 | if (pindex > 0) { | ||
925 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | ||
926 | pindex - 1); | ||
927 | ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, | ||
928 | &bh); | ||
929 | if (ret < 0) | ||
930 | goto err_out_child_node; | ||
931 | sib = (struct nilfs_btree_node *)bh->b_data; | ||
932 | if (nilfs_btree_node_get_nchildren(btree, sib) < | ||
933 | nilfs_btree_node_nchildren_max(btree, sib)) { | ||
934 | path[level].bp_sib_bh = bh; | ||
935 | path[level].bp_op = nilfs_btree_carry_left; | ||
936 | stats->bs_nblocks++; | ||
937 | goto out; | ||
938 | } else | ||
939 | nilfs_bmap_put_block(&btree->bt_bmap, bh); | ||
940 | } | ||
941 | |||
942 | /* right sibling */ | ||
943 | if (pindex < | ||
944 | nilfs_btree_node_get_nchildren(btree, parent) - 1) { | ||
945 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | ||
946 | pindex + 1); | ||
947 | ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, | ||
948 | &bh); | ||
949 | if (ret < 0) | ||
950 | goto err_out_child_node; | ||
951 | sib = (struct nilfs_btree_node *)bh->b_data; | ||
952 | if (nilfs_btree_node_get_nchildren(btree, sib) < | ||
953 | nilfs_btree_node_nchildren_max(btree, sib)) { | ||
954 | path[level].bp_sib_bh = bh; | ||
955 | path[level].bp_op = nilfs_btree_carry_right; | ||
956 | stats->bs_nblocks++; | ||
957 | goto out; | ||
958 | } else | ||
959 | nilfs_bmap_put_block(&btree->bt_bmap, bh); | ||
960 | } | ||
961 | |||
962 | /* split */ | ||
963 | path[level].bp_newreq.bpr_ptr = | ||
964 | path[level - 1].bp_newreq.bpr_ptr + 1; | ||
965 | ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( | ||
966 | &btree->bt_bmap, &path[level].bp_newreq); | ||
967 | if (ret < 0) | ||
968 | goto err_out_child_node; | ||
969 | ret = nilfs_bmap_get_new_block(&btree->bt_bmap, | ||
970 | path[level].bp_newreq.bpr_ptr, | ||
971 | &bh); | ||
972 | if (ret < 0) | ||
973 | goto err_out_curr_node; | ||
974 | |||
975 | stats->bs_nblocks++; | ||
976 | |||
977 | lock_buffer(bh); | ||
978 | nilfs_btree_node_init(btree, | ||
979 | (struct nilfs_btree_node *)bh->b_data, | ||
980 | 0, level, 0, NULL, NULL); | ||
981 | unlock_buffer(bh); | ||
982 | path[level].bp_sib_bh = bh; | ||
983 | path[level].bp_op = nilfs_btree_split; | ||
984 | } | ||
985 | |||
986 | /* root */ | ||
987 | node = nilfs_btree_get_root(btree); | ||
988 | if (nilfs_btree_node_get_nchildren(btree, node) < | ||
989 | nilfs_btree_node_nchildren_max(btree, node)) { | ||
990 | path[level].bp_op = nilfs_btree_do_insert; | ||
991 | stats->bs_nblocks++; | ||
992 | goto out; | ||
993 | } | ||
994 | |||
995 | /* grow */ | ||
996 | path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; | ||
997 | ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( | ||
998 | &btree->bt_bmap, &path[level].bp_newreq); | ||
999 | if (ret < 0) | ||
1000 | goto err_out_child_node; | ||
1001 | ret = nilfs_bmap_get_new_block(&btree->bt_bmap, | ||
1002 | path[level].bp_newreq.bpr_ptr, &bh); | ||
1003 | if (ret < 0) | ||
1004 | goto err_out_curr_node; | ||
1005 | |||
1006 | lock_buffer(bh); | ||
1007 | nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data, | ||
1008 | 0, level, 0, NULL, NULL); | ||
1009 | unlock_buffer(bh); | ||
1010 | path[level].bp_sib_bh = bh; | ||
1011 | path[level].bp_op = nilfs_btree_grow; | ||
1012 | |||
1013 | level++; | ||
1014 | path[level].bp_op = nilfs_btree_do_insert; | ||
1015 | |||
1016 | /* a newly-created node block and a data block are added */ | ||
1017 | stats->bs_nblocks += 2; | ||
1018 | |||
1019 | /* success */ | ||
1020 | out: | ||
1021 | *levelp = level; | ||
1022 | return ret; | ||
1023 | |||
1024 | /* error */ | ||
1025 | err_out_curr_node: | ||
1026 | btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, | ||
1027 | &path[level].bp_newreq); | ||
1028 | err_out_child_node: | ||
1029 | for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { | ||
1030 | nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
1031 | btree->bt_bmap.b_pops->bpop_abort_alloc_ptr( | ||
1032 | &btree->bt_bmap, &path[level].bp_newreq); | ||
1033 | |||
1034 | } | ||
1035 | |||
1036 | btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, | ||
1037 | &path[level].bp_newreq); | ||
1038 | err_out_data: | ||
1039 | *levelp = level; | ||
1040 | stats->bs_nblocks = 0; | ||
1041 | return ret; | ||
1042 | } | ||
1043 | |||
1044 | static void nilfs_btree_commit_insert(struct nilfs_btree *btree, | ||
1045 | struct nilfs_btree_path *path, | ||
1046 | int maxlevel, __u64 key, __u64 ptr) | ||
1047 | { | ||
1048 | int level; | ||
1049 | |||
1050 | set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); | ||
1051 | ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; | ||
1052 | if (btree->bt_ops->btop_set_target != NULL) | ||
1053 | btree->bt_ops->btop_set_target(btree, key, ptr); | ||
1054 | |||
1055 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { | ||
1056 | if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) { | ||
1057 | btree->bt_bmap.b_pops->bpop_commit_alloc_ptr( | ||
1058 | &btree->bt_bmap, &path[level - 1].bp_newreq); | ||
1059 | } | ||
1060 | path[level].bp_op(btree, path, level, &key, &ptr); | ||
1061 | } | ||
1062 | |||
1063 | if (!nilfs_bmap_dirty(&btree->bt_bmap)) | ||
1064 | nilfs_bmap_set_dirty(&btree->bt_bmap); | ||
1065 | } | ||
1066 | |||
1067 | static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | ||
1068 | { | ||
1069 | struct nilfs_btree *btree; | ||
1070 | struct nilfs_btree_path *path; | ||
1071 | struct nilfs_bmap_stats stats; | ||
1072 | int level, ret; | ||
1073 | |||
1074 | btree = (struct nilfs_btree *)bmap; | ||
1075 | path = nilfs_btree_alloc_path(btree); | ||
1076 | if (path == NULL) | ||
1077 | return -ENOMEM; | ||
1078 | nilfs_btree_init_path(btree, path); | ||
1079 | |||
1080 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, | ||
1081 | NILFS_BTREE_LEVEL_NODE_MIN); | ||
1082 | if (ret != -ENOENT) { | ||
1083 | if (ret == 0) | ||
1084 | ret = -EEXIST; | ||
1085 | goto out; | ||
1086 | } | ||
1087 | |||
1088 | ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); | ||
1089 | if (ret < 0) | ||
1090 | goto out; | ||
1091 | nilfs_btree_commit_insert(btree, path, level, key, ptr); | ||
1092 | nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); | ||
1093 | |||
1094 | out: | ||
1095 | nilfs_btree_clear_path(btree, path); | ||
1096 | nilfs_btree_free_path(btree, path); | ||
1097 | return ret; | ||
1098 | } | ||
1099 | |||
1100 | static void nilfs_btree_do_delete(struct nilfs_btree *btree, | ||
1101 | struct nilfs_btree_path *path, | ||
1102 | int level, __u64 *keyp, __u64 *ptrp) | ||
1103 | { | ||
1104 | struct nilfs_btree_node *node; | ||
1105 | |||
1106 | if (level < nilfs_btree_height(btree) - 1) { | ||
1107 | lock_buffer(path[level].bp_bh); | ||
1108 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1109 | nilfs_btree_node_delete(btree, node, keyp, ptrp, | ||
1110 | path[level].bp_index); | ||
1111 | if (!buffer_dirty(path[level].bp_bh)) | ||
1112 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
1113 | unlock_buffer(path[level].bp_bh); | ||
1114 | if (path[level].bp_index == 0) | ||
1115 | nilfs_btree_promote_key(btree, path, level + 1, | ||
1116 | nilfs_btree_node_get_key(btree, node, 0)); | ||
1117 | } else { | ||
1118 | node = nilfs_btree_get_root(btree); | ||
1119 | nilfs_btree_node_delete(btree, node, keyp, ptrp, | ||
1120 | path[level].bp_index); | ||
1121 | } | ||
1122 | } | ||
1123 | |||
1124 | static void nilfs_btree_borrow_left(struct nilfs_btree *btree, | ||
1125 | struct nilfs_btree_path *path, | ||
1126 | int level, __u64 *keyp, __u64 *ptrp) | ||
1127 | { | ||
1128 | struct nilfs_btree_node *node, *left; | ||
1129 | int nchildren, lnchildren, n; | ||
1130 | |||
1131 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | ||
1132 | |||
1133 | lock_buffer(path[level].bp_bh); | ||
1134 | lock_buffer(path[level].bp_sib_bh); | ||
1135 | |||
1136 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1137 | left = nilfs_btree_get_sib_node(btree, path, level); | ||
1138 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
1139 | lnchildren = nilfs_btree_node_get_nchildren(btree, left); | ||
1140 | |||
1141 | n = (nchildren + lnchildren) / 2 - nchildren; | ||
1142 | |||
1143 | nilfs_btree_node_move_right(btree, left, node, n); | ||
1144 | |||
1145 | if (!buffer_dirty(path[level].bp_bh)) | ||
1146 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
1147 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
1148 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
1149 | |||
1150 | unlock_buffer(path[level].bp_bh); | ||
1151 | unlock_buffer(path[level].bp_sib_bh); | ||
1152 | |||
1153 | nilfs_btree_promote_key(btree, path, level + 1, | ||
1154 | nilfs_btree_node_get_key(btree, node, 0)); | ||
1155 | |||
1156 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
1157 | path[level].bp_sib_bh = NULL; | ||
1158 | path[level].bp_index += n; | ||
1159 | } | ||
1160 | |||
1161 | static void nilfs_btree_borrow_right(struct nilfs_btree *btree, | ||
1162 | struct nilfs_btree_path *path, | ||
1163 | int level, __u64 *keyp, __u64 *ptrp) | ||
1164 | { | ||
1165 | struct nilfs_btree_node *node, *right; | ||
1166 | int nchildren, rnchildren, n; | ||
1167 | |||
1168 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | ||
1169 | |||
1170 | lock_buffer(path[level].bp_bh); | ||
1171 | lock_buffer(path[level].bp_sib_bh); | ||
1172 | |||
1173 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1174 | right = nilfs_btree_get_sib_node(btree, path, level); | ||
1175 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
1176 | rnchildren = nilfs_btree_node_get_nchildren(btree, right); | ||
1177 | |||
1178 | n = (nchildren + rnchildren) / 2 - nchildren; | ||
1179 | |||
1180 | nilfs_btree_node_move_left(btree, node, right, n); | ||
1181 | |||
1182 | if (!buffer_dirty(path[level].bp_bh)) | ||
1183 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
1184 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
1185 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
1186 | |||
1187 | unlock_buffer(path[level].bp_bh); | ||
1188 | unlock_buffer(path[level].bp_sib_bh); | ||
1189 | |||
1190 | path[level + 1].bp_index++; | ||
1191 | nilfs_btree_promote_key(btree, path, level + 1, | ||
1192 | nilfs_btree_node_get_key(btree, right, 0)); | ||
1193 | path[level + 1].bp_index--; | ||
1194 | |||
1195 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
1196 | path[level].bp_sib_bh = NULL; | ||
1197 | } | ||
1198 | |||
1199 | static void nilfs_btree_concat_left(struct nilfs_btree *btree, | ||
1200 | struct nilfs_btree_path *path, | ||
1201 | int level, __u64 *keyp, __u64 *ptrp) | ||
1202 | { | ||
1203 | struct nilfs_btree_node *node, *left; | ||
1204 | int n; | ||
1205 | |||
1206 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | ||
1207 | |||
1208 | lock_buffer(path[level].bp_bh); | ||
1209 | lock_buffer(path[level].bp_sib_bh); | ||
1210 | |||
1211 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1212 | left = nilfs_btree_get_sib_node(btree, path, level); | ||
1213 | |||
1214 | n = nilfs_btree_node_get_nchildren(btree, node); | ||
1215 | |||
1216 | nilfs_btree_node_move_left(btree, left, node, n); | ||
1217 | |||
1218 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
1219 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
1220 | |||
1221 | unlock_buffer(path[level].bp_bh); | ||
1222 | unlock_buffer(path[level].bp_sib_bh); | ||
1223 | |||
1224 | nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); | ||
1225 | path[level].bp_bh = path[level].bp_sib_bh; | ||
1226 | path[level].bp_sib_bh = NULL; | ||
1227 | path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left); | ||
1228 | } | ||
1229 | |||
1230 | static void nilfs_btree_concat_right(struct nilfs_btree *btree, | ||
1231 | struct nilfs_btree_path *path, | ||
1232 | int level, __u64 *keyp, __u64 *ptrp) | ||
1233 | { | ||
1234 | struct nilfs_btree_node *node, *right; | ||
1235 | int n; | ||
1236 | |||
1237 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | ||
1238 | |||
1239 | lock_buffer(path[level].bp_bh); | ||
1240 | lock_buffer(path[level].bp_sib_bh); | ||
1241 | |||
1242 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1243 | right = nilfs_btree_get_sib_node(btree, path, level); | ||
1244 | |||
1245 | n = nilfs_btree_node_get_nchildren(btree, right); | ||
1246 | |||
1247 | nilfs_btree_node_move_left(btree, node, right, n); | ||
1248 | |||
1249 | if (!buffer_dirty(path[level].bp_bh)) | ||
1250 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
1251 | |||
1252 | unlock_buffer(path[level].bp_bh); | ||
1253 | unlock_buffer(path[level].bp_sib_bh); | ||
1254 | |||
1255 | nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
1256 | path[level].bp_sib_bh = NULL; | ||
1257 | path[level + 1].bp_index++; | ||
1258 | } | ||
1259 | |||
1260 | static void nilfs_btree_shrink(struct nilfs_btree *btree, | ||
1261 | struct nilfs_btree_path *path, | ||
1262 | int level, __u64 *keyp, __u64 *ptrp) | ||
1263 | { | ||
1264 | struct nilfs_btree_node *root, *child; | ||
1265 | int n; | ||
1266 | |||
1267 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | ||
1268 | |||
1269 | lock_buffer(path[level].bp_bh); | ||
1270 | root = nilfs_btree_get_root(btree); | ||
1271 | child = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1272 | |||
1273 | nilfs_btree_node_delete(btree, root, NULL, NULL, 0); | ||
1274 | nilfs_btree_node_set_level(btree, root, level); | ||
1275 | n = nilfs_btree_node_get_nchildren(btree, child); | ||
1276 | nilfs_btree_node_move_left(btree, root, child, n); | ||
1277 | unlock_buffer(path[level].bp_bh); | ||
1278 | |||
1279 | nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); | ||
1280 | path[level].bp_bh = NULL; | ||
1281 | } | ||
1282 | |||
1283 | |||
1284 | static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | ||
1285 | struct nilfs_btree_path *path, | ||
1286 | int *levelp, | ||
1287 | struct nilfs_bmap_stats *stats) | ||
1288 | { | ||
1289 | struct buffer_head *bh; | ||
1290 | struct nilfs_btree_node *node, *parent, *sib; | ||
1291 | __u64 sibptr; | ||
1292 | int pindex, level, ret; | ||
1293 | |||
1294 | ret = 0; | ||
1295 | stats->bs_nblocks = 0; | ||
1296 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; | ||
1297 | level < nilfs_btree_height(btree) - 1; | ||
1298 | level++) { | ||
1299 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1300 | path[level].bp_oldreq.bpr_ptr = | ||
1301 | nilfs_btree_node_get_ptr(btree, node, | ||
1302 | path[level].bp_index); | ||
1303 | if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { | ||
1304 | ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( | ||
1305 | &btree->bt_bmap, &path[level].bp_oldreq); | ||
1306 | if (ret < 0) | ||
1307 | goto err_out_child_node; | ||
1308 | } | ||
1309 | |||
1310 | if (nilfs_btree_node_get_nchildren(btree, node) > | ||
1311 | nilfs_btree_node_nchildren_min(btree, node)) { | ||
1312 | path[level].bp_op = nilfs_btree_do_delete; | ||
1313 | stats->bs_nblocks++; | ||
1314 | goto out; | ||
1315 | } | ||
1316 | |||
1317 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
1318 | pindex = path[level + 1].bp_index; | ||
1319 | |||
1320 | if (pindex > 0) { | ||
1321 | /* left sibling */ | ||
1322 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | ||
1323 | pindex - 1); | ||
1324 | ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, | ||
1325 | &bh); | ||
1326 | if (ret < 0) | ||
1327 | goto err_out_curr_node; | ||
1328 | sib = (struct nilfs_btree_node *)bh->b_data; | ||
1329 | if (nilfs_btree_node_get_nchildren(btree, sib) > | ||
1330 | nilfs_btree_node_nchildren_min(btree, sib)) { | ||
1331 | path[level].bp_sib_bh = bh; | ||
1332 | path[level].bp_op = nilfs_btree_borrow_left; | ||
1333 | stats->bs_nblocks++; | ||
1334 | goto out; | ||
1335 | } else { | ||
1336 | path[level].bp_sib_bh = bh; | ||
1337 | path[level].bp_op = nilfs_btree_concat_left; | ||
1338 | stats->bs_nblocks++; | ||
1339 | /* continue; */ | ||
1340 | } | ||
1341 | } else if (pindex < | ||
1342 | nilfs_btree_node_get_nchildren(btree, parent) - 1) { | ||
1343 | /* right sibling */ | ||
1344 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | ||
1345 | pindex + 1); | ||
1346 | ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, | ||
1347 | &bh); | ||
1348 | if (ret < 0) | ||
1349 | goto err_out_curr_node; | ||
1350 | sib = (struct nilfs_btree_node *)bh->b_data; | ||
1351 | if (nilfs_btree_node_get_nchildren(btree, sib) > | ||
1352 | nilfs_btree_node_nchildren_min(btree, sib)) { | ||
1353 | path[level].bp_sib_bh = bh; | ||
1354 | path[level].bp_op = nilfs_btree_borrow_right; | ||
1355 | stats->bs_nblocks++; | ||
1356 | goto out; | ||
1357 | } else { | ||
1358 | path[level].bp_sib_bh = bh; | ||
1359 | path[level].bp_op = nilfs_btree_concat_right; | ||
1360 | stats->bs_nblocks++; | ||
1361 | /* continue; */ | ||
1362 | } | ||
1363 | } else { | ||
1364 | /* no siblings */ | ||
1365 | /* the only child of the root node */ | ||
1366 | WARN_ON(level != nilfs_btree_height(btree) - 2); | ||
1367 | if (nilfs_btree_node_get_nchildren(btree, node) - 1 <= | ||
1368 | NILFS_BTREE_ROOT_NCHILDREN_MAX) { | ||
1369 | path[level].bp_op = nilfs_btree_shrink; | ||
1370 | stats->bs_nblocks += 2; | ||
1371 | } else { | ||
1372 | path[level].bp_op = nilfs_btree_do_delete; | ||
1373 | stats->bs_nblocks++; | ||
1374 | } | ||
1375 | |||
1376 | goto out; | ||
1377 | |||
1378 | } | ||
1379 | } | ||
1380 | |||
1381 | node = nilfs_btree_get_root(btree); | ||
1382 | path[level].bp_oldreq.bpr_ptr = | ||
1383 | nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); | ||
1384 | if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { | ||
1385 | ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( | ||
1386 | &btree->bt_bmap, &path[level].bp_oldreq); | ||
1387 | if (ret < 0) | ||
1388 | goto err_out_child_node; | ||
1389 | } | ||
1390 | /* child of the root node is deleted */ | ||
1391 | path[level].bp_op = nilfs_btree_do_delete; | ||
1392 | stats->bs_nblocks++; | ||
1393 | |||
1394 | /* success */ | ||
1395 | out: | ||
1396 | *levelp = level; | ||
1397 | return ret; | ||
1398 | |||
1399 | /* error */ | ||
1400 | err_out_curr_node: | ||
1401 | if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) | ||
1402 | btree->bt_bmap.b_pops->bpop_abort_end_ptr( | ||
1403 | &btree->bt_bmap, &path[level].bp_oldreq); | ||
1404 | err_out_child_node: | ||
1405 | for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { | ||
1406 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
1407 | if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) | ||
1408 | btree->bt_bmap.b_pops->bpop_abort_end_ptr( | ||
1409 | &btree->bt_bmap, &path[level].bp_oldreq); | ||
1410 | } | ||
1411 | *levelp = level; | ||
1412 | stats->bs_nblocks = 0; | ||
1413 | return ret; | ||
1414 | } | ||
1415 | |||
1416 | static void nilfs_btree_commit_delete(struct nilfs_btree *btree, | ||
1417 | struct nilfs_btree_path *path, | ||
1418 | int maxlevel) | ||
1419 | { | ||
1420 | int level; | ||
1421 | |||
1422 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { | ||
1423 | if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL) | ||
1424 | btree->bt_bmap.b_pops->bpop_commit_end_ptr( | ||
1425 | &btree->bt_bmap, &path[level].bp_oldreq); | ||
1426 | path[level].bp_op(btree, path, level, NULL, NULL); | ||
1427 | } | ||
1428 | |||
1429 | if (!nilfs_bmap_dirty(&btree->bt_bmap)) | ||
1430 | nilfs_bmap_set_dirty(&btree->bt_bmap); | ||
1431 | } | ||
1432 | |||
1433 | static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key) | ||
1434 | |||
1435 | { | ||
1436 | struct nilfs_btree *btree; | ||
1437 | struct nilfs_btree_path *path; | ||
1438 | struct nilfs_bmap_stats stats; | ||
1439 | int level, ret; | ||
1440 | |||
1441 | btree = (struct nilfs_btree *)bmap; | ||
1442 | path = nilfs_btree_alloc_path(btree); | ||
1443 | if (path == NULL) | ||
1444 | return -ENOMEM; | ||
1445 | nilfs_btree_init_path(btree, path); | ||
1446 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, | ||
1447 | NILFS_BTREE_LEVEL_NODE_MIN); | ||
1448 | if (ret < 0) | ||
1449 | goto out; | ||
1450 | |||
1451 | ret = nilfs_btree_prepare_delete(btree, path, &level, &stats); | ||
1452 | if (ret < 0) | ||
1453 | goto out; | ||
1454 | nilfs_btree_commit_delete(btree, path, level); | ||
1455 | nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); | ||
1456 | |||
1457 | out: | ||
1458 | nilfs_btree_clear_path(btree, path); | ||
1459 | nilfs_btree_free_path(btree, path); | ||
1460 | return ret; | ||
1461 | } | ||
1462 | |||
1463 | static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) | ||
1464 | { | ||
1465 | struct nilfs_btree *btree; | ||
1466 | struct nilfs_btree_path *path; | ||
1467 | int ret; | ||
1468 | |||
1469 | btree = (struct nilfs_btree *)bmap; | ||
1470 | path = nilfs_btree_alloc_path(btree); | ||
1471 | if (path == NULL) | ||
1472 | return -ENOMEM; | ||
1473 | nilfs_btree_init_path(btree, path); | ||
1474 | |||
1475 | ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); | ||
1476 | |||
1477 | nilfs_btree_clear_path(btree, path); | ||
1478 | nilfs_btree_free_path(btree, path); | ||
1479 | |||
1480 | return ret; | ||
1481 | } | ||
1482 | |||
1483 | static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) | ||
1484 | { | ||
1485 | struct buffer_head *bh; | ||
1486 | struct nilfs_btree *btree; | ||
1487 | struct nilfs_btree_node *root, *node; | ||
1488 | __u64 maxkey, nextmaxkey; | ||
1489 | __u64 ptr; | ||
1490 | int nchildren, ret; | ||
1491 | |||
1492 | btree = (struct nilfs_btree *)bmap; | ||
1493 | root = nilfs_btree_get_root(btree); | ||
1494 | switch (nilfs_btree_height(btree)) { | ||
1495 | case 2: | ||
1496 | bh = NULL; | ||
1497 | node = root; | ||
1498 | break; | ||
1499 | case 3: | ||
1500 | nchildren = nilfs_btree_node_get_nchildren(btree, root); | ||
1501 | if (nchildren > 1) | ||
1502 | return 0; | ||
1503 | ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); | ||
1504 | ret = nilfs_bmap_get_block(bmap, ptr, &bh); | ||
1505 | if (ret < 0) | ||
1506 | return ret; | ||
1507 | node = (struct nilfs_btree_node *)bh->b_data; | ||
1508 | break; | ||
1509 | default: | ||
1510 | return 0; | ||
1511 | } | ||
1512 | |||
1513 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
1514 | maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1); | ||
1515 | nextmaxkey = (nchildren > 1) ? | ||
1516 | nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0; | ||
1517 | if (bh != NULL) | ||
1518 | nilfs_bmap_put_block(bmap, bh); | ||
1519 | |||
1520 | return (maxkey == key) && (nextmaxkey < bmap->b_low); | ||
1521 | } | ||
1522 | |||
1523 | static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, | ||
1524 | __u64 *keys, __u64 *ptrs, int nitems) | ||
1525 | { | ||
1526 | struct buffer_head *bh; | ||
1527 | struct nilfs_btree *btree; | ||
1528 | struct nilfs_btree_node *node, *root; | ||
1529 | __le64 *dkeys; | ||
1530 | __le64 *dptrs; | ||
1531 | __u64 ptr; | ||
1532 | int nchildren, i, ret; | ||
1533 | |||
1534 | btree = (struct nilfs_btree *)bmap; | ||
1535 | root = nilfs_btree_get_root(btree); | ||
1536 | switch (nilfs_btree_height(btree)) { | ||
1537 | case 2: | ||
1538 | bh = NULL; | ||
1539 | node = root; | ||
1540 | break; | ||
1541 | case 3: | ||
1542 | nchildren = nilfs_btree_node_get_nchildren(btree, root); | ||
1543 | WARN_ON(nchildren > 1); | ||
1544 | ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); | ||
1545 | ret = nilfs_bmap_get_block(bmap, ptr, &bh); | ||
1546 | if (ret < 0) | ||
1547 | return ret; | ||
1548 | node = (struct nilfs_btree_node *)bh->b_data; | ||
1549 | break; | ||
1550 | default: | ||
1551 | node = NULL; | ||
1552 | return -EINVAL; | ||
1553 | } | ||
1554 | |||
1555 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
1556 | if (nchildren < nitems) | ||
1557 | nitems = nchildren; | ||
1558 | dkeys = nilfs_btree_node_dkeys(btree, node); | ||
1559 | dptrs = nilfs_btree_node_dptrs(btree, node); | ||
1560 | for (i = 0; i < nitems; i++) { | ||
1561 | keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]); | ||
1562 | ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]); | ||
1563 | } | ||
1564 | |||
1565 | if (bh != NULL) | ||
1566 | nilfs_bmap_put_block(bmap, bh); | ||
1567 | |||
1568 | return nitems; | ||
1569 | } | ||
1570 | |||
1571 | static int | ||
1572 | nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, | ||
1573 | union nilfs_bmap_ptr_req *dreq, | ||
1574 | union nilfs_bmap_ptr_req *nreq, | ||
1575 | struct buffer_head **bhp, | ||
1576 | struct nilfs_bmap_stats *stats) | ||
1577 | { | ||
1578 | struct buffer_head *bh; | ||
1579 | struct nilfs_btree *btree; | ||
1580 | int ret; | ||
1581 | |||
1582 | btree = (struct nilfs_btree *)bmap; | ||
1583 | stats->bs_nblocks = 0; | ||
1584 | |||
1585 | /* for data */ | ||
1586 | /* cannot find near ptr */ | ||
1587 | if (btree->bt_ops->btop_find_target != NULL) | ||
1588 | dreq->bpr_ptr | ||
1589 | = btree->bt_ops->btop_find_target(btree, NULL, key); | ||
1590 | ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq); | ||
1591 | if (ret < 0) | ||
1592 | return ret; | ||
1593 | |||
1594 | *bhp = NULL; | ||
1595 | stats->bs_nblocks++; | ||
1596 | if (nreq != NULL) { | ||
1597 | nreq->bpr_ptr = dreq->bpr_ptr + 1; | ||
1598 | ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq); | ||
1599 | if (ret < 0) | ||
1600 | goto err_out_dreq; | ||
1601 | |||
1602 | ret = nilfs_bmap_get_new_block(bmap, nreq->bpr_ptr, &bh); | ||
1603 | if (ret < 0) | ||
1604 | goto err_out_nreq; | ||
1605 | |||
1606 | *bhp = bh; | ||
1607 | stats->bs_nblocks++; | ||
1608 | } | ||
1609 | |||
1610 | /* success */ | ||
1611 | return 0; | ||
1612 | |||
1613 | /* error */ | ||
1614 | err_out_nreq: | ||
1615 | bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq); | ||
1616 | err_out_dreq: | ||
1617 | bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq); | ||
1618 | stats->bs_nblocks = 0; | ||
1619 | return ret; | ||
1620 | |||
1621 | } | ||
1622 | |||
1623 | static void | ||
1624 | nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, | ||
1625 | __u64 key, __u64 ptr, | ||
1626 | const __u64 *keys, const __u64 *ptrs, | ||
1627 | int n, __u64 low, __u64 high, | ||
1628 | union nilfs_bmap_ptr_req *dreq, | ||
1629 | union nilfs_bmap_ptr_req *nreq, | ||
1630 | struct buffer_head *bh) | ||
1631 | { | ||
1632 | struct nilfs_btree *btree; | ||
1633 | struct nilfs_btree_node *node; | ||
1634 | __u64 tmpptr; | ||
1635 | |||
1636 | /* free resources */ | ||
1637 | if (bmap->b_ops->bop_clear != NULL) | ||
1638 | bmap->b_ops->bop_clear(bmap); | ||
1639 | |||
1640 | /* ptr must be a pointer to a buffer head. */ | ||
1641 | set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); | ||
1642 | |||
1643 | /* convert and insert */ | ||
1644 | btree = (struct nilfs_btree *)bmap; | ||
1645 | nilfs_btree_init(bmap, low, high); | ||
1646 | if (nreq != NULL) { | ||
1647 | if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) { | ||
1648 | bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq); | ||
1649 | bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq); | ||
1650 | } | ||
1651 | |||
1652 | /* create child node at level 1 */ | ||
1653 | lock_buffer(bh); | ||
1654 | node = (struct nilfs_btree_node *)bh->b_data; | ||
1655 | nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs); | ||
1656 | nilfs_btree_node_insert(btree, node, | ||
1657 | key, dreq->bpr_ptr, n); | ||
1658 | if (!buffer_dirty(bh)) | ||
1659 | nilfs_btnode_mark_dirty(bh); | ||
1660 | if (!nilfs_bmap_dirty(bmap)) | ||
1661 | nilfs_bmap_set_dirty(bmap); | ||
1662 | |||
1663 | unlock_buffer(bh); | ||
1664 | nilfs_bmap_put_block(bmap, bh); | ||
1665 | |||
1666 | /* create root node at level 2 */ | ||
1667 | node = nilfs_btree_get_root(btree); | ||
1668 | tmpptr = nreq->bpr_ptr; | ||
1669 | nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, | ||
1670 | 2, 1, &keys[0], &tmpptr); | ||
1671 | } else { | ||
1672 | if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) | ||
1673 | bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq); | ||
1674 | |||
1675 | /* create root node at level 1 */ | ||
1676 | node = nilfs_btree_get_root(btree); | ||
1677 | nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, | ||
1678 | 1, n, keys, ptrs); | ||
1679 | nilfs_btree_node_insert(btree, node, | ||
1680 | key, dreq->bpr_ptr, n); | ||
1681 | if (!nilfs_bmap_dirty(bmap)) | ||
1682 | nilfs_bmap_set_dirty(bmap); | ||
1683 | } | ||
1684 | |||
1685 | if (btree->bt_ops->btop_set_target != NULL) | ||
1686 | btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr); | ||
1687 | } | ||
1688 | |||
1689 | /** | ||
1690 | * nilfs_btree_convert_and_insert - | ||
1691 | * @bmap: | ||
1692 | * @key: | ||
1693 | * @ptr: | ||
1694 | * @keys: | ||
1695 | * @ptrs: | ||
1696 | * @n: | ||
1697 | * @low: | ||
1698 | * @high: | ||
1699 | */ | ||
1700 | int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, | ||
1701 | __u64 key, __u64 ptr, | ||
1702 | const __u64 *keys, const __u64 *ptrs, | ||
1703 | int n, __u64 low, __u64 high) | ||
1704 | { | ||
1705 | struct buffer_head *bh; | ||
1706 | union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; | ||
1707 | struct nilfs_bmap_stats stats; | ||
1708 | int ret; | ||
1709 | |||
1710 | if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { | ||
1711 | di = &dreq; | ||
1712 | ni = NULL; | ||
1713 | } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( | ||
1714 | 1 << bmap->b_inode->i_blkbits)) { | ||
1715 | di = &dreq; | ||
1716 | ni = &nreq; | ||
1717 | } else { | ||
1718 | di = NULL; | ||
1719 | ni = NULL; | ||
1720 | BUG(); | ||
1721 | } | ||
1722 | |||
1723 | ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh, | ||
1724 | &stats); | ||
1725 | if (ret < 0) | ||
1726 | return ret; | ||
1727 | nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n, | ||
1728 | low, high, di, ni, bh); | ||
1729 | nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); | ||
1730 | return 0; | ||
1731 | } | ||
1732 | |||
1733 | static int nilfs_btree_propagate_p(struct nilfs_btree *btree, | ||
1734 | struct nilfs_btree_path *path, | ||
1735 | int level, | ||
1736 | struct buffer_head *bh) | ||
1737 | { | ||
1738 | while ((++level < nilfs_btree_height(btree) - 1) && | ||
1739 | !buffer_dirty(path[level].bp_bh)) | ||
1740 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
1741 | |||
1742 | return 0; | ||
1743 | } | ||
1744 | |||
1745 | static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, | ||
1746 | struct nilfs_btree_path *path, | ||
1747 | int level) | ||
1748 | { | ||
1749 | struct nilfs_btree_node *parent; | ||
1750 | int ret; | ||
1751 | |||
1752 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
1753 | path[level].bp_oldreq.bpr_ptr = | ||
1754 | nilfs_btree_node_get_ptr(btree, parent, | ||
1755 | path[level + 1].bp_index); | ||
1756 | path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; | ||
1757 | ret = nilfs_bmap_prepare_update(&btree->bt_bmap, | ||
1758 | &path[level].bp_oldreq, | ||
1759 | &path[level].bp_newreq); | ||
1760 | if (ret < 0) | ||
1761 | return ret; | ||
1762 | |||
1763 | if (buffer_nilfs_node(path[level].bp_bh)) { | ||
1764 | path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; | ||
1765 | path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; | ||
1766 | path[level].bp_ctxt.bh = path[level].bp_bh; | ||
1767 | ret = nilfs_btnode_prepare_change_key( | ||
1768 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | ||
1769 | &path[level].bp_ctxt); | ||
1770 | if (ret < 0) { | ||
1771 | nilfs_bmap_abort_update(&btree->bt_bmap, | ||
1772 | &path[level].bp_oldreq, | ||
1773 | &path[level].bp_newreq); | ||
1774 | return ret; | ||
1775 | } | ||
1776 | } | ||
1777 | |||
1778 | return 0; | ||
1779 | } | ||
1780 | |||
1781 | static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, | ||
1782 | struct nilfs_btree_path *path, | ||
1783 | int level) | ||
1784 | { | ||
1785 | struct nilfs_btree_node *parent; | ||
1786 | |||
1787 | nilfs_bmap_commit_update(&btree->bt_bmap, | ||
1788 | &path[level].bp_oldreq, | ||
1789 | &path[level].bp_newreq); | ||
1790 | |||
1791 | if (buffer_nilfs_node(path[level].bp_bh)) { | ||
1792 | nilfs_btnode_commit_change_key( | ||
1793 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | ||
1794 | &path[level].bp_ctxt); | ||
1795 | path[level].bp_bh = path[level].bp_ctxt.bh; | ||
1796 | } | ||
1797 | set_buffer_nilfs_volatile(path[level].bp_bh); | ||
1798 | |||
1799 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
1800 | nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index, | ||
1801 | path[level].bp_newreq.bpr_ptr); | ||
1802 | } | ||
1803 | |||
1804 | static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, | ||
1805 | struct nilfs_btree_path *path, | ||
1806 | int level) | ||
1807 | { | ||
1808 | nilfs_bmap_abort_update(&btree->bt_bmap, | ||
1809 | &path[level].bp_oldreq, | ||
1810 | &path[level].bp_newreq); | ||
1811 | if (buffer_nilfs_node(path[level].bp_bh)) | ||
1812 | nilfs_btnode_abort_change_key( | ||
1813 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | ||
1814 | &path[level].bp_ctxt); | ||
1815 | } | ||
1816 | |||
1817 | static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, | ||
1818 | struct nilfs_btree_path *path, | ||
1819 | int minlevel, | ||
1820 | int *maxlevelp) | ||
1821 | { | ||
1822 | int level, ret; | ||
1823 | |||
1824 | level = minlevel; | ||
1825 | if (!buffer_nilfs_volatile(path[level].bp_bh)) { | ||
1826 | ret = nilfs_btree_prepare_update_v(btree, path, level); | ||
1827 | if (ret < 0) | ||
1828 | return ret; | ||
1829 | } | ||
1830 | while ((++level < nilfs_btree_height(btree) - 1) && | ||
1831 | !buffer_dirty(path[level].bp_bh)) { | ||
1832 | |||
1833 | WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); | ||
1834 | ret = nilfs_btree_prepare_update_v(btree, path, level); | ||
1835 | if (ret < 0) | ||
1836 | goto out; | ||
1837 | } | ||
1838 | |||
1839 | /* success */ | ||
1840 | *maxlevelp = level - 1; | ||
1841 | return 0; | ||
1842 | |||
1843 | /* error */ | ||
1844 | out: | ||
1845 | while (--level > minlevel) | ||
1846 | nilfs_btree_abort_update_v(btree, path, level); | ||
1847 | if (!buffer_nilfs_volatile(path[level].bp_bh)) | ||
1848 | nilfs_btree_abort_update_v(btree, path, level); | ||
1849 | return ret; | ||
1850 | } | ||
1851 | |||
1852 | static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree, | ||
1853 | struct nilfs_btree_path *path, | ||
1854 | int minlevel, | ||
1855 | int maxlevel, | ||
1856 | struct buffer_head *bh) | ||
1857 | { | ||
1858 | int level; | ||
1859 | |||
1860 | if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) | ||
1861 | nilfs_btree_commit_update_v(btree, path, minlevel); | ||
1862 | |||
1863 | for (level = minlevel + 1; level <= maxlevel; level++) | ||
1864 | nilfs_btree_commit_update_v(btree, path, level); | ||
1865 | } | ||
1866 | |||
1867 | static int nilfs_btree_propagate_v(struct nilfs_btree *btree, | ||
1868 | struct nilfs_btree_path *path, | ||
1869 | int level, | ||
1870 | struct buffer_head *bh) | ||
1871 | { | ||
1872 | int maxlevel, ret; | ||
1873 | struct nilfs_btree_node *parent; | ||
1874 | __u64 ptr; | ||
1875 | |||
1876 | get_bh(bh); | ||
1877 | path[level].bp_bh = bh; | ||
1878 | ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel); | ||
1879 | if (ret < 0) | ||
1880 | goto out; | ||
1881 | |||
1882 | if (buffer_nilfs_volatile(path[level].bp_bh)) { | ||
1883 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
1884 | ptr = nilfs_btree_node_get_ptr(btree, parent, | ||
1885 | path[level + 1].bp_index); | ||
1886 | ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr); | ||
1887 | if (ret < 0) | ||
1888 | goto out; | ||
1889 | } | ||
1890 | |||
1891 | nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh); | ||
1892 | |||
1893 | out: | ||
1894 | brelse(path[level].bp_bh); | ||
1895 | path[level].bp_bh = NULL; | ||
1896 | return ret; | ||
1897 | } | ||
1898 | |||
1899 | static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, | ||
1900 | struct buffer_head *bh) | ||
1901 | { | ||
1902 | struct nilfs_btree *btree; | ||
1903 | struct nilfs_btree_path *path; | ||
1904 | struct nilfs_btree_node *node; | ||
1905 | __u64 key; | ||
1906 | int level, ret; | ||
1907 | |||
1908 | WARN_ON(!buffer_dirty(bh)); | ||
1909 | |||
1910 | btree = (struct nilfs_btree *)bmap; | ||
1911 | path = nilfs_btree_alloc_path(btree); | ||
1912 | if (path == NULL) | ||
1913 | return -ENOMEM; | ||
1914 | nilfs_btree_init_path(btree, path); | ||
1915 | |||
1916 | if (buffer_nilfs_node(bh)) { | ||
1917 | node = (struct nilfs_btree_node *)bh->b_data; | ||
1918 | key = nilfs_btree_node_get_key(btree, node, 0); | ||
1919 | level = nilfs_btree_node_get_level(btree, node); | ||
1920 | } else { | ||
1921 | key = nilfs_bmap_data_get_key(bmap, bh); | ||
1922 | level = NILFS_BTREE_LEVEL_DATA; | ||
1923 | } | ||
1924 | |||
1925 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); | ||
1926 | if (ret < 0) { | ||
1927 | if (unlikely(ret == -ENOENT)) | ||
1928 | printk(KERN_CRIT "%s: key = %llu, level == %d\n", | ||
1929 | __func__, (unsigned long long)key, level); | ||
1930 | goto out; | ||
1931 | } | ||
1932 | |||
1933 | ret = btree->bt_ops->btop_propagate(btree, path, level, bh); | ||
1934 | |||
1935 | out: | ||
1936 | nilfs_btree_clear_path(btree, path); | ||
1937 | nilfs_btree_free_path(btree, path); | ||
1938 | |||
1939 | return ret; | ||
1940 | } | ||
1941 | |||
1942 | static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap, | ||
1943 | struct buffer_head *bh) | ||
1944 | { | ||
1945 | return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr); | ||
1946 | } | ||
1947 | |||
1948 | static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, | ||
1949 | struct list_head *lists, | ||
1950 | struct buffer_head *bh) | ||
1951 | { | ||
1952 | struct list_head *head; | ||
1953 | struct buffer_head *cbh; | ||
1954 | struct nilfs_btree_node *node, *cnode; | ||
1955 | __u64 key, ckey; | ||
1956 | int level; | ||
1957 | |||
1958 | get_bh(bh); | ||
1959 | node = (struct nilfs_btree_node *)bh->b_data; | ||
1960 | key = nilfs_btree_node_get_key(btree, node, 0); | ||
1961 | level = nilfs_btree_node_get_level(btree, node); | ||
1962 | list_for_each(head, &lists[level]) { | ||
1963 | cbh = list_entry(head, struct buffer_head, b_assoc_buffers); | ||
1964 | cnode = (struct nilfs_btree_node *)cbh->b_data; | ||
1965 | ckey = nilfs_btree_node_get_key(btree, cnode, 0); | ||
1966 | if (key < ckey) | ||
1967 | break; | ||
1968 | } | ||
1969 | list_add_tail(&bh->b_assoc_buffers, head); | ||
1970 | } | ||
1971 | |||
1972 | static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap, | ||
1973 | struct list_head *listp) | ||
1974 | { | ||
1975 | struct nilfs_btree *btree = (struct nilfs_btree *)bmap; | ||
1976 | struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache; | ||
1977 | struct list_head lists[NILFS_BTREE_LEVEL_MAX]; | ||
1978 | struct pagevec pvec; | ||
1979 | struct buffer_head *bh, *head; | ||
1980 | pgoff_t index = 0; | ||
1981 | int level, i; | ||
1982 | |||
1983 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; | ||
1984 | level < NILFS_BTREE_LEVEL_MAX; | ||
1985 | level++) | ||
1986 | INIT_LIST_HEAD(&lists[level]); | ||
1987 | |||
1988 | pagevec_init(&pvec, 0); | ||
1989 | |||
1990 | while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY, | ||
1991 | PAGEVEC_SIZE)) { | ||
1992 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
1993 | bh = head = page_buffers(pvec.pages[i]); | ||
1994 | do { | ||
1995 | if (buffer_dirty(bh)) | ||
1996 | nilfs_btree_add_dirty_buffer(btree, | ||
1997 | lists, bh); | ||
1998 | } while ((bh = bh->b_this_page) != head); | ||
1999 | } | ||
2000 | pagevec_release(&pvec); | ||
2001 | cond_resched(); | ||
2002 | } | ||
2003 | |||
2004 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; | ||
2005 | level < NILFS_BTREE_LEVEL_MAX; | ||
2006 | level++) | ||
2007 | list_splice(&lists[level], listp->prev); | ||
2008 | } | ||
2009 | |||
2010 | static int nilfs_btree_assign_p(struct nilfs_btree *btree, | ||
2011 | struct nilfs_btree_path *path, | ||
2012 | int level, | ||
2013 | struct buffer_head **bh, | ||
2014 | sector_t blocknr, | ||
2015 | union nilfs_binfo *binfo) | ||
2016 | { | ||
2017 | struct nilfs_btree_node *parent; | ||
2018 | __u64 key; | ||
2019 | __u64 ptr; | ||
2020 | int ret; | ||
2021 | |||
2022 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
2023 | ptr = nilfs_btree_node_get_ptr(btree, parent, | ||
2024 | path[level + 1].bp_index); | ||
2025 | if (buffer_nilfs_node(*bh)) { | ||
2026 | path[level].bp_ctxt.oldkey = ptr; | ||
2027 | path[level].bp_ctxt.newkey = blocknr; | ||
2028 | path[level].bp_ctxt.bh = *bh; | ||
2029 | ret = nilfs_btnode_prepare_change_key( | ||
2030 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | ||
2031 | &path[level].bp_ctxt); | ||
2032 | if (ret < 0) | ||
2033 | return ret; | ||
2034 | nilfs_btnode_commit_change_key( | ||
2035 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | ||
2036 | &path[level].bp_ctxt); | ||
2037 | *bh = path[level].bp_ctxt.bh; | ||
2038 | } | ||
2039 | |||
2040 | nilfs_btree_node_set_ptr(btree, parent, | ||
2041 | path[level + 1].bp_index, blocknr); | ||
2042 | |||
2043 | key = nilfs_btree_node_get_key(btree, parent, | ||
2044 | path[level + 1].bp_index); | ||
2045 | /* on-disk format */ | ||
2046 | binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); | ||
2047 | binfo->bi_dat.bi_level = level; | ||
2048 | |||
2049 | return 0; | ||
2050 | } | ||
2051 | |||
2052 | static int nilfs_btree_assign_v(struct nilfs_btree *btree, | ||
2053 | struct nilfs_btree_path *path, | ||
2054 | int level, | ||
2055 | struct buffer_head **bh, | ||
2056 | sector_t blocknr, | ||
2057 | union nilfs_binfo *binfo) | ||
2058 | { | ||
2059 | struct nilfs_btree_node *parent; | ||
2060 | __u64 key; | ||
2061 | __u64 ptr; | ||
2062 | union nilfs_bmap_ptr_req req; | ||
2063 | int ret; | ||
2064 | |||
2065 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
2066 | ptr = nilfs_btree_node_get_ptr(btree, parent, | ||
2067 | path[level + 1].bp_index); | ||
2068 | req.bpr_ptr = ptr; | ||
2069 | ret = btree->bt_bmap.b_pops->bpop_prepare_start_ptr(&btree->bt_bmap, | ||
2070 | &req); | ||
2071 | if (ret < 0) | ||
2072 | return ret; | ||
2073 | btree->bt_bmap.b_pops->bpop_commit_start_ptr(&btree->bt_bmap, | ||
2074 | &req, blocknr); | ||
2075 | |||
2076 | key = nilfs_btree_node_get_key(btree, parent, | ||
2077 | path[level + 1].bp_index); | ||
2078 | /* on-disk format */ | ||
2079 | binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); | ||
2080 | binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); | ||
2081 | |||
2082 | return 0; | ||
2083 | } | ||
2084 | |||
2085 | static int nilfs_btree_assign(struct nilfs_bmap *bmap, | ||
2086 | struct buffer_head **bh, | ||
2087 | sector_t blocknr, | ||
2088 | union nilfs_binfo *binfo) | ||
2089 | { | ||
2090 | struct nilfs_btree *btree; | ||
2091 | struct nilfs_btree_path *path; | ||
2092 | struct nilfs_btree_node *node; | ||
2093 | __u64 key; | ||
2094 | int level, ret; | ||
2095 | |||
2096 | btree = (struct nilfs_btree *)bmap; | ||
2097 | path = nilfs_btree_alloc_path(btree); | ||
2098 | if (path == NULL) | ||
2099 | return -ENOMEM; | ||
2100 | nilfs_btree_init_path(btree, path); | ||
2101 | |||
2102 | if (buffer_nilfs_node(*bh)) { | ||
2103 | node = (struct nilfs_btree_node *)(*bh)->b_data; | ||
2104 | key = nilfs_btree_node_get_key(btree, node, 0); | ||
2105 | level = nilfs_btree_node_get_level(btree, node); | ||
2106 | } else { | ||
2107 | key = nilfs_bmap_data_get_key(bmap, *bh); | ||
2108 | level = NILFS_BTREE_LEVEL_DATA; | ||
2109 | } | ||
2110 | |||
2111 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); | ||
2112 | if (ret < 0) { | ||
2113 | WARN_ON(ret == -ENOENT); | ||
2114 | goto out; | ||
2115 | } | ||
2116 | |||
2117 | ret = btree->bt_ops->btop_assign(btree, path, level, bh, | ||
2118 | blocknr, binfo); | ||
2119 | |||
2120 | out: | ||
2121 | nilfs_btree_clear_path(btree, path); | ||
2122 | nilfs_btree_free_path(btree, path); | ||
2123 | |||
2124 | return ret; | ||
2125 | } | ||
2126 | |||
2127 | static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap, | ||
2128 | struct buffer_head **bh, | ||
2129 | sector_t blocknr, | ||
2130 | union nilfs_binfo *binfo) | ||
2131 | { | ||
2132 | struct nilfs_btree *btree; | ||
2133 | struct nilfs_btree_node *node; | ||
2134 | __u64 key; | ||
2135 | int ret; | ||
2136 | |||
2137 | btree = (struct nilfs_btree *)bmap; | ||
2138 | ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr); | ||
2139 | if (ret < 0) | ||
2140 | return ret; | ||
2141 | |||
2142 | if (buffer_nilfs_node(*bh)) { | ||
2143 | node = (struct nilfs_btree_node *)(*bh)->b_data; | ||
2144 | key = nilfs_btree_node_get_key(btree, node, 0); | ||
2145 | } else | ||
2146 | key = nilfs_bmap_data_get_key(bmap, *bh); | ||
2147 | |||
2148 | /* on-disk format */ | ||
2149 | binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); | ||
2150 | binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); | ||
2151 | |||
2152 | return 0; | ||
2153 | } | ||
2154 | |||
2155 | static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) | ||
2156 | { | ||
2157 | struct buffer_head *bh; | ||
2158 | struct nilfs_btree *btree; | ||
2159 | struct nilfs_btree_path *path; | ||
2160 | __u64 ptr; | ||
2161 | int ret; | ||
2162 | |||
2163 | btree = (struct nilfs_btree *)bmap; | ||
2164 | path = nilfs_btree_alloc_path(btree); | ||
2165 | if (path == NULL) | ||
2166 | return -ENOMEM; | ||
2167 | nilfs_btree_init_path(btree, path); | ||
2168 | |||
2169 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); | ||
2170 | if (ret < 0) { | ||
2171 | WARN_ON(ret == -ENOENT); | ||
2172 | goto out; | ||
2173 | } | ||
2174 | ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh); | ||
2175 | if (ret < 0) { | ||
2176 | WARN_ON(ret == -ENOENT); | ||
2177 | goto out; | ||
2178 | } | ||
2179 | |||
2180 | if (!buffer_dirty(bh)) | ||
2181 | nilfs_btnode_mark_dirty(bh); | ||
2182 | nilfs_bmap_put_block(&btree->bt_bmap, bh); | ||
2183 | if (!nilfs_bmap_dirty(&btree->bt_bmap)) | ||
2184 | nilfs_bmap_set_dirty(&btree->bt_bmap); | ||
2185 | |||
2186 | out: | ||
2187 | nilfs_btree_clear_path(btree, path); | ||
2188 | nilfs_btree_free_path(btree, path); | ||
2189 | return ret; | ||
2190 | } | ||
2191 | |||
2192 | static const struct nilfs_bmap_operations nilfs_btree_ops = { | ||
2193 | .bop_lookup = nilfs_btree_lookup, | ||
2194 | .bop_insert = nilfs_btree_insert, | ||
2195 | .bop_delete = nilfs_btree_delete, | ||
2196 | .bop_clear = NULL, | ||
2197 | |||
2198 | .bop_propagate = nilfs_btree_propagate, | ||
2199 | |||
2200 | .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, | ||
2201 | |||
2202 | .bop_assign = nilfs_btree_assign, | ||
2203 | .bop_mark = nilfs_btree_mark, | ||
2204 | |||
2205 | .bop_last_key = nilfs_btree_last_key, | ||
2206 | .bop_check_insert = NULL, | ||
2207 | .bop_check_delete = nilfs_btree_check_delete, | ||
2208 | .bop_gather_data = nilfs_btree_gather_data, | ||
2209 | }; | ||
2210 | |||
2211 | static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { | ||
2212 | .bop_lookup = NULL, | ||
2213 | .bop_insert = NULL, | ||
2214 | .bop_delete = NULL, | ||
2215 | .bop_clear = NULL, | ||
2216 | |||
2217 | .bop_propagate = nilfs_btree_propagate_gc, | ||
2218 | |||
2219 | .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, | ||
2220 | |||
2221 | .bop_assign = nilfs_btree_assign_gc, | ||
2222 | .bop_mark = NULL, | ||
2223 | |||
2224 | .bop_last_key = NULL, | ||
2225 | .bop_check_insert = NULL, | ||
2226 | .bop_check_delete = NULL, | ||
2227 | .bop_gather_data = NULL, | ||
2228 | }; | ||
2229 | |||
2230 | static const struct nilfs_btree_operations nilfs_btree_ops_v = { | ||
2231 | .btop_find_target = nilfs_btree_find_target_v, | ||
2232 | .btop_set_target = nilfs_btree_set_target_v, | ||
2233 | .btop_propagate = nilfs_btree_propagate_v, | ||
2234 | .btop_assign = nilfs_btree_assign_v, | ||
2235 | }; | ||
2236 | |||
2237 | static const struct nilfs_btree_operations nilfs_btree_ops_p = { | ||
2238 | .btop_find_target = NULL, | ||
2239 | .btop_set_target = NULL, | ||
2240 | .btop_propagate = nilfs_btree_propagate_p, | ||
2241 | .btop_assign = nilfs_btree_assign_p, | ||
2242 | }; | ||
2243 | |||
2244 | int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high) | ||
2245 | { | ||
2246 | struct nilfs_btree *btree; | ||
2247 | |||
2248 | btree = (struct nilfs_btree *)bmap; | ||
2249 | bmap->b_ops = &nilfs_btree_ops; | ||
2250 | bmap->b_low = low; | ||
2251 | bmap->b_high = high; | ||
2252 | switch (bmap->b_inode->i_ino) { | ||
2253 | case NILFS_DAT_INO: | ||
2254 | btree->bt_ops = &nilfs_btree_ops_p; | ||
2255 | break; | ||
2256 | default: | ||
2257 | btree->bt_ops = &nilfs_btree_ops_v; | ||
2258 | break; | ||
2259 | } | ||
2260 | |||
2261 | return 0; | ||
2262 | } | ||
2263 | |||
2264 | void nilfs_btree_init_gc(struct nilfs_bmap *bmap) | ||
2265 | { | ||
2266 | bmap->b_low = NILFS_BMAP_LARGE_LOW; | ||
2267 | bmap->b_high = NILFS_BMAP_LARGE_HIGH; | ||
2268 | bmap->b_ops = &nilfs_btree_ops_gc; | ||
2269 | } | ||
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h new file mode 100644 index 000000000000..4766deb52fb1 --- /dev/null +++ b/fs/nilfs2/btree.h | |||
@@ -0,0 +1,117 @@ | |||
1 | /* | ||
2 | * btree.h - NILFS B-tree. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_BTREE_H | ||
24 | #define _NILFS_BTREE_H | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/list.h> | ||
29 | #include <linux/nilfs2_fs.h> | ||
30 | #include "btnode.h" | ||
31 | #include "bmap.h" | ||
32 | |||
33 | struct nilfs_btree; | ||
34 | struct nilfs_btree_path; | ||
35 | |||
36 | /** | ||
37 | * struct nilfs_btree_operations - B-tree operation table | ||
38 | */ | ||
39 | struct nilfs_btree_operations { | ||
40 | __u64 (*btop_find_target)(const struct nilfs_btree *, | ||
41 | const struct nilfs_btree_path *, __u64); | ||
42 | void (*btop_set_target)(struct nilfs_btree *, __u64, __u64); | ||
43 | |||
44 | struct the_nilfs *(*btop_get_nilfs)(struct nilfs_btree *); | ||
45 | |||
46 | int (*btop_propagate)(struct nilfs_btree *, | ||
47 | struct nilfs_btree_path *, | ||
48 | int, | ||
49 | struct buffer_head *); | ||
50 | int (*btop_assign)(struct nilfs_btree *, | ||
51 | struct nilfs_btree_path *, | ||
52 | int, | ||
53 | struct buffer_head **, | ||
54 | sector_t, | ||
55 | union nilfs_binfo *); | ||
56 | }; | ||
57 | |||
58 | /** | ||
59 | * struct nilfs_btree_node - B-tree node | ||
60 | * @bn_flags: flags | ||
61 | * @bn_level: level | ||
62 | * @bn_nchildren: number of children | ||
63 | * @bn_pad: padding | ||
64 | */ | ||
65 | struct nilfs_btree_node { | ||
66 | __u8 bn_flags; | ||
67 | __u8 bn_level; | ||
68 | __le16 bn_nchildren; | ||
69 | __le32 bn_pad; | ||
70 | }; | ||
71 | |||
72 | /* flags */ | ||
73 | #define NILFS_BTREE_NODE_ROOT 0x01 | ||
74 | |||
75 | /* level */ | ||
76 | #define NILFS_BTREE_LEVEL_DATA 0 | ||
77 | #define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) | ||
78 | #define NILFS_BTREE_LEVEL_MAX 14 | ||
79 | |||
80 | /** | ||
81 | * struct nilfs_btree - B-tree structure | ||
82 | * @bt_bmap: bmap base structure | ||
83 | * @bt_ops: B-tree operation table | ||
84 | */ | ||
85 | struct nilfs_btree { | ||
86 | struct nilfs_bmap bt_bmap; | ||
87 | |||
88 | /* B-tree-specific members */ | ||
89 | const struct nilfs_btree_operations *bt_ops; | ||
90 | }; | ||
91 | |||
92 | |||
93 | #define NILFS_BTREE_ROOT_SIZE NILFS_BMAP_SIZE | ||
94 | #define NILFS_BTREE_ROOT_NCHILDREN_MAX \ | ||
95 | ((NILFS_BTREE_ROOT_SIZE - sizeof(struct nilfs_btree_node)) / \ | ||
96 | (sizeof(__le64 /* dkey */) + sizeof(__le64 /* dptr */))) | ||
97 | #define NILFS_BTREE_ROOT_NCHILDREN_MIN 0 | ||
98 | #define NILFS_BTREE_NODE_EXTRA_PAD_SIZE (sizeof(__le64)) | ||
99 | #define NILFS_BTREE_NODE_NCHILDREN_MAX(nodesize) \ | ||
100 | (((nodesize) - sizeof(struct nilfs_btree_node) - \ | ||
101 | NILFS_BTREE_NODE_EXTRA_PAD_SIZE) / \ | ||
102 | (sizeof(__le64 /* dkey */) + sizeof(__le64 /* dptr */))) | ||
103 | #define NILFS_BTREE_NODE_NCHILDREN_MIN(nodesize) \ | ||
104 | ((NILFS_BTREE_NODE_NCHILDREN_MAX(nodesize) - 1) / 2 + 1) | ||
105 | #define NILFS_BTREE_KEY_MIN ((__u64)0) | ||
106 | #define NILFS_BTREE_KEY_MAX (~(__u64)0) | ||
107 | |||
108 | |||
109 | int nilfs_btree_path_cache_init(void); | ||
110 | void nilfs_btree_path_cache_destroy(void); | ||
111 | int nilfs_btree_init(struct nilfs_bmap *, __u64, __u64); | ||
112 | int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64, | ||
113 | const __u64 *, const __u64 *, | ||
114 | int, __u64, __u64); | ||
115 | void nilfs_btree_init_gc(struct nilfs_bmap *); | ||
116 | |||
117 | #endif /* _NILFS_BTREE_H */ | ||
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c new file mode 100644 index 000000000000..e90b60dfced9 --- /dev/null +++ b/fs/nilfs2/cpfile.c | |||
@@ -0,0 +1,925 @@ | |||
1 | /* | ||
2 | * cpfile.c - NILFS checkpoint file. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/fs.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/buffer_head.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/nilfs2_fs.h> | ||
29 | #include "mdt.h" | ||
30 | #include "cpfile.h" | ||
31 | |||
32 | |||
33 | static inline unsigned long | ||
34 | nilfs_cpfile_checkpoints_per_block(const struct inode *cpfile) | ||
35 | { | ||
36 | return NILFS_MDT(cpfile)->mi_entries_per_block; | ||
37 | } | ||
38 | |||
39 | /* block number from the beginning of the file */ | ||
40 | static unsigned long | ||
41 | nilfs_cpfile_get_blkoff(const struct inode *cpfile, __u64 cno) | ||
42 | { | ||
43 | __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; | ||
44 | do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); | ||
45 | return (unsigned long)tcno; | ||
46 | } | ||
47 | |||
48 | /* offset in block */ | ||
49 | static unsigned long | ||
50 | nilfs_cpfile_get_offset(const struct inode *cpfile, __u64 cno) | ||
51 | { | ||
52 | __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; | ||
53 | return do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); | ||
54 | } | ||
55 | |||
56 | static unsigned long | ||
57 | nilfs_cpfile_checkpoints_in_block(const struct inode *cpfile, | ||
58 | __u64 curr, | ||
59 | __u64 max) | ||
60 | { | ||
61 | return min_t(__u64, | ||
62 | nilfs_cpfile_checkpoints_per_block(cpfile) - | ||
63 | nilfs_cpfile_get_offset(cpfile, curr), | ||
64 | max - curr); | ||
65 | } | ||
66 | |||
67 | static inline int nilfs_cpfile_is_in_first(const struct inode *cpfile, | ||
68 | __u64 cno) | ||
69 | { | ||
70 | return nilfs_cpfile_get_blkoff(cpfile, cno) == 0; | ||
71 | } | ||
72 | |||
73 | static unsigned int | ||
74 | nilfs_cpfile_block_add_valid_checkpoints(const struct inode *cpfile, | ||
75 | struct buffer_head *bh, | ||
76 | void *kaddr, | ||
77 | unsigned int n) | ||
78 | { | ||
79 | struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); | ||
80 | unsigned int count; | ||
81 | |||
82 | count = le32_to_cpu(cp->cp_checkpoints_count) + n; | ||
83 | cp->cp_checkpoints_count = cpu_to_le32(count); | ||
84 | return count; | ||
85 | } | ||
86 | |||
87 | static unsigned int | ||
88 | nilfs_cpfile_block_sub_valid_checkpoints(const struct inode *cpfile, | ||
89 | struct buffer_head *bh, | ||
90 | void *kaddr, | ||
91 | unsigned int n) | ||
92 | { | ||
93 | struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); | ||
94 | unsigned int count; | ||
95 | |||
96 | WARN_ON(le32_to_cpu(cp->cp_checkpoints_count) < n); | ||
97 | count = le32_to_cpu(cp->cp_checkpoints_count) - n; | ||
98 | cp->cp_checkpoints_count = cpu_to_le32(count); | ||
99 | return count; | ||
100 | } | ||
101 | |||
102 | static inline struct nilfs_cpfile_header * | ||
103 | nilfs_cpfile_block_get_header(const struct inode *cpfile, | ||
104 | struct buffer_head *bh, | ||
105 | void *kaddr) | ||
106 | { | ||
107 | return kaddr + bh_offset(bh); | ||
108 | } | ||
109 | |||
110 | static struct nilfs_checkpoint * | ||
111 | nilfs_cpfile_block_get_checkpoint(const struct inode *cpfile, __u64 cno, | ||
112 | struct buffer_head *bh, | ||
113 | void *kaddr) | ||
114 | { | ||
115 | return kaddr + bh_offset(bh) + nilfs_cpfile_get_offset(cpfile, cno) * | ||
116 | NILFS_MDT(cpfile)->mi_entry_size; | ||
117 | } | ||
118 | |||
119 | static void nilfs_cpfile_block_init(struct inode *cpfile, | ||
120 | struct buffer_head *bh, | ||
121 | void *kaddr) | ||
122 | { | ||
123 | struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); | ||
124 | size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; | ||
125 | int n = nilfs_cpfile_checkpoints_per_block(cpfile); | ||
126 | |||
127 | while (n-- > 0) { | ||
128 | nilfs_checkpoint_set_invalid(cp); | ||
129 | cp = (void *)cp + cpsz; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | static inline int nilfs_cpfile_get_header_block(struct inode *cpfile, | ||
134 | struct buffer_head **bhp) | ||
135 | { | ||
136 | return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); | ||
137 | } | ||
138 | |||
139 | static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile, | ||
140 | __u64 cno, | ||
141 | int create, | ||
142 | struct buffer_head **bhp) | ||
143 | { | ||
144 | return nilfs_mdt_get_block(cpfile, | ||
145 | nilfs_cpfile_get_blkoff(cpfile, cno), | ||
146 | create, nilfs_cpfile_block_init, bhp); | ||
147 | } | ||
148 | |||
149 | static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile, | ||
150 | __u64 cno) | ||
151 | { | ||
152 | return nilfs_mdt_delete_block(cpfile, | ||
153 | nilfs_cpfile_get_blkoff(cpfile, cno)); | ||
154 | } | ||
155 | |||
156 | /** | ||
157 | * nilfs_cpfile_get_checkpoint - get a checkpoint | ||
158 | * @cpfile: inode of checkpoint file | ||
159 | * @cno: checkpoint number | ||
160 | * @create: create flag | ||
161 | * @cpp: pointer to a checkpoint | ||
162 | * @bhp: pointer to a buffer head | ||
163 | * | ||
164 | * Description: nilfs_cpfile_get_checkpoint() acquires the checkpoint | ||
165 | * specified by @cno. A new checkpoint will be created if @cno is the current | ||
166 | * checkpoint number and @create is nonzero. | ||
167 | * | ||
168 | * Return Value: On success, 0 is returned, and the checkpoint and the | ||
169 | * buffer head of the buffer on which the checkpoint is located are stored in | ||
170 | * the place pointed by @cpp and @bhp, respectively. On error, one of the | ||
171 | * following negative error codes is returned. | ||
172 | * | ||
173 | * %-EIO - I/O error. | ||
174 | * | ||
175 | * %-ENOMEM - Insufficient amount of memory available. | ||
176 | * | ||
177 | * %-ENOENT - No such checkpoint. | ||
178 | * | ||
179 | * %-EINVAL - invalid checkpoint. | ||
180 | */ | ||
181 | int nilfs_cpfile_get_checkpoint(struct inode *cpfile, | ||
182 | __u64 cno, | ||
183 | int create, | ||
184 | struct nilfs_checkpoint **cpp, | ||
185 | struct buffer_head **bhp) | ||
186 | { | ||
187 | struct buffer_head *header_bh, *cp_bh; | ||
188 | struct nilfs_cpfile_header *header; | ||
189 | struct nilfs_checkpoint *cp; | ||
190 | void *kaddr; | ||
191 | int ret; | ||
192 | |||
193 | if (unlikely(cno < 1 || cno > nilfs_mdt_cno(cpfile) || | ||
194 | (cno < nilfs_mdt_cno(cpfile) && create))) | ||
195 | return -EINVAL; | ||
196 | |||
197 | down_write(&NILFS_MDT(cpfile)->mi_sem); | ||
198 | |||
199 | ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); | ||
200 | if (ret < 0) | ||
201 | goto out_sem; | ||
202 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, create, &cp_bh); | ||
203 | if (ret < 0) | ||
204 | goto out_header; | ||
205 | kaddr = kmap(cp_bh->b_page); | ||
206 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); | ||
207 | if (nilfs_checkpoint_invalid(cp)) { | ||
208 | if (!create) { | ||
209 | kunmap(cp_bh->b_page); | ||
210 | brelse(cp_bh); | ||
211 | ret = -ENOENT; | ||
212 | goto out_header; | ||
213 | } | ||
214 | /* a newly-created checkpoint */ | ||
215 | nilfs_checkpoint_clear_invalid(cp); | ||
216 | if (!nilfs_cpfile_is_in_first(cpfile, cno)) | ||
217 | nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, | ||
218 | kaddr, 1); | ||
219 | nilfs_mdt_mark_buffer_dirty(cp_bh); | ||
220 | |||
221 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
222 | header = nilfs_cpfile_block_get_header(cpfile, header_bh, | ||
223 | kaddr); | ||
224 | le64_add_cpu(&header->ch_ncheckpoints, 1); | ||
225 | kunmap_atomic(kaddr, KM_USER0); | ||
226 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
227 | nilfs_mdt_mark_dirty(cpfile); | ||
228 | } | ||
229 | |||
230 | if (cpp != NULL) | ||
231 | *cpp = cp; | ||
232 | *bhp = cp_bh; | ||
233 | |||
234 | out_header: | ||
235 | brelse(header_bh); | ||
236 | |||
237 | out_sem: | ||
238 | up_write(&NILFS_MDT(cpfile)->mi_sem); | ||
239 | return ret; | ||
240 | } | ||
241 | |||
242 | /** | ||
243 | * nilfs_cpfile_put_checkpoint - put a checkpoint | ||
244 | * @cpfile: inode of checkpoint file | ||
245 | * @cno: checkpoint number | ||
246 | * @bh: buffer head | ||
247 | * | ||
248 | * Description: nilfs_cpfile_put_checkpoint() releases the checkpoint | ||
249 | * specified by @cno. @bh must be the buffer head which has been returned by | ||
250 | * a previous call to nilfs_cpfile_get_checkpoint() with @cno. | ||
251 | */ | ||
252 | void nilfs_cpfile_put_checkpoint(struct inode *cpfile, __u64 cno, | ||
253 | struct buffer_head *bh) | ||
254 | { | ||
255 | kunmap(bh->b_page); | ||
256 | brelse(bh); | ||
257 | } | ||
258 | |||
259 | /** | ||
260 | * nilfs_cpfile_delete_checkpoints - delete checkpoints | ||
261 | * @cpfile: inode of checkpoint file | ||
262 | * @start: start checkpoint number | ||
263 | * @end: end checkpoint numer | ||
264 | * | ||
265 | * Description: nilfs_cpfile_delete_checkpoints() deletes the checkpoints in | ||
266 | * the period from @start to @end, excluding @end itself. The checkpoints | ||
267 | * which have been already deleted are ignored. | ||
268 | * | ||
269 | * Return Value: On success, 0 is returned. On error, one of the following | ||
270 | * negative error codes is returned. | ||
271 | * | ||
272 | * %-EIO - I/O error. | ||
273 | * | ||
274 | * %-ENOMEM - Insufficient amount of memory available. | ||
275 | * | ||
276 | * %-EINVAL - invalid checkpoints. | ||
277 | */ | ||
278 | int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, | ||
279 | __u64 start, | ||
280 | __u64 end) | ||
281 | { | ||
282 | struct buffer_head *header_bh, *cp_bh; | ||
283 | struct nilfs_cpfile_header *header; | ||
284 | struct nilfs_checkpoint *cp; | ||
285 | size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; | ||
286 | __u64 cno; | ||
287 | void *kaddr; | ||
288 | unsigned long tnicps; | ||
289 | int ret, ncps, nicps, count, i; | ||
290 | |||
291 | if (unlikely(start == 0 || start > end)) { | ||
292 | printk(KERN_ERR "%s: invalid range of checkpoint numbers: " | ||
293 | "[%llu, %llu)\n", __func__, | ||
294 | (unsigned long long)start, (unsigned long long)end); | ||
295 | return -EINVAL; | ||
296 | } | ||
297 | |||
298 | /* cannot delete the latest checkpoint */ | ||
299 | if (start == nilfs_mdt_cno(cpfile) - 1) | ||
300 | return -EPERM; | ||
301 | |||
302 | down_write(&NILFS_MDT(cpfile)->mi_sem); | ||
303 | |||
304 | ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); | ||
305 | if (ret < 0) | ||
306 | goto out_sem; | ||
307 | tnicps = 0; | ||
308 | |||
309 | for (cno = start; cno < end; cno += ncps) { | ||
310 | ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, end); | ||
311 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); | ||
312 | if (ret < 0) { | ||
313 | if (ret != -ENOENT) | ||
314 | goto out_sem; | ||
315 | /* skip hole */ | ||
316 | ret = 0; | ||
317 | continue; | ||
318 | } | ||
319 | |||
320 | kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); | ||
321 | cp = nilfs_cpfile_block_get_checkpoint( | ||
322 | cpfile, cno, cp_bh, kaddr); | ||
323 | nicps = 0; | ||
324 | for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) { | ||
325 | WARN_ON(nilfs_checkpoint_snapshot(cp)); | ||
326 | if (!nilfs_checkpoint_invalid(cp)) { | ||
327 | nilfs_checkpoint_set_invalid(cp); | ||
328 | nicps++; | ||
329 | } | ||
330 | } | ||
331 | if (nicps > 0) { | ||
332 | tnicps += nicps; | ||
333 | nilfs_mdt_mark_buffer_dirty(cp_bh); | ||
334 | nilfs_mdt_mark_dirty(cpfile); | ||
335 | if (!nilfs_cpfile_is_in_first(cpfile, cno) && | ||
336 | (count = nilfs_cpfile_block_sub_valid_checkpoints( | ||
337 | cpfile, cp_bh, kaddr, nicps)) == 0) { | ||
338 | /* make hole */ | ||
339 | kunmap_atomic(kaddr, KM_USER0); | ||
340 | brelse(cp_bh); | ||
341 | ret = nilfs_cpfile_delete_checkpoint_block( | ||
342 | cpfile, cno); | ||
343 | if (ret == 0) | ||
344 | continue; | ||
345 | printk(KERN_ERR "%s: cannot delete block\n", | ||
346 | __func__); | ||
347 | goto out_sem; | ||
348 | } | ||
349 | } | ||
350 | |||
351 | kunmap_atomic(kaddr, KM_USER0); | ||
352 | brelse(cp_bh); | ||
353 | } | ||
354 | |||
355 | if (tnicps > 0) { | ||
356 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
357 | header = nilfs_cpfile_block_get_header(cpfile, header_bh, | ||
358 | kaddr); | ||
359 | le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); | ||
360 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
361 | nilfs_mdt_mark_dirty(cpfile); | ||
362 | kunmap_atomic(kaddr, KM_USER0); | ||
363 | } | ||
364 | brelse(header_bh); | ||
365 | |||
366 | out_sem: | ||
367 | up_write(&NILFS_MDT(cpfile)->mi_sem); | ||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | static void nilfs_cpfile_checkpoint_to_cpinfo(struct inode *cpfile, | ||
372 | struct nilfs_checkpoint *cp, | ||
373 | struct nilfs_cpinfo *ci) | ||
374 | { | ||
375 | ci->ci_flags = le32_to_cpu(cp->cp_flags); | ||
376 | ci->ci_cno = le64_to_cpu(cp->cp_cno); | ||
377 | ci->ci_create = le64_to_cpu(cp->cp_create); | ||
378 | ci->ci_nblk_inc = le64_to_cpu(cp->cp_nblk_inc); | ||
379 | ci->ci_inodes_count = le64_to_cpu(cp->cp_inodes_count); | ||
380 | ci->ci_blocks_count = le64_to_cpu(cp->cp_blocks_count); | ||
381 | ci->ci_next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); | ||
382 | } | ||
383 | |||
384 | static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, | ||
385 | struct nilfs_cpinfo *ci, size_t nci) | ||
386 | { | ||
387 | struct nilfs_checkpoint *cp; | ||
388 | struct buffer_head *bh; | ||
389 | size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; | ||
390 | __u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop; | ||
391 | void *kaddr; | ||
392 | int n, ret; | ||
393 | int ncps, i; | ||
394 | |||
395 | if (cno == 0) | ||
396 | return -ENOENT; /* checkpoint number 0 is invalid */ | ||
397 | down_read(&NILFS_MDT(cpfile)->mi_sem); | ||
398 | |||
399 | for (n = 0; cno < cur_cno && n < nci; cno += ncps) { | ||
400 | ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno); | ||
401 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); | ||
402 | if (ret < 0) { | ||
403 | if (ret != -ENOENT) | ||
404 | goto out; | ||
405 | continue; /* skip hole */ | ||
406 | } | ||
407 | |||
408 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
409 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); | ||
410 | for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { | ||
411 | if (!nilfs_checkpoint_invalid(cp)) | ||
412 | nilfs_cpfile_checkpoint_to_cpinfo( | ||
413 | cpfile, cp, &ci[n++]); | ||
414 | } | ||
415 | kunmap_atomic(kaddr, KM_USER0); | ||
416 | brelse(bh); | ||
417 | } | ||
418 | |||
419 | ret = n; | ||
420 | if (n > 0) | ||
421 | *cnop = ci[n - 1].ci_cno + 1; | ||
422 | |||
423 | out: | ||
424 | up_read(&NILFS_MDT(cpfile)->mi_sem); | ||
425 | return ret; | ||
426 | } | ||
427 | |||
428 | static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, | ||
429 | struct nilfs_cpinfo *ci, size_t nci) | ||
430 | { | ||
431 | struct buffer_head *bh; | ||
432 | struct nilfs_cpfile_header *header; | ||
433 | struct nilfs_checkpoint *cp; | ||
434 | __u64 curr = *cnop, next; | ||
435 | unsigned long curr_blkoff, next_blkoff; | ||
436 | void *kaddr; | ||
437 | int n = 0, ret; | ||
438 | |||
439 | down_read(&NILFS_MDT(cpfile)->mi_sem); | ||
440 | |||
441 | if (curr == 0) { | ||
442 | ret = nilfs_cpfile_get_header_block(cpfile, &bh); | ||
443 | if (ret < 0) | ||
444 | goto out; | ||
445 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
446 | header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); | ||
447 | curr = le64_to_cpu(header->ch_snapshot_list.ssl_next); | ||
448 | kunmap_atomic(kaddr, KM_USER0); | ||
449 | brelse(bh); | ||
450 | if (curr == 0) { | ||
451 | ret = 0; | ||
452 | goto out; | ||
453 | } | ||
454 | } else if (unlikely(curr == ~(__u64)0)) { | ||
455 | ret = 0; | ||
456 | goto out; | ||
457 | } | ||
458 | |||
459 | curr_blkoff = nilfs_cpfile_get_blkoff(cpfile, curr); | ||
460 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &bh); | ||
461 | if (unlikely(ret < 0)) { | ||
462 | if (ret == -ENOENT) | ||
463 | ret = 0; /* No snapshots (started from a hole block) */ | ||
464 | goto out; | ||
465 | } | ||
466 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
467 | while (n < nci) { | ||
468 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr); | ||
469 | curr = ~(__u64)0; /* Terminator */ | ||
470 | if (unlikely(nilfs_checkpoint_invalid(cp) || | ||
471 | !nilfs_checkpoint_snapshot(cp))) | ||
472 | break; | ||
473 | nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, &ci[n++]); | ||
474 | next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); | ||
475 | if (next == 0) | ||
476 | break; /* reach end of the snapshot list */ | ||
477 | |||
478 | next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); | ||
479 | if (curr_blkoff != next_blkoff) { | ||
480 | kunmap_atomic(kaddr, KM_USER0); | ||
481 | brelse(bh); | ||
482 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, | ||
483 | 0, &bh); | ||
484 | if (unlikely(ret < 0)) { | ||
485 | WARN_ON(ret == -ENOENT); | ||
486 | goto out; | ||
487 | } | ||
488 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
489 | } | ||
490 | curr = next; | ||
491 | curr_blkoff = next_blkoff; | ||
492 | } | ||
493 | kunmap_atomic(kaddr, KM_USER0); | ||
494 | brelse(bh); | ||
495 | *cnop = curr; | ||
496 | ret = n; | ||
497 | |||
498 | out: | ||
499 | up_read(&NILFS_MDT(cpfile)->mi_sem); | ||
500 | return ret; | ||
501 | } | ||
502 | |||
503 | /** | ||
504 | * nilfs_cpfile_get_cpinfo - | ||
505 | * @cpfile: | ||
506 | * @cno: | ||
507 | * @ci: | ||
508 | * @nci: | ||
509 | */ | ||
510 | |||
511 | ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, | ||
512 | struct nilfs_cpinfo *ci, size_t nci) | ||
513 | { | ||
514 | switch (mode) { | ||
515 | case NILFS_CHECKPOINT: | ||
516 | return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, ci, nci); | ||
517 | case NILFS_SNAPSHOT: | ||
518 | return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, ci, nci); | ||
519 | default: | ||
520 | return -EINVAL; | ||
521 | } | ||
522 | } | ||
523 | |||
524 | /** | ||
525 | * nilfs_cpfile_delete_checkpoint - | ||
526 | * @cpfile: | ||
527 | * @cno: | ||
528 | */ | ||
529 | int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) | ||
530 | { | ||
531 | struct nilfs_cpinfo ci; | ||
532 | __u64 tcno = cno; | ||
533 | ssize_t nci; | ||
534 | int ret; | ||
535 | |||
536 | nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, 1); | ||
537 | if (nci < 0) | ||
538 | return nci; | ||
539 | else if (nci == 0 || ci.ci_cno != cno) | ||
540 | return -ENOENT; | ||
541 | |||
542 | /* cannot delete the latest checkpoint nor snapshots */ | ||
543 | ret = nilfs_cpinfo_snapshot(&ci); | ||
544 | if (ret < 0) | ||
545 | return ret; | ||
546 | else if (ret > 0 || cno == nilfs_mdt_cno(cpfile) - 1) | ||
547 | return -EPERM; | ||
548 | |||
549 | return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1); | ||
550 | } | ||
551 | |||
552 | static struct nilfs_snapshot_list * | ||
553 | nilfs_cpfile_block_get_snapshot_list(const struct inode *cpfile, | ||
554 | __u64 cno, | ||
555 | struct buffer_head *bh, | ||
556 | void *kaddr) | ||
557 | { | ||
558 | struct nilfs_cpfile_header *header; | ||
559 | struct nilfs_checkpoint *cp; | ||
560 | struct nilfs_snapshot_list *list; | ||
561 | |||
562 | if (cno != 0) { | ||
563 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); | ||
564 | list = &cp->cp_snapshot_list; | ||
565 | } else { | ||
566 | header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); | ||
567 | list = &header->ch_snapshot_list; | ||
568 | } | ||
569 | return list; | ||
570 | } | ||
571 | |||
572 | static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) | ||
573 | { | ||
574 | struct buffer_head *header_bh, *curr_bh, *prev_bh, *cp_bh; | ||
575 | struct nilfs_cpfile_header *header; | ||
576 | struct nilfs_checkpoint *cp; | ||
577 | struct nilfs_snapshot_list *list; | ||
578 | __u64 curr, prev; | ||
579 | unsigned long curr_blkoff, prev_blkoff; | ||
580 | void *kaddr; | ||
581 | int ret; | ||
582 | |||
583 | if (cno == 0) | ||
584 | return -ENOENT; /* checkpoint number 0 is invalid */ | ||
585 | down_write(&NILFS_MDT(cpfile)->mi_sem); | ||
586 | |||
587 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); | ||
588 | if (ret < 0) | ||
589 | goto out_sem; | ||
590 | kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); | ||
591 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); | ||
592 | if (nilfs_checkpoint_invalid(cp)) { | ||
593 | ret = -ENOENT; | ||
594 | kunmap_atomic(kaddr, KM_USER0); | ||
595 | goto out_cp; | ||
596 | } | ||
597 | if (nilfs_checkpoint_snapshot(cp)) { | ||
598 | ret = 0; | ||
599 | kunmap_atomic(kaddr, KM_USER0); | ||
600 | goto out_cp; | ||
601 | } | ||
602 | kunmap_atomic(kaddr, KM_USER0); | ||
603 | |||
604 | ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); | ||
605 | if (ret < 0) | ||
606 | goto out_cp; | ||
607 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
608 | header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); | ||
609 | list = &header->ch_snapshot_list; | ||
610 | curr_bh = header_bh; | ||
611 | get_bh(curr_bh); | ||
612 | curr = 0; | ||
613 | curr_blkoff = 0; | ||
614 | prev = le64_to_cpu(list->ssl_prev); | ||
615 | while (prev > cno) { | ||
616 | prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev); | ||
617 | curr = prev; | ||
618 | if (curr_blkoff != prev_blkoff) { | ||
619 | kunmap_atomic(kaddr, KM_USER0); | ||
620 | brelse(curr_bh); | ||
621 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, | ||
622 | 0, &curr_bh); | ||
623 | if (ret < 0) | ||
624 | goto out_header; | ||
625 | kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); | ||
626 | } | ||
627 | curr_blkoff = prev_blkoff; | ||
628 | cp = nilfs_cpfile_block_get_checkpoint( | ||
629 | cpfile, curr, curr_bh, kaddr); | ||
630 | list = &cp->cp_snapshot_list; | ||
631 | prev = le64_to_cpu(list->ssl_prev); | ||
632 | } | ||
633 | kunmap_atomic(kaddr, KM_USER0); | ||
634 | |||
635 | if (prev != 0) { | ||
636 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, | ||
637 | &prev_bh); | ||
638 | if (ret < 0) | ||
639 | goto out_curr; | ||
640 | } else { | ||
641 | prev_bh = header_bh; | ||
642 | get_bh(prev_bh); | ||
643 | } | ||
644 | |||
645 | kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); | ||
646 | list = nilfs_cpfile_block_get_snapshot_list( | ||
647 | cpfile, curr, curr_bh, kaddr); | ||
648 | list->ssl_prev = cpu_to_le64(cno); | ||
649 | kunmap_atomic(kaddr, KM_USER0); | ||
650 | |||
651 | kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); | ||
652 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); | ||
653 | cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr); | ||
654 | cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev); | ||
655 | nilfs_checkpoint_set_snapshot(cp); | ||
656 | kunmap_atomic(kaddr, KM_USER0); | ||
657 | |||
658 | kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); | ||
659 | list = nilfs_cpfile_block_get_snapshot_list( | ||
660 | cpfile, prev, prev_bh, kaddr); | ||
661 | list->ssl_next = cpu_to_le64(cno); | ||
662 | kunmap_atomic(kaddr, KM_USER0); | ||
663 | |||
664 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
665 | header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); | ||
666 | le64_add_cpu(&header->ch_nsnapshots, 1); | ||
667 | kunmap_atomic(kaddr, KM_USER0); | ||
668 | |||
669 | nilfs_mdt_mark_buffer_dirty(prev_bh); | ||
670 | nilfs_mdt_mark_buffer_dirty(curr_bh); | ||
671 | nilfs_mdt_mark_buffer_dirty(cp_bh); | ||
672 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
673 | nilfs_mdt_mark_dirty(cpfile); | ||
674 | |||
675 | brelse(prev_bh); | ||
676 | |||
677 | out_curr: | ||
678 | brelse(curr_bh); | ||
679 | |||
680 | out_header: | ||
681 | brelse(header_bh); | ||
682 | |||
683 | out_cp: | ||
684 | brelse(cp_bh); | ||
685 | |||
686 | out_sem: | ||
687 | up_write(&NILFS_MDT(cpfile)->mi_sem); | ||
688 | return ret; | ||
689 | } | ||
690 | |||
691 | static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) | ||
692 | { | ||
693 | struct buffer_head *header_bh, *next_bh, *prev_bh, *cp_bh; | ||
694 | struct nilfs_cpfile_header *header; | ||
695 | struct nilfs_checkpoint *cp; | ||
696 | struct nilfs_snapshot_list *list; | ||
697 | __u64 next, prev; | ||
698 | void *kaddr; | ||
699 | int ret; | ||
700 | |||
701 | if (cno == 0) | ||
702 | return -ENOENT; /* checkpoint number 0 is invalid */ | ||
703 | down_write(&NILFS_MDT(cpfile)->mi_sem); | ||
704 | |||
705 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); | ||
706 | if (ret < 0) | ||
707 | goto out_sem; | ||
708 | kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); | ||
709 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); | ||
710 | if (nilfs_checkpoint_invalid(cp)) { | ||
711 | ret = -ENOENT; | ||
712 | kunmap_atomic(kaddr, KM_USER0); | ||
713 | goto out_cp; | ||
714 | } | ||
715 | if (!nilfs_checkpoint_snapshot(cp)) { | ||
716 | ret = 0; | ||
717 | kunmap_atomic(kaddr, KM_USER0); | ||
718 | goto out_cp; | ||
719 | } | ||
720 | |||
721 | list = &cp->cp_snapshot_list; | ||
722 | next = le64_to_cpu(list->ssl_next); | ||
723 | prev = le64_to_cpu(list->ssl_prev); | ||
724 | kunmap_atomic(kaddr, KM_USER0); | ||
725 | |||
726 | ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); | ||
727 | if (ret < 0) | ||
728 | goto out_cp; | ||
729 | if (next != 0) { | ||
730 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, | ||
731 | &next_bh); | ||
732 | if (ret < 0) | ||
733 | goto out_header; | ||
734 | } else { | ||
735 | next_bh = header_bh; | ||
736 | get_bh(next_bh); | ||
737 | } | ||
738 | if (prev != 0) { | ||
739 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, | ||
740 | &prev_bh); | ||
741 | if (ret < 0) | ||
742 | goto out_next; | ||
743 | } else { | ||
744 | prev_bh = header_bh; | ||
745 | get_bh(prev_bh); | ||
746 | } | ||
747 | |||
748 | kaddr = kmap_atomic(next_bh->b_page, KM_USER0); | ||
749 | list = nilfs_cpfile_block_get_snapshot_list( | ||
750 | cpfile, next, next_bh, kaddr); | ||
751 | list->ssl_prev = cpu_to_le64(prev); | ||
752 | kunmap_atomic(kaddr, KM_USER0); | ||
753 | |||
754 | kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); | ||
755 | list = nilfs_cpfile_block_get_snapshot_list( | ||
756 | cpfile, prev, prev_bh, kaddr); | ||
757 | list->ssl_next = cpu_to_le64(next); | ||
758 | kunmap_atomic(kaddr, KM_USER0); | ||
759 | |||
760 | kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); | ||
761 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); | ||
762 | cp->cp_snapshot_list.ssl_next = cpu_to_le64(0); | ||
763 | cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0); | ||
764 | nilfs_checkpoint_clear_snapshot(cp); | ||
765 | kunmap_atomic(kaddr, KM_USER0); | ||
766 | |||
767 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
768 | header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); | ||
769 | le64_add_cpu(&header->ch_nsnapshots, -1); | ||
770 | kunmap_atomic(kaddr, KM_USER0); | ||
771 | |||
772 | nilfs_mdt_mark_buffer_dirty(next_bh); | ||
773 | nilfs_mdt_mark_buffer_dirty(prev_bh); | ||
774 | nilfs_mdt_mark_buffer_dirty(cp_bh); | ||
775 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
776 | nilfs_mdt_mark_dirty(cpfile); | ||
777 | |||
778 | brelse(prev_bh); | ||
779 | |||
780 | out_next: | ||
781 | brelse(next_bh); | ||
782 | |||
783 | out_header: | ||
784 | brelse(header_bh); | ||
785 | |||
786 | out_cp: | ||
787 | brelse(cp_bh); | ||
788 | |||
789 | out_sem: | ||
790 | up_write(&NILFS_MDT(cpfile)->mi_sem); | ||
791 | return ret; | ||
792 | } | ||
793 | |||
794 | /** | ||
795 | * nilfs_cpfile_is_snapshot - | ||
796 | * @cpfile: inode of checkpoint file | ||
797 | * @cno: checkpoint number | ||
798 | * | ||
799 | * Description: | ||
800 | * | ||
801 | * Return Value: On success, 1 is returned if the checkpoint specified by | ||
802 | * @cno is a snapshot, or 0 if not. On error, one of the following negative | ||
803 | * error codes is returned. | ||
804 | * | ||
805 | * %-EIO - I/O error. | ||
806 | * | ||
807 | * %-ENOMEM - Insufficient amount of memory available. | ||
808 | * | ||
809 | * %-ENOENT - No such checkpoint. | ||
810 | */ | ||
811 | int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) | ||
812 | { | ||
813 | struct buffer_head *bh; | ||
814 | struct nilfs_checkpoint *cp; | ||
815 | void *kaddr; | ||
816 | int ret; | ||
817 | |||
818 | if (cno == 0) | ||
819 | return -ENOENT; /* checkpoint number 0 is invalid */ | ||
820 | down_read(&NILFS_MDT(cpfile)->mi_sem); | ||
821 | |||
822 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); | ||
823 | if (ret < 0) | ||
824 | goto out; | ||
825 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
826 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); | ||
827 | ret = nilfs_checkpoint_snapshot(cp); | ||
828 | kunmap_atomic(kaddr, KM_USER0); | ||
829 | brelse(bh); | ||
830 | |||
831 | out: | ||
832 | up_read(&NILFS_MDT(cpfile)->mi_sem); | ||
833 | return ret; | ||
834 | } | ||
835 | |||
836 | /** | ||
837 | * nilfs_cpfile_change_cpmode - change checkpoint mode | ||
838 | * @cpfile: inode of checkpoint file | ||
839 | * @cno: checkpoint number | ||
840 | * @status: mode of checkpoint | ||
841 | * | ||
842 | * Description: nilfs_change_cpmode() changes the mode of the checkpoint | ||
843 | * specified by @cno. The mode @mode is NILFS_CHECKPOINT or NILFS_SNAPSHOT. | ||
844 | * | ||
845 | * Return Value: On success, 0 is returned. On error, one of the following | ||
846 | * negative error codes is returned. | ||
847 | * | ||
848 | * %-EIO - I/O error. | ||
849 | * | ||
850 | * %-ENOMEM - Insufficient amount of memory available. | ||
851 | * | ||
852 | * %-ENOENT - No such checkpoint. | ||
853 | */ | ||
854 | int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) | ||
855 | { | ||
856 | struct the_nilfs *nilfs; | ||
857 | int ret; | ||
858 | |||
859 | nilfs = NILFS_MDT(cpfile)->mi_nilfs; | ||
860 | |||
861 | switch (mode) { | ||
862 | case NILFS_CHECKPOINT: | ||
863 | /* | ||
864 | * Check for protecting existing snapshot mounts: | ||
865 | * bd_mount_sem is used to make this operation atomic and | ||
866 | * exclusive with a new mount job. Though it doesn't cover | ||
867 | * umount, it's enough for the purpose. | ||
868 | */ | ||
869 | down(&nilfs->ns_bdev->bd_mount_sem); | ||
870 | if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) { | ||
871 | /* Current implementation does not have to protect | ||
872 | plain read-only mounts since they are exclusive | ||
873 | with a read/write mount and are protected from the | ||
874 | cleaner. */ | ||
875 | ret = -EBUSY; | ||
876 | } else | ||
877 | ret = nilfs_cpfile_clear_snapshot(cpfile, cno); | ||
878 | up(&nilfs->ns_bdev->bd_mount_sem); | ||
879 | return ret; | ||
880 | case NILFS_SNAPSHOT: | ||
881 | return nilfs_cpfile_set_snapshot(cpfile, cno); | ||
882 | default: | ||
883 | return -EINVAL; | ||
884 | } | ||
885 | } | ||
886 | |||
887 | /** | ||
888 | * nilfs_cpfile_get_stat - get checkpoint statistics | ||
889 | * @cpfile: inode of checkpoint file | ||
890 | * @stat: pointer to a structure of checkpoint statistics | ||
891 | * | ||
892 | * Description: nilfs_cpfile_get_stat() returns information about checkpoints. | ||
893 | * | ||
894 | * Return Value: On success, 0 is returned, and checkpoints information is | ||
895 | * stored in the place pointed by @stat. On error, one of the following | ||
896 | * negative error codes is returned. | ||
897 | * | ||
898 | * %-EIO - I/O error. | ||
899 | * | ||
900 | * %-ENOMEM - Insufficient amount of memory available. | ||
901 | */ | ||
902 | int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) | ||
903 | { | ||
904 | struct buffer_head *bh; | ||
905 | struct nilfs_cpfile_header *header; | ||
906 | void *kaddr; | ||
907 | int ret; | ||
908 | |||
909 | down_read(&NILFS_MDT(cpfile)->mi_sem); | ||
910 | |||
911 | ret = nilfs_cpfile_get_header_block(cpfile, &bh); | ||
912 | if (ret < 0) | ||
913 | goto out_sem; | ||
914 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
915 | header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); | ||
916 | cpstat->cs_cno = nilfs_mdt_cno(cpfile); | ||
917 | cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints); | ||
918 | cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots); | ||
919 | kunmap_atomic(kaddr, KM_USER0); | ||
920 | brelse(bh); | ||
921 | |||
922 | out_sem: | ||
923 | up_read(&NILFS_MDT(cpfile)->mi_sem); | ||
924 | return ret; | ||
925 | } | ||
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h new file mode 100644 index 000000000000..1a8a1008c342 --- /dev/null +++ b/fs/nilfs2/cpfile.h | |||
@@ -0,0 +1,45 @@ | |||
1 | /* | ||
2 | * cpfile.h - NILFS checkpoint file. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_CPFILE_H | ||
24 | #define _NILFS_CPFILE_H | ||
25 | |||
26 | #include <linux/fs.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/nilfs2_fs.h> | ||
29 | |||
30 | #define NILFS_CPFILE_GFP NILFS_MDT_GFP | ||
31 | |||
32 | |||
33 | int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int, | ||
34 | struct nilfs_checkpoint **, | ||
35 | struct buffer_head **); | ||
36 | void nilfs_cpfile_put_checkpoint(struct inode *, __u64, struct buffer_head *); | ||
37 | int nilfs_cpfile_delete_checkpoints(struct inode *, __u64, __u64); | ||
38 | int nilfs_cpfile_delete_checkpoint(struct inode *, __u64); | ||
39 | int nilfs_cpfile_change_cpmode(struct inode *, __u64, int); | ||
40 | int nilfs_cpfile_is_snapshot(struct inode *, __u64); | ||
41 | int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); | ||
42 | ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, | ||
43 | struct nilfs_cpinfo *, size_t); | ||
44 | |||
45 | #endif /* _NILFS_CPFILE_H */ | ||
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c new file mode 100644 index 000000000000..bb8a5818e7f1 --- /dev/null +++ b/fs/nilfs2/dat.c | |||
@@ -0,0 +1,430 @@ | |||
1 | /* | ||
2 | * dat.c - NILFS disk address translation. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/types.h> | ||
24 | #include <linux/buffer_head.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/errno.h> | ||
27 | #include "nilfs.h" | ||
28 | #include "mdt.h" | ||
29 | #include "alloc.h" | ||
30 | #include "dat.h" | ||
31 | |||
32 | |||
33 | #define NILFS_CNO_MIN ((__u64)1) | ||
34 | #define NILFS_CNO_MAX (~(__u64)0) | ||
35 | |||
36 | static int nilfs_dat_prepare_entry(struct inode *dat, | ||
37 | struct nilfs_palloc_req *req, int create) | ||
38 | { | ||
39 | return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, | ||
40 | create, &req->pr_entry_bh); | ||
41 | } | ||
42 | |||
43 | static void nilfs_dat_commit_entry(struct inode *dat, | ||
44 | struct nilfs_palloc_req *req) | ||
45 | { | ||
46 | nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh); | ||
47 | nilfs_mdt_mark_dirty(dat); | ||
48 | brelse(req->pr_entry_bh); | ||
49 | } | ||
50 | |||
51 | static void nilfs_dat_abort_entry(struct inode *dat, | ||
52 | struct nilfs_palloc_req *req) | ||
53 | { | ||
54 | brelse(req->pr_entry_bh); | ||
55 | } | ||
56 | |||
57 | int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) | ||
58 | { | ||
59 | int ret; | ||
60 | |||
61 | ret = nilfs_palloc_prepare_alloc_entry(dat, req); | ||
62 | if (ret < 0) | ||
63 | return ret; | ||
64 | |||
65 | ret = nilfs_dat_prepare_entry(dat, req, 1); | ||
66 | if (ret < 0) | ||
67 | nilfs_palloc_abort_alloc_entry(dat, req); | ||
68 | |||
69 | return ret; | ||
70 | } | ||
71 | |||
72 | void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) | ||
73 | { | ||
74 | struct nilfs_dat_entry *entry; | ||
75 | void *kaddr; | ||
76 | |||
77 | kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); | ||
78 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, | ||
79 | req->pr_entry_bh, kaddr); | ||
80 | entry->de_start = cpu_to_le64(NILFS_CNO_MIN); | ||
81 | entry->de_end = cpu_to_le64(NILFS_CNO_MAX); | ||
82 | entry->de_blocknr = cpu_to_le64(0); | ||
83 | kunmap_atomic(kaddr, KM_USER0); | ||
84 | |||
85 | nilfs_palloc_commit_alloc_entry(dat, req); | ||
86 | nilfs_dat_commit_entry(dat, req); | ||
87 | } | ||
88 | |||
89 | void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) | ||
90 | { | ||
91 | nilfs_dat_abort_entry(dat, req); | ||
92 | nilfs_palloc_abort_alloc_entry(dat, req); | ||
93 | } | ||
94 | |||
95 | int nilfs_dat_prepare_free(struct inode *dat, struct nilfs_palloc_req *req) | ||
96 | { | ||
97 | int ret; | ||
98 | |||
99 | ret = nilfs_palloc_prepare_free_entry(dat, req); | ||
100 | if (ret < 0) | ||
101 | return ret; | ||
102 | ret = nilfs_dat_prepare_entry(dat, req, 0); | ||
103 | if (ret < 0) { | ||
104 | nilfs_palloc_abort_free_entry(dat, req); | ||
105 | return ret; | ||
106 | } | ||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) | ||
111 | { | ||
112 | struct nilfs_dat_entry *entry; | ||
113 | void *kaddr; | ||
114 | |||
115 | kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); | ||
116 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, | ||
117 | req->pr_entry_bh, kaddr); | ||
118 | entry->de_start = cpu_to_le64(NILFS_CNO_MIN); | ||
119 | entry->de_end = cpu_to_le64(NILFS_CNO_MIN); | ||
120 | entry->de_blocknr = cpu_to_le64(0); | ||
121 | kunmap_atomic(kaddr, KM_USER0); | ||
122 | |||
123 | nilfs_dat_commit_entry(dat, req); | ||
124 | nilfs_palloc_commit_free_entry(dat, req); | ||
125 | } | ||
126 | |||
127 | void nilfs_dat_abort_free(struct inode *dat, struct nilfs_palloc_req *req) | ||
128 | { | ||
129 | nilfs_dat_abort_entry(dat, req); | ||
130 | nilfs_palloc_abort_free_entry(dat, req); | ||
131 | } | ||
132 | |||
133 | int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) | ||
134 | { | ||
135 | int ret; | ||
136 | |||
137 | ret = nilfs_dat_prepare_entry(dat, req, 0); | ||
138 | WARN_ON(ret == -ENOENT); | ||
139 | return ret; | ||
140 | } | ||
141 | |||
142 | void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, | ||
143 | sector_t blocknr) | ||
144 | { | ||
145 | struct nilfs_dat_entry *entry; | ||
146 | void *kaddr; | ||
147 | |||
148 | kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); | ||
149 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, | ||
150 | req->pr_entry_bh, kaddr); | ||
151 | entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); | ||
152 | if (entry->de_blocknr != cpu_to_le64(0) || | ||
153 | entry->de_end != cpu_to_le64(NILFS_CNO_MAX)) { | ||
154 | printk(KERN_CRIT | ||
155 | "%s: vbn = %llu, start = %llu, end = %llu, pbn = %llu\n", | ||
156 | __func__, (unsigned long long)req->pr_entry_nr, | ||
157 | (unsigned long long)le64_to_cpu(entry->de_start), | ||
158 | (unsigned long long)le64_to_cpu(entry->de_end), | ||
159 | (unsigned long long)le64_to_cpu(entry->de_blocknr)); | ||
160 | } | ||
161 | entry->de_blocknr = cpu_to_le64(blocknr); | ||
162 | kunmap_atomic(kaddr, KM_USER0); | ||
163 | |||
164 | nilfs_dat_commit_entry(dat, req); | ||
165 | } | ||
166 | |||
167 | void nilfs_dat_abort_start(struct inode *dat, struct nilfs_palloc_req *req) | ||
168 | { | ||
169 | nilfs_dat_abort_entry(dat, req); | ||
170 | } | ||
171 | |||
172 | int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) | ||
173 | { | ||
174 | struct nilfs_dat_entry *entry; | ||
175 | __u64 start; | ||
176 | sector_t blocknr; | ||
177 | void *kaddr; | ||
178 | int ret; | ||
179 | |||
180 | ret = nilfs_dat_prepare_entry(dat, req, 0); | ||
181 | if (ret < 0) { | ||
182 | WARN_ON(ret == -ENOENT); | ||
183 | return ret; | ||
184 | } | ||
185 | |||
186 | kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); | ||
187 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, | ||
188 | req->pr_entry_bh, kaddr); | ||
189 | start = le64_to_cpu(entry->de_start); | ||
190 | blocknr = le64_to_cpu(entry->de_blocknr); | ||
191 | kunmap_atomic(kaddr, KM_USER0); | ||
192 | |||
193 | if (blocknr == 0) { | ||
194 | ret = nilfs_palloc_prepare_free_entry(dat, req); | ||
195 | if (ret < 0) { | ||
196 | nilfs_dat_abort_entry(dat, req); | ||
197 | return ret; | ||
198 | } | ||
199 | } | ||
200 | |||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, | ||
205 | int dead) | ||
206 | { | ||
207 | struct nilfs_dat_entry *entry; | ||
208 | __u64 start, end; | ||
209 | sector_t blocknr; | ||
210 | void *kaddr; | ||
211 | |||
212 | kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); | ||
213 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, | ||
214 | req->pr_entry_bh, kaddr); | ||
215 | end = start = le64_to_cpu(entry->de_start); | ||
216 | if (!dead) { | ||
217 | end = nilfs_mdt_cno(dat); | ||
218 | WARN_ON(start > end); | ||
219 | } | ||
220 | entry->de_end = cpu_to_le64(end); | ||
221 | blocknr = le64_to_cpu(entry->de_blocknr); | ||
222 | kunmap_atomic(kaddr, KM_USER0); | ||
223 | |||
224 | if (blocknr == 0) | ||
225 | nilfs_dat_commit_free(dat, req); | ||
226 | else | ||
227 | nilfs_dat_commit_entry(dat, req); | ||
228 | } | ||
229 | |||
230 | void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) | ||
231 | { | ||
232 | struct nilfs_dat_entry *entry; | ||
233 | __u64 start; | ||
234 | sector_t blocknr; | ||
235 | void *kaddr; | ||
236 | |||
237 | kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); | ||
238 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, | ||
239 | req->pr_entry_bh, kaddr); | ||
240 | start = le64_to_cpu(entry->de_start); | ||
241 | blocknr = le64_to_cpu(entry->de_blocknr); | ||
242 | kunmap_atomic(kaddr, KM_USER0); | ||
243 | |||
244 | if (start == nilfs_mdt_cno(dat) && blocknr == 0) | ||
245 | nilfs_palloc_abort_free_entry(dat, req); | ||
246 | nilfs_dat_abort_entry(dat, req); | ||
247 | } | ||
248 | |||
249 | /** | ||
250 | * nilfs_dat_mark_dirty - | ||
251 | * @dat: DAT file inode | ||
252 | * @vblocknr: virtual block number | ||
253 | * | ||
254 | * Description: | ||
255 | * | ||
256 | * Return Value: On success, 0 is returned. On error, one of the following | ||
257 | * negative error codes is returned. | ||
258 | * | ||
259 | * %-EIO - I/O error. | ||
260 | * | ||
261 | * %-ENOMEM - Insufficient amount of memory available. | ||
262 | */ | ||
263 | int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) | ||
264 | { | ||
265 | struct nilfs_palloc_req req; | ||
266 | int ret; | ||
267 | |||
268 | req.pr_entry_nr = vblocknr; | ||
269 | ret = nilfs_dat_prepare_entry(dat, &req, 0); | ||
270 | if (ret == 0) | ||
271 | nilfs_dat_commit_entry(dat, &req); | ||
272 | return ret; | ||
273 | } | ||
274 | |||
275 | /** | ||
276 | * nilfs_dat_freev - free virtual block numbers | ||
277 | * @dat: DAT file inode | ||
278 | * @vblocknrs: array of virtual block numbers | ||
279 | * @nitems: number of virtual block numbers | ||
280 | * | ||
281 | * Description: nilfs_dat_freev() frees the virtual block numbers specified by | ||
282 | * @vblocknrs and @nitems. | ||
283 | * | ||
284 | * Return Value: On success, 0 is returned. On error, one of the following | ||
285 | * nagative error codes is returned. | ||
286 | * | ||
287 | * %-EIO - I/O error. | ||
288 | * | ||
289 | * %-ENOMEM - Insufficient amount of memory available. | ||
290 | * | ||
291 | * %-ENOENT - The virtual block number have not been allocated. | ||
292 | */ | ||
293 | int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) | ||
294 | { | ||
295 | return nilfs_palloc_freev(dat, vblocknrs, nitems); | ||
296 | } | ||
297 | |||
298 | /** | ||
299 | * nilfs_dat_move - change a block number | ||
300 | * @dat: DAT file inode | ||
301 | * @vblocknr: virtual block number | ||
302 | * @blocknr: block number | ||
303 | * | ||
304 | * Description: nilfs_dat_move() changes the block number associated with | ||
305 | * @vblocknr to @blocknr. | ||
306 | * | ||
307 | * Return Value: On success, 0 is returned. On error, one of the following | ||
308 | * negative error codes is returned. | ||
309 | * | ||
310 | * %-EIO - I/O error. | ||
311 | * | ||
312 | * %-ENOMEM - Insufficient amount of memory available. | ||
313 | */ | ||
314 | int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) | ||
315 | { | ||
316 | struct buffer_head *entry_bh; | ||
317 | struct nilfs_dat_entry *entry; | ||
318 | void *kaddr; | ||
319 | int ret; | ||
320 | |||
321 | ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); | ||
322 | if (ret < 0) | ||
323 | return ret; | ||
324 | kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); | ||
325 | entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); | ||
326 | if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { | ||
327 | printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__, | ||
328 | (unsigned long long)vblocknr, | ||
329 | (unsigned long long)le64_to_cpu(entry->de_start), | ||
330 | (unsigned long long)le64_to_cpu(entry->de_end)); | ||
331 | kunmap_atomic(kaddr, KM_USER0); | ||
332 | brelse(entry_bh); | ||
333 | return -EINVAL; | ||
334 | } | ||
335 | WARN_ON(blocknr == 0); | ||
336 | entry->de_blocknr = cpu_to_le64(blocknr); | ||
337 | kunmap_atomic(kaddr, KM_USER0); | ||
338 | |||
339 | nilfs_mdt_mark_buffer_dirty(entry_bh); | ||
340 | nilfs_mdt_mark_dirty(dat); | ||
341 | |||
342 | brelse(entry_bh); | ||
343 | |||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | /** | ||
348 | * nilfs_dat_translate - translate a virtual block number to a block number | ||
349 | * @dat: DAT file inode | ||
350 | * @vblocknr: virtual block number | ||
351 | * @blocknrp: pointer to a block number | ||
352 | * | ||
353 | * Description: nilfs_dat_translate() maps the virtual block number @vblocknr | ||
354 | * to the corresponding block number. | ||
355 | * | ||
356 | * Return Value: On success, 0 is returned and the block number associated | ||
357 | * with @vblocknr is stored in the place pointed by @blocknrp. On error, one | ||
358 | * of the following negative error codes is returned. | ||
359 | * | ||
360 | * %-EIO - I/O error. | ||
361 | * | ||
362 | * %-ENOMEM - Insufficient amount of memory available. | ||
363 | * | ||
364 | * %-ENOENT - A block number associated with @vblocknr does not exist. | ||
365 | */ | ||
366 | int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) | ||
367 | { | ||
368 | struct buffer_head *entry_bh; | ||
369 | struct nilfs_dat_entry *entry; | ||
370 | sector_t blocknr; | ||
371 | void *kaddr; | ||
372 | int ret; | ||
373 | |||
374 | ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); | ||
375 | if (ret < 0) | ||
376 | return ret; | ||
377 | |||
378 | kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); | ||
379 | entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); | ||
380 | blocknr = le64_to_cpu(entry->de_blocknr); | ||
381 | if (blocknr == 0) { | ||
382 | ret = -ENOENT; | ||
383 | goto out; | ||
384 | } | ||
385 | if (blocknrp != NULL) | ||
386 | *blocknrp = blocknr; | ||
387 | |||
388 | out: | ||
389 | kunmap_atomic(kaddr, KM_USER0); | ||
390 | brelse(entry_bh); | ||
391 | return ret; | ||
392 | } | ||
393 | |||
394 | ssize_t nilfs_dat_get_vinfo(struct inode *dat, struct nilfs_vinfo *vinfo, | ||
395 | size_t nvi) | ||
396 | { | ||
397 | struct buffer_head *entry_bh; | ||
398 | struct nilfs_dat_entry *entry; | ||
399 | __u64 first, last; | ||
400 | void *kaddr; | ||
401 | unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; | ||
402 | int i, j, n, ret; | ||
403 | |||
404 | for (i = 0; i < nvi; i += n) { | ||
405 | ret = nilfs_palloc_get_entry_block(dat, vinfo[i].vi_vblocknr, | ||
406 | 0, &entry_bh); | ||
407 | if (ret < 0) | ||
408 | return ret; | ||
409 | kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); | ||
410 | /* last virtual block number in this block */ | ||
411 | first = vinfo[i].vi_vblocknr; | ||
412 | do_div(first, entries_per_block); | ||
413 | first *= entries_per_block; | ||
414 | last = first + entries_per_block - 1; | ||
415 | for (j = i, n = 0; | ||
416 | j < nvi && vinfo[j].vi_vblocknr >= first && | ||
417 | vinfo[j].vi_vblocknr <= last; | ||
418 | j++, n++) { | ||
419 | entry = nilfs_palloc_block_get_entry( | ||
420 | dat, vinfo[j].vi_vblocknr, entry_bh, kaddr); | ||
421 | vinfo[j].vi_start = le64_to_cpu(entry->de_start); | ||
422 | vinfo[j].vi_end = le64_to_cpu(entry->de_end); | ||
423 | vinfo[j].vi_blocknr = le64_to_cpu(entry->de_blocknr); | ||
424 | } | ||
425 | kunmap_atomic(kaddr, KM_USER0); | ||
426 | brelse(entry_bh); | ||
427 | } | ||
428 | |||
429 | return nvi; | ||
430 | } | ||
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h new file mode 100644 index 000000000000..d9560654a4b7 --- /dev/null +++ b/fs/nilfs2/dat.h | |||
@@ -0,0 +1,52 @@ | |||
1 | /* | ||
2 | * dat.h - NILFS disk address translation. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_DAT_H | ||
24 | #define _NILFS_DAT_H | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/fs.h> | ||
29 | |||
30 | #define NILFS_DAT_GFP NILFS_MDT_GFP | ||
31 | |||
32 | struct nilfs_palloc_req; | ||
33 | |||
34 | int nilfs_dat_translate(struct inode *, __u64, sector_t *); | ||
35 | |||
36 | int nilfs_dat_prepare_alloc(struct inode *, struct nilfs_palloc_req *); | ||
37 | void nilfs_dat_commit_alloc(struct inode *, struct nilfs_palloc_req *); | ||
38 | void nilfs_dat_abort_alloc(struct inode *, struct nilfs_palloc_req *); | ||
39 | int nilfs_dat_prepare_start(struct inode *, struct nilfs_palloc_req *); | ||
40 | void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *, | ||
41 | sector_t); | ||
42 | void nilfs_dat_abort_start(struct inode *, struct nilfs_palloc_req *); | ||
43 | int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *); | ||
44 | void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int); | ||
45 | void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *); | ||
46 | |||
47 | int nilfs_dat_mark_dirty(struct inode *, __u64); | ||
48 | int nilfs_dat_freev(struct inode *, __u64 *, size_t); | ||
49 | int nilfs_dat_move(struct inode *, __u64, sector_t); | ||
50 | ssize_t nilfs_dat_get_vinfo(struct inode *, struct nilfs_vinfo *, size_t); | ||
51 | |||
52 | #endif /* _NILFS_DAT_H */ | ||
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c new file mode 100644 index 000000000000..54100acc1102 --- /dev/null +++ b/fs/nilfs2/dir.c | |||
@@ -0,0 +1,711 @@ | |||
1 | /* | ||
2 | * dir.c - NILFS directory entry operations | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Modified for NILFS by Amagai Yoshiji <amagai@osrg.net> | ||
21 | */ | ||
22 | /* | ||
23 | * linux/fs/ext2/dir.c | ||
24 | * | ||
25 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
26 | * Remy Card (card@masi.ibp.fr) | ||
27 | * Laboratoire MASI - Institut Blaise Pascal | ||
28 | * Universite Pierre et Marie Curie (Paris VI) | ||
29 | * | ||
30 | * from | ||
31 | * | ||
32 | * linux/fs/minix/dir.c | ||
33 | * | ||
34 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
35 | * | ||
36 | * ext2 directory handling functions | ||
37 | * | ||
38 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
39 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
40 | * | ||
41 | * All code that works with directory layout had been switched to pagecache | ||
42 | * and moved here. AV | ||
43 | */ | ||
44 | |||
45 | #include <linux/pagemap.h> | ||
46 | #include <linux/smp_lock.h> | ||
47 | #include "nilfs.h" | ||
48 | #include "page.h" | ||
49 | |||
50 | /* | ||
51 | * nilfs uses block-sized chunks. Arguably, sector-sized ones would be | ||
52 | * more robust, but we have what we have | ||
53 | */ | ||
54 | static inline unsigned nilfs_chunk_size(struct inode *inode) | ||
55 | { | ||
56 | return inode->i_sb->s_blocksize; | ||
57 | } | ||
58 | |||
59 | static inline void nilfs_put_page(struct page *page) | ||
60 | { | ||
61 | kunmap(page); | ||
62 | page_cache_release(page); | ||
63 | } | ||
64 | |||
65 | static inline unsigned long dir_pages(struct inode *inode) | ||
66 | { | ||
67 | return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * Return the offset into page `page_nr' of the last valid | ||
72 | * byte in that page, plus one. | ||
73 | */ | ||
74 | static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr) | ||
75 | { | ||
76 | unsigned last_byte = inode->i_size; | ||
77 | |||
78 | last_byte -= page_nr << PAGE_CACHE_SHIFT; | ||
79 | if (last_byte > PAGE_CACHE_SIZE) | ||
80 | last_byte = PAGE_CACHE_SIZE; | ||
81 | return last_byte; | ||
82 | } | ||
83 | |||
84 | static int nilfs_prepare_chunk_uninterruptible(struct page *page, | ||
85 | struct address_space *mapping, | ||
86 | unsigned from, unsigned to) | ||
87 | { | ||
88 | loff_t pos = page_offset(page) + from; | ||
89 | return block_write_begin(NULL, mapping, pos, to - from, | ||
90 | AOP_FLAG_UNINTERRUPTIBLE, &page, | ||
91 | NULL, nilfs_get_block); | ||
92 | } | ||
93 | |||
94 | static int nilfs_prepare_chunk(struct page *page, | ||
95 | struct address_space *mapping, | ||
96 | unsigned from, unsigned to) | ||
97 | { | ||
98 | loff_t pos = page_offset(page) + from; | ||
99 | return block_write_begin(NULL, mapping, pos, to - from, 0, &page, | ||
100 | NULL, nilfs_get_block); | ||
101 | } | ||
102 | |||
103 | static int nilfs_commit_chunk(struct page *page, | ||
104 | struct address_space *mapping, | ||
105 | unsigned from, unsigned to) | ||
106 | { | ||
107 | struct inode *dir = mapping->host; | ||
108 | struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb); | ||
109 | loff_t pos = page_offset(page) + from; | ||
110 | unsigned len = to - from; | ||
111 | unsigned nr_dirty, copied; | ||
112 | int err; | ||
113 | |||
114 | nr_dirty = nilfs_page_count_clean_buffers(page, from, to); | ||
115 | copied = block_write_end(NULL, mapping, pos, len, len, page, NULL); | ||
116 | if (pos + copied > dir->i_size) { | ||
117 | i_size_write(dir, pos + copied); | ||
118 | mark_inode_dirty(dir); | ||
119 | } | ||
120 | if (IS_DIRSYNC(dir)) | ||
121 | nilfs_set_transaction_flag(NILFS_TI_SYNC); | ||
122 | err = nilfs_set_file_dirty(sbi, dir, nr_dirty); | ||
123 | unlock_page(page); | ||
124 | return err; | ||
125 | } | ||
126 | |||
127 | static void nilfs_check_page(struct page *page) | ||
128 | { | ||
129 | struct inode *dir = page->mapping->host; | ||
130 | struct super_block *sb = dir->i_sb; | ||
131 | unsigned chunk_size = nilfs_chunk_size(dir); | ||
132 | char *kaddr = page_address(page); | ||
133 | unsigned offs, rec_len; | ||
134 | unsigned limit = PAGE_CACHE_SIZE; | ||
135 | struct nilfs_dir_entry *p; | ||
136 | char *error; | ||
137 | |||
138 | if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { | ||
139 | limit = dir->i_size & ~PAGE_CACHE_MASK; | ||
140 | if (limit & (chunk_size - 1)) | ||
141 | goto Ebadsize; | ||
142 | if (!limit) | ||
143 | goto out; | ||
144 | } | ||
145 | for (offs = 0; offs <= limit - NILFS_DIR_REC_LEN(1); offs += rec_len) { | ||
146 | p = (struct nilfs_dir_entry *)(kaddr + offs); | ||
147 | rec_len = le16_to_cpu(p->rec_len); | ||
148 | |||
149 | if (rec_len < NILFS_DIR_REC_LEN(1)) | ||
150 | goto Eshort; | ||
151 | if (rec_len & 3) | ||
152 | goto Ealign; | ||
153 | if (rec_len < NILFS_DIR_REC_LEN(p->name_len)) | ||
154 | goto Enamelen; | ||
155 | if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) | ||
156 | goto Espan; | ||
157 | } | ||
158 | if (offs != limit) | ||
159 | goto Eend; | ||
160 | out: | ||
161 | SetPageChecked(page); | ||
162 | return; | ||
163 | |||
164 | /* Too bad, we had an error */ | ||
165 | |||
166 | Ebadsize: | ||
167 | nilfs_error(sb, "nilfs_check_page", | ||
168 | "size of directory #%lu is not a multiple of chunk size", | ||
169 | dir->i_ino | ||
170 | ); | ||
171 | goto fail; | ||
172 | Eshort: | ||
173 | error = "rec_len is smaller than minimal"; | ||
174 | goto bad_entry; | ||
175 | Ealign: | ||
176 | error = "unaligned directory entry"; | ||
177 | goto bad_entry; | ||
178 | Enamelen: | ||
179 | error = "rec_len is too small for name_len"; | ||
180 | goto bad_entry; | ||
181 | Espan: | ||
182 | error = "directory entry across blocks"; | ||
183 | bad_entry: | ||
184 | nilfs_error(sb, "nilfs_check_page", "bad entry in directory #%lu: %s - " | ||
185 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", | ||
186 | dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, | ||
187 | (unsigned long) le64_to_cpu(p->inode), | ||
188 | rec_len, p->name_len); | ||
189 | goto fail; | ||
190 | Eend: | ||
191 | p = (struct nilfs_dir_entry *)(kaddr + offs); | ||
192 | nilfs_error(sb, "nilfs_check_page", | ||
193 | "entry in directory #%lu spans the page boundary" | ||
194 | "offset=%lu, inode=%lu", | ||
195 | dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, | ||
196 | (unsigned long) le64_to_cpu(p->inode)); | ||
197 | fail: | ||
198 | SetPageChecked(page); | ||
199 | SetPageError(page); | ||
200 | } | ||
201 | |||
202 | static struct page *nilfs_get_page(struct inode *dir, unsigned long n) | ||
203 | { | ||
204 | struct address_space *mapping = dir->i_mapping; | ||
205 | struct page *page = read_cache_page(mapping, n, | ||
206 | (filler_t *)mapping->a_ops->readpage, NULL); | ||
207 | if (!IS_ERR(page)) { | ||
208 | wait_on_page_locked(page); | ||
209 | kmap(page); | ||
210 | if (!PageUptodate(page)) | ||
211 | goto fail; | ||
212 | if (!PageChecked(page)) | ||
213 | nilfs_check_page(page); | ||
214 | if (PageError(page)) | ||
215 | goto fail; | ||
216 | } | ||
217 | return page; | ||
218 | |||
219 | fail: | ||
220 | nilfs_put_page(page); | ||
221 | return ERR_PTR(-EIO); | ||
222 | } | ||
223 | |||
224 | /* | ||
225 | * NOTE! unlike strncmp, nilfs_match returns 1 for success, 0 for failure. | ||
226 | * | ||
227 | * len <= NILFS_NAME_LEN and de != NULL are guaranteed by caller. | ||
228 | */ | ||
229 | static int | ||
230 | nilfs_match(int len, const char * const name, struct nilfs_dir_entry *de) | ||
231 | { | ||
232 | if (len != de->name_len) | ||
233 | return 0; | ||
234 | if (!de->inode) | ||
235 | return 0; | ||
236 | return !memcmp(name, de->name, len); | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * p is at least 6 bytes before the end of page | ||
241 | */ | ||
242 | static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) | ||
243 | { | ||
244 | return (struct nilfs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len)); | ||
245 | } | ||
246 | |||
247 | static unsigned char | ||
248 | nilfs_filetype_table[NILFS_FT_MAX] = { | ||
249 | [NILFS_FT_UNKNOWN] = DT_UNKNOWN, | ||
250 | [NILFS_FT_REG_FILE] = DT_REG, | ||
251 | [NILFS_FT_DIR] = DT_DIR, | ||
252 | [NILFS_FT_CHRDEV] = DT_CHR, | ||
253 | [NILFS_FT_BLKDEV] = DT_BLK, | ||
254 | [NILFS_FT_FIFO] = DT_FIFO, | ||
255 | [NILFS_FT_SOCK] = DT_SOCK, | ||
256 | [NILFS_FT_SYMLINK] = DT_LNK, | ||
257 | }; | ||
258 | |||
259 | #define S_SHIFT 12 | ||
260 | static unsigned char | ||
261 | nilfs_type_by_mode[S_IFMT >> S_SHIFT] = { | ||
262 | [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, | ||
263 | [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, | ||
264 | [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, | ||
265 | [S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV, | ||
266 | [S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO, | ||
267 | [S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK, | ||
268 | [S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK, | ||
269 | }; | ||
270 | |||
271 | static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) | ||
272 | { | ||
273 | mode_t mode = inode->i_mode; | ||
274 | |||
275 | de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; | ||
276 | } | ||
277 | |||
278 | static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
279 | { | ||
280 | loff_t pos = filp->f_pos; | ||
281 | struct inode *inode = filp->f_dentry->d_inode; | ||
282 | struct super_block *sb = inode->i_sb; | ||
283 | unsigned int offset = pos & ~PAGE_CACHE_MASK; | ||
284 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | ||
285 | unsigned long npages = dir_pages(inode); | ||
286 | /* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */ | ||
287 | unsigned char *types = NULL; | ||
288 | int ret; | ||
289 | |||
290 | if (pos > inode->i_size - NILFS_DIR_REC_LEN(1)) | ||
291 | goto success; | ||
292 | |||
293 | types = nilfs_filetype_table; | ||
294 | |||
295 | for ( ; n < npages; n++, offset = 0) { | ||
296 | char *kaddr, *limit; | ||
297 | struct nilfs_dir_entry *de; | ||
298 | struct page *page = nilfs_get_page(inode, n); | ||
299 | |||
300 | if (IS_ERR(page)) { | ||
301 | nilfs_error(sb, __func__, "bad page in #%lu", | ||
302 | inode->i_ino); | ||
303 | filp->f_pos += PAGE_CACHE_SIZE - offset; | ||
304 | ret = -EIO; | ||
305 | goto done; | ||
306 | } | ||
307 | kaddr = page_address(page); | ||
308 | de = (struct nilfs_dir_entry *)(kaddr + offset); | ||
309 | limit = kaddr + nilfs_last_byte(inode, n) - | ||
310 | NILFS_DIR_REC_LEN(1); | ||
311 | for ( ; (char *)de <= limit; de = nilfs_next_entry(de)) { | ||
312 | if (de->rec_len == 0) { | ||
313 | nilfs_error(sb, __func__, | ||
314 | "zero-length directory entry"); | ||
315 | ret = -EIO; | ||
316 | nilfs_put_page(page); | ||
317 | goto done; | ||
318 | } | ||
319 | if (de->inode) { | ||
320 | int over; | ||
321 | unsigned char d_type = DT_UNKNOWN; | ||
322 | |||
323 | if (types && de->file_type < NILFS_FT_MAX) | ||
324 | d_type = types[de->file_type]; | ||
325 | |||
326 | offset = (char *)de - kaddr; | ||
327 | over = filldir(dirent, de->name, de->name_len, | ||
328 | (n<<PAGE_CACHE_SHIFT) | offset, | ||
329 | le64_to_cpu(de->inode), d_type); | ||
330 | if (over) { | ||
331 | nilfs_put_page(page); | ||
332 | goto success; | ||
333 | } | ||
334 | } | ||
335 | filp->f_pos += le16_to_cpu(de->rec_len); | ||
336 | } | ||
337 | nilfs_put_page(page); | ||
338 | } | ||
339 | |||
340 | success: | ||
341 | ret = 0; | ||
342 | done: | ||
343 | return ret; | ||
344 | } | ||
345 | |||
346 | /* | ||
347 | * nilfs_find_entry() | ||
348 | * | ||
349 | * finds an entry in the specified directory with the wanted name. It | ||
350 | * returns the page in which the entry was found, and the entry itself | ||
351 | * (as a parameter - res_dir). Page is returned mapped and unlocked. | ||
352 | * Entry is guaranteed to be valid. | ||
353 | */ | ||
354 | struct nilfs_dir_entry * | ||
355 | nilfs_find_entry(struct inode *dir, struct dentry *dentry, | ||
356 | struct page **res_page) | ||
357 | { | ||
358 | const char *name = dentry->d_name.name; | ||
359 | int namelen = dentry->d_name.len; | ||
360 | unsigned reclen = NILFS_DIR_REC_LEN(namelen); | ||
361 | unsigned long start, n; | ||
362 | unsigned long npages = dir_pages(dir); | ||
363 | struct page *page = NULL; | ||
364 | struct nilfs_inode_info *ei = NILFS_I(dir); | ||
365 | struct nilfs_dir_entry *de; | ||
366 | |||
367 | if (npages == 0) | ||
368 | goto out; | ||
369 | |||
370 | /* OFFSET_CACHE */ | ||
371 | *res_page = NULL; | ||
372 | |||
373 | start = ei->i_dir_start_lookup; | ||
374 | if (start >= npages) | ||
375 | start = 0; | ||
376 | n = start; | ||
377 | do { | ||
378 | char *kaddr; | ||
379 | page = nilfs_get_page(dir, n); | ||
380 | if (!IS_ERR(page)) { | ||
381 | kaddr = page_address(page); | ||
382 | de = (struct nilfs_dir_entry *)kaddr; | ||
383 | kaddr += nilfs_last_byte(dir, n) - reclen; | ||
384 | while ((char *) de <= kaddr) { | ||
385 | if (de->rec_len == 0) { | ||
386 | nilfs_error(dir->i_sb, __func__, | ||
387 | "zero-length directory entry"); | ||
388 | nilfs_put_page(page); | ||
389 | goto out; | ||
390 | } | ||
391 | if (nilfs_match(namelen, name, de)) | ||
392 | goto found; | ||
393 | de = nilfs_next_entry(de); | ||
394 | } | ||
395 | nilfs_put_page(page); | ||
396 | } | ||
397 | if (++n >= npages) | ||
398 | n = 0; | ||
399 | /* next page is past the blocks we've got */ | ||
400 | if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) { | ||
401 | nilfs_error(dir->i_sb, __func__, | ||
402 | "dir %lu size %lld exceeds block cout %llu", | ||
403 | dir->i_ino, dir->i_size, | ||
404 | (unsigned long long)dir->i_blocks); | ||
405 | goto out; | ||
406 | } | ||
407 | } while (n != start); | ||
408 | out: | ||
409 | return NULL; | ||
410 | |||
411 | found: | ||
412 | *res_page = page; | ||
413 | ei->i_dir_start_lookup = n; | ||
414 | return de; | ||
415 | } | ||
416 | |||
417 | struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p) | ||
418 | { | ||
419 | struct page *page = nilfs_get_page(dir, 0); | ||
420 | struct nilfs_dir_entry *de = NULL; | ||
421 | |||
422 | if (!IS_ERR(page)) { | ||
423 | de = nilfs_next_entry( | ||
424 | (struct nilfs_dir_entry *)page_address(page)); | ||
425 | *p = page; | ||
426 | } | ||
427 | return de; | ||
428 | } | ||
429 | |||
430 | ino_t nilfs_inode_by_name(struct inode *dir, struct dentry *dentry) | ||
431 | { | ||
432 | ino_t res = 0; | ||
433 | struct nilfs_dir_entry *de; | ||
434 | struct page *page; | ||
435 | |||
436 | de = nilfs_find_entry(dir, dentry, &page); | ||
437 | if (de) { | ||
438 | res = le64_to_cpu(de->inode); | ||
439 | kunmap(page); | ||
440 | page_cache_release(page); | ||
441 | } | ||
442 | return res; | ||
443 | } | ||
444 | |||
445 | /* Releases the page */ | ||
446 | void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, | ||
447 | struct page *page, struct inode *inode) | ||
448 | { | ||
449 | unsigned from = (char *) de - (char *) page_address(page); | ||
450 | unsigned to = from + le16_to_cpu(de->rec_len); | ||
451 | struct address_space *mapping = page->mapping; | ||
452 | int err; | ||
453 | |||
454 | lock_page(page); | ||
455 | err = nilfs_prepare_chunk_uninterruptible(page, mapping, from, to); | ||
456 | BUG_ON(err); | ||
457 | de->inode = cpu_to_le64(inode->i_ino); | ||
458 | nilfs_set_de_type(de, inode); | ||
459 | err = nilfs_commit_chunk(page, mapping, from, to); | ||
460 | nilfs_put_page(page); | ||
461 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | ||
462 | /* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ | ||
463 | mark_inode_dirty(dir); | ||
464 | } | ||
465 | |||
466 | /* | ||
467 | * Parent is locked. | ||
468 | */ | ||
469 | int nilfs_add_link(struct dentry *dentry, struct inode *inode) | ||
470 | { | ||
471 | struct inode *dir = dentry->d_parent->d_inode; | ||
472 | const char *name = dentry->d_name.name; | ||
473 | int namelen = dentry->d_name.len; | ||
474 | unsigned chunk_size = nilfs_chunk_size(dir); | ||
475 | unsigned reclen = NILFS_DIR_REC_LEN(namelen); | ||
476 | unsigned short rec_len, name_len; | ||
477 | struct page *page = NULL; | ||
478 | struct nilfs_dir_entry *de; | ||
479 | unsigned long npages = dir_pages(dir); | ||
480 | unsigned long n; | ||
481 | char *kaddr; | ||
482 | unsigned from, to; | ||
483 | int err; | ||
484 | |||
485 | /* | ||
486 | * We take care of directory expansion in the same loop. | ||
487 | * This code plays outside i_size, so it locks the page | ||
488 | * to protect that region. | ||
489 | */ | ||
490 | for (n = 0; n <= npages; n++) { | ||
491 | char *dir_end; | ||
492 | |||
493 | page = nilfs_get_page(dir, n); | ||
494 | err = PTR_ERR(page); | ||
495 | if (IS_ERR(page)) | ||
496 | goto out; | ||
497 | lock_page(page); | ||
498 | kaddr = page_address(page); | ||
499 | dir_end = kaddr + nilfs_last_byte(dir, n); | ||
500 | de = (struct nilfs_dir_entry *)kaddr; | ||
501 | kaddr += PAGE_CACHE_SIZE - reclen; | ||
502 | while ((char *)de <= kaddr) { | ||
503 | if ((char *)de == dir_end) { | ||
504 | /* We hit i_size */ | ||
505 | name_len = 0; | ||
506 | rec_len = chunk_size; | ||
507 | de->rec_len = cpu_to_le16(chunk_size); | ||
508 | de->inode = 0; | ||
509 | goto got_it; | ||
510 | } | ||
511 | if (de->rec_len == 0) { | ||
512 | nilfs_error(dir->i_sb, __func__, | ||
513 | "zero-length directory entry"); | ||
514 | err = -EIO; | ||
515 | goto out_unlock; | ||
516 | } | ||
517 | err = -EEXIST; | ||
518 | if (nilfs_match(namelen, name, de)) | ||
519 | goto out_unlock; | ||
520 | name_len = NILFS_DIR_REC_LEN(de->name_len); | ||
521 | rec_len = le16_to_cpu(de->rec_len); | ||
522 | if (!de->inode && rec_len >= reclen) | ||
523 | goto got_it; | ||
524 | if (rec_len >= name_len + reclen) | ||
525 | goto got_it; | ||
526 | de = (struct nilfs_dir_entry *)((char *)de + rec_len); | ||
527 | } | ||
528 | unlock_page(page); | ||
529 | nilfs_put_page(page); | ||
530 | } | ||
531 | BUG(); | ||
532 | return -EINVAL; | ||
533 | |||
534 | got_it: | ||
535 | from = (char *)de - (char *)page_address(page); | ||
536 | to = from + rec_len; | ||
537 | err = nilfs_prepare_chunk(page, page->mapping, from, to); | ||
538 | if (err) | ||
539 | goto out_unlock; | ||
540 | if (de->inode) { | ||
541 | struct nilfs_dir_entry *de1; | ||
542 | |||
543 | de1 = (struct nilfs_dir_entry *)((char *)de + name_len); | ||
544 | de1->rec_len = cpu_to_le16(rec_len - name_len); | ||
545 | de->rec_len = cpu_to_le16(name_len); | ||
546 | de = de1; | ||
547 | } | ||
548 | de->name_len = namelen; | ||
549 | memcpy(de->name, name, namelen); | ||
550 | de->inode = cpu_to_le64(inode->i_ino); | ||
551 | nilfs_set_de_type(de, inode); | ||
552 | err = nilfs_commit_chunk(page, page->mapping, from, to); | ||
553 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | ||
554 | /* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ | ||
555 | mark_inode_dirty(dir); | ||
556 | /* OFFSET_CACHE */ | ||
557 | out_put: | ||
558 | nilfs_put_page(page); | ||
559 | out: | ||
560 | return err; | ||
561 | out_unlock: | ||
562 | unlock_page(page); | ||
563 | goto out_put; | ||
564 | } | ||
565 | |||
566 | /* | ||
567 | * nilfs_delete_entry deletes a directory entry by merging it with the | ||
568 | * previous entry. Page is up-to-date. Releases the page. | ||
569 | */ | ||
570 | int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) | ||
571 | { | ||
572 | struct address_space *mapping = page->mapping; | ||
573 | struct inode *inode = mapping->host; | ||
574 | char *kaddr = page_address(page); | ||
575 | unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1); | ||
576 | unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len); | ||
577 | struct nilfs_dir_entry *pde = NULL; | ||
578 | struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from); | ||
579 | int err; | ||
580 | |||
581 | while ((char *)de < (char *)dir) { | ||
582 | if (de->rec_len == 0) { | ||
583 | nilfs_error(inode->i_sb, __func__, | ||
584 | "zero-length directory entry"); | ||
585 | err = -EIO; | ||
586 | goto out; | ||
587 | } | ||
588 | pde = de; | ||
589 | de = nilfs_next_entry(de); | ||
590 | } | ||
591 | if (pde) | ||
592 | from = (char *)pde - (char *)page_address(page); | ||
593 | lock_page(page); | ||
594 | err = nilfs_prepare_chunk(page, mapping, from, to); | ||
595 | BUG_ON(err); | ||
596 | if (pde) | ||
597 | pde->rec_len = cpu_to_le16(to - from); | ||
598 | dir->inode = 0; | ||
599 | err = nilfs_commit_chunk(page, mapping, from, to); | ||
600 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | ||
601 | /* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */ | ||
602 | mark_inode_dirty(inode); | ||
603 | out: | ||
604 | nilfs_put_page(page); | ||
605 | return err; | ||
606 | } | ||
607 | |||
608 | /* | ||
609 | * Set the first fragment of directory. | ||
610 | */ | ||
611 | int nilfs_make_empty(struct inode *inode, struct inode *parent) | ||
612 | { | ||
613 | struct address_space *mapping = inode->i_mapping; | ||
614 | struct page *page = grab_cache_page(mapping, 0); | ||
615 | unsigned chunk_size = nilfs_chunk_size(inode); | ||
616 | struct nilfs_dir_entry *de; | ||
617 | int err; | ||
618 | void *kaddr; | ||
619 | |||
620 | if (!page) | ||
621 | return -ENOMEM; | ||
622 | |||
623 | err = nilfs_prepare_chunk(page, mapping, 0, chunk_size); | ||
624 | if (unlikely(err)) { | ||
625 | unlock_page(page); | ||
626 | goto fail; | ||
627 | } | ||
628 | kaddr = kmap_atomic(page, KM_USER0); | ||
629 | memset(kaddr, 0, chunk_size); | ||
630 | de = (struct nilfs_dir_entry *)kaddr; | ||
631 | de->name_len = 1; | ||
632 | de->rec_len = cpu_to_le16(NILFS_DIR_REC_LEN(1)); | ||
633 | memcpy(de->name, ".\0\0", 4); | ||
634 | de->inode = cpu_to_le64(inode->i_ino); | ||
635 | nilfs_set_de_type(de, inode); | ||
636 | |||
637 | de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); | ||
638 | de->name_len = 2; | ||
639 | de->rec_len = cpu_to_le16(chunk_size - NILFS_DIR_REC_LEN(1)); | ||
640 | de->inode = cpu_to_le64(parent->i_ino); | ||
641 | memcpy(de->name, "..\0", 4); | ||
642 | nilfs_set_de_type(de, inode); | ||
643 | kunmap_atomic(kaddr, KM_USER0); | ||
644 | err = nilfs_commit_chunk(page, mapping, 0, chunk_size); | ||
645 | fail: | ||
646 | page_cache_release(page); | ||
647 | return err; | ||
648 | } | ||
649 | |||
650 | /* | ||
651 | * routine to check that the specified directory is empty (for rmdir) | ||
652 | */ | ||
653 | int nilfs_empty_dir(struct inode *inode) | ||
654 | { | ||
655 | struct page *page = NULL; | ||
656 | unsigned long i, npages = dir_pages(inode); | ||
657 | |||
658 | for (i = 0; i < npages; i++) { | ||
659 | char *kaddr; | ||
660 | struct nilfs_dir_entry *de; | ||
661 | |||
662 | page = nilfs_get_page(inode, i); | ||
663 | if (IS_ERR(page)) | ||
664 | continue; | ||
665 | |||
666 | kaddr = page_address(page); | ||
667 | de = (struct nilfs_dir_entry *)kaddr; | ||
668 | kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1); | ||
669 | |||
670 | while ((char *)de <= kaddr) { | ||
671 | if (de->rec_len == 0) { | ||
672 | nilfs_error(inode->i_sb, __func__, | ||
673 | "zero-length directory entry " | ||
674 | "(kaddr=%p, de=%p)\n", kaddr, de); | ||
675 | goto not_empty; | ||
676 | } | ||
677 | if (de->inode != 0) { | ||
678 | /* check for . and .. */ | ||
679 | if (de->name[0] != '.') | ||
680 | goto not_empty; | ||
681 | if (de->name_len > 2) | ||
682 | goto not_empty; | ||
683 | if (de->name_len < 2) { | ||
684 | if (de->inode != | ||
685 | cpu_to_le64(inode->i_ino)) | ||
686 | goto not_empty; | ||
687 | } else if (de->name[1] != '.') | ||
688 | goto not_empty; | ||
689 | } | ||
690 | de = nilfs_next_entry(de); | ||
691 | } | ||
692 | nilfs_put_page(page); | ||
693 | } | ||
694 | return 1; | ||
695 | |||
696 | not_empty: | ||
697 | nilfs_put_page(page); | ||
698 | return 0; | ||
699 | } | ||
700 | |||
701 | struct file_operations nilfs_dir_operations = { | ||
702 | .llseek = generic_file_llseek, | ||
703 | .read = generic_read_dir, | ||
704 | .readdir = nilfs_readdir, | ||
705 | .unlocked_ioctl = nilfs_ioctl, | ||
706 | #ifdef CONFIG_COMPAT | ||
707 | .compat_ioctl = nilfs_ioctl, | ||
708 | #endif /* CONFIG_COMPAT */ | ||
709 | .fsync = nilfs_sync_file, | ||
710 | |||
711 | }; | ||
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c new file mode 100644 index 000000000000..c6379e482781 --- /dev/null +++ b/fs/nilfs2/direct.c | |||
@@ -0,0 +1,436 @@ | |||
1 | /* | ||
2 | * direct.c - NILFS direct block pointer. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/errno.h> | ||
24 | #include "nilfs.h" | ||
25 | #include "page.h" | ||
26 | #include "direct.h" | ||
27 | #include "alloc.h" | ||
28 | |||
29 | static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct) | ||
30 | { | ||
31 | return (__le64 *) | ||
32 | ((struct nilfs_direct_node *)direct->d_bmap.b_u.u_data + 1); | ||
33 | } | ||
34 | |||
35 | static inline __u64 | ||
36 | nilfs_direct_get_ptr(const struct nilfs_direct *direct, __u64 key) | ||
37 | { | ||
38 | return nilfs_bmap_dptr_to_ptr(*(nilfs_direct_dptrs(direct) + key)); | ||
39 | } | ||
40 | |||
41 | static inline void nilfs_direct_set_ptr(struct nilfs_direct *direct, | ||
42 | __u64 key, __u64 ptr) | ||
43 | { | ||
44 | *(nilfs_direct_dptrs(direct) + key) = nilfs_bmap_ptr_to_dptr(ptr); | ||
45 | } | ||
46 | |||
47 | static int nilfs_direct_lookup(const struct nilfs_bmap *bmap, | ||
48 | __u64 key, int level, __u64 *ptrp) | ||
49 | { | ||
50 | struct nilfs_direct *direct; | ||
51 | __u64 ptr; | ||
52 | |||
53 | direct = (struct nilfs_direct *)bmap; | ||
54 | if ((key > NILFS_DIRECT_KEY_MAX) || | ||
55 | (level != 1) || /* XXX: use macro for level 1 */ | ||
56 | ((ptr = nilfs_direct_get_ptr(direct, key)) == | ||
57 | NILFS_BMAP_INVALID_PTR)) | ||
58 | return -ENOENT; | ||
59 | |||
60 | if (ptrp != NULL) | ||
61 | *ptrp = ptr; | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | static __u64 | ||
66 | nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key) | ||
67 | { | ||
68 | __u64 ptr; | ||
69 | |||
70 | ptr = nilfs_bmap_find_target_seq(&direct->d_bmap, key); | ||
71 | if (ptr != NILFS_BMAP_INVALID_PTR) | ||
72 | /* sequential access */ | ||
73 | return ptr; | ||
74 | else | ||
75 | /* block group */ | ||
76 | return nilfs_bmap_find_target_in_group(&direct->d_bmap); | ||
77 | } | ||
78 | |||
79 | static void nilfs_direct_set_target_v(struct nilfs_direct *direct, | ||
80 | __u64 key, __u64 ptr) | ||
81 | { | ||
82 | direct->d_bmap.b_last_allocated_key = key; | ||
83 | direct->d_bmap.b_last_allocated_ptr = ptr; | ||
84 | } | ||
85 | |||
86 | static int nilfs_direct_prepare_insert(struct nilfs_direct *direct, | ||
87 | __u64 key, | ||
88 | union nilfs_bmap_ptr_req *req, | ||
89 | struct nilfs_bmap_stats *stats) | ||
90 | { | ||
91 | int ret; | ||
92 | |||
93 | if (direct->d_ops->dop_find_target != NULL) | ||
94 | req->bpr_ptr = direct->d_ops->dop_find_target(direct, key); | ||
95 | ret = direct->d_bmap.b_pops->bpop_prepare_alloc_ptr(&direct->d_bmap, | ||
96 | req); | ||
97 | if (ret < 0) | ||
98 | return ret; | ||
99 | |||
100 | stats->bs_nblocks = 1; | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static void nilfs_direct_commit_insert(struct nilfs_direct *direct, | ||
105 | union nilfs_bmap_ptr_req *req, | ||
106 | __u64 key, __u64 ptr) | ||
107 | { | ||
108 | struct buffer_head *bh; | ||
109 | |||
110 | /* ptr must be a pointer to a buffer head. */ | ||
111 | bh = (struct buffer_head *)((unsigned long)ptr); | ||
112 | set_buffer_nilfs_volatile(bh); | ||
113 | |||
114 | if (direct->d_bmap.b_pops->bpop_commit_alloc_ptr != NULL) | ||
115 | direct->d_bmap.b_pops->bpop_commit_alloc_ptr( | ||
116 | &direct->d_bmap, req); | ||
117 | nilfs_direct_set_ptr(direct, key, req->bpr_ptr); | ||
118 | |||
119 | if (!nilfs_bmap_dirty(&direct->d_bmap)) | ||
120 | nilfs_bmap_set_dirty(&direct->d_bmap); | ||
121 | |||
122 | if (direct->d_ops->dop_set_target != NULL) | ||
123 | direct->d_ops->dop_set_target(direct, key, req->bpr_ptr); | ||
124 | } | ||
125 | |||
126 | static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | ||
127 | { | ||
128 | struct nilfs_direct *direct; | ||
129 | union nilfs_bmap_ptr_req req; | ||
130 | struct nilfs_bmap_stats stats; | ||
131 | int ret; | ||
132 | |||
133 | direct = (struct nilfs_direct *)bmap; | ||
134 | if (key > NILFS_DIRECT_KEY_MAX) | ||
135 | return -ENOENT; | ||
136 | if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR) | ||
137 | return -EEXIST; | ||
138 | |||
139 | ret = nilfs_direct_prepare_insert(direct, key, &req, &stats); | ||
140 | if (ret < 0) | ||
141 | return ret; | ||
142 | nilfs_direct_commit_insert(direct, &req, key, ptr); | ||
143 | nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); | ||
144 | |||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | static int nilfs_direct_prepare_delete(struct nilfs_direct *direct, | ||
149 | union nilfs_bmap_ptr_req *req, | ||
150 | __u64 key, | ||
151 | struct nilfs_bmap_stats *stats) | ||
152 | { | ||
153 | int ret; | ||
154 | |||
155 | if (direct->d_bmap.b_pops->bpop_prepare_end_ptr != NULL) { | ||
156 | req->bpr_ptr = nilfs_direct_get_ptr(direct, key); | ||
157 | ret = direct->d_bmap.b_pops->bpop_prepare_end_ptr( | ||
158 | &direct->d_bmap, req); | ||
159 | if (ret < 0) | ||
160 | return ret; | ||
161 | } | ||
162 | |||
163 | stats->bs_nblocks = 1; | ||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | static void nilfs_direct_commit_delete(struct nilfs_direct *direct, | ||
168 | union nilfs_bmap_ptr_req *req, | ||
169 | __u64 key) | ||
170 | { | ||
171 | if (direct->d_bmap.b_pops->bpop_commit_end_ptr != NULL) | ||
172 | direct->d_bmap.b_pops->bpop_commit_end_ptr( | ||
173 | &direct->d_bmap, req); | ||
174 | nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR); | ||
175 | } | ||
176 | |||
177 | static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) | ||
178 | { | ||
179 | struct nilfs_direct *direct; | ||
180 | union nilfs_bmap_ptr_req req; | ||
181 | struct nilfs_bmap_stats stats; | ||
182 | int ret; | ||
183 | |||
184 | direct = (struct nilfs_direct *)bmap; | ||
185 | if ((key > NILFS_DIRECT_KEY_MAX) || | ||
186 | nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR) | ||
187 | return -ENOENT; | ||
188 | |||
189 | ret = nilfs_direct_prepare_delete(direct, &req, key, &stats); | ||
190 | if (ret < 0) | ||
191 | return ret; | ||
192 | nilfs_direct_commit_delete(direct, &req, key); | ||
193 | nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); | ||
194 | |||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) | ||
199 | { | ||
200 | struct nilfs_direct *direct; | ||
201 | __u64 key, lastkey; | ||
202 | |||
203 | direct = (struct nilfs_direct *)bmap; | ||
204 | lastkey = NILFS_DIRECT_KEY_MAX + 1; | ||
205 | for (key = NILFS_DIRECT_KEY_MIN; key <= NILFS_DIRECT_KEY_MAX; key++) | ||
206 | if (nilfs_direct_get_ptr(direct, key) != | ||
207 | NILFS_BMAP_INVALID_PTR) | ||
208 | lastkey = key; | ||
209 | |||
210 | if (lastkey == NILFS_DIRECT_KEY_MAX + 1) | ||
211 | return -ENOENT; | ||
212 | |||
213 | *keyp = lastkey; | ||
214 | |||
215 | return 0; | ||
216 | } | ||
217 | |||
218 | static int nilfs_direct_check_insert(const struct nilfs_bmap *bmap, __u64 key) | ||
219 | { | ||
220 | return key > NILFS_DIRECT_KEY_MAX; | ||
221 | } | ||
222 | |||
223 | static int nilfs_direct_gather_data(struct nilfs_bmap *bmap, | ||
224 | __u64 *keys, __u64 *ptrs, int nitems) | ||
225 | { | ||
226 | struct nilfs_direct *direct; | ||
227 | __u64 key; | ||
228 | __u64 ptr; | ||
229 | int n; | ||
230 | |||
231 | direct = (struct nilfs_direct *)bmap; | ||
232 | if (nitems > NILFS_DIRECT_NBLOCKS) | ||
233 | nitems = NILFS_DIRECT_NBLOCKS; | ||
234 | n = 0; | ||
235 | for (key = 0; key < nitems; key++) { | ||
236 | ptr = nilfs_direct_get_ptr(direct, key); | ||
237 | if (ptr != NILFS_BMAP_INVALID_PTR) { | ||
238 | keys[n] = key; | ||
239 | ptrs[n] = ptr; | ||
240 | n++; | ||
241 | } | ||
242 | } | ||
243 | return n; | ||
244 | } | ||
245 | |||
246 | int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, | ||
247 | __u64 key, __u64 *keys, __u64 *ptrs, | ||
248 | int n, __u64 low, __u64 high) | ||
249 | { | ||
250 | struct nilfs_direct *direct; | ||
251 | __le64 *dptrs; | ||
252 | int ret, i, j; | ||
253 | |||
254 | /* no need to allocate any resource for conversion */ | ||
255 | |||
256 | /* delete */ | ||
257 | ret = bmap->b_ops->bop_delete(bmap, key); | ||
258 | if (ret < 0) | ||
259 | return ret; | ||
260 | |||
261 | /* free resources */ | ||
262 | if (bmap->b_ops->bop_clear != NULL) | ||
263 | bmap->b_ops->bop_clear(bmap); | ||
264 | |||
265 | /* convert */ | ||
266 | direct = (struct nilfs_direct *)bmap; | ||
267 | dptrs = nilfs_direct_dptrs(direct); | ||
268 | for (i = 0, j = 0; i < NILFS_DIRECT_NBLOCKS; i++) { | ||
269 | if ((j < n) && (i == keys[j])) { | ||
270 | dptrs[i] = (i != key) ? | ||
271 | nilfs_bmap_ptr_to_dptr(ptrs[j]) : | ||
272 | NILFS_BMAP_INVALID_PTR; | ||
273 | j++; | ||
274 | } else | ||
275 | dptrs[i] = NILFS_BMAP_INVALID_PTR; | ||
276 | } | ||
277 | |||
278 | nilfs_direct_init(bmap, low, high); | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | static int nilfs_direct_propagate_v(struct nilfs_direct *direct, | ||
284 | struct buffer_head *bh) | ||
285 | { | ||
286 | union nilfs_bmap_ptr_req oldreq, newreq; | ||
287 | __u64 key; | ||
288 | __u64 ptr; | ||
289 | int ret; | ||
290 | |||
291 | key = nilfs_bmap_data_get_key(&direct->d_bmap, bh); | ||
292 | ptr = nilfs_direct_get_ptr(direct, key); | ||
293 | if (!buffer_nilfs_volatile(bh)) { | ||
294 | oldreq.bpr_ptr = ptr; | ||
295 | newreq.bpr_ptr = ptr; | ||
296 | ret = nilfs_bmap_prepare_update(&direct->d_bmap, &oldreq, | ||
297 | &newreq); | ||
298 | if (ret < 0) | ||
299 | return ret; | ||
300 | nilfs_bmap_commit_update(&direct->d_bmap, &oldreq, &newreq); | ||
301 | set_buffer_nilfs_volatile(bh); | ||
302 | nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr); | ||
303 | } else | ||
304 | ret = nilfs_bmap_mark_dirty(&direct->d_bmap, ptr); | ||
305 | |||
306 | return ret; | ||
307 | } | ||
308 | |||
309 | static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, | ||
310 | struct buffer_head *bh) | ||
311 | { | ||
312 | struct nilfs_direct *direct; | ||
313 | |||
314 | direct = (struct nilfs_direct *)bmap; | ||
315 | return (direct->d_ops->dop_propagate != NULL) ? | ||
316 | direct->d_ops->dop_propagate(direct, bh) : | ||
317 | 0; | ||
318 | } | ||
319 | |||
320 | static int nilfs_direct_assign_v(struct nilfs_direct *direct, | ||
321 | __u64 key, __u64 ptr, | ||
322 | struct buffer_head **bh, | ||
323 | sector_t blocknr, | ||
324 | union nilfs_binfo *binfo) | ||
325 | { | ||
326 | union nilfs_bmap_ptr_req req; | ||
327 | int ret; | ||
328 | |||
329 | req.bpr_ptr = ptr; | ||
330 | ret = direct->d_bmap.b_pops->bpop_prepare_start_ptr( | ||
331 | &direct->d_bmap, &req); | ||
332 | if (ret < 0) | ||
333 | return ret; | ||
334 | direct->d_bmap.b_pops->bpop_commit_start_ptr(&direct->d_bmap, | ||
335 | &req, blocknr); | ||
336 | |||
337 | binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); | ||
338 | binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); | ||
339 | |||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | static int nilfs_direct_assign_p(struct nilfs_direct *direct, | ||
344 | __u64 key, __u64 ptr, | ||
345 | struct buffer_head **bh, | ||
346 | sector_t blocknr, | ||
347 | union nilfs_binfo *binfo) | ||
348 | { | ||
349 | nilfs_direct_set_ptr(direct, key, blocknr); | ||
350 | |||
351 | binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); | ||
352 | binfo->bi_dat.bi_level = 0; | ||
353 | |||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | static int nilfs_direct_assign(struct nilfs_bmap *bmap, | ||
358 | struct buffer_head **bh, | ||
359 | sector_t blocknr, | ||
360 | union nilfs_binfo *binfo) | ||
361 | { | ||
362 | struct nilfs_direct *direct; | ||
363 | __u64 key; | ||
364 | __u64 ptr; | ||
365 | |||
366 | direct = (struct nilfs_direct *)bmap; | ||
367 | key = nilfs_bmap_data_get_key(bmap, *bh); | ||
368 | if (unlikely(key > NILFS_DIRECT_KEY_MAX)) { | ||
369 | printk(KERN_CRIT "%s: invalid key: %llu\n", __func__, | ||
370 | (unsigned long long)key); | ||
371 | return -EINVAL; | ||
372 | } | ||
373 | ptr = nilfs_direct_get_ptr(direct, key); | ||
374 | if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) { | ||
375 | printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__, | ||
376 | (unsigned long long)ptr); | ||
377 | return -EINVAL; | ||
378 | } | ||
379 | |||
380 | return direct->d_ops->dop_assign(direct, key, ptr, bh, | ||
381 | blocknr, binfo); | ||
382 | } | ||
383 | |||
384 | static const struct nilfs_bmap_operations nilfs_direct_ops = { | ||
385 | .bop_lookup = nilfs_direct_lookup, | ||
386 | .bop_insert = nilfs_direct_insert, | ||
387 | .bop_delete = nilfs_direct_delete, | ||
388 | .bop_clear = NULL, | ||
389 | |||
390 | .bop_propagate = nilfs_direct_propagate, | ||
391 | |||
392 | .bop_lookup_dirty_buffers = NULL, | ||
393 | |||
394 | .bop_assign = nilfs_direct_assign, | ||
395 | .bop_mark = NULL, | ||
396 | |||
397 | .bop_last_key = nilfs_direct_last_key, | ||
398 | .bop_check_insert = nilfs_direct_check_insert, | ||
399 | .bop_check_delete = NULL, | ||
400 | .bop_gather_data = nilfs_direct_gather_data, | ||
401 | }; | ||
402 | |||
403 | |||
404 | static const struct nilfs_direct_operations nilfs_direct_ops_v = { | ||
405 | .dop_find_target = nilfs_direct_find_target_v, | ||
406 | .dop_set_target = nilfs_direct_set_target_v, | ||
407 | .dop_propagate = nilfs_direct_propagate_v, | ||
408 | .dop_assign = nilfs_direct_assign_v, | ||
409 | }; | ||
410 | |||
411 | static const struct nilfs_direct_operations nilfs_direct_ops_p = { | ||
412 | .dop_find_target = NULL, | ||
413 | .dop_set_target = NULL, | ||
414 | .dop_propagate = NULL, | ||
415 | .dop_assign = nilfs_direct_assign_p, | ||
416 | }; | ||
417 | |||
418 | int nilfs_direct_init(struct nilfs_bmap *bmap, __u64 low, __u64 high) | ||
419 | { | ||
420 | struct nilfs_direct *direct; | ||
421 | |||
422 | direct = (struct nilfs_direct *)bmap; | ||
423 | bmap->b_ops = &nilfs_direct_ops; | ||
424 | bmap->b_low = low; | ||
425 | bmap->b_high = high; | ||
426 | switch (bmap->b_inode->i_ino) { | ||
427 | case NILFS_DAT_INO: | ||
428 | direct->d_ops = &nilfs_direct_ops_p; | ||
429 | break; | ||
430 | default: | ||
431 | direct->d_ops = &nilfs_direct_ops_v; | ||
432 | break; | ||
433 | } | ||
434 | |||
435 | return 0; | ||
436 | } | ||
diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h new file mode 100644 index 000000000000..45d2c5cda812 --- /dev/null +++ b/fs/nilfs2/direct.h | |||
@@ -0,0 +1,78 @@ | |||
1 | /* | ||
2 | * direct.h - NILFS direct block pointer. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_DIRECT_H | ||
24 | #define _NILFS_DIRECT_H | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include "bmap.h" | ||
29 | |||
30 | |||
31 | struct nilfs_direct; | ||
32 | |||
33 | /** | ||
34 | * struct nilfs_direct_operations - direct mapping operation table | ||
35 | */ | ||
36 | struct nilfs_direct_operations { | ||
37 | __u64 (*dop_find_target)(const struct nilfs_direct *, __u64); | ||
38 | void (*dop_set_target)(struct nilfs_direct *, __u64, __u64); | ||
39 | int (*dop_propagate)(struct nilfs_direct *, struct buffer_head *); | ||
40 | int (*dop_assign)(struct nilfs_direct *, __u64, __u64, | ||
41 | struct buffer_head **, sector_t, | ||
42 | union nilfs_binfo *); | ||
43 | }; | ||
44 | |||
45 | /** | ||
46 | * struct nilfs_direct_node - direct node | ||
47 | * @dn_flags: flags | ||
48 | * @dn_pad: padding | ||
49 | */ | ||
50 | struct nilfs_direct_node { | ||
51 | __u8 dn_flags; | ||
52 | __u8 pad[7]; | ||
53 | }; | ||
54 | |||
55 | /** | ||
56 | * struct nilfs_direct - direct mapping | ||
57 | * @d_bmap: bmap structure | ||
58 | * @d_ops: direct mapping operation table | ||
59 | */ | ||
60 | struct nilfs_direct { | ||
61 | struct nilfs_bmap d_bmap; | ||
62 | |||
63 | /* direct-mapping-specific members */ | ||
64 | const struct nilfs_direct_operations *d_ops; | ||
65 | }; | ||
66 | |||
67 | |||
68 | #define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1) | ||
69 | #define NILFS_DIRECT_KEY_MIN 0 | ||
70 | #define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1) | ||
71 | |||
72 | |||
73 | int nilfs_direct_init(struct nilfs_bmap *, __u64, __u64); | ||
74 | int nilfs_direct_delete_and_convert(struct nilfs_bmap *, __u64, __u64 *, | ||
75 | __u64 *, int, __u64, __u64); | ||
76 | |||
77 | |||
78 | #endif /* _NILFS_DIRECT_H */ | ||
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c new file mode 100644 index 000000000000..6bd84a0d8238 --- /dev/null +++ b/fs/nilfs2/file.c | |||
@@ -0,0 +1,160 @@ | |||
1 | /* | ||
2 | * file.c - NILFS regular file handling primitives including fsync(). | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Amagai Yoshiji <amagai@osrg.net>, | ||
21 | * Ryusuke Konishi <ryusuke@osrg.net> | ||
22 | */ | ||
23 | |||
24 | #include <linux/fs.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/writeback.h> | ||
27 | #include "nilfs.h" | ||
28 | #include "segment.h" | ||
29 | |||
30 | int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | ||
31 | { | ||
32 | /* | ||
33 | * Called from fsync() system call | ||
34 | * This is the only entry point that can catch write and synch | ||
35 | * timing for both data blocks and intermediate blocks. | ||
36 | * | ||
37 | * This function should be implemented when the writeback function | ||
38 | * will be implemented. | ||
39 | */ | ||
40 | struct inode *inode = dentry->d_inode; | ||
41 | int err; | ||
42 | |||
43 | if (!nilfs_inode_dirty(inode)) | ||
44 | return 0; | ||
45 | |||
46 | if (datasync) | ||
47 | err = nilfs_construct_dsync_segment(inode->i_sb, inode, 0, | ||
48 | LLONG_MAX); | ||
49 | else | ||
50 | err = nilfs_construct_segment(inode->i_sb); | ||
51 | |||
52 | return err; | ||
53 | } | ||
54 | |||
55 | static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
56 | { | ||
57 | struct page *page = vmf->page; | ||
58 | struct inode *inode = vma->vm_file->f_dentry->d_inode; | ||
59 | struct nilfs_transaction_info ti; | ||
60 | int ret; | ||
61 | |||
62 | if (unlikely(nilfs_near_disk_full(NILFS_SB(inode->i_sb)->s_nilfs))) | ||
63 | return VM_FAULT_SIGBUS; /* -ENOSPC */ | ||
64 | |||
65 | lock_page(page); | ||
66 | if (page->mapping != inode->i_mapping || | ||
67 | page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { | ||
68 | unlock_page(page); | ||
69 | return VM_FAULT_NOPAGE; /* make the VM retry the fault */ | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * check to see if the page is mapped already (no holes) | ||
74 | */ | ||
75 | if (PageMappedToDisk(page)) { | ||
76 | unlock_page(page); | ||
77 | goto mapped; | ||
78 | } | ||
79 | if (page_has_buffers(page)) { | ||
80 | struct buffer_head *bh, *head; | ||
81 | int fully_mapped = 1; | ||
82 | |||
83 | bh = head = page_buffers(page); | ||
84 | do { | ||
85 | if (!buffer_mapped(bh)) { | ||
86 | fully_mapped = 0; | ||
87 | break; | ||
88 | } | ||
89 | } while (bh = bh->b_this_page, bh != head); | ||
90 | |||
91 | if (fully_mapped) { | ||
92 | SetPageMappedToDisk(page); | ||
93 | unlock_page(page); | ||
94 | goto mapped; | ||
95 | } | ||
96 | } | ||
97 | unlock_page(page); | ||
98 | |||
99 | /* | ||
100 | * fill hole blocks | ||
101 | */ | ||
102 | ret = nilfs_transaction_begin(inode->i_sb, &ti, 1); | ||
103 | /* never returns -ENOMEM, but may return -ENOSPC */ | ||
104 | if (unlikely(ret)) | ||
105 | return VM_FAULT_SIGBUS; | ||
106 | |||
107 | ret = block_page_mkwrite(vma, vmf, nilfs_get_block); | ||
108 | if (unlikely(ret)) { | ||
109 | nilfs_transaction_abort(inode->i_sb); | ||
110 | return ret; | ||
111 | } | ||
112 | nilfs_transaction_commit(inode->i_sb); | ||
113 | |||
114 | mapped: | ||
115 | SetPageChecked(page); | ||
116 | wait_on_page_writeback(page); | ||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | struct vm_operations_struct nilfs_file_vm_ops = { | ||
121 | .fault = filemap_fault, | ||
122 | .page_mkwrite = nilfs_page_mkwrite, | ||
123 | }; | ||
124 | |||
125 | static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) | ||
126 | { | ||
127 | file_accessed(file); | ||
128 | vma->vm_ops = &nilfs_file_vm_ops; | ||
129 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * We have mostly NULL's here: the current defaults are ok for | ||
135 | * the nilfs filesystem. | ||
136 | */ | ||
137 | struct file_operations nilfs_file_operations = { | ||
138 | .llseek = generic_file_llseek, | ||
139 | .read = do_sync_read, | ||
140 | .write = do_sync_write, | ||
141 | .aio_read = generic_file_aio_read, | ||
142 | .aio_write = generic_file_aio_write, | ||
143 | .unlocked_ioctl = nilfs_ioctl, | ||
144 | #ifdef CONFIG_COMPAT | ||
145 | .compat_ioctl = nilfs_ioctl, | ||
146 | #endif /* CONFIG_COMPAT */ | ||
147 | .mmap = nilfs_file_mmap, | ||
148 | .open = generic_file_open, | ||
149 | /* .release = nilfs_release_file, */ | ||
150 | .fsync = nilfs_sync_file, | ||
151 | .splice_read = generic_file_splice_read, | ||
152 | }; | ||
153 | |||
154 | struct inode_operations nilfs_file_inode_operations = { | ||
155 | .truncate = nilfs_truncate, | ||
156 | .setattr = nilfs_setattr, | ||
157 | .permission = nilfs_permission, | ||
158 | }; | ||
159 | |||
160 | /* end of file */ | ||
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c new file mode 100644 index 000000000000..93383c5cee90 --- /dev/null +++ b/fs/nilfs2/gcdat.c | |||
@@ -0,0 +1,84 @@ | |||
1 | /* | ||
2 | * gcdat.c - NILFS shadow DAT inode for GC | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>, | ||
21 | * and Ryusuke Konishi <ryusuke@osrg.net>. | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/buffer_head.h> | ||
26 | #include "nilfs.h" | ||
27 | #include "page.h" | ||
28 | #include "mdt.h" | ||
29 | |||
30 | int nilfs_init_gcdat_inode(struct the_nilfs *nilfs) | ||
31 | { | ||
32 | struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; | ||
33 | struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); | ||
34 | int err; | ||
35 | |||
36 | gcdat->i_state = 0; | ||
37 | gcdat->i_blocks = dat->i_blocks; | ||
38 | gii->i_flags = dii->i_flags; | ||
39 | gii->i_state = dii->i_state | (1 << NILFS_I_GCDAT); | ||
40 | gii->i_cno = 0; | ||
41 | nilfs_bmap_init_gcdat(gii->i_bmap, dii->i_bmap); | ||
42 | err = nilfs_copy_dirty_pages(gcdat->i_mapping, dat->i_mapping); | ||
43 | if (unlikely(err)) | ||
44 | return err; | ||
45 | |||
46 | return nilfs_copy_dirty_pages(&gii->i_btnode_cache, | ||
47 | &dii->i_btnode_cache); | ||
48 | } | ||
49 | |||
50 | void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs) | ||
51 | { | ||
52 | struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; | ||
53 | struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); | ||
54 | struct address_space *mapping = dat->i_mapping; | ||
55 | struct address_space *gmapping = gcdat->i_mapping; | ||
56 | |||
57 | down_write(&NILFS_MDT(dat)->mi_sem); | ||
58 | dat->i_blocks = gcdat->i_blocks; | ||
59 | dii->i_flags = gii->i_flags; | ||
60 | dii->i_state = gii->i_state & ~(1 << NILFS_I_GCDAT); | ||
61 | |||
62 | nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap); | ||
63 | |||
64 | nilfs_clear_dirty_pages(mapping); | ||
65 | nilfs_copy_back_pages(mapping, gmapping); | ||
66 | /* note: mdt dirty flags should be cleared by segctor. */ | ||
67 | |||
68 | nilfs_clear_dirty_pages(&dii->i_btnode_cache); | ||
69 | nilfs_copy_back_pages(&dii->i_btnode_cache, &gii->i_btnode_cache); | ||
70 | |||
71 | up_write(&NILFS_MDT(dat)->mi_sem); | ||
72 | } | ||
73 | |||
74 | void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs) | ||
75 | { | ||
76 | struct inode *gcdat = nilfs->ns_gc_dat; | ||
77 | struct nilfs_inode_info *gii = NILFS_I(gcdat); | ||
78 | |||
79 | gcdat->i_state = I_CLEAR; | ||
80 | gii->i_flags = 0; | ||
81 | |||
82 | truncate_inode_pages(gcdat->i_mapping, 0); | ||
83 | truncate_inode_pages(&gii->i_btnode_cache, 0); | ||
84 | } | ||
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c new file mode 100644 index 000000000000..19d2102b6a69 --- /dev/null +++ b/fs/nilfs2/gcinode.c | |||
@@ -0,0 +1,288 @@ | |||
1 | /* | ||
2 | * gcinode.c - dummy inodes to buffer blocks for garbage collection | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>, | ||
21 | * and Ryusuke Konishi <ryusuke@osrg.net>. | ||
22 | * Revised by Ryusuke Konishi <ryusuke@osrg.net>. | ||
23 | * | ||
24 | */ | ||
25 | /* | ||
26 | * This file adds the cache of on-disk blocks to be moved in garbage | ||
27 | * collection. The disk blocks are held with dummy inodes (called | ||
28 | * gcinodes), and this file provides lookup function of the dummy | ||
29 | * inodes and their buffer read function. | ||
30 | * | ||
31 | * Since NILFS2 keeps up multiple checkpoints/snapshots accross GC, it | ||
32 | * has to treat blocks that belong to a same file but have different | ||
33 | * checkpoint numbers. To avoid interference among generations, dummy | ||
34 | * inodes are managed separatly from actual inodes, and their lookup | ||
35 | * function (nilfs_gc_iget) is designed to be specified with a | ||
36 | * checkpoint number argument as well as an inode number. | ||
37 | * | ||
38 | * Buffers and pages held by the dummy inodes will be released each | ||
39 | * time after they are copied to a new log. Dirty blocks made on the | ||
40 | * current generation and the blocks to be moved by GC never overlap | ||
41 | * because the dirty blocks make a new generation; they rather must be | ||
42 | * written individually. | ||
43 | */ | ||
44 | |||
45 | #include <linux/buffer_head.h> | ||
46 | #include <linux/mpage.h> | ||
47 | #include <linux/hash.h> | ||
48 | #include <linux/swap.h> | ||
49 | #include "nilfs.h" | ||
50 | #include "page.h" | ||
51 | #include "mdt.h" | ||
52 | #include "dat.h" | ||
53 | #include "ifile.h" | ||
54 | |||
55 | static struct address_space_operations def_gcinode_aops = {}; | ||
56 | /* XXX need def_gcinode_iops/fops? */ | ||
57 | |||
58 | /* | ||
59 | * nilfs_gccache_submit_read_data() - add data buffer and submit read request | ||
60 | * @inode - gc inode | ||
61 | * @blkoff - dummy offset treated as the key for the page cache | ||
62 | * @pbn - physical block number of the block | ||
63 | * @vbn - virtual block number of the block, 0 for non-virtual block | ||
64 | * @out_bh - indirect pointer to a buffer_head struct to receive the results | ||
65 | * | ||
66 | * Description: nilfs_gccache_submit_read_data() registers the data buffer | ||
67 | * specified by @pbn to the GC pagecache with the key @blkoff. | ||
68 | * This function sets @vbn (@pbn if @vbn is zero) in b_blocknr of the buffer. | ||
69 | * | ||
70 | * Return Value: On success, 0 is returned. On Error, one of the following | ||
71 | * negative error code is returned. | ||
72 | * | ||
73 | * %-EIO - I/O error. | ||
74 | * | ||
75 | * %-ENOMEM - Insufficient amount of memory available. | ||
76 | * | ||
77 | * %-ENOENT - The block specified with @pbn does not exist. | ||
78 | */ | ||
79 | int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, | ||
80 | sector_t pbn, __u64 vbn, | ||
81 | struct buffer_head **out_bh) | ||
82 | { | ||
83 | struct buffer_head *bh; | ||
84 | int err; | ||
85 | |||
86 | bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); | ||
87 | if (unlikely(!bh)) | ||
88 | return -ENOMEM; | ||
89 | |||
90 | if (buffer_uptodate(bh)) | ||
91 | goto out; | ||
92 | |||
93 | if (pbn == 0) { | ||
94 | struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat; | ||
95 | /* use original dat, not gc dat. */ | ||
96 | err = nilfs_dat_translate(dat_inode, vbn, &pbn); | ||
97 | if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ | ||
98 | brelse(bh); | ||
99 | goto failed; | ||
100 | } | ||
101 | } | ||
102 | |||
103 | lock_buffer(bh); | ||
104 | if (buffer_uptodate(bh)) { | ||
105 | unlock_buffer(bh); | ||
106 | goto out; | ||
107 | } | ||
108 | |||
109 | if (!buffer_mapped(bh)) { | ||
110 | bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; | ||
111 | set_buffer_mapped(bh); | ||
112 | } | ||
113 | bh->b_blocknr = pbn; | ||
114 | bh->b_end_io = end_buffer_read_sync; | ||
115 | get_bh(bh); | ||
116 | submit_bh(READ, bh); | ||
117 | if (vbn) | ||
118 | bh->b_blocknr = vbn; | ||
119 | out: | ||
120 | err = 0; | ||
121 | *out_bh = bh; | ||
122 | |||
123 | failed: | ||
124 | unlock_page(bh->b_page); | ||
125 | page_cache_release(bh->b_page); | ||
126 | return err; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * nilfs_gccache_submit_read_node() - add node buffer and submit read request | ||
131 | * @inode - gc inode | ||
132 | * @pbn - physical block number for the block | ||
133 | * @vbn - virtual block number for the block | ||
134 | * @out_bh - indirect pointer to a buffer_head struct to receive the results | ||
135 | * | ||
136 | * Description: nilfs_gccache_submit_read_node() registers the node buffer | ||
137 | * specified by @vbn to the GC pagecache. @pbn can be supplied by the | ||
138 | * caller to avoid translation of the disk block address. | ||
139 | * | ||
140 | * Return Value: On success, 0 is returned. On Error, one of the following | ||
141 | * negative error code is returned. | ||
142 | * | ||
143 | * %-EIO - I/O error. | ||
144 | * | ||
145 | * %-ENOMEM - Insufficient amount of memory available. | ||
146 | */ | ||
147 | int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, | ||
148 | __u64 vbn, struct buffer_head **out_bh) | ||
149 | { | ||
150 | int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, | ||
151 | vbn ? : pbn, pbn, out_bh, 0); | ||
152 | if (ret == -EEXIST) /* internal code (cache hit) */ | ||
153 | ret = 0; | ||
154 | return ret; | ||
155 | } | ||
156 | |||
157 | int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) | ||
158 | { | ||
159 | wait_on_buffer(bh); | ||
160 | if (!buffer_uptodate(bh)) | ||
161 | return -EIO; | ||
162 | if (buffer_dirty(bh)) | ||
163 | return -EEXIST; | ||
164 | |||
165 | if (buffer_nilfs_node(bh)) | ||
166 | nilfs_btnode_mark_dirty(bh); | ||
167 | else | ||
168 | nilfs_mdt_mark_buffer_dirty(bh); | ||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * nilfs_init_gccache() - allocate and initialize gc_inode hash table | ||
174 | * @nilfs - the_nilfs | ||
175 | * | ||
176 | * Return Value: On success, 0. | ||
177 | * On error, a negative error code is returned. | ||
178 | */ | ||
179 | int nilfs_init_gccache(struct the_nilfs *nilfs) | ||
180 | { | ||
181 | int loop; | ||
182 | |||
183 | BUG_ON(nilfs->ns_gc_inodes_h); | ||
184 | |||
185 | INIT_LIST_HEAD(&nilfs->ns_gc_inodes); | ||
186 | |||
187 | nilfs->ns_gc_inodes_h = | ||
188 | kmalloc(sizeof(struct hlist_head) * NILFS_GCINODE_HASH_SIZE, | ||
189 | GFP_NOFS); | ||
190 | if (nilfs->ns_gc_inodes_h == NULL) | ||
191 | return -ENOMEM; | ||
192 | |||
193 | for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++) | ||
194 | INIT_HLIST_HEAD(&nilfs->ns_gc_inodes_h[loop]); | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * nilfs_destroy_gccache() - free gc_inode hash table | ||
200 | * @nilfs - the nilfs | ||
201 | */ | ||
202 | void nilfs_destroy_gccache(struct the_nilfs *nilfs) | ||
203 | { | ||
204 | if (nilfs->ns_gc_inodes_h) { | ||
205 | nilfs_remove_all_gcinode(nilfs); | ||
206 | kfree(nilfs->ns_gc_inodes_h); | ||
207 | nilfs->ns_gc_inodes_h = NULL; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino, | ||
212 | __u64 cno) | ||
213 | { | ||
214 | struct inode *inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS); | ||
215 | struct nilfs_inode_info *ii; | ||
216 | |||
217 | if (!inode) | ||
218 | return NULL; | ||
219 | |||
220 | inode->i_op = NULL; | ||
221 | inode->i_fop = NULL; | ||
222 | inode->i_mapping->a_ops = &def_gcinode_aops; | ||
223 | |||
224 | ii = NILFS_I(inode); | ||
225 | ii->i_cno = cno; | ||
226 | ii->i_flags = 0; | ||
227 | ii->i_state = 1 << NILFS_I_GCINODE; | ||
228 | ii->i_bh = NULL; | ||
229 | nilfs_bmap_init_gc(ii->i_bmap); | ||
230 | |||
231 | return inode; | ||
232 | } | ||
233 | |||
234 | static unsigned long ihash(ino_t ino, __u64 cno) | ||
235 | { | ||
236 | return hash_long((unsigned long)((ino << 2) + cno), | ||
237 | NILFS_GCINODE_HASH_BITS); | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * nilfs_gc_iget() - find or create gc inode with specified (ino,cno) | ||
242 | */ | ||
243 | struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno) | ||
244 | { | ||
245 | struct hlist_head *head = nilfs->ns_gc_inodes_h + ihash(ino, cno); | ||
246 | struct hlist_node *node; | ||
247 | struct inode *inode; | ||
248 | |||
249 | hlist_for_each_entry(inode, node, head, i_hash) { | ||
250 | if (inode->i_ino == ino && NILFS_I(inode)->i_cno == cno) | ||
251 | return inode; | ||
252 | } | ||
253 | |||
254 | inode = alloc_gcinode(nilfs, ino, cno); | ||
255 | if (likely(inode)) { | ||
256 | hlist_add_head(&inode->i_hash, head); | ||
257 | list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes); | ||
258 | } | ||
259 | return inode; | ||
260 | } | ||
261 | |||
262 | /* | ||
263 | * nilfs_clear_gcinode() - clear and free a gc inode | ||
264 | */ | ||
265 | void nilfs_clear_gcinode(struct inode *inode) | ||
266 | { | ||
267 | nilfs_mdt_clear(inode); | ||
268 | nilfs_mdt_destroy(inode); | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * nilfs_remove_all_gcinode() - remove all inodes from the_nilfs | ||
273 | */ | ||
274 | void nilfs_remove_all_gcinode(struct the_nilfs *nilfs) | ||
275 | { | ||
276 | struct hlist_head *head = nilfs->ns_gc_inodes_h; | ||
277 | struct hlist_node *node, *n; | ||
278 | struct inode *inode; | ||
279 | int loop; | ||
280 | |||
281 | for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++, head++) { | ||
282 | hlist_for_each_entry_safe(inode, node, n, head, i_hash) { | ||
283 | hlist_del_init(&inode->i_hash); | ||
284 | list_del_init(&NILFS_I(inode)->i_dirty); | ||
285 | nilfs_clear_gcinode(inode); /* might sleep */ | ||
286 | } | ||
287 | } | ||
288 | } | ||
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c new file mode 100644 index 000000000000..de86401f209f --- /dev/null +++ b/fs/nilfs2/ifile.c | |||
@@ -0,0 +1,150 @@ | |||
1 | /* | ||
2 | * ifile.c - NILFS inode file | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Amagai Yoshiji <amagai@osrg.net>. | ||
21 | * Revised by Ryusuke Konishi <ryusuke@osrg.net>. | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/types.h> | ||
26 | #include <linux/buffer_head.h> | ||
27 | #include "nilfs.h" | ||
28 | #include "mdt.h" | ||
29 | #include "alloc.h" | ||
30 | #include "ifile.h" | ||
31 | |||
32 | /** | ||
33 | * nilfs_ifile_create_inode - create a new disk inode | ||
34 | * @ifile: ifile inode | ||
35 | * @out_ino: pointer to a variable to store inode number | ||
36 | * @out_bh: buffer_head contains newly allocated disk inode | ||
37 | * | ||
38 | * Return Value: On success, 0 is returned and the newly allocated inode | ||
39 | * number is stored in the place pointed by @ino, and buffer_head pointer | ||
40 | * that contains newly allocated disk inode structure is stored in the | ||
41 | * place pointed by @out_bh | ||
42 | * On error, one of the following negative error codes is returned. | ||
43 | * | ||
44 | * %-EIO - I/O error. | ||
45 | * | ||
46 | * %-ENOMEM - Insufficient amount of memory available. | ||
47 | * | ||
48 | * %-ENOSPC - No inode left. | ||
49 | */ | ||
50 | int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, | ||
51 | struct buffer_head **out_bh) | ||
52 | { | ||
53 | struct nilfs_palloc_req req; | ||
54 | int ret; | ||
55 | |||
56 | req.pr_entry_nr = 0; /* 0 says find free inode from beginning of | ||
57 | a group. dull code!! */ | ||
58 | req.pr_entry_bh = NULL; | ||
59 | |||
60 | ret = nilfs_palloc_prepare_alloc_entry(ifile, &req); | ||
61 | if (!ret) { | ||
62 | ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1, | ||
63 | &req.pr_entry_bh); | ||
64 | if (ret < 0) | ||
65 | nilfs_palloc_abort_alloc_entry(ifile, &req); | ||
66 | } | ||
67 | if (ret < 0) { | ||
68 | brelse(req.pr_entry_bh); | ||
69 | return ret; | ||
70 | } | ||
71 | nilfs_palloc_commit_alloc_entry(ifile, &req); | ||
72 | nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); | ||
73 | nilfs_mdt_mark_dirty(ifile); | ||
74 | *out_ino = (ino_t)req.pr_entry_nr; | ||
75 | *out_bh = req.pr_entry_bh; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | /** | ||
80 | * nilfs_ifile_delete_inode - delete a disk inode | ||
81 | * @ifile: ifile inode | ||
82 | * @ino: inode number | ||
83 | * | ||
84 | * Return Value: On success, 0 is returned. On error, one of the following | ||
85 | * negative error codes is returned. | ||
86 | * | ||
87 | * %-EIO - I/O error. | ||
88 | * | ||
89 | * %-ENOMEM - Insufficient amount of memory available. | ||
90 | * | ||
91 | * %-ENOENT - The inode number @ino have not been allocated. | ||
92 | */ | ||
93 | int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) | ||
94 | { | ||
95 | struct nilfs_palloc_req req = { | ||
96 | .pr_entry_nr = ino, .pr_entry_bh = NULL | ||
97 | }; | ||
98 | struct nilfs_inode *raw_inode; | ||
99 | void *kaddr; | ||
100 | int ret; | ||
101 | |||
102 | ret = nilfs_palloc_prepare_free_entry(ifile, &req); | ||
103 | if (!ret) { | ||
104 | ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 0, | ||
105 | &req.pr_entry_bh); | ||
106 | if (ret < 0) | ||
107 | nilfs_palloc_abort_free_entry(ifile, &req); | ||
108 | } | ||
109 | if (ret < 0) { | ||
110 | brelse(req.pr_entry_bh); | ||
111 | return ret; | ||
112 | } | ||
113 | |||
114 | kaddr = kmap_atomic(req.pr_entry_bh->b_page, KM_USER0); | ||
115 | raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, | ||
116 | req.pr_entry_bh, kaddr); | ||
117 | raw_inode->i_flags = 0; | ||
118 | kunmap_atomic(kaddr, KM_USER0); | ||
119 | |||
120 | nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); | ||
121 | brelse(req.pr_entry_bh); | ||
122 | |||
123 | nilfs_palloc_commit_free_entry(ifile, &req); | ||
124 | |||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, | ||
129 | struct buffer_head **out_bh) | ||
130 | { | ||
131 | struct super_block *sb = ifile->i_sb; | ||
132 | int err; | ||
133 | |||
134 | if (unlikely(!NILFS_VALID_INODE(sb, ino))) { | ||
135 | nilfs_error(sb, __func__, "bad inode number: %lu", | ||
136 | (unsigned long) ino); | ||
137 | return -EINVAL; | ||
138 | } | ||
139 | |||
140 | err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh); | ||
141 | if (unlikely(err)) { | ||
142 | if (err == -EINVAL) | ||
143 | nilfs_error(sb, __func__, "ifile is broken"); | ||
144 | else | ||
145 | nilfs_warning(sb, __func__, | ||
146 | "unable to read inode: %lu", | ||
147 | (unsigned long) ino); | ||
148 | } | ||
149 | return err; | ||
150 | } | ||
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h new file mode 100644 index 000000000000..5d30a35679b5 --- /dev/null +++ b/fs/nilfs2/ifile.h | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * ifile.h - NILFS inode file | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Amagai Yoshiji <amagai@osrg.net> | ||
21 | * Revised by Ryusuke Konishi <ryusuke@osrg.net> | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #ifndef _NILFS_IFILE_H | ||
26 | #define _NILFS_IFILE_H | ||
27 | |||
28 | #include <linux/fs.h> | ||
29 | #include <linux/buffer_head.h> | ||
30 | #include <linux/nilfs2_fs.h> | ||
31 | #include "mdt.h" | ||
32 | #include "alloc.h" | ||
33 | |||
34 | #define NILFS_IFILE_GFP NILFS_MDT_GFP | ||
35 | |||
36 | static inline struct nilfs_inode * | ||
37 | nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh) | ||
38 | { | ||
39 | void *kaddr = kmap(ibh->b_page); | ||
40 | return nilfs_palloc_block_get_entry(ifile, ino, ibh, kaddr); | ||
41 | } | ||
42 | |||
43 | static inline void nilfs_ifile_unmap_inode(struct inode *ifile, ino_t ino, | ||
44 | struct buffer_head *ibh) | ||
45 | { | ||
46 | kunmap(ibh->b_page); | ||
47 | } | ||
48 | |||
49 | int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); | ||
50 | int nilfs_ifile_delete_inode(struct inode *, ino_t); | ||
51 | int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); | ||
52 | |||
53 | #endif /* _NILFS_IFILE_H */ | ||
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c new file mode 100644 index 000000000000..49ab4a49bb4f --- /dev/null +++ b/fs/nilfs2/inode.c | |||
@@ -0,0 +1,785 @@ | |||
1 | /* | ||
2 | * inode.c - NILFS inode operations. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/buffer_head.h> | ||
25 | #include <linux/mpage.h> | ||
26 | #include <linux/writeback.h> | ||
27 | #include <linux/uio.h> | ||
28 | #include "nilfs.h" | ||
29 | #include "segment.h" | ||
30 | #include "page.h" | ||
31 | #include "mdt.h" | ||
32 | #include "cpfile.h" | ||
33 | #include "ifile.h" | ||
34 | |||
35 | |||
36 | /** | ||
37 | * nilfs_get_block() - get a file block on the filesystem (callback function) | ||
38 | * @inode - inode struct of the target file | ||
39 | * @blkoff - file block number | ||
40 | * @bh_result - buffer head to be mapped on | ||
41 | * @create - indicate whether allocating the block or not when it has not | ||
42 | * been allocated yet. | ||
43 | * | ||
44 | * This function does not issue actual read request of the specified data | ||
45 | * block. It is done by VFS. | ||
46 | * Bulk read for direct-io is not supported yet. (should be supported) | ||
47 | */ | ||
48 | int nilfs_get_block(struct inode *inode, sector_t blkoff, | ||
49 | struct buffer_head *bh_result, int create) | ||
50 | { | ||
51 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
52 | unsigned long blknum = 0; | ||
53 | int err = 0, ret; | ||
54 | struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode)); | ||
55 | |||
56 | /* This exclusion control is a workaround; should be revised */ | ||
57 | down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
58 | ret = nilfs_bmap_lookup(ii->i_bmap, (unsigned long)blkoff, &blknum); | ||
59 | up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
60 | if (ret == 0) { /* found */ | ||
61 | map_bh(bh_result, inode->i_sb, blknum); | ||
62 | goto out; | ||
63 | } | ||
64 | /* data block was not found */ | ||
65 | if (ret == -ENOENT && create) { | ||
66 | struct nilfs_transaction_info ti; | ||
67 | |||
68 | bh_result->b_blocknr = 0; | ||
69 | err = nilfs_transaction_begin(inode->i_sb, &ti, 1); | ||
70 | if (unlikely(err)) | ||
71 | goto out; | ||
72 | err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff, | ||
73 | (unsigned long)bh_result); | ||
74 | if (unlikely(err != 0)) { | ||
75 | if (err == -EEXIST) { | ||
76 | /* | ||
77 | * The get_block() function could be called | ||
78 | * from multiple callers for an inode. | ||
79 | * However, the page having this block must | ||
80 | * be locked in this case. | ||
81 | */ | ||
82 | printk(KERN_WARNING | ||
83 | "nilfs_get_block: a race condition " | ||
84 | "while inserting a data block. " | ||
85 | "(inode number=%lu, file block " | ||
86 | "offset=%llu)\n", | ||
87 | inode->i_ino, | ||
88 | (unsigned long long)blkoff); | ||
89 | err = 0; | ||
90 | } else if (err == -EINVAL) { | ||
91 | nilfs_error(inode->i_sb, __func__, | ||
92 | "broken bmap (inode=%lu)\n", | ||
93 | inode->i_ino); | ||
94 | err = -EIO; | ||
95 | } | ||
96 | nilfs_transaction_abort(inode->i_sb); | ||
97 | goto out; | ||
98 | } | ||
99 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | ||
100 | /* Error handling should be detailed */ | ||
101 | set_buffer_new(bh_result); | ||
102 | map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed | ||
103 | to proper value */ | ||
104 | } else if (ret == -ENOENT) { | ||
105 | /* not found is not error (e.g. hole); must return without | ||
106 | the mapped state flag. */ | ||
107 | ; | ||
108 | } else { | ||
109 | err = ret; | ||
110 | } | ||
111 | |||
112 | out: | ||
113 | return err; | ||
114 | } | ||
115 | |||
116 | /** | ||
117 | * nilfs_readpage() - implement readpage() method of nilfs_aops {} | ||
118 | * address_space_operations. | ||
119 | * @file - file struct of the file to be read | ||
120 | * @page - the page to be read | ||
121 | */ | ||
122 | static int nilfs_readpage(struct file *file, struct page *page) | ||
123 | { | ||
124 | return mpage_readpage(page, nilfs_get_block); | ||
125 | } | ||
126 | |||
127 | /** | ||
128 | * nilfs_readpages() - implement readpages() method of nilfs_aops {} | ||
129 | * address_space_operations. | ||
130 | * @file - file struct of the file to be read | ||
131 | * @mapping - address_space struct used for reading multiple pages | ||
132 | * @pages - the pages to be read | ||
133 | * @nr_pages - number of pages to be read | ||
134 | */ | ||
135 | static int nilfs_readpages(struct file *file, struct address_space *mapping, | ||
136 | struct list_head *pages, unsigned nr_pages) | ||
137 | { | ||
138 | return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block); | ||
139 | } | ||
140 | |||
141 | static int nilfs_writepages(struct address_space *mapping, | ||
142 | struct writeback_control *wbc) | ||
143 | { | ||
144 | struct inode *inode = mapping->host; | ||
145 | int err = 0; | ||
146 | |||
147 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
148 | err = nilfs_construct_dsync_segment(inode->i_sb, inode, | ||
149 | wbc->range_start, | ||
150 | wbc->range_end); | ||
151 | return err; | ||
152 | } | ||
153 | |||
154 | static int nilfs_writepage(struct page *page, struct writeback_control *wbc) | ||
155 | { | ||
156 | struct inode *inode = page->mapping->host; | ||
157 | int err; | ||
158 | |||
159 | redirty_page_for_writepage(wbc, page); | ||
160 | unlock_page(page); | ||
161 | |||
162 | if (wbc->sync_mode == WB_SYNC_ALL) { | ||
163 | err = nilfs_construct_segment(inode->i_sb); | ||
164 | if (unlikely(err)) | ||
165 | return err; | ||
166 | } else if (wbc->for_reclaim) | ||
167 | nilfs_flush_segment(inode->i_sb, inode->i_ino); | ||
168 | |||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | static int nilfs_set_page_dirty(struct page *page) | ||
173 | { | ||
174 | int ret = __set_page_dirty_buffers(page); | ||
175 | |||
176 | if (ret) { | ||
177 | struct inode *inode = page->mapping->host; | ||
178 | struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); | ||
179 | unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); | ||
180 | |||
181 | nilfs_set_file_dirty(sbi, inode, nr_dirty); | ||
182 | } | ||
183 | return ret; | ||
184 | } | ||
185 | |||
186 | static int nilfs_write_begin(struct file *file, struct address_space *mapping, | ||
187 | loff_t pos, unsigned len, unsigned flags, | ||
188 | struct page **pagep, void **fsdata) | ||
189 | |||
190 | { | ||
191 | struct inode *inode = mapping->host; | ||
192 | int err = nilfs_transaction_begin(inode->i_sb, NULL, 1); | ||
193 | |||
194 | if (unlikely(err)) | ||
195 | return err; | ||
196 | |||
197 | *pagep = NULL; | ||
198 | err = block_write_begin(file, mapping, pos, len, flags, pagep, | ||
199 | fsdata, nilfs_get_block); | ||
200 | if (unlikely(err)) | ||
201 | nilfs_transaction_abort(inode->i_sb); | ||
202 | return err; | ||
203 | } | ||
204 | |||
205 | static int nilfs_write_end(struct file *file, struct address_space *mapping, | ||
206 | loff_t pos, unsigned len, unsigned copied, | ||
207 | struct page *page, void *fsdata) | ||
208 | { | ||
209 | struct inode *inode = mapping->host; | ||
210 | unsigned start = pos & (PAGE_CACHE_SIZE - 1); | ||
211 | unsigned nr_dirty; | ||
212 | int err; | ||
213 | |||
214 | nr_dirty = nilfs_page_count_clean_buffers(page, start, | ||
215 | start + copied); | ||
216 | copied = generic_write_end(file, mapping, pos, len, copied, page, | ||
217 | fsdata); | ||
218 | nilfs_set_file_dirty(NILFS_SB(inode->i_sb), inode, nr_dirty); | ||
219 | err = nilfs_transaction_commit(inode->i_sb); | ||
220 | return err ? : copied; | ||
221 | } | ||
222 | |||
223 | static ssize_t | ||
224 | nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | ||
225 | loff_t offset, unsigned long nr_segs) | ||
226 | { | ||
227 | struct file *file = iocb->ki_filp; | ||
228 | struct inode *inode = file->f_mapping->host; | ||
229 | ssize_t size; | ||
230 | |||
231 | if (rw == WRITE) | ||
232 | return 0; | ||
233 | |||
234 | /* Needs synchronization with the cleaner */ | ||
235 | size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | ||
236 | offset, nr_segs, nilfs_get_block, NULL); | ||
237 | return size; | ||
238 | } | ||
239 | |||
240 | struct address_space_operations nilfs_aops = { | ||
241 | .writepage = nilfs_writepage, | ||
242 | .readpage = nilfs_readpage, | ||
243 | /* .sync_page = nilfs_sync_page, */ | ||
244 | .writepages = nilfs_writepages, | ||
245 | .set_page_dirty = nilfs_set_page_dirty, | ||
246 | .readpages = nilfs_readpages, | ||
247 | .write_begin = nilfs_write_begin, | ||
248 | .write_end = nilfs_write_end, | ||
249 | /* .releasepage = nilfs_releasepage, */ | ||
250 | .invalidatepage = block_invalidatepage, | ||
251 | .direct_IO = nilfs_direct_IO, | ||
252 | }; | ||
253 | |||
254 | struct inode *nilfs_new_inode(struct inode *dir, int mode) | ||
255 | { | ||
256 | struct super_block *sb = dir->i_sb; | ||
257 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
258 | struct inode *inode; | ||
259 | struct nilfs_inode_info *ii; | ||
260 | int err = -ENOMEM; | ||
261 | ino_t ino; | ||
262 | |||
263 | inode = new_inode(sb); | ||
264 | if (unlikely(!inode)) | ||
265 | goto failed; | ||
266 | |||
267 | mapping_set_gfp_mask(inode->i_mapping, | ||
268 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); | ||
269 | |||
270 | ii = NILFS_I(inode); | ||
271 | ii->i_state = 1 << NILFS_I_NEW; | ||
272 | |||
273 | err = nilfs_ifile_create_inode(sbi->s_ifile, &ino, &ii->i_bh); | ||
274 | if (unlikely(err)) | ||
275 | goto failed_ifile_create_inode; | ||
276 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ | ||
277 | |||
278 | atomic_inc(&sbi->s_inodes_count); | ||
279 | |||
280 | inode->i_uid = current_fsuid(); | ||
281 | if (dir->i_mode & S_ISGID) { | ||
282 | inode->i_gid = dir->i_gid; | ||
283 | if (S_ISDIR(mode)) | ||
284 | mode |= S_ISGID; | ||
285 | } else | ||
286 | inode->i_gid = current_fsgid(); | ||
287 | |||
288 | inode->i_mode = mode; | ||
289 | inode->i_ino = ino; | ||
290 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
291 | |||
292 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { | ||
293 | err = nilfs_bmap_read(ii->i_bmap, NULL); | ||
294 | if (err < 0) | ||
295 | goto failed_bmap; | ||
296 | |||
297 | set_bit(NILFS_I_BMAP, &ii->i_state); | ||
298 | /* No lock is needed; iget() ensures it. */ | ||
299 | } | ||
300 | |||
301 | ii->i_flags = NILFS_I(dir)->i_flags; | ||
302 | if (S_ISLNK(mode)) | ||
303 | ii->i_flags &= ~(NILFS_IMMUTABLE_FL | NILFS_APPEND_FL); | ||
304 | if (!S_ISDIR(mode)) | ||
305 | ii->i_flags &= ~NILFS_DIRSYNC_FL; | ||
306 | |||
307 | /* ii->i_file_acl = 0; */ | ||
308 | /* ii->i_dir_acl = 0; */ | ||
309 | ii->i_dir_start_lookup = 0; | ||
310 | #ifdef CONFIG_NILFS_FS_POSIX_ACL | ||
311 | ii->i_acl = NULL; | ||
312 | ii->i_default_acl = NULL; | ||
313 | #endif | ||
314 | ii->i_cno = 0; | ||
315 | nilfs_set_inode_flags(inode); | ||
316 | spin_lock(&sbi->s_next_gen_lock); | ||
317 | inode->i_generation = sbi->s_next_generation++; | ||
318 | spin_unlock(&sbi->s_next_gen_lock); | ||
319 | insert_inode_hash(inode); | ||
320 | |||
321 | err = nilfs_init_acl(inode, dir); | ||
322 | if (unlikely(err)) | ||
323 | goto failed_acl; /* never occur. When supporting | ||
324 | nilfs_init_acl(), proper cancellation of | ||
325 | above jobs should be considered */ | ||
326 | |||
327 | mark_inode_dirty(inode); | ||
328 | return inode; | ||
329 | |||
330 | failed_acl: | ||
331 | failed_bmap: | ||
332 | inode->i_nlink = 0; | ||
333 | iput(inode); /* raw_inode will be deleted through | ||
334 | generic_delete_inode() */ | ||
335 | goto failed; | ||
336 | |||
337 | failed_ifile_create_inode: | ||
338 | make_bad_inode(inode); | ||
339 | iput(inode); /* if i_nlink == 1, generic_forget_inode() will be | ||
340 | called */ | ||
341 | failed: | ||
342 | return ERR_PTR(err); | ||
343 | } | ||
344 | |||
345 | void nilfs_free_inode(struct inode *inode) | ||
346 | { | ||
347 | struct super_block *sb = inode->i_sb; | ||
348 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
349 | |||
350 | clear_inode(inode); | ||
351 | /* XXX: check error code? Is there any thing I can do? */ | ||
352 | (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino); | ||
353 | atomic_dec(&sbi->s_inodes_count); | ||
354 | } | ||
355 | |||
356 | void nilfs_set_inode_flags(struct inode *inode) | ||
357 | { | ||
358 | unsigned int flags = NILFS_I(inode)->i_flags; | ||
359 | |||
360 | inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | | ||
361 | S_DIRSYNC); | ||
362 | if (flags & NILFS_SYNC_FL) | ||
363 | inode->i_flags |= S_SYNC; | ||
364 | if (flags & NILFS_APPEND_FL) | ||
365 | inode->i_flags |= S_APPEND; | ||
366 | if (flags & NILFS_IMMUTABLE_FL) | ||
367 | inode->i_flags |= S_IMMUTABLE; | ||
368 | #ifndef NILFS_ATIME_DISABLE | ||
369 | if (flags & NILFS_NOATIME_FL) | ||
370 | #endif | ||
371 | inode->i_flags |= S_NOATIME; | ||
372 | if (flags & NILFS_DIRSYNC_FL) | ||
373 | inode->i_flags |= S_DIRSYNC; | ||
374 | mapping_set_gfp_mask(inode->i_mapping, | ||
375 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); | ||
376 | } | ||
377 | |||
378 | int nilfs_read_inode_common(struct inode *inode, | ||
379 | struct nilfs_inode *raw_inode) | ||
380 | { | ||
381 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
382 | int err; | ||
383 | |||
384 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | ||
385 | inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); | ||
386 | inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); | ||
387 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | ||
388 | inode->i_size = le64_to_cpu(raw_inode->i_size); | ||
389 | inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | ||
390 | inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); | ||
391 | inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | ||
392 | inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | ||
393 | inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); | ||
394 | inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | ||
395 | if (inode->i_nlink == 0 && inode->i_mode == 0) | ||
396 | return -EINVAL; /* this inode is deleted */ | ||
397 | |||
398 | inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); | ||
399 | ii->i_flags = le32_to_cpu(raw_inode->i_flags); | ||
400 | #if 0 | ||
401 | ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); | ||
402 | ii->i_dir_acl = S_ISREG(inode->i_mode) ? | ||
403 | 0 : le32_to_cpu(raw_inode->i_dir_acl); | ||
404 | #endif | ||
405 | ii->i_cno = 0; | ||
406 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | ||
407 | |||
408 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | ||
409 | S_ISLNK(inode->i_mode)) { | ||
410 | err = nilfs_bmap_read(ii->i_bmap, raw_inode); | ||
411 | if (err < 0) | ||
412 | return err; | ||
413 | set_bit(NILFS_I_BMAP, &ii->i_state); | ||
414 | /* No lock is needed; iget() ensures it. */ | ||
415 | } | ||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, | ||
420 | struct inode *inode) | ||
421 | { | ||
422 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
423 | struct inode *dat = nilfs_dat_inode(sbi->s_nilfs); | ||
424 | struct buffer_head *bh; | ||
425 | struct nilfs_inode *raw_inode; | ||
426 | int err; | ||
427 | |||
428 | down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
429 | err = nilfs_ifile_get_inode_block(sbi->s_ifile, ino, &bh); | ||
430 | if (unlikely(err)) | ||
431 | goto bad_inode; | ||
432 | |||
433 | raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); | ||
434 | |||
435 | #ifdef CONFIG_NILFS_FS_POSIX_ACL | ||
436 | ii->i_acl = NILFS_ACL_NOT_CACHED; | ||
437 | ii->i_default_acl = NILFS_ACL_NOT_CACHED; | ||
438 | #endif | ||
439 | if (nilfs_read_inode_common(inode, raw_inode)) | ||
440 | goto failed_unmap; | ||
441 | |||
442 | if (S_ISREG(inode->i_mode)) { | ||
443 | inode->i_op = &nilfs_file_inode_operations; | ||
444 | inode->i_fop = &nilfs_file_operations; | ||
445 | inode->i_mapping->a_ops = &nilfs_aops; | ||
446 | } else if (S_ISDIR(inode->i_mode)) { | ||
447 | inode->i_op = &nilfs_dir_inode_operations; | ||
448 | inode->i_fop = &nilfs_dir_operations; | ||
449 | inode->i_mapping->a_ops = &nilfs_aops; | ||
450 | } else if (S_ISLNK(inode->i_mode)) { | ||
451 | inode->i_op = &nilfs_symlink_inode_operations; | ||
452 | inode->i_mapping->a_ops = &nilfs_aops; | ||
453 | } else { | ||
454 | inode->i_op = &nilfs_special_inode_operations; | ||
455 | init_special_inode( | ||
456 | inode, inode->i_mode, | ||
457 | new_decode_dev(le64_to_cpu(raw_inode->i_device_code))); | ||
458 | } | ||
459 | nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); | ||
460 | brelse(bh); | ||
461 | up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
462 | nilfs_set_inode_flags(inode); | ||
463 | return 0; | ||
464 | |||
465 | failed_unmap: | ||
466 | nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); | ||
467 | brelse(bh); | ||
468 | |||
469 | bad_inode: | ||
470 | up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
471 | return err; | ||
472 | } | ||
473 | |||
474 | struct inode *nilfs_iget(struct super_block *sb, unsigned long ino) | ||
475 | { | ||
476 | struct inode *inode; | ||
477 | int err; | ||
478 | |||
479 | inode = iget_locked(sb, ino); | ||
480 | if (unlikely(!inode)) | ||
481 | return ERR_PTR(-ENOMEM); | ||
482 | if (!(inode->i_state & I_NEW)) | ||
483 | return inode; | ||
484 | |||
485 | err = __nilfs_read_inode(sb, ino, inode); | ||
486 | if (unlikely(err)) { | ||
487 | iget_failed(inode); | ||
488 | return ERR_PTR(err); | ||
489 | } | ||
490 | unlock_new_inode(inode); | ||
491 | return inode; | ||
492 | } | ||
493 | |||
494 | void nilfs_write_inode_common(struct inode *inode, | ||
495 | struct nilfs_inode *raw_inode, int has_bmap) | ||
496 | { | ||
497 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
498 | |||
499 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | ||
500 | raw_inode->i_uid = cpu_to_le32(inode->i_uid); | ||
501 | raw_inode->i_gid = cpu_to_le32(inode->i_gid); | ||
502 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); | ||
503 | raw_inode->i_size = cpu_to_le64(inode->i_size); | ||
504 | raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
505 | raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); | ||
506 | raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
507 | raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | ||
508 | raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); | ||
509 | |||
510 | raw_inode->i_flags = cpu_to_le32(ii->i_flags); | ||
511 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); | ||
512 | |||
513 | if (has_bmap) | ||
514 | nilfs_bmap_write(ii->i_bmap, raw_inode); | ||
515 | else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | ||
516 | raw_inode->i_device_code = | ||
517 | cpu_to_le64(new_encode_dev(inode->i_rdev)); | ||
518 | /* When extending inode, nilfs->ns_inode_size should be checked | ||
519 | for substitutions of appended fields */ | ||
520 | } | ||
521 | |||
522 | void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) | ||
523 | { | ||
524 | ino_t ino = inode->i_ino; | ||
525 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
526 | struct super_block *sb = inode->i_sb; | ||
527 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
528 | struct nilfs_inode *raw_inode; | ||
529 | |||
530 | raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh); | ||
531 | |||
532 | /* The buffer is guarded with lock_buffer() by the caller */ | ||
533 | if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) | ||
534 | memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size); | ||
535 | set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); | ||
536 | |||
537 | nilfs_write_inode_common(inode, raw_inode, 0); | ||
538 | /* XXX: call with has_bmap = 0 is a workaround to avoid | ||
539 | deadlock of bmap. This delays update of i_bmap to just | ||
540 | before writing */ | ||
541 | nilfs_ifile_unmap_inode(sbi->s_ifile, ino, ibh); | ||
542 | } | ||
543 | |||
544 | #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ | ||
545 | |||
546 | static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, | ||
547 | unsigned long from) | ||
548 | { | ||
549 | unsigned long b; | ||
550 | int ret; | ||
551 | |||
552 | if (!test_bit(NILFS_I_BMAP, &ii->i_state)) | ||
553 | return; | ||
554 | repeat: | ||
555 | ret = nilfs_bmap_last_key(ii->i_bmap, &b); | ||
556 | if (ret == -ENOENT) | ||
557 | return; | ||
558 | else if (ret < 0) | ||
559 | goto failed; | ||
560 | |||
561 | if (b < from) | ||
562 | return; | ||
563 | |||
564 | b -= min_t(unsigned long, NILFS_MAX_TRUNCATE_BLOCKS, b - from); | ||
565 | ret = nilfs_bmap_truncate(ii->i_bmap, b); | ||
566 | nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); | ||
567 | if (!ret || (ret == -ENOMEM && | ||
568 | nilfs_bmap_truncate(ii->i_bmap, b) == 0)) | ||
569 | goto repeat; | ||
570 | |||
571 | failed: | ||
572 | if (ret == -EINVAL) | ||
573 | nilfs_error(ii->vfs_inode.i_sb, __func__, | ||
574 | "bmap is broken (ino=%lu)", ii->vfs_inode.i_ino); | ||
575 | else | ||
576 | nilfs_warning(ii->vfs_inode.i_sb, __func__, | ||
577 | "failed to truncate bmap (ino=%lu, err=%d)", | ||
578 | ii->vfs_inode.i_ino, ret); | ||
579 | } | ||
580 | |||
581 | void nilfs_truncate(struct inode *inode) | ||
582 | { | ||
583 | unsigned long blkoff; | ||
584 | unsigned int blocksize; | ||
585 | struct nilfs_transaction_info ti; | ||
586 | struct super_block *sb = inode->i_sb; | ||
587 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
588 | |||
589 | if (!test_bit(NILFS_I_BMAP, &ii->i_state)) | ||
590 | return; | ||
591 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
592 | return; | ||
593 | |||
594 | blocksize = sb->s_blocksize; | ||
595 | blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; | ||
596 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ | ||
597 | |||
598 | block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); | ||
599 | |||
600 | nilfs_truncate_bmap(ii, blkoff); | ||
601 | |||
602 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
603 | if (IS_SYNC(inode)) | ||
604 | nilfs_set_transaction_flag(NILFS_TI_SYNC); | ||
605 | |||
606 | nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); | ||
607 | nilfs_transaction_commit(sb); | ||
608 | /* May construct a logical segment and may fail in sync mode. | ||
609 | But truncate has no return value. */ | ||
610 | } | ||
611 | |||
612 | void nilfs_delete_inode(struct inode *inode) | ||
613 | { | ||
614 | struct nilfs_transaction_info ti; | ||
615 | struct super_block *sb = inode->i_sb; | ||
616 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
617 | |||
618 | if (unlikely(is_bad_inode(inode))) { | ||
619 | if (inode->i_data.nrpages) | ||
620 | truncate_inode_pages(&inode->i_data, 0); | ||
621 | clear_inode(inode); | ||
622 | return; | ||
623 | } | ||
624 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ | ||
625 | |||
626 | if (inode->i_data.nrpages) | ||
627 | truncate_inode_pages(&inode->i_data, 0); | ||
628 | |||
629 | nilfs_truncate_bmap(ii, 0); | ||
630 | nilfs_free_inode(inode); | ||
631 | /* nilfs_free_inode() marks inode buffer dirty */ | ||
632 | if (IS_SYNC(inode)) | ||
633 | nilfs_set_transaction_flag(NILFS_TI_SYNC); | ||
634 | nilfs_transaction_commit(sb); | ||
635 | /* May construct a logical segment and may fail in sync mode. | ||
636 | But delete_inode has no return value. */ | ||
637 | } | ||
638 | |||
639 | int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) | ||
640 | { | ||
641 | struct nilfs_transaction_info ti; | ||
642 | struct inode *inode = dentry->d_inode; | ||
643 | struct super_block *sb = inode->i_sb; | ||
644 | int err; | ||
645 | |||
646 | err = inode_change_ok(inode, iattr); | ||
647 | if (err) | ||
648 | return err; | ||
649 | |||
650 | err = nilfs_transaction_begin(sb, &ti, 0); | ||
651 | if (unlikely(err)) | ||
652 | return err; | ||
653 | err = inode_setattr(inode, iattr); | ||
654 | if (!err && (iattr->ia_valid & ATTR_MODE)) | ||
655 | err = nilfs_acl_chmod(inode); | ||
656 | if (likely(!err)) | ||
657 | err = nilfs_transaction_commit(sb); | ||
658 | else | ||
659 | nilfs_transaction_abort(sb); | ||
660 | |||
661 | return err; | ||
662 | } | ||
663 | |||
664 | int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, | ||
665 | struct buffer_head **pbh) | ||
666 | { | ||
667 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
668 | int err; | ||
669 | |||
670 | spin_lock(&sbi->s_inode_lock); | ||
671 | /* Caller of this function MUST lock s_inode_lock */ | ||
672 | if (ii->i_bh == NULL) { | ||
673 | spin_unlock(&sbi->s_inode_lock); | ||
674 | err = nilfs_ifile_get_inode_block(sbi->s_ifile, inode->i_ino, | ||
675 | pbh); | ||
676 | if (unlikely(err)) | ||
677 | return err; | ||
678 | spin_lock(&sbi->s_inode_lock); | ||
679 | if (ii->i_bh == NULL) | ||
680 | ii->i_bh = *pbh; | ||
681 | else { | ||
682 | brelse(*pbh); | ||
683 | *pbh = ii->i_bh; | ||
684 | } | ||
685 | } else | ||
686 | *pbh = ii->i_bh; | ||
687 | |||
688 | get_bh(*pbh); | ||
689 | spin_unlock(&sbi->s_inode_lock); | ||
690 | return 0; | ||
691 | } | ||
692 | |||
693 | int nilfs_inode_dirty(struct inode *inode) | ||
694 | { | ||
695 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
696 | struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); | ||
697 | int ret = 0; | ||
698 | |||
699 | if (!list_empty(&ii->i_dirty)) { | ||
700 | spin_lock(&sbi->s_inode_lock); | ||
701 | ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || | ||
702 | test_bit(NILFS_I_BUSY, &ii->i_state); | ||
703 | spin_unlock(&sbi->s_inode_lock); | ||
704 | } | ||
705 | return ret; | ||
706 | } | ||
707 | |||
708 | int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode, | ||
709 | unsigned nr_dirty) | ||
710 | { | ||
711 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
712 | |||
713 | atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks); | ||
714 | |||
715 | if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) | ||
716 | return 0; | ||
717 | |||
718 | spin_lock(&sbi->s_inode_lock); | ||
719 | if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && | ||
720 | !test_bit(NILFS_I_BUSY, &ii->i_state)) { | ||
721 | /* Because this routine may race with nilfs_dispose_list(), | ||
722 | we have to check NILFS_I_QUEUED here, too. */ | ||
723 | if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { | ||
724 | /* This will happen when somebody is freeing | ||
725 | this inode. */ | ||
726 | nilfs_warning(sbi->s_super, __func__, | ||
727 | "cannot get inode (ino=%lu)\n", | ||
728 | inode->i_ino); | ||
729 | spin_unlock(&sbi->s_inode_lock); | ||
730 | return -EINVAL; /* NILFS_I_DIRTY may remain for | ||
731 | freeing inode */ | ||
732 | } | ||
733 | list_del(&ii->i_dirty); | ||
734 | list_add_tail(&ii->i_dirty, &sbi->s_dirty_files); | ||
735 | set_bit(NILFS_I_QUEUED, &ii->i_state); | ||
736 | } | ||
737 | spin_unlock(&sbi->s_inode_lock); | ||
738 | return 0; | ||
739 | } | ||
740 | |||
741 | int nilfs_mark_inode_dirty(struct inode *inode) | ||
742 | { | ||
743 | struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); | ||
744 | struct buffer_head *ibh; | ||
745 | int err; | ||
746 | |||
747 | err = nilfs_load_inode_block(sbi, inode, &ibh); | ||
748 | if (unlikely(err)) { | ||
749 | nilfs_warning(inode->i_sb, __func__, | ||
750 | "failed to reget inode block.\n"); | ||
751 | return err; | ||
752 | } | ||
753 | lock_buffer(ibh); | ||
754 | nilfs_update_inode(inode, ibh); | ||
755 | unlock_buffer(ibh); | ||
756 | nilfs_mdt_mark_buffer_dirty(ibh); | ||
757 | nilfs_mdt_mark_dirty(sbi->s_ifile); | ||
758 | brelse(ibh); | ||
759 | return 0; | ||
760 | } | ||
761 | |||
762 | /** | ||
763 | * nilfs_dirty_inode - reflect changes on given inode to an inode block. | ||
764 | * @inode: inode of the file to be registered. | ||
765 | * | ||
766 | * nilfs_dirty_inode() loads a inode block containing the specified | ||
767 | * @inode and copies data from a nilfs_inode to a corresponding inode | ||
768 | * entry in the inode block. This operation is excluded from the segment | ||
769 | * construction. This function can be called both as a single operation | ||
770 | * and as a part of indivisible file operations. | ||
771 | */ | ||
772 | void nilfs_dirty_inode(struct inode *inode) | ||
773 | { | ||
774 | struct nilfs_transaction_info ti; | ||
775 | |||
776 | if (is_bad_inode(inode)) { | ||
777 | nilfs_warning(inode->i_sb, __func__, | ||
778 | "tried to mark bad_inode dirty. ignored.\n"); | ||
779 | dump_stack(); | ||
780 | return; | ||
781 | } | ||
782 | nilfs_transaction_begin(inode->i_sb, &ti, 0); | ||
783 | nilfs_mark_inode_dirty(inode); | ||
784 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | ||
785 | } | ||
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c new file mode 100644 index 000000000000..108d281ebca5 --- /dev/null +++ b/fs/nilfs2/ioctl.c | |||
@@ -0,0 +1,654 @@ | |||
1 | /* | ||
2 | * ioctl.c - NILFS ioctl operations. | ||
3 | * | ||
4 | * Copyright (C) 2007, 2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/fs.h> | ||
24 | #include <linux/wait.h> | ||
25 | #include <linux/smp_lock.h> /* lock_kernel(), unlock_kernel() */ | ||
26 | #include <linux/capability.h> /* capable() */ | ||
27 | #include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ | ||
28 | #include <linux/nilfs2_fs.h> | ||
29 | #include "nilfs.h" | ||
30 | #include "segment.h" | ||
31 | #include "bmap.h" | ||
32 | #include "cpfile.h" | ||
33 | #include "sufile.h" | ||
34 | #include "dat.h" | ||
35 | |||
36 | |||
37 | static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, | ||
38 | struct nilfs_argv *argv, int dir, | ||
39 | ssize_t (*dofunc)(struct the_nilfs *, | ||
40 | __u64 *, int, | ||
41 | void *, size_t, size_t)) | ||
42 | { | ||
43 | void *buf; | ||
44 | void __user *base = (void __user *)(unsigned long)argv->v_base; | ||
45 | size_t maxmembs, total, n; | ||
46 | ssize_t nr; | ||
47 | int ret, i; | ||
48 | __u64 pos, ppos; | ||
49 | |||
50 | if (argv->v_nmembs == 0) | ||
51 | return 0; | ||
52 | |||
53 | if (argv->v_size > PAGE_SIZE) | ||
54 | return -EINVAL; | ||
55 | |||
56 | buf = (void *)__get_free_pages(GFP_NOFS, 0); | ||
57 | if (unlikely(!buf)) | ||
58 | return -ENOMEM; | ||
59 | maxmembs = PAGE_SIZE / argv->v_size; | ||
60 | |||
61 | ret = 0; | ||
62 | total = 0; | ||
63 | pos = argv->v_index; | ||
64 | for (i = 0; i < argv->v_nmembs; i += n) { | ||
65 | n = (argv->v_nmembs - i < maxmembs) ? | ||
66 | argv->v_nmembs - i : maxmembs; | ||
67 | if ((dir & _IOC_WRITE) && | ||
68 | copy_from_user(buf, base + argv->v_size * i, | ||
69 | argv->v_size * n)) { | ||
70 | ret = -EFAULT; | ||
71 | break; | ||
72 | } | ||
73 | ppos = pos; | ||
74 | nr = dofunc(nilfs, &pos, argv->v_flags, buf, argv->v_size, | ||
75 | n); | ||
76 | if (nr < 0) { | ||
77 | ret = nr; | ||
78 | break; | ||
79 | } | ||
80 | if ((dir & _IOC_READ) && | ||
81 | copy_to_user(base + argv->v_size * i, buf, | ||
82 | argv->v_size * nr)) { | ||
83 | ret = -EFAULT; | ||
84 | break; | ||
85 | } | ||
86 | total += nr; | ||
87 | if ((size_t)nr < n) | ||
88 | break; | ||
89 | if (pos == ppos) | ||
90 | pos += n; | ||
91 | } | ||
92 | argv->v_nmembs = total; | ||
93 | |||
94 | free_pages((unsigned long)buf, 0); | ||
95 | return ret; | ||
96 | } | ||
97 | |||
98 | static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, | ||
99 | unsigned int cmd, void __user *argp) | ||
100 | { | ||
101 | struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; | ||
102 | struct nilfs_transaction_info ti; | ||
103 | struct nilfs_cpmode cpmode; | ||
104 | int ret; | ||
105 | |||
106 | if (!capable(CAP_SYS_ADMIN)) | ||
107 | return -EPERM; | ||
108 | if (copy_from_user(&cpmode, argp, sizeof(cpmode))) | ||
109 | return -EFAULT; | ||
110 | |||
111 | nilfs_transaction_begin(inode->i_sb, &ti, 0); | ||
112 | ret = nilfs_cpfile_change_cpmode( | ||
113 | cpfile, cpmode.cm_cno, cpmode.cm_mode); | ||
114 | if (unlikely(ret < 0)) { | ||
115 | nilfs_transaction_abort(inode->i_sb); | ||
116 | return ret; | ||
117 | } | ||
118 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | ||
119 | return ret; | ||
120 | } | ||
121 | |||
122 | static int | ||
123 | nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, | ||
124 | unsigned int cmd, void __user *argp) | ||
125 | { | ||
126 | struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; | ||
127 | struct nilfs_transaction_info ti; | ||
128 | __u64 cno; | ||
129 | int ret; | ||
130 | |||
131 | if (!capable(CAP_SYS_ADMIN)) | ||
132 | return -EPERM; | ||
133 | if (copy_from_user(&cno, argp, sizeof(cno))) | ||
134 | return -EFAULT; | ||
135 | |||
136 | nilfs_transaction_begin(inode->i_sb, &ti, 0); | ||
137 | ret = nilfs_cpfile_delete_checkpoint(cpfile, cno); | ||
138 | if (unlikely(ret < 0)) { | ||
139 | nilfs_transaction_abort(inode->i_sb); | ||
140 | return ret; | ||
141 | } | ||
142 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | ||
143 | return ret; | ||
144 | } | ||
145 | |||
146 | static ssize_t | ||
147 | nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
148 | void *buf, size_t size, size_t nmembs) | ||
149 | { | ||
150 | return nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf, | ||
151 | nmembs); | ||
152 | } | ||
153 | |||
154 | static int nilfs_ioctl_get_cpinfo(struct inode *inode, struct file *filp, | ||
155 | unsigned int cmd, void __user *argp) | ||
156 | { | ||
157 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
158 | struct nilfs_argv argv; | ||
159 | int ret; | ||
160 | |||
161 | if (copy_from_user(&argv, argp, sizeof(argv))) | ||
162 | return -EFAULT; | ||
163 | |||
164 | down_read(&nilfs->ns_segctor_sem); | ||
165 | ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), | ||
166 | nilfs_ioctl_do_get_cpinfo); | ||
167 | up_read(&nilfs->ns_segctor_sem); | ||
168 | if (ret < 0) | ||
169 | return ret; | ||
170 | |||
171 | if (copy_to_user(argp, &argv, sizeof(argv))) | ||
172 | ret = -EFAULT; | ||
173 | return ret; | ||
174 | } | ||
175 | |||
176 | static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, | ||
177 | unsigned int cmd, void __user *argp) | ||
178 | { | ||
179 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
180 | struct nilfs_cpstat cpstat; | ||
181 | int ret; | ||
182 | |||
183 | down_read(&nilfs->ns_segctor_sem); | ||
184 | ret = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat); | ||
185 | up_read(&nilfs->ns_segctor_sem); | ||
186 | if (ret < 0) | ||
187 | return ret; | ||
188 | |||
189 | if (copy_to_user(argp, &cpstat, sizeof(cpstat))) | ||
190 | ret = -EFAULT; | ||
191 | return ret; | ||
192 | } | ||
193 | |||
194 | static ssize_t | ||
195 | nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
196 | void *buf, size_t size, size_t nmembs) | ||
197 | { | ||
198 | return nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs); | ||
199 | } | ||
200 | |||
201 | static int nilfs_ioctl_get_suinfo(struct inode *inode, struct file *filp, | ||
202 | unsigned int cmd, void __user *argp) | ||
203 | { | ||
204 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
205 | struct nilfs_argv argv; | ||
206 | int ret; | ||
207 | |||
208 | if (copy_from_user(&argv, argp, sizeof(argv))) | ||
209 | return -EFAULT; | ||
210 | |||
211 | down_read(&nilfs->ns_segctor_sem); | ||
212 | ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), | ||
213 | nilfs_ioctl_do_get_suinfo); | ||
214 | up_read(&nilfs->ns_segctor_sem); | ||
215 | if (ret < 0) | ||
216 | return ret; | ||
217 | |||
218 | if (copy_to_user(argp, &argv, sizeof(argv))) | ||
219 | ret = -EFAULT; | ||
220 | return ret; | ||
221 | } | ||
222 | |||
223 | static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, | ||
224 | unsigned int cmd, void __user *argp) | ||
225 | { | ||
226 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
227 | struct nilfs_sustat sustat; | ||
228 | int ret; | ||
229 | |||
230 | down_read(&nilfs->ns_segctor_sem); | ||
231 | ret = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat); | ||
232 | up_read(&nilfs->ns_segctor_sem); | ||
233 | if (ret < 0) | ||
234 | return ret; | ||
235 | |||
236 | if (copy_to_user(argp, &sustat, sizeof(sustat))) | ||
237 | ret = -EFAULT; | ||
238 | return ret; | ||
239 | } | ||
240 | |||
241 | static ssize_t | ||
242 | nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
243 | void *buf, size_t size, size_t nmembs) | ||
244 | { | ||
245 | return nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); | ||
246 | } | ||
247 | |||
248 | static int nilfs_ioctl_get_vinfo(struct inode *inode, struct file *filp, | ||
249 | unsigned int cmd, void __user *argp) | ||
250 | { | ||
251 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
252 | struct nilfs_argv argv; | ||
253 | int ret; | ||
254 | |||
255 | if (copy_from_user(&argv, argp, sizeof(argv))) | ||
256 | return -EFAULT; | ||
257 | |||
258 | down_read(&nilfs->ns_segctor_sem); | ||
259 | ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), | ||
260 | nilfs_ioctl_do_get_vinfo); | ||
261 | up_read(&nilfs->ns_segctor_sem); | ||
262 | if (ret < 0) | ||
263 | return ret; | ||
264 | |||
265 | if (copy_to_user(argp, &argv, sizeof(argv))) | ||
266 | ret = -EFAULT; | ||
267 | return ret; | ||
268 | } | ||
269 | |||
270 | static ssize_t | ||
271 | nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
272 | void *buf, size_t size, size_t nmembs) | ||
273 | { | ||
274 | struct inode *dat = nilfs_dat_inode(nilfs); | ||
275 | struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; | ||
276 | struct nilfs_bdesc *bdescs = buf; | ||
277 | int ret, i; | ||
278 | |||
279 | for (i = 0; i < nmembs; i++) { | ||
280 | ret = nilfs_bmap_lookup_at_level(bmap, | ||
281 | bdescs[i].bd_offset, | ||
282 | bdescs[i].bd_level + 1, | ||
283 | &bdescs[i].bd_blocknr); | ||
284 | if (ret < 0) { | ||
285 | if (ret != -ENOENT) | ||
286 | return ret; | ||
287 | bdescs[i].bd_blocknr = 0; | ||
288 | } | ||
289 | } | ||
290 | return nmembs; | ||
291 | } | ||
292 | |||
293 | static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, | ||
294 | unsigned int cmd, void __user *argp) | ||
295 | { | ||
296 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
297 | struct nilfs_argv argv; | ||
298 | int ret; | ||
299 | |||
300 | if (copy_from_user(&argv, argp, sizeof(argv))) | ||
301 | return -EFAULT; | ||
302 | |||
303 | down_read(&nilfs->ns_segctor_sem); | ||
304 | ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), | ||
305 | nilfs_ioctl_do_get_bdescs); | ||
306 | up_read(&nilfs->ns_segctor_sem); | ||
307 | if (ret < 0) | ||
308 | return ret; | ||
309 | |||
310 | if (copy_to_user(argp, &argv, sizeof(argv))) | ||
311 | ret = -EFAULT; | ||
312 | return ret; | ||
313 | } | ||
314 | |||
315 | static int nilfs_ioctl_move_inode_block(struct inode *inode, | ||
316 | struct nilfs_vdesc *vdesc, | ||
317 | struct list_head *buffers) | ||
318 | { | ||
319 | struct buffer_head *bh; | ||
320 | int ret; | ||
321 | |||
322 | if (vdesc->vd_flags == 0) | ||
323 | ret = nilfs_gccache_submit_read_data( | ||
324 | inode, vdesc->vd_offset, vdesc->vd_blocknr, | ||
325 | vdesc->vd_vblocknr, &bh); | ||
326 | else | ||
327 | ret = nilfs_gccache_submit_read_node( | ||
328 | inode, vdesc->vd_blocknr, vdesc->vd_vblocknr, &bh); | ||
329 | |||
330 | if (unlikely(ret < 0)) { | ||
331 | if (ret == -ENOENT) | ||
332 | printk(KERN_CRIT | ||
333 | "%s: invalid virtual block address (%s): " | ||
334 | "ino=%llu, cno=%llu, offset=%llu, " | ||
335 | "blocknr=%llu, vblocknr=%llu\n", | ||
336 | __func__, vdesc->vd_flags ? "node" : "data", | ||
337 | (unsigned long long)vdesc->vd_ino, | ||
338 | (unsigned long long)vdesc->vd_cno, | ||
339 | (unsigned long long)vdesc->vd_offset, | ||
340 | (unsigned long long)vdesc->vd_blocknr, | ||
341 | (unsigned long long)vdesc->vd_vblocknr); | ||
342 | return ret; | ||
343 | } | ||
344 | bh->b_private = vdesc; | ||
345 | list_add_tail(&bh->b_assoc_buffers, buffers); | ||
346 | return 0; | ||
347 | } | ||
348 | |||
349 | static ssize_t | ||
350 | nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
351 | void *buf, size_t size, size_t nmembs) | ||
352 | { | ||
353 | struct inode *inode; | ||
354 | struct nilfs_vdesc *vdesc; | ||
355 | struct buffer_head *bh, *n; | ||
356 | LIST_HEAD(buffers); | ||
357 | ino_t ino; | ||
358 | __u64 cno; | ||
359 | int i, ret; | ||
360 | |||
361 | for (i = 0, vdesc = buf; i < nmembs; ) { | ||
362 | ino = vdesc->vd_ino; | ||
363 | cno = vdesc->vd_cno; | ||
364 | inode = nilfs_gc_iget(nilfs, ino, cno); | ||
365 | if (unlikely(inode == NULL)) { | ||
366 | ret = -ENOMEM; | ||
367 | goto failed; | ||
368 | } | ||
369 | do { | ||
370 | ret = nilfs_ioctl_move_inode_block(inode, vdesc, | ||
371 | &buffers); | ||
372 | if (unlikely(ret < 0)) | ||
373 | goto failed; | ||
374 | vdesc++; | ||
375 | } while (++i < nmembs && | ||
376 | vdesc->vd_ino == ino && vdesc->vd_cno == cno); | ||
377 | } | ||
378 | |||
379 | list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) { | ||
380 | ret = nilfs_gccache_wait_and_mark_dirty(bh); | ||
381 | if (unlikely(ret < 0)) { | ||
382 | if (ret == -EEXIST) { | ||
383 | vdesc = bh->b_private; | ||
384 | printk(KERN_CRIT | ||
385 | "%s: conflicting %s buffer: " | ||
386 | "ino=%llu, cno=%llu, offset=%llu, " | ||
387 | "blocknr=%llu, vblocknr=%llu\n", | ||
388 | __func__, | ||
389 | vdesc->vd_flags ? "node" : "data", | ||
390 | (unsigned long long)vdesc->vd_ino, | ||
391 | (unsigned long long)vdesc->vd_cno, | ||
392 | (unsigned long long)vdesc->vd_offset, | ||
393 | (unsigned long long)vdesc->vd_blocknr, | ||
394 | (unsigned long long)vdesc->vd_vblocknr); | ||
395 | } | ||
396 | goto failed; | ||
397 | } | ||
398 | list_del_init(&bh->b_assoc_buffers); | ||
399 | bh->b_private = NULL; | ||
400 | brelse(bh); | ||
401 | } | ||
402 | return nmembs; | ||
403 | |||
404 | failed: | ||
405 | list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) { | ||
406 | list_del_init(&bh->b_assoc_buffers); | ||
407 | bh->b_private = NULL; | ||
408 | brelse(bh); | ||
409 | } | ||
410 | return ret; | ||
411 | } | ||
412 | |||
413 | static inline int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, | ||
414 | struct nilfs_argv *argv, | ||
415 | int dir) | ||
416 | { | ||
417 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
418 | nilfs_ioctl_do_move_blocks); | ||
419 | } | ||
420 | |||
421 | static ssize_t | ||
422 | nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp, | ||
423 | int flags, void *buf, size_t size, | ||
424 | size_t nmembs) | ||
425 | { | ||
426 | struct inode *cpfile = nilfs->ns_cpfile; | ||
427 | struct nilfs_period *periods = buf; | ||
428 | int ret, i; | ||
429 | |||
430 | for (i = 0; i < nmembs; i++) { | ||
431 | ret = nilfs_cpfile_delete_checkpoints( | ||
432 | cpfile, periods[i].p_start, periods[i].p_end); | ||
433 | if (ret < 0) | ||
434 | return ret; | ||
435 | } | ||
436 | return nmembs; | ||
437 | } | ||
438 | |||
439 | static inline int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, | ||
440 | struct nilfs_argv *argv, | ||
441 | int dir) | ||
442 | { | ||
443 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
444 | nilfs_ioctl_do_delete_checkpoints); | ||
445 | } | ||
446 | |||
447 | static ssize_t | ||
448 | nilfs_ioctl_do_free_vblocknrs(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
449 | void *buf, size_t size, size_t nmembs) | ||
450 | { | ||
451 | int ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); | ||
452 | |||
453 | return (ret < 0) ? ret : nmembs; | ||
454 | } | ||
455 | |||
456 | static inline int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, | ||
457 | struct nilfs_argv *argv, | ||
458 | int dir) | ||
459 | { | ||
460 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
461 | nilfs_ioctl_do_free_vblocknrs); | ||
462 | } | ||
463 | |||
464 | static ssize_t | ||
465 | nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, | ||
466 | int flags, void *buf, size_t size, | ||
467 | size_t nmembs) | ||
468 | { | ||
469 | struct inode *dat = nilfs_dat_inode(nilfs); | ||
470 | struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; | ||
471 | struct nilfs_bdesc *bdescs = buf; | ||
472 | int ret, i; | ||
473 | |||
474 | for (i = 0; i < nmembs; i++) { | ||
475 | /* XXX: use macro or inline func to check liveness */ | ||
476 | ret = nilfs_bmap_lookup_at_level(bmap, | ||
477 | bdescs[i].bd_offset, | ||
478 | bdescs[i].bd_level + 1, | ||
479 | &bdescs[i].bd_blocknr); | ||
480 | if (ret < 0) { | ||
481 | if (ret != -ENOENT) | ||
482 | return ret; | ||
483 | bdescs[i].bd_blocknr = 0; | ||
484 | } | ||
485 | if (bdescs[i].bd_blocknr != bdescs[i].bd_oblocknr) | ||
486 | /* skip dead block */ | ||
487 | continue; | ||
488 | if (bdescs[i].bd_level == 0) { | ||
489 | ret = nilfs_mdt_mark_block_dirty(dat, | ||
490 | bdescs[i].bd_offset); | ||
491 | if (ret < 0) { | ||
492 | WARN_ON(ret == -ENOENT); | ||
493 | return ret; | ||
494 | } | ||
495 | } else { | ||
496 | ret = nilfs_bmap_mark(bmap, bdescs[i].bd_offset, | ||
497 | bdescs[i].bd_level); | ||
498 | if (ret < 0) { | ||
499 | WARN_ON(ret == -ENOENT); | ||
500 | return ret; | ||
501 | } | ||
502 | } | ||
503 | } | ||
504 | return nmembs; | ||
505 | } | ||
506 | |||
507 | static inline int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, | ||
508 | struct nilfs_argv *argv, | ||
509 | int dir) | ||
510 | { | ||
511 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
512 | nilfs_ioctl_do_mark_blocks_dirty); | ||
513 | } | ||
514 | |||
515 | static ssize_t | ||
516 | nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
517 | void *buf, size_t size, size_t nmembs) | ||
518 | { | ||
519 | struct nilfs_sb_info *sbi = nilfs_get_writer(nilfs); | ||
520 | int ret; | ||
521 | |||
522 | if (unlikely(!sbi)) | ||
523 | return -EROFS; | ||
524 | ret = nilfs_segctor_add_segments_to_be_freed( | ||
525 | NILFS_SC(sbi), buf, nmembs); | ||
526 | nilfs_put_writer(nilfs); | ||
527 | |||
528 | return (ret < 0) ? ret : nmembs; | ||
529 | } | ||
530 | |||
531 | static inline int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, | ||
532 | struct nilfs_argv *argv, | ||
533 | int dir) | ||
534 | { | ||
535 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
536 | nilfs_ioctl_do_free_segments); | ||
537 | } | ||
538 | |||
539 | int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, | ||
540 | void __user *argp) | ||
541 | { | ||
542 | struct nilfs_argv argv[5]; | ||
543 | const char *msg; | ||
544 | int dir, ret; | ||
545 | |||
546 | if (copy_from_user(argv, argp, sizeof(argv))) | ||
547 | return -EFAULT; | ||
548 | |||
549 | dir = _IOC_WRITE; | ||
550 | ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], dir); | ||
551 | if (ret < 0) { | ||
552 | msg = "cannot read source blocks"; | ||
553 | goto failed; | ||
554 | } | ||
555 | ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], dir); | ||
556 | if (ret < 0) { | ||
557 | /* | ||
558 | * can safely abort because checkpoints can be removed | ||
559 | * independently. | ||
560 | */ | ||
561 | msg = "cannot delete checkpoints"; | ||
562 | goto failed; | ||
563 | } | ||
564 | ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], dir); | ||
565 | if (ret < 0) { | ||
566 | /* | ||
567 | * can safely abort because DAT file is updated atomically | ||
568 | * using a copy-on-write technique. | ||
569 | */ | ||
570 | msg = "cannot delete virtual blocks from DAT file"; | ||
571 | goto failed; | ||
572 | } | ||
573 | ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], dir); | ||
574 | if (ret < 0) { | ||
575 | /* | ||
576 | * can safely abort because the operation is nondestructive. | ||
577 | */ | ||
578 | msg = "cannot mark copying blocks dirty"; | ||
579 | goto failed; | ||
580 | } | ||
581 | ret = nilfs_ioctl_free_segments(nilfs, &argv[4], dir); | ||
582 | if (ret < 0) { | ||
583 | /* | ||
584 | * can safely abort because this operation is atomic. | ||
585 | */ | ||
586 | msg = "cannot set segments to be freed"; | ||
587 | goto failed; | ||
588 | } | ||
589 | return 0; | ||
590 | |||
591 | failed: | ||
592 | nilfs_remove_all_gcinode(nilfs); | ||
593 | printk(KERN_ERR "NILFS: GC failed during preparation: %s: err=%d\n", | ||
594 | msg, ret); | ||
595 | return ret; | ||
596 | } | ||
597 | |||
598 | static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, | ||
599 | unsigned int cmd, void __user *argp) | ||
600 | { | ||
601 | if (!capable(CAP_SYS_ADMIN)) | ||
602 | return -EPERM; | ||
603 | return nilfs_clean_segments(inode->i_sb, argp); | ||
604 | } | ||
605 | |||
606 | static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, | ||
607 | unsigned int cmd, void __user *argp) | ||
608 | { | ||
609 | __u64 cno; | ||
610 | int ret; | ||
611 | |||
612 | ret = nilfs_construct_segment(inode->i_sb); | ||
613 | if (ret < 0) | ||
614 | return ret; | ||
615 | |||
616 | if (argp != NULL) { | ||
617 | cno = NILFS_SB(inode->i_sb)->s_nilfs->ns_cno - 1; | ||
618 | if (copy_to_user(argp, &cno, sizeof(cno))) | ||
619 | return -EFAULT; | ||
620 | } | ||
621 | return 0; | ||
622 | } | ||
623 | |||
624 | long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
625 | { | ||
626 | struct inode *inode = filp->f_dentry->d_inode; | ||
627 | void __user *argp = (void * __user *)arg; | ||
628 | |||
629 | switch (cmd) { | ||
630 | case NILFS_IOCTL_CHANGE_CPMODE: | ||
631 | return nilfs_ioctl_change_cpmode(inode, filp, cmd, argp); | ||
632 | case NILFS_IOCTL_DELETE_CHECKPOINT: | ||
633 | return nilfs_ioctl_delete_checkpoint(inode, filp, cmd, argp); | ||
634 | case NILFS_IOCTL_GET_CPINFO: | ||
635 | return nilfs_ioctl_get_cpinfo(inode, filp, cmd, argp); | ||
636 | case NILFS_IOCTL_GET_CPSTAT: | ||
637 | return nilfs_ioctl_get_cpstat(inode, filp, cmd, argp); | ||
638 | case NILFS_IOCTL_GET_SUINFO: | ||
639 | return nilfs_ioctl_get_suinfo(inode, filp, cmd, argp); | ||
640 | case NILFS_IOCTL_GET_SUSTAT: | ||
641 | return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); | ||
642 | case NILFS_IOCTL_GET_VINFO: | ||
643 | /* XXX: rename to ??? */ | ||
644 | return nilfs_ioctl_get_vinfo(inode, filp, cmd, argp); | ||
645 | case NILFS_IOCTL_GET_BDESCS: | ||
646 | return nilfs_ioctl_get_bdescs(inode, filp, cmd, argp); | ||
647 | case NILFS_IOCTL_CLEAN_SEGMENTS: | ||
648 | return nilfs_ioctl_clean_segments(inode, filp, cmd, argp); | ||
649 | case NILFS_IOCTL_SYNC: | ||
650 | return nilfs_ioctl_sync(inode, filp, cmd, argp); | ||
651 | default: | ||
652 | return -ENOTTY; | ||
653 | } | ||
654 | } | ||
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c new file mode 100644 index 000000000000..47dd815433fd --- /dev/null +++ b/fs/nilfs2/mdt.c | |||
@@ -0,0 +1,563 @@ | |||
1 | /* | ||
2 | * mdt.c - meta data file for NILFS | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | */ | ||
22 | |||
23 | #include <linux/buffer_head.h> | ||
24 | #include <linux/mpage.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/writeback.h> | ||
27 | #include <linux/backing-dev.h> | ||
28 | #include <linux/swap.h> | ||
29 | #include "nilfs.h" | ||
30 | #include "segment.h" | ||
31 | #include "page.h" | ||
32 | #include "mdt.h" | ||
33 | |||
34 | |||
35 | #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) | ||
36 | |||
37 | #define INIT_UNUSED_INODE_FIELDS | ||
38 | |||
39 | static int | ||
40 | nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, | ||
41 | struct buffer_head *bh, | ||
42 | void (*init_block)(struct inode *, | ||
43 | struct buffer_head *, void *)) | ||
44 | { | ||
45 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
46 | void *kaddr; | ||
47 | int ret; | ||
48 | |||
49 | /* Caller exclude read accesses using page lock */ | ||
50 | |||
51 | /* set_buffer_new(bh); */ | ||
52 | bh->b_blocknr = 0; | ||
53 | |||
54 | ret = nilfs_bmap_insert(ii->i_bmap, block, (unsigned long)bh); | ||
55 | if (unlikely(ret)) | ||
56 | return ret; | ||
57 | |||
58 | set_buffer_mapped(bh); | ||
59 | |||
60 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
61 | memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); | ||
62 | if (init_block) | ||
63 | init_block(inode, bh, kaddr); | ||
64 | flush_dcache_page(bh->b_page); | ||
65 | kunmap_atomic(kaddr, KM_USER0); | ||
66 | |||
67 | set_buffer_uptodate(bh); | ||
68 | nilfs_mark_buffer_dirty(bh); | ||
69 | nilfs_mdt_mark_dirty(inode); | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, | ||
74 | struct buffer_head **out_bh, | ||
75 | void (*init_block)(struct inode *, | ||
76 | struct buffer_head *, | ||
77 | void *)) | ||
78 | { | ||
79 | struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; | ||
80 | struct nilfs_sb_info *writer = NULL; | ||
81 | struct super_block *sb = inode->i_sb; | ||
82 | struct nilfs_transaction_info ti; | ||
83 | struct buffer_head *bh; | ||
84 | int err; | ||
85 | |||
86 | if (!sb) { | ||
87 | writer = nilfs_get_writer(nilfs); | ||
88 | if (!writer) { | ||
89 | err = -EROFS; | ||
90 | goto out; | ||
91 | } | ||
92 | sb = writer->s_super; | ||
93 | } | ||
94 | |||
95 | nilfs_transaction_begin(sb, &ti, 0); | ||
96 | |||
97 | err = -ENOMEM; | ||
98 | bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0); | ||
99 | if (unlikely(!bh)) | ||
100 | goto failed_unlock; | ||
101 | |||
102 | err = -EEXIST; | ||
103 | if (buffer_uptodate(bh) || buffer_mapped(bh)) | ||
104 | goto failed_bh; | ||
105 | #if 0 | ||
106 | /* The uptodate flag is not protected by the page lock, but | ||
107 | the mapped flag is. Thus, we don't have to wait the buffer. */ | ||
108 | wait_on_buffer(bh); | ||
109 | if (buffer_uptodate(bh)) | ||
110 | goto failed_bh; | ||
111 | #endif | ||
112 | |||
113 | bh->b_bdev = nilfs->ns_bdev; | ||
114 | err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); | ||
115 | if (likely(!err)) { | ||
116 | get_bh(bh); | ||
117 | *out_bh = bh; | ||
118 | } | ||
119 | |||
120 | failed_bh: | ||
121 | unlock_page(bh->b_page); | ||
122 | page_cache_release(bh->b_page); | ||
123 | brelse(bh); | ||
124 | |||
125 | failed_unlock: | ||
126 | if (likely(!err)) | ||
127 | err = nilfs_transaction_commit(sb); | ||
128 | else | ||
129 | nilfs_transaction_abort(sb); | ||
130 | if (writer) | ||
131 | nilfs_put_writer(nilfs); | ||
132 | out: | ||
133 | return err; | ||
134 | } | ||
135 | |||
136 | static int | ||
137 | nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, | ||
138 | int mode, struct buffer_head **out_bh) | ||
139 | { | ||
140 | struct buffer_head *bh; | ||
141 | unsigned long blknum = 0; | ||
142 | int ret = -ENOMEM; | ||
143 | |||
144 | bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); | ||
145 | if (unlikely(!bh)) | ||
146 | goto failed; | ||
147 | |||
148 | ret = -EEXIST; /* internal code */ | ||
149 | if (buffer_uptodate(bh)) | ||
150 | goto out; | ||
151 | |||
152 | if (mode == READA) { | ||
153 | if (!trylock_buffer(bh)) { | ||
154 | ret = -EBUSY; | ||
155 | goto failed_bh; | ||
156 | } | ||
157 | } else /* mode == READ */ | ||
158 | lock_buffer(bh); | ||
159 | |||
160 | if (buffer_uptodate(bh)) { | ||
161 | unlock_buffer(bh); | ||
162 | goto out; | ||
163 | } | ||
164 | if (!buffer_mapped(bh)) { /* unused buffer */ | ||
165 | ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, | ||
166 | &blknum); | ||
167 | if (unlikely(ret)) { | ||
168 | unlock_buffer(bh); | ||
169 | goto failed_bh; | ||
170 | } | ||
171 | bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev; | ||
172 | bh->b_blocknr = blknum; | ||
173 | set_buffer_mapped(bh); | ||
174 | } | ||
175 | |||
176 | bh->b_end_io = end_buffer_read_sync; | ||
177 | get_bh(bh); | ||
178 | submit_bh(mode, bh); | ||
179 | ret = 0; | ||
180 | out: | ||
181 | get_bh(bh); | ||
182 | *out_bh = bh; | ||
183 | |||
184 | failed_bh: | ||
185 | unlock_page(bh->b_page); | ||
186 | page_cache_release(bh->b_page); | ||
187 | brelse(bh); | ||
188 | failed: | ||
189 | return ret; | ||
190 | } | ||
191 | |||
192 | static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, | ||
193 | struct buffer_head **out_bh) | ||
194 | { | ||
195 | struct buffer_head *first_bh, *bh; | ||
196 | unsigned long blkoff; | ||
197 | int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS; | ||
198 | int err; | ||
199 | |||
200 | err = nilfs_mdt_submit_block(inode, block, READ, &first_bh); | ||
201 | if (err == -EEXIST) /* internal code */ | ||
202 | goto out; | ||
203 | |||
204 | if (unlikely(err)) | ||
205 | goto failed; | ||
206 | |||
207 | blkoff = block + 1; | ||
208 | for (i = 0; i < nr_ra_blocks; i++, blkoff++) { | ||
209 | err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); | ||
210 | if (likely(!err || err == -EEXIST)) | ||
211 | brelse(bh); | ||
212 | else if (err != -EBUSY) | ||
213 | break; /* abort readahead if bmap lookup failed */ | ||
214 | |||
215 | if (!buffer_locked(first_bh)) | ||
216 | goto out_no_wait; | ||
217 | } | ||
218 | |||
219 | wait_on_buffer(first_bh); | ||
220 | |||
221 | out_no_wait: | ||
222 | err = -EIO; | ||
223 | if (!buffer_uptodate(first_bh)) | ||
224 | goto failed_bh; | ||
225 | out: | ||
226 | *out_bh = first_bh; | ||
227 | return 0; | ||
228 | |||
229 | failed_bh: | ||
230 | brelse(first_bh); | ||
231 | failed: | ||
232 | return err; | ||
233 | } | ||
234 | |||
235 | /** | ||
236 | * nilfs_mdt_get_block - read or create a buffer on meta data file. | ||
237 | * @inode: inode of the meta data file | ||
238 | * @blkoff: block offset | ||
239 | * @create: create flag | ||
240 | * @init_block: initializer used for newly allocated block | ||
241 | * @out_bh: output of a pointer to the buffer_head | ||
242 | * | ||
243 | * nilfs_mdt_get_block() looks up the specified buffer and tries to create | ||
244 | * a new buffer if @create is not zero. On success, the returned buffer is | ||
245 | * assured to be either existing or formatted using a buffer lock on success. | ||
246 | * @out_bh is substituted only when zero is returned. | ||
247 | * | ||
248 | * Return Value: On success, it returns 0. On error, the following negative | ||
249 | * error code is returned. | ||
250 | * | ||
251 | * %-ENOMEM - Insufficient memory available. | ||
252 | * | ||
253 | * %-EIO - I/O error | ||
254 | * | ||
255 | * %-ENOENT - the specified block does not exist (hole block) | ||
256 | * | ||
257 | * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) | ||
258 | * | ||
259 | * %-EROFS - Read only filesystem (for create mode) | ||
260 | */ | ||
261 | int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, | ||
262 | void (*init_block)(struct inode *, | ||
263 | struct buffer_head *, void *), | ||
264 | struct buffer_head **out_bh) | ||
265 | { | ||
266 | int ret; | ||
267 | |||
268 | /* Should be rewritten with merging nilfs_mdt_read_block() */ | ||
269 | retry: | ||
270 | ret = nilfs_mdt_read_block(inode, blkoff, out_bh); | ||
271 | if (!create || ret != -ENOENT) | ||
272 | return ret; | ||
273 | |||
274 | ret = nilfs_mdt_create_block(inode, blkoff, out_bh, init_block); | ||
275 | if (unlikely(ret == -EEXIST)) { | ||
276 | /* create = 0; */ /* limit read-create loop retries */ | ||
277 | goto retry; | ||
278 | } | ||
279 | return ret; | ||
280 | } | ||
281 | |||
282 | /** | ||
283 | * nilfs_mdt_delete_block - make a hole on the meta data file. | ||
284 | * @inode: inode of the meta data file | ||
285 | * @block: block offset | ||
286 | * | ||
287 | * Return Value: On success, zero is returned. | ||
288 | * On error, one of the following negative error code is returned. | ||
289 | * | ||
290 | * %-ENOMEM - Insufficient memory available. | ||
291 | * | ||
292 | * %-EIO - I/O error | ||
293 | * | ||
294 | * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) | ||
295 | */ | ||
296 | int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) | ||
297 | { | ||
298 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
299 | int err; | ||
300 | |||
301 | err = nilfs_bmap_delete(ii->i_bmap, block); | ||
302 | if (likely(!err)) { | ||
303 | nilfs_mdt_mark_dirty(inode); | ||
304 | nilfs_mdt_forget_block(inode, block); | ||
305 | } | ||
306 | return err; | ||
307 | } | ||
308 | |||
309 | /** | ||
310 | * nilfs_mdt_forget_block - discard dirty state and try to remove the page | ||
311 | * @inode: inode of the meta data file | ||
312 | * @block: block offset | ||
313 | * | ||
314 | * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and | ||
315 | * tries to release the page including the buffer from a page cache. | ||
316 | * | ||
317 | * Return Value: On success, 0 is returned. On error, one of the following | ||
318 | * negative error code is returned. | ||
319 | * | ||
320 | * %-EBUSY - page has an active buffer. | ||
321 | * | ||
322 | * %-ENOENT - page cache has no page addressed by the offset. | ||
323 | */ | ||
324 | int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) | ||
325 | { | ||
326 | pgoff_t index = (pgoff_t)block >> | ||
327 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
328 | struct page *page; | ||
329 | unsigned long first_block; | ||
330 | int ret = 0; | ||
331 | int still_dirty; | ||
332 | |||
333 | page = find_lock_page(inode->i_mapping, index); | ||
334 | if (!page) | ||
335 | return -ENOENT; | ||
336 | |||
337 | wait_on_page_writeback(page); | ||
338 | |||
339 | first_block = (unsigned long)index << | ||
340 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
341 | if (page_has_buffers(page)) { | ||
342 | struct buffer_head *bh; | ||
343 | |||
344 | bh = nilfs_page_get_nth_block(page, block - first_block); | ||
345 | nilfs_forget_buffer(bh); | ||
346 | } | ||
347 | still_dirty = PageDirty(page); | ||
348 | unlock_page(page); | ||
349 | page_cache_release(page); | ||
350 | |||
351 | if (still_dirty || | ||
352 | invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0) | ||
353 | ret = -EBUSY; | ||
354 | return ret; | ||
355 | } | ||
356 | |||
357 | /** | ||
358 | * nilfs_mdt_mark_block_dirty - mark a block on the meta data file dirty. | ||
359 | * @inode: inode of the meta data file | ||
360 | * @block: block offset | ||
361 | * | ||
362 | * Return Value: On success, it returns 0. On error, the following negative | ||
363 | * error code is returned. | ||
364 | * | ||
365 | * %-ENOMEM - Insufficient memory available. | ||
366 | * | ||
367 | * %-EIO - I/O error | ||
368 | * | ||
369 | * %-ENOENT - the specified block does not exist (hole block) | ||
370 | * | ||
371 | * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) | ||
372 | */ | ||
373 | int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) | ||
374 | { | ||
375 | struct buffer_head *bh; | ||
376 | int err; | ||
377 | |||
378 | err = nilfs_mdt_read_block(inode, block, &bh); | ||
379 | if (unlikely(err)) | ||
380 | return err; | ||
381 | nilfs_mark_buffer_dirty(bh); | ||
382 | nilfs_mdt_mark_dirty(inode); | ||
383 | brelse(bh); | ||
384 | return 0; | ||
385 | } | ||
386 | |||
387 | int nilfs_mdt_fetch_dirty(struct inode *inode) | ||
388 | { | ||
389 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
390 | |||
391 | if (nilfs_bmap_test_and_clear_dirty(ii->i_bmap)) { | ||
392 | set_bit(NILFS_I_DIRTY, &ii->i_state); | ||
393 | return 1; | ||
394 | } | ||
395 | return test_bit(NILFS_I_DIRTY, &ii->i_state); | ||
396 | } | ||
397 | |||
398 | static int | ||
399 | nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) | ||
400 | { | ||
401 | struct inode *inode = container_of(page->mapping, | ||
402 | struct inode, i_data); | ||
403 | struct super_block *sb = inode->i_sb; | ||
404 | struct nilfs_sb_info *writer = NULL; | ||
405 | int err = 0; | ||
406 | |||
407 | redirty_page_for_writepage(wbc, page); | ||
408 | unlock_page(page); | ||
409 | |||
410 | if (page->mapping->assoc_mapping) | ||
411 | return 0; /* Do not request flush for shadow page cache */ | ||
412 | if (!sb) { | ||
413 | writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs); | ||
414 | if (!writer) | ||
415 | return -EROFS; | ||
416 | sb = writer->s_super; | ||
417 | } | ||
418 | |||
419 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
420 | err = nilfs_construct_segment(sb); | ||
421 | else if (wbc->for_reclaim) | ||
422 | nilfs_flush_segment(sb, inode->i_ino); | ||
423 | |||
424 | if (writer) | ||
425 | nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs); | ||
426 | return err; | ||
427 | } | ||
428 | |||
429 | |||
430 | static struct address_space_operations def_mdt_aops = { | ||
431 | .writepage = nilfs_mdt_write_page, | ||
432 | }; | ||
433 | |||
434 | static struct inode_operations def_mdt_iops; | ||
435 | static struct file_operations def_mdt_fops; | ||
436 | |||
437 | /* | ||
438 | * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile, | ||
439 | * ifile, or gcinodes. This allows the B-tree code and segment constructor | ||
440 | * to treat them like regular files, and this helps to simplify the | ||
441 | * implementation. | ||
442 | * On the other hand, some of the pseudo inodes have an irregular point: | ||
443 | * They don't have valid inode->i_sb pointer because their lifetimes are | ||
444 | * longer than those of the super block structs; they may continue for | ||
445 | * several consecutive mounts/umounts. This would need discussions. | ||
446 | */ | ||
447 | struct inode * | ||
448 | nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, | ||
449 | ino_t ino, gfp_t gfp_mask) | ||
450 | { | ||
451 | struct inode *inode = nilfs_alloc_inode(sb); | ||
452 | |||
453 | if (!inode) | ||
454 | return NULL; | ||
455 | else { | ||
456 | struct address_space * const mapping = &inode->i_data; | ||
457 | struct nilfs_mdt_info *mi = kzalloc(sizeof(*mi), GFP_NOFS); | ||
458 | |||
459 | if (!mi) { | ||
460 | nilfs_destroy_inode(inode); | ||
461 | return NULL; | ||
462 | } | ||
463 | mi->mi_nilfs = nilfs; | ||
464 | init_rwsem(&mi->mi_sem); | ||
465 | |||
466 | inode->i_sb = sb; /* sb may be NULL for some meta data files */ | ||
467 | inode->i_blkbits = nilfs->ns_blocksize_bits; | ||
468 | inode->i_flags = 0; | ||
469 | atomic_set(&inode->i_count, 1); | ||
470 | inode->i_nlink = 1; | ||
471 | inode->i_ino = ino; | ||
472 | inode->i_mode = S_IFREG; | ||
473 | inode->i_private = mi; | ||
474 | |||
475 | #ifdef INIT_UNUSED_INODE_FIELDS | ||
476 | atomic_set(&inode->i_writecount, 0); | ||
477 | inode->i_size = 0; | ||
478 | inode->i_blocks = 0; | ||
479 | inode->i_bytes = 0; | ||
480 | inode->i_generation = 0; | ||
481 | #ifdef CONFIG_QUOTA | ||
482 | memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); | ||
483 | #endif | ||
484 | inode->i_pipe = NULL; | ||
485 | inode->i_bdev = NULL; | ||
486 | inode->i_cdev = NULL; | ||
487 | inode->i_rdev = 0; | ||
488 | #ifdef CONFIG_SECURITY | ||
489 | inode->i_security = NULL; | ||
490 | #endif | ||
491 | inode->dirtied_when = 0; | ||
492 | |||
493 | INIT_LIST_HEAD(&inode->i_list); | ||
494 | INIT_LIST_HEAD(&inode->i_sb_list); | ||
495 | inode->i_state = 0; | ||
496 | #endif | ||
497 | |||
498 | spin_lock_init(&inode->i_lock); | ||
499 | mutex_init(&inode->i_mutex); | ||
500 | init_rwsem(&inode->i_alloc_sem); | ||
501 | |||
502 | mapping->host = NULL; /* instead of inode */ | ||
503 | mapping->flags = 0; | ||
504 | mapping_set_gfp_mask(mapping, gfp_mask); | ||
505 | mapping->assoc_mapping = NULL; | ||
506 | mapping->backing_dev_info = nilfs->ns_bdi; | ||
507 | |||
508 | inode->i_mapping = mapping; | ||
509 | } | ||
510 | |||
511 | return inode; | ||
512 | } | ||
513 | |||
514 | struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, | ||
515 | ino_t ino, gfp_t gfp_mask) | ||
516 | { | ||
517 | struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, gfp_mask); | ||
518 | |||
519 | if (!inode) | ||
520 | return NULL; | ||
521 | |||
522 | inode->i_op = &def_mdt_iops; | ||
523 | inode->i_fop = &def_mdt_fops; | ||
524 | inode->i_mapping->a_ops = &def_mdt_aops; | ||
525 | return inode; | ||
526 | } | ||
527 | |||
528 | void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, | ||
529 | unsigned header_size) | ||
530 | { | ||
531 | struct nilfs_mdt_info *mi = NILFS_MDT(inode); | ||
532 | |||
533 | mi->mi_entry_size = entry_size; | ||
534 | mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size; | ||
535 | mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); | ||
536 | } | ||
537 | |||
538 | void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow) | ||
539 | { | ||
540 | shadow->i_mapping->assoc_mapping = orig->i_mapping; | ||
541 | NILFS_I(shadow)->i_btnode_cache.assoc_mapping = | ||
542 | &NILFS_I(orig)->i_btnode_cache; | ||
543 | } | ||
544 | |||
545 | void nilfs_mdt_clear(struct inode *inode) | ||
546 | { | ||
547 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
548 | |||
549 | invalidate_mapping_pages(inode->i_mapping, 0, -1); | ||
550 | truncate_inode_pages(inode->i_mapping, 0); | ||
551 | |||
552 | nilfs_bmap_clear(ii->i_bmap); | ||
553 | nilfs_btnode_cache_clear(&ii->i_btnode_cache); | ||
554 | } | ||
555 | |||
556 | void nilfs_mdt_destroy(struct inode *inode) | ||
557 | { | ||
558 | struct nilfs_mdt_info *mdi = NILFS_MDT(inode); | ||
559 | |||
560 | kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ | ||
561 | kfree(mdi); | ||
562 | nilfs_destroy_inode(inode); | ||
563 | } | ||
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h new file mode 100644 index 000000000000..df683e0bca6a --- /dev/null +++ b/fs/nilfs2/mdt.h | |||
@@ -0,0 +1,125 @@ | |||
1 | /* | ||
2 | * mdt.h - NILFS meta data file prototype and definitions | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_MDT_H | ||
24 | #define _NILFS_MDT_H | ||
25 | |||
26 | #include <linux/buffer_head.h> | ||
27 | #include <linux/blockgroup_lock.h> | ||
28 | #include "nilfs.h" | ||
29 | #include "page.h" | ||
30 | |||
31 | /** | ||
32 | * struct nilfs_mdt_info - on-memory private data of meta data files | ||
33 | * @mi_nilfs: back pointer to the_nilfs struct | ||
34 | * @mi_sem: reader/writer semaphore for meta data operations | ||
35 | * @mi_bgl: per-blockgroup locking | ||
36 | * @mi_entry_size: size of an entry | ||
37 | * @mi_first_entry_offset: offset to the first entry | ||
38 | * @mi_entries_per_block: number of entries in a block | ||
39 | * @mi_blocks_per_group: number of blocks in a group | ||
40 | * @mi_blocks_per_desc_block: number of blocks per descriptor block | ||
41 | */ | ||
42 | struct nilfs_mdt_info { | ||
43 | struct the_nilfs *mi_nilfs; | ||
44 | struct rw_semaphore mi_sem; | ||
45 | struct blockgroup_lock *mi_bgl; | ||
46 | unsigned mi_entry_size; | ||
47 | unsigned mi_first_entry_offset; | ||
48 | unsigned long mi_entries_per_block; | ||
49 | unsigned long mi_blocks_per_group; | ||
50 | unsigned long mi_blocks_per_desc_block; | ||
51 | }; | ||
52 | |||
53 | static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) | ||
54 | { | ||
55 | return inode->i_private; | ||
56 | } | ||
57 | |||
58 | static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode) | ||
59 | { | ||
60 | struct super_block *sb = inode->i_sb; | ||
61 | |||
62 | return sb ? NILFS_SB(sb)->s_nilfs : NILFS_MDT(inode)->mi_nilfs; | ||
63 | } | ||
64 | |||
65 | /* Default GFP flags using highmem */ | ||
66 | #define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) | ||
67 | |||
68 | int nilfs_mdt_get_block(struct inode *, unsigned long, int, | ||
69 | void (*init_block)(struct inode *, | ||
70 | struct buffer_head *, void *), | ||
71 | struct buffer_head **); | ||
72 | int nilfs_mdt_delete_block(struct inode *, unsigned long); | ||
73 | int nilfs_mdt_forget_block(struct inode *, unsigned long); | ||
74 | int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); | ||
75 | int nilfs_mdt_fetch_dirty(struct inode *); | ||
76 | |||
77 | struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t, | ||
78 | gfp_t); | ||
79 | struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, | ||
80 | ino_t, gfp_t); | ||
81 | void nilfs_mdt_destroy(struct inode *); | ||
82 | void nilfs_mdt_clear(struct inode *); | ||
83 | void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); | ||
84 | void nilfs_mdt_set_shadow(struct inode *, struct inode *); | ||
85 | |||
86 | |||
87 | #define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh) | ||
88 | |||
89 | static inline void nilfs_mdt_mark_dirty(struct inode *inode) | ||
90 | { | ||
91 | if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state)) | ||
92 | set_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state); | ||
93 | } | ||
94 | |||
95 | static inline void nilfs_mdt_clear_dirty(struct inode *inode) | ||
96 | { | ||
97 | clear_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state); | ||
98 | } | ||
99 | |||
100 | static inline __u64 nilfs_mdt_cno(struct inode *inode) | ||
101 | { | ||
102 | return NILFS_MDT(inode)->mi_nilfs->ns_cno; | ||
103 | } | ||
104 | |||
105 | #define nilfs_mdt_bgl_lock(inode, bg) \ | ||
106 | (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock) | ||
107 | |||
108 | |||
109 | static inline int | ||
110 | nilfs_mdt_read_inode_direct(struct inode *inode, struct buffer_head *bh, | ||
111 | unsigned n) | ||
112 | { | ||
113 | return nilfs_read_inode_common( | ||
114 | inode, (struct nilfs_inode *)(bh->b_data + n)); | ||
115 | } | ||
116 | |||
117 | static inline void | ||
118 | nilfs_mdt_write_inode_direct(struct inode *inode, struct buffer_head *bh, | ||
119 | unsigned n) | ||
120 | { | ||
121 | nilfs_write_inode_common( | ||
122 | inode, (struct nilfs_inode *)(bh->b_data + n), 1); | ||
123 | } | ||
124 | |||
125 | #endif /* _NILFS_MDT_H */ | ||
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c new file mode 100644 index 000000000000..df70dadb336f --- /dev/null +++ b/fs/nilfs2/namei.c | |||
@@ -0,0 +1,474 @@ | |||
1 | /* | ||
2 | * namei.c - NILFS pathname lookup operations. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Modified for NILFS by Amagai Yoshiji <amagai@osrg.net>, | ||
21 | * Ryusuke Konishi <ryusuke@osrg.net> | ||
22 | */ | ||
23 | /* | ||
24 | * linux/fs/ext2/namei.c | ||
25 | * | ||
26 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
27 | * Remy Card (card@masi.ibp.fr) | ||
28 | * Laboratoire MASI - Institut Blaise Pascal | ||
29 | * Universite Pierre et Marie Curie (Paris VI) | ||
30 | * | ||
31 | * from | ||
32 | * | ||
33 | * linux/fs/minix/namei.c | ||
34 | * | ||
35 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
36 | * | ||
37 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
38 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
39 | */ | ||
40 | |||
41 | #include <linux/pagemap.h> | ||
42 | #include "nilfs.h" | ||
43 | |||
44 | |||
45 | static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) | ||
46 | { | ||
47 | int err = nilfs_add_link(dentry, inode); | ||
48 | if (!err) { | ||
49 | d_instantiate(dentry, inode); | ||
50 | return 0; | ||
51 | } | ||
52 | inode_dec_link_count(inode); | ||
53 | iput(inode); | ||
54 | return err; | ||
55 | } | ||
56 | |||
57 | /* | ||
58 | * Methods themselves. | ||
59 | */ | ||
60 | |||
61 | static struct dentry * | ||
62 | nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | ||
63 | { | ||
64 | struct inode *inode; | ||
65 | ino_t ino; | ||
66 | |||
67 | if (dentry->d_name.len > NILFS_NAME_LEN) | ||
68 | return ERR_PTR(-ENAMETOOLONG); | ||
69 | |||
70 | ino = nilfs_inode_by_name(dir, dentry); | ||
71 | inode = NULL; | ||
72 | if (ino) { | ||
73 | inode = nilfs_iget(dir->i_sb, ino); | ||
74 | if (IS_ERR(inode)) | ||
75 | return ERR_CAST(inode); | ||
76 | } | ||
77 | return d_splice_alias(inode, dentry); | ||
78 | } | ||
79 | |||
80 | struct dentry *nilfs_get_parent(struct dentry *child) | ||
81 | { | ||
82 | unsigned long ino; | ||
83 | struct inode *inode; | ||
84 | struct dentry dotdot; | ||
85 | |||
86 | dotdot.d_name.name = ".."; | ||
87 | dotdot.d_name.len = 2; | ||
88 | |||
89 | ino = nilfs_inode_by_name(child->d_inode, &dotdot); | ||
90 | if (!ino) | ||
91 | return ERR_PTR(-ENOENT); | ||
92 | |||
93 | inode = nilfs_iget(child->d_inode->i_sb, ino); | ||
94 | if (IS_ERR(inode)) | ||
95 | return ERR_CAST(inode); | ||
96 | return d_obtain_alias(inode); | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * By the time this is called, we already have created | ||
101 | * the directory cache entry for the new file, but it | ||
102 | * is so far negative - it has no inode. | ||
103 | * | ||
104 | * If the create succeeds, we fill in the inode information | ||
105 | * with d_instantiate(). | ||
106 | */ | ||
107 | static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, | ||
108 | struct nameidata *nd) | ||
109 | { | ||
110 | struct inode *inode; | ||
111 | struct nilfs_transaction_info ti; | ||
112 | int err; | ||
113 | |||
114 | err = nilfs_transaction_begin(dir->i_sb, &ti, 1); | ||
115 | if (err) | ||
116 | return err; | ||
117 | inode = nilfs_new_inode(dir, mode); | ||
118 | err = PTR_ERR(inode); | ||
119 | if (!IS_ERR(inode)) { | ||
120 | inode->i_op = &nilfs_file_inode_operations; | ||
121 | inode->i_fop = &nilfs_file_operations; | ||
122 | inode->i_mapping->a_ops = &nilfs_aops; | ||
123 | mark_inode_dirty(inode); | ||
124 | err = nilfs_add_nondir(dentry, inode); | ||
125 | } | ||
126 | if (!err) | ||
127 | err = nilfs_transaction_commit(dir->i_sb); | ||
128 | else | ||
129 | nilfs_transaction_abort(dir->i_sb); | ||
130 | |||
131 | return err; | ||
132 | } | ||
133 | |||
134 | static int | ||
135 | nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) | ||
136 | { | ||
137 | struct inode *inode; | ||
138 | struct nilfs_transaction_info ti; | ||
139 | int err; | ||
140 | |||
141 | if (!new_valid_dev(rdev)) | ||
142 | return -EINVAL; | ||
143 | |||
144 | err = nilfs_transaction_begin(dir->i_sb, &ti, 1); | ||
145 | if (err) | ||
146 | return err; | ||
147 | inode = nilfs_new_inode(dir, mode); | ||
148 | err = PTR_ERR(inode); | ||
149 | if (!IS_ERR(inode)) { | ||
150 | init_special_inode(inode, inode->i_mode, rdev); | ||
151 | mark_inode_dirty(inode); | ||
152 | err = nilfs_add_nondir(dentry, inode); | ||
153 | } | ||
154 | if (!err) | ||
155 | err = nilfs_transaction_commit(dir->i_sb); | ||
156 | else | ||
157 | nilfs_transaction_abort(dir->i_sb); | ||
158 | |||
159 | return err; | ||
160 | } | ||
161 | |||
162 | static int nilfs_symlink(struct inode *dir, struct dentry *dentry, | ||
163 | const char *symname) | ||
164 | { | ||
165 | struct nilfs_transaction_info ti; | ||
166 | struct super_block *sb = dir->i_sb; | ||
167 | unsigned l = strlen(symname)+1; | ||
168 | struct inode *inode; | ||
169 | int err; | ||
170 | |||
171 | if (l > sb->s_blocksize) | ||
172 | return -ENAMETOOLONG; | ||
173 | |||
174 | err = nilfs_transaction_begin(dir->i_sb, &ti, 1); | ||
175 | if (err) | ||
176 | return err; | ||
177 | |||
178 | inode = nilfs_new_inode(dir, S_IFLNK | S_IRWXUGO); | ||
179 | err = PTR_ERR(inode); | ||
180 | if (IS_ERR(inode)) | ||
181 | goto out; | ||
182 | |||
183 | /* slow symlink */ | ||
184 | inode->i_op = &nilfs_symlink_inode_operations; | ||
185 | inode->i_mapping->a_ops = &nilfs_aops; | ||
186 | err = page_symlink(inode, symname, l); | ||
187 | if (err) | ||
188 | goto out_fail; | ||
189 | |||
190 | /* mark_inode_dirty(inode); */ | ||
191 | /* nilfs_new_inode() and page_symlink() do this */ | ||
192 | |||
193 | err = nilfs_add_nondir(dentry, inode); | ||
194 | out: | ||
195 | if (!err) | ||
196 | err = nilfs_transaction_commit(dir->i_sb); | ||
197 | else | ||
198 | nilfs_transaction_abort(dir->i_sb); | ||
199 | |||
200 | return err; | ||
201 | |||
202 | out_fail: | ||
203 | inode_dec_link_count(inode); | ||
204 | iput(inode); | ||
205 | goto out; | ||
206 | } | ||
207 | |||
208 | static int nilfs_link(struct dentry *old_dentry, struct inode *dir, | ||
209 | struct dentry *dentry) | ||
210 | { | ||
211 | struct inode *inode = old_dentry->d_inode; | ||
212 | struct nilfs_transaction_info ti; | ||
213 | int err; | ||
214 | |||
215 | if (inode->i_nlink >= NILFS_LINK_MAX) | ||
216 | return -EMLINK; | ||
217 | |||
218 | err = nilfs_transaction_begin(dir->i_sb, &ti, 1); | ||
219 | if (err) | ||
220 | return err; | ||
221 | |||
222 | inode->i_ctime = CURRENT_TIME; | ||
223 | inode_inc_link_count(inode); | ||
224 | atomic_inc(&inode->i_count); | ||
225 | |||
226 | err = nilfs_add_nondir(dentry, inode); | ||
227 | if (!err) | ||
228 | err = nilfs_transaction_commit(dir->i_sb); | ||
229 | else | ||
230 | nilfs_transaction_abort(dir->i_sb); | ||
231 | |||
232 | return err; | ||
233 | } | ||
234 | |||
235 | static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
236 | { | ||
237 | struct inode *inode; | ||
238 | struct nilfs_transaction_info ti; | ||
239 | int err; | ||
240 | |||
241 | if (dir->i_nlink >= NILFS_LINK_MAX) | ||
242 | return -EMLINK; | ||
243 | |||
244 | err = nilfs_transaction_begin(dir->i_sb, &ti, 1); | ||
245 | if (err) | ||
246 | return err; | ||
247 | |||
248 | inode_inc_link_count(dir); | ||
249 | |||
250 | inode = nilfs_new_inode(dir, S_IFDIR | mode); | ||
251 | err = PTR_ERR(inode); | ||
252 | if (IS_ERR(inode)) | ||
253 | goto out_dir; | ||
254 | |||
255 | inode->i_op = &nilfs_dir_inode_operations; | ||
256 | inode->i_fop = &nilfs_dir_operations; | ||
257 | inode->i_mapping->a_ops = &nilfs_aops; | ||
258 | |||
259 | inode_inc_link_count(inode); | ||
260 | |||
261 | err = nilfs_make_empty(inode, dir); | ||
262 | if (err) | ||
263 | goto out_fail; | ||
264 | |||
265 | err = nilfs_add_link(dentry, inode); | ||
266 | if (err) | ||
267 | goto out_fail; | ||
268 | |||
269 | d_instantiate(dentry, inode); | ||
270 | out: | ||
271 | if (!err) | ||
272 | err = nilfs_transaction_commit(dir->i_sb); | ||
273 | else | ||
274 | nilfs_transaction_abort(dir->i_sb); | ||
275 | |||
276 | return err; | ||
277 | |||
278 | out_fail: | ||
279 | inode_dec_link_count(inode); | ||
280 | inode_dec_link_count(inode); | ||
281 | iput(inode); | ||
282 | out_dir: | ||
283 | inode_dec_link_count(dir); | ||
284 | goto out; | ||
285 | } | ||
286 | |||
287 | static int nilfs_unlink(struct inode *dir, struct dentry *dentry) | ||
288 | { | ||
289 | struct inode *inode; | ||
290 | struct nilfs_dir_entry *de; | ||
291 | struct page *page; | ||
292 | struct nilfs_transaction_info ti; | ||
293 | int err; | ||
294 | |||
295 | err = nilfs_transaction_begin(dir->i_sb, &ti, 0); | ||
296 | if (err) | ||
297 | return err; | ||
298 | |||
299 | err = -ENOENT; | ||
300 | de = nilfs_find_entry(dir, dentry, &page); | ||
301 | if (!de) | ||
302 | goto out; | ||
303 | |||
304 | inode = dentry->d_inode; | ||
305 | err = -EIO; | ||
306 | if (le64_to_cpu(de->inode) != inode->i_ino) | ||
307 | goto out; | ||
308 | |||
309 | if (!inode->i_nlink) { | ||
310 | nilfs_warning(inode->i_sb, __func__, | ||
311 | "deleting nonexistent file (%lu), %d\n", | ||
312 | inode->i_ino, inode->i_nlink); | ||
313 | inode->i_nlink = 1; | ||
314 | } | ||
315 | err = nilfs_delete_entry(de, page); | ||
316 | if (err) | ||
317 | goto out; | ||
318 | |||
319 | inode->i_ctime = dir->i_ctime; | ||
320 | inode_dec_link_count(inode); | ||
321 | err = 0; | ||
322 | out: | ||
323 | if (!err) | ||
324 | err = nilfs_transaction_commit(dir->i_sb); | ||
325 | else | ||
326 | nilfs_transaction_abort(dir->i_sb); | ||
327 | |||
328 | return err; | ||
329 | } | ||
330 | |||
331 | static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) | ||
332 | { | ||
333 | struct inode *inode = dentry->d_inode; | ||
334 | struct nilfs_transaction_info ti; | ||
335 | int err; | ||
336 | |||
337 | err = nilfs_transaction_begin(dir->i_sb, &ti, 0); | ||
338 | if (err) | ||
339 | return err; | ||
340 | |||
341 | err = -ENOTEMPTY; | ||
342 | if (nilfs_empty_dir(inode)) { | ||
343 | err = nilfs_unlink(dir, dentry); | ||
344 | if (!err) { | ||
345 | inode->i_size = 0; | ||
346 | inode_dec_link_count(inode); | ||
347 | inode_dec_link_count(dir); | ||
348 | } | ||
349 | } | ||
350 | if (!err) | ||
351 | err = nilfs_transaction_commit(dir->i_sb); | ||
352 | else | ||
353 | nilfs_transaction_abort(dir->i_sb); | ||
354 | |||
355 | return err; | ||
356 | } | ||
357 | |||
358 | static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, | ||
359 | struct inode *new_dir, struct dentry *new_dentry) | ||
360 | { | ||
361 | struct inode *old_inode = old_dentry->d_inode; | ||
362 | struct inode *new_inode = new_dentry->d_inode; | ||
363 | struct page *dir_page = NULL; | ||
364 | struct nilfs_dir_entry *dir_de = NULL; | ||
365 | struct page *old_page; | ||
366 | struct nilfs_dir_entry *old_de; | ||
367 | struct nilfs_transaction_info ti; | ||
368 | int err; | ||
369 | |||
370 | err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); | ||
371 | if (unlikely(err)) | ||
372 | return err; | ||
373 | |||
374 | err = -ENOENT; | ||
375 | old_de = nilfs_find_entry(old_dir, old_dentry, &old_page); | ||
376 | if (!old_de) | ||
377 | goto out; | ||
378 | |||
379 | if (S_ISDIR(old_inode->i_mode)) { | ||
380 | err = -EIO; | ||
381 | dir_de = nilfs_dotdot(old_inode, &dir_page); | ||
382 | if (!dir_de) | ||
383 | goto out_old; | ||
384 | } | ||
385 | |||
386 | if (new_inode) { | ||
387 | struct page *new_page; | ||
388 | struct nilfs_dir_entry *new_de; | ||
389 | |||
390 | err = -ENOTEMPTY; | ||
391 | if (dir_de && !nilfs_empty_dir(new_inode)) | ||
392 | goto out_dir; | ||
393 | |||
394 | err = -ENOENT; | ||
395 | new_de = nilfs_find_entry(new_dir, new_dentry, &new_page); | ||
396 | if (!new_de) | ||
397 | goto out_dir; | ||
398 | inode_inc_link_count(old_inode); | ||
399 | nilfs_set_link(new_dir, new_de, new_page, old_inode); | ||
400 | new_inode->i_ctime = CURRENT_TIME; | ||
401 | if (dir_de) | ||
402 | drop_nlink(new_inode); | ||
403 | inode_dec_link_count(new_inode); | ||
404 | } else { | ||
405 | if (dir_de) { | ||
406 | err = -EMLINK; | ||
407 | if (new_dir->i_nlink >= NILFS_LINK_MAX) | ||
408 | goto out_dir; | ||
409 | } | ||
410 | inode_inc_link_count(old_inode); | ||
411 | err = nilfs_add_link(new_dentry, old_inode); | ||
412 | if (err) { | ||
413 | inode_dec_link_count(old_inode); | ||
414 | goto out_dir; | ||
415 | } | ||
416 | if (dir_de) | ||
417 | inode_inc_link_count(new_dir); | ||
418 | } | ||
419 | |||
420 | /* | ||
421 | * Like most other Unix systems, set the ctime for inodes on a | ||
422 | * rename. | ||
423 | * inode_dec_link_count() will mark the inode dirty. | ||
424 | */ | ||
425 | old_inode->i_ctime = CURRENT_TIME; | ||
426 | |||
427 | nilfs_delete_entry(old_de, old_page); | ||
428 | inode_dec_link_count(old_inode); | ||
429 | |||
430 | if (dir_de) { | ||
431 | nilfs_set_link(old_inode, dir_de, dir_page, new_dir); | ||
432 | inode_dec_link_count(old_dir); | ||
433 | } | ||
434 | |||
435 | err = nilfs_transaction_commit(old_dir->i_sb); | ||
436 | return err; | ||
437 | |||
438 | out_dir: | ||
439 | if (dir_de) { | ||
440 | kunmap(dir_page); | ||
441 | page_cache_release(dir_page); | ||
442 | } | ||
443 | out_old: | ||
444 | kunmap(old_page); | ||
445 | page_cache_release(old_page); | ||
446 | out: | ||
447 | nilfs_transaction_abort(old_dir->i_sb); | ||
448 | return err; | ||
449 | } | ||
450 | |||
451 | struct inode_operations nilfs_dir_inode_operations = { | ||
452 | .create = nilfs_create, | ||
453 | .lookup = nilfs_lookup, | ||
454 | .link = nilfs_link, | ||
455 | .unlink = nilfs_unlink, | ||
456 | .symlink = nilfs_symlink, | ||
457 | .mkdir = nilfs_mkdir, | ||
458 | .rmdir = nilfs_rmdir, | ||
459 | .mknod = nilfs_mknod, | ||
460 | .rename = nilfs_rename, | ||
461 | .setattr = nilfs_setattr, | ||
462 | .permission = nilfs_permission, | ||
463 | }; | ||
464 | |||
465 | struct inode_operations nilfs_special_inode_operations = { | ||
466 | .setattr = nilfs_setattr, | ||
467 | .permission = nilfs_permission, | ||
468 | }; | ||
469 | |||
470 | struct inode_operations nilfs_symlink_inode_operations = { | ||
471 | .readlink = generic_readlink, | ||
472 | .follow_link = page_follow_link_light, | ||
473 | .put_link = page_put_link, | ||
474 | }; | ||
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h new file mode 100644 index 000000000000..7558c977db02 --- /dev/null +++ b/fs/nilfs2/nilfs.h | |||
@@ -0,0 +1,318 @@ | |||
1 | /* | ||
2 | * nilfs.h - NILFS local header file. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net> | ||
21 | * Ryusuke Konishi <ryusuke@osrg.net> | ||
22 | */ | ||
23 | |||
24 | #ifndef _NILFS_H | ||
25 | #define _NILFS_H | ||
26 | |||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/spinlock.h> | ||
30 | #include <linux/blkdev.h> | ||
31 | #include <linux/nilfs2_fs.h> | ||
32 | #include "the_nilfs.h" | ||
33 | #include "sb.h" | ||
34 | #include "bmap.h" | ||
35 | #include "bmap_union.h" | ||
36 | |||
37 | /* | ||
38 | * NILFS filesystem version | ||
39 | */ | ||
40 | #define NILFS_VERSION "2.0.5" | ||
41 | |||
42 | /* | ||
43 | * nilfs inode data in memory | ||
44 | */ | ||
45 | struct nilfs_inode_info { | ||
46 | __u32 i_flags; | ||
47 | unsigned long i_state; /* Dynamic state flags */ | ||
48 | struct nilfs_bmap *i_bmap; | ||
49 | union nilfs_bmap_union i_bmap_union; | ||
50 | __u64 i_xattr; /* sector_t ??? */ | ||
51 | __u32 i_dir_start_lookup; | ||
52 | __u64 i_cno; /* check point number for GC inode */ | ||
53 | struct address_space i_btnode_cache; | ||
54 | struct list_head i_dirty; /* List for connecting dirty files */ | ||
55 | |||
56 | #ifdef CONFIG_NILFS_XATTR | ||
57 | /* | ||
58 | * Extended attributes can be read independently of the main file | ||
59 | * data. Taking i_sem even when reading would cause contention | ||
60 | * between readers of EAs and writers of regular file data, so | ||
61 | * instead we synchronize on xattr_sem when reading or changing | ||
62 | * EAs. | ||
63 | */ | ||
64 | struct rw_semaphore xattr_sem; | ||
65 | #endif | ||
66 | #ifdef CONFIG_NILFS_POSIX_ACL | ||
67 | struct posix_acl *i_acl; | ||
68 | struct posix_acl *i_default_acl; | ||
69 | #endif | ||
70 | struct buffer_head *i_bh; /* i_bh contains a new or dirty | ||
71 | disk inode */ | ||
72 | struct inode vfs_inode; | ||
73 | }; | ||
74 | |||
75 | static inline struct nilfs_inode_info *NILFS_I(const struct inode *inode) | ||
76 | { | ||
77 | return container_of(inode, struct nilfs_inode_info, vfs_inode); | ||
78 | } | ||
79 | |||
80 | static inline struct nilfs_inode_info * | ||
81 | NILFS_BMAP_I(const struct nilfs_bmap *bmap) | ||
82 | { | ||
83 | return container_of((union nilfs_bmap_union *)bmap, | ||
84 | struct nilfs_inode_info, | ||
85 | i_bmap_union); | ||
86 | } | ||
87 | |||
88 | static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) | ||
89 | { | ||
90 | struct nilfs_inode_info *ii = | ||
91 | container_of(btnc, struct nilfs_inode_info, i_btnode_cache); | ||
92 | return &ii->vfs_inode; | ||
93 | } | ||
94 | |||
95 | static inline struct inode *NILFS_AS_I(struct address_space *mapping) | ||
96 | { | ||
97 | return (mapping->host) ? : | ||
98 | container_of(mapping, struct inode, i_data); | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * Dynamic state flags of NILFS on-memory inode (i_state) | ||
103 | */ | ||
104 | enum { | ||
105 | NILFS_I_NEW = 0, /* Inode is newly created */ | ||
106 | NILFS_I_DIRTY, /* The file is dirty */ | ||
107 | NILFS_I_QUEUED, /* inode is in dirty_files list */ | ||
108 | NILFS_I_BUSY, /* inode is grabbed by a segment | ||
109 | constructor */ | ||
110 | NILFS_I_COLLECTED, /* All dirty blocks are collected */ | ||
111 | NILFS_I_UPDATED, /* The file has been written back */ | ||
112 | NILFS_I_INODE_DIRTY, /* write_inode is requested */ | ||
113 | NILFS_I_BMAP, /* has bmap and btnode_cache */ | ||
114 | NILFS_I_GCINODE, /* inode for GC, on memory only */ | ||
115 | NILFS_I_GCDAT, /* shadow DAT, on memory only */ | ||
116 | }; | ||
117 | |||
118 | /* | ||
119 | * Macros to check inode numbers | ||
120 | */ | ||
121 | #define NILFS_MDT_INO_BITS \ | ||
122 | ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \ | ||
123 | 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \ | ||
124 | 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO)) | ||
125 | |||
126 | #define NILFS_SYS_INO_BITS \ | ||
127 | ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) | ||
128 | |||
129 | #define NILFS_FIRST_INO(sb) (NILFS_SB(sb)->s_nilfs->ns_first_ino) | ||
130 | |||
131 | #define NILFS_MDT_INODE(sb, ino) \ | ||
132 | ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) | ||
133 | #define NILFS_VALID_INODE(sb, ino) \ | ||
134 | ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino)))) | ||
135 | |||
136 | /** | ||
137 | * struct nilfs_transaction_info: context information for synchronization | ||
138 | * @ti_magic: Magic number | ||
139 | * @ti_save: Backup of journal_info field of task_struct | ||
140 | * @ti_flags: Flags | ||
141 | * @ti_count: Nest level | ||
142 | * @ti_garbage: List of inode to be put when releasing semaphore | ||
143 | */ | ||
144 | struct nilfs_transaction_info { | ||
145 | u32 ti_magic; | ||
146 | void *ti_save; | ||
147 | /* This should never used. If this happens, | ||
148 | one of other filesystems has a bug. */ | ||
149 | unsigned short ti_flags; | ||
150 | unsigned short ti_count; | ||
151 | struct list_head ti_garbage; | ||
152 | }; | ||
153 | |||
154 | /* ti_magic */ | ||
155 | #define NILFS_TI_MAGIC 0xd9e392fb | ||
156 | |||
157 | /* ti_flags */ | ||
158 | #define NILFS_TI_DYNAMIC_ALLOC 0x0001 /* Allocated from slab */ | ||
159 | #define NILFS_TI_SYNC 0x0002 /* Force to construct segment at the | ||
160 | end of transaction. */ | ||
161 | #define NILFS_TI_GC 0x0004 /* GC context */ | ||
162 | #define NILFS_TI_COMMIT 0x0008 /* Change happened or not */ | ||
163 | #define NILFS_TI_WRITER 0x0010 /* Constructor context */ | ||
164 | |||
165 | |||
166 | int nilfs_transaction_begin(struct super_block *, | ||
167 | struct nilfs_transaction_info *, int); | ||
168 | int nilfs_transaction_commit(struct super_block *); | ||
169 | void nilfs_transaction_abort(struct super_block *); | ||
170 | |||
171 | static inline void nilfs_set_transaction_flag(unsigned int flag) | ||
172 | { | ||
173 | struct nilfs_transaction_info *ti = current->journal_info; | ||
174 | |||
175 | ti->ti_flags |= flag; | ||
176 | } | ||
177 | |||
178 | static inline int nilfs_test_transaction_flag(unsigned int flag) | ||
179 | { | ||
180 | struct nilfs_transaction_info *ti = current->journal_info; | ||
181 | |||
182 | if (ti == NULL || ti->ti_magic != NILFS_TI_MAGIC) | ||
183 | return 0; | ||
184 | return !!(ti->ti_flags & flag); | ||
185 | } | ||
186 | |||
187 | static inline int nilfs_doing_gc(void) | ||
188 | { | ||
189 | return nilfs_test_transaction_flag(NILFS_TI_GC); | ||
190 | } | ||
191 | |||
192 | static inline int nilfs_doing_construction(void) | ||
193 | { | ||
194 | return nilfs_test_transaction_flag(NILFS_TI_WRITER); | ||
195 | } | ||
196 | |||
197 | static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs) | ||
198 | { | ||
199 | return nilfs_doing_gc() ? nilfs->ns_gc_dat : nilfs->ns_dat; | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * function prototype | ||
204 | */ | ||
205 | #ifdef CONFIG_NILFS_POSIX_ACL | ||
206 | #error "NILFS: not yet supported POSIX ACL" | ||
207 | extern int nilfs_permission(struct inode *, int, struct nameidata *); | ||
208 | extern int nilfs_acl_chmod(struct inode *); | ||
209 | extern int nilfs_init_acl(struct inode *, struct inode *); | ||
210 | #else | ||
211 | #define nilfs_permission NULL | ||
212 | |||
213 | static inline int nilfs_acl_chmod(struct inode *inode) | ||
214 | { | ||
215 | return 0; | ||
216 | } | ||
217 | |||
218 | static inline int nilfs_init_acl(struct inode *inode, struct inode *dir) | ||
219 | { | ||
220 | inode->i_mode &= ~current_umask(); | ||
221 | return 0; | ||
222 | } | ||
223 | #endif | ||
224 | |||
225 | #define NILFS_ATIME_DISABLE | ||
226 | |||
227 | /* dir.c */ | ||
228 | extern int nilfs_add_link(struct dentry *, struct inode *); | ||
229 | extern ino_t nilfs_inode_by_name(struct inode *, struct dentry *); | ||
230 | extern int nilfs_make_empty(struct inode *, struct inode *); | ||
231 | extern struct nilfs_dir_entry * | ||
232 | nilfs_find_entry(struct inode *, struct dentry *, struct page **); | ||
233 | extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *); | ||
234 | extern int nilfs_empty_dir(struct inode *); | ||
235 | extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **); | ||
236 | extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, | ||
237 | struct page *, struct inode *); | ||
238 | |||
239 | /* file.c */ | ||
240 | extern int nilfs_sync_file(struct file *, struct dentry *, int); | ||
241 | |||
242 | /* ioctl.c */ | ||
243 | long nilfs_ioctl(struct file *, unsigned int, unsigned long); | ||
244 | int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, void __user *); | ||
245 | |||
246 | /* inode.c */ | ||
247 | extern struct inode *nilfs_new_inode(struct inode *, int); | ||
248 | extern void nilfs_free_inode(struct inode *); | ||
249 | extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); | ||
250 | extern void nilfs_set_inode_flags(struct inode *); | ||
251 | extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *); | ||
252 | extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int); | ||
253 | extern struct inode *nilfs_iget(struct super_block *, unsigned long); | ||
254 | extern void nilfs_update_inode(struct inode *, struct buffer_head *); | ||
255 | extern void nilfs_truncate(struct inode *); | ||
256 | extern void nilfs_delete_inode(struct inode *); | ||
257 | extern int nilfs_setattr(struct dentry *, struct iattr *); | ||
258 | extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, | ||
259 | struct buffer_head **); | ||
260 | extern int nilfs_inode_dirty(struct inode *); | ||
261 | extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *, | ||
262 | unsigned); | ||
263 | extern int nilfs_mark_inode_dirty(struct inode *); | ||
264 | extern void nilfs_dirty_inode(struct inode *); | ||
265 | |||
266 | /* namei.c */ | ||
267 | extern struct dentry *nilfs_get_parent(struct dentry *); | ||
268 | |||
269 | /* super.c */ | ||
270 | extern struct inode *nilfs_alloc_inode(struct super_block *); | ||
271 | extern void nilfs_destroy_inode(struct inode *); | ||
272 | extern void nilfs_error(struct super_block *, const char *, const char *, ...) | ||
273 | __attribute__ ((format (printf, 3, 4))); | ||
274 | extern void nilfs_warning(struct super_block *, const char *, const char *, ...) | ||
275 | __attribute__ ((format (printf, 3, 4))); | ||
276 | extern struct nilfs_super_block * | ||
277 | nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); | ||
278 | extern int nilfs_store_magic_and_option(struct super_block *, | ||
279 | struct nilfs_super_block *, char *); | ||
280 | extern int nilfs_commit_super(struct nilfs_sb_info *, int); | ||
281 | extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64); | ||
282 | extern void nilfs_detach_checkpoint(struct nilfs_sb_info *); | ||
283 | |||
284 | /* gcinode.c */ | ||
285 | int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64, | ||
286 | struct buffer_head **); | ||
287 | int nilfs_gccache_submit_read_node(struct inode *, sector_t, __u64, | ||
288 | struct buffer_head **); | ||
289 | int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *); | ||
290 | int nilfs_init_gccache(struct the_nilfs *); | ||
291 | void nilfs_destroy_gccache(struct the_nilfs *); | ||
292 | void nilfs_clear_gcinode(struct inode *); | ||
293 | struct inode *nilfs_gc_iget(struct the_nilfs *, ino_t, __u64); | ||
294 | void nilfs_remove_all_gcinode(struct the_nilfs *); | ||
295 | |||
296 | /* gcdat.c */ | ||
297 | int nilfs_init_gcdat_inode(struct the_nilfs *); | ||
298 | void nilfs_commit_gcdat_inode(struct the_nilfs *); | ||
299 | void nilfs_clear_gcdat_inode(struct the_nilfs *); | ||
300 | |||
301 | /* | ||
302 | * Inodes and files operations | ||
303 | */ | ||
304 | extern struct file_operations nilfs_dir_operations; | ||
305 | extern struct inode_operations nilfs_file_inode_operations; | ||
306 | extern struct file_operations nilfs_file_operations; | ||
307 | extern struct address_space_operations nilfs_aops; | ||
308 | extern struct inode_operations nilfs_dir_inode_operations; | ||
309 | extern struct inode_operations nilfs_special_inode_operations; | ||
310 | extern struct inode_operations nilfs_symlink_inode_operations; | ||
311 | |||
312 | /* | ||
313 | * filesystem type | ||
314 | */ | ||
315 | extern struct file_system_type nilfs_fs_type; | ||
316 | |||
317 | |||
318 | #endif /* _NILFS_H */ | ||
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c new file mode 100644 index 000000000000..1bfbba9c0e9a --- /dev/null +++ b/fs/nilfs2/page.c | |||
@@ -0,0 +1,540 @@ | |||
1 | /* | ||
2 | * page.c - buffer/page management specific to NILFS | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net>, | ||
21 | * Seiji Kihara <kihara@osrg.net>. | ||
22 | */ | ||
23 | |||
24 | #include <linux/pagemap.h> | ||
25 | #include <linux/writeback.h> | ||
26 | #include <linux/swap.h> | ||
27 | #include <linux/bitops.h> | ||
28 | #include <linux/page-flags.h> | ||
29 | #include <linux/list.h> | ||
30 | #include <linux/highmem.h> | ||
31 | #include <linux/pagevec.h> | ||
32 | #include "nilfs.h" | ||
33 | #include "page.h" | ||
34 | #include "mdt.h" | ||
35 | |||
36 | |||
37 | #define NILFS_BUFFER_INHERENT_BITS \ | ||
38 | ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ | ||
39 | (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated)) | ||
40 | |||
41 | static struct buffer_head * | ||
42 | __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, | ||
43 | int blkbits, unsigned long b_state) | ||
44 | |||
45 | { | ||
46 | unsigned long first_block; | ||
47 | struct buffer_head *bh; | ||
48 | |||
49 | if (!page_has_buffers(page)) | ||
50 | create_empty_buffers(page, 1 << blkbits, b_state); | ||
51 | |||
52 | first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits); | ||
53 | bh = nilfs_page_get_nth_block(page, block - first_block); | ||
54 | |||
55 | touch_buffer(bh); | ||
56 | wait_on_buffer(bh); | ||
57 | return bh; | ||
58 | } | ||
59 | |||
60 | /* | ||
61 | * Since the page cache of B-tree node pages or data page cache of pseudo | ||
62 | * inodes does not have a valid mapping->host pointer, calling | ||
63 | * mark_buffer_dirty() for their buffers causes a NULL pointer dereference; | ||
64 | * it calls __mark_inode_dirty(NULL) through __set_page_dirty(). | ||
65 | * To avoid this problem, the old style mark_buffer_dirty() is used instead. | ||
66 | */ | ||
67 | void nilfs_mark_buffer_dirty(struct buffer_head *bh) | ||
68 | { | ||
69 | if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) | ||
70 | __set_page_dirty_nobuffers(bh->b_page); | ||
71 | } | ||
72 | |||
73 | struct buffer_head *nilfs_grab_buffer(struct inode *inode, | ||
74 | struct address_space *mapping, | ||
75 | unsigned long blkoff, | ||
76 | unsigned long b_state) | ||
77 | { | ||
78 | int blkbits = inode->i_blkbits; | ||
79 | pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); | ||
80 | struct page *page, *opage; | ||
81 | struct buffer_head *bh, *obh; | ||
82 | |||
83 | page = grab_cache_page(mapping, index); | ||
84 | if (unlikely(!page)) | ||
85 | return NULL; | ||
86 | |||
87 | bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); | ||
88 | if (unlikely(!bh)) { | ||
89 | unlock_page(page); | ||
90 | page_cache_release(page); | ||
91 | return NULL; | ||
92 | } | ||
93 | if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { | ||
94 | /* | ||
95 | * Shadow page cache uses assoc_mapping to point its original | ||
96 | * page cache. The following code tries the original cache | ||
97 | * if the given cache is a shadow and it didn't hit. | ||
98 | */ | ||
99 | opage = find_lock_page(mapping->assoc_mapping, index); | ||
100 | if (!opage) | ||
101 | return bh; | ||
102 | |||
103 | obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, | ||
104 | b_state); | ||
105 | if (buffer_uptodate(obh)) { | ||
106 | nilfs_copy_buffer(bh, obh); | ||
107 | if (buffer_dirty(obh)) { | ||
108 | nilfs_mark_buffer_dirty(bh); | ||
109 | if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) | ||
110 | nilfs_mdt_mark_dirty(inode); | ||
111 | } | ||
112 | } | ||
113 | brelse(obh); | ||
114 | unlock_page(opage); | ||
115 | page_cache_release(opage); | ||
116 | } | ||
117 | return bh; | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * nilfs_forget_buffer - discard dirty state | ||
122 | * @inode: owner inode of the buffer | ||
123 | * @bh: buffer head of the buffer to be discarded | ||
124 | */ | ||
125 | void nilfs_forget_buffer(struct buffer_head *bh) | ||
126 | { | ||
127 | struct page *page = bh->b_page; | ||
128 | |||
129 | lock_buffer(bh); | ||
130 | clear_buffer_nilfs_volatile(bh); | ||
131 | if (test_clear_buffer_dirty(bh) && nilfs_page_buffers_clean(page)) | ||
132 | __nilfs_clear_page_dirty(page); | ||
133 | |||
134 | clear_buffer_uptodate(bh); | ||
135 | clear_buffer_mapped(bh); | ||
136 | bh->b_blocknr = -1; | ||
137 | ClearPageUptodate(page); | ||
138 | ClearPageMappedToDisk(page); | ||
139 | unlock_buffer(bh); | ||
140 | brelse(bh); | ||
141 | } | ||
142 | |||
143 | /** | ||
144 | * nilfs_copy_buffer -- copy buffer data and flags | ||
145 | * @dbh: destination buffer | ||
146 | * @sbh: source buffer | ||
147 | */ | ||
148 | void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) | ||
149 | { | ||
150 | void *kaddr0, *kaddr1; | ||
151 | unsigned long bits; | ||
152 | struct page *spage = sbh->b_page, *dpage = dbh->b_page; | ||
153 | struct buffer_head *bh; | ||
154 | |||
155 | kaddr0 = kmap_atomic(spage, KM_USER0); | ||
156 | kaddr1 = kmap_atomic(dpage, KM_USER1); | ||
157 | memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); | ||
158 | kunmap_atomic(kaddr1, KM_USER1); | ||
159 | kunmap_atomic(kaddr0, KM_USER0); | ||
160 | |||
161 | dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; | ||
162 | dbh->b_blocknr = sbh->b_blocknr; | ||
163 | dbh->b_bdev = sbh->b_bdev; | ||
164 | |||
165 | bh = dbh; | ||
166 | bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped)); | ||
167 | while ((bh = bh->b_this_page) != dbh) { | ||
168 | lock_buffer(bh); | ||
169 | bits &= bh->b_state; | ||
170 | unlock_buffer(bh); | ||
171 | } | ||
172 | if (bits & (1UL << BH_Uptodate)) | ||
173 | SetPageUptodate(dpage); | ||
174 | else | ||
175 | ClearPageUptodate(dpage); | ||
176 | if (bits & (1UL << BH_Mapped)) | ||
177 | SetPageMappedToDisk(dpage); | ||
178 | else | ||
179 | ClearPageMappedToDisk(dpage); | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * nilfs_page_buffers_clean - check if a page has dirty buffers or not. | ||
184 | * @page: page to be checked | ||
185 | * | ||
186 | * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. | ||
187 | * Otherwise, it returns non-zero value. | ||
188 | */ | ||
189 | int nilfs_page_buffers_clean(struct page *page) | ||
190 | { | ||
191 | struct buffer_head *bh, *head; | ||
192 | |||
193 | bh = head = page_buffers(page); | ||
194 | do { | ||
195 | if (buffer_dirty(bh)) | ||
196 | return 0; | ||
197 | bh = bh->b_this_page; | ||
198 | } while (bh != head); | ||
199 | return 1; | ||
200 | } | ||
201 | |||
202 | void nilfs_page_bug(struct page *page) | ||
203 | { | ||
204 | struct address_space *m; | ||
205 | unsigned long ino = 0; | ||
206 | |||
207 | if (unlikely(!page)) { | ||
208 | printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); | ||
209 | return; | ||
210 | } | ||
211 | |||
212 | m = page->mapping; | ||
213 | if (m) { | ||
214 | struct inode *inode = NILFS_AS_I(m); | ||
215 | if (inode != NULL) | ||
216 | ino = inode->i_ino; | ||
217 | } | ||
218 | printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " | ||
219 | "mapping=%p ino=%lu\n", | ||
220 | page, atomic_read(&page->_count), | ||
221 | (unsigned long long)page->index, page->flags, m, ino); | ||
222 | |||
223 | if (page_has_buffers(page)) { | ||
224 | struct buffer_head *bh, *head; | ||
225 | int i = 0; | ||
226 | |||
227 | bh = head = page_buffers(page); | ||
228 | do { | ||
229 | printk(KERN_CRIT | ||
230 | " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", | ||
231 | i++, bh, atomic_read(&bh->b_count), | ||
232 | (unsigned long long)bh->b_blocknr, bh->b_state); | ||
233 | bh = bh->b_this_page; | ||
234 | } while (bh != head); | ||
235 | } | ||
236 | } | ||
237 | |||
238 | /** | ||
239 | * nilfs_alloc_private_page - allocate a private page with buffer heads | ||
240 | * | ||
241 | * Return Value: On success, a pointer to the allocated page is returned. | ||
242 | * On error, NULL is returned. | ||
243 | */ | ||
244 | struct page *nilfs_alloc_private_page(struct block_device *bdev, int size, | ||
245 | unsigned long state) | ||
246 | { | ||
247 | struct buffer_head *bh, *head, *tail; | ||
248 | struct page *page; | ||
249 | |||
250 | page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */ | ||
251 | if (unlikely(!page)) | ||
252 | return NULL; | ||
253 | |||
254 | lock_page(page); | ||
255 | head = alloc_page_buffers(page, size, 0); | ||
256 | if (unlikely(!head)) { | ||
257 | unlock_page(page); | ||
258 | __free_page(page); | ||
259 | return NULL; | ||
260 | } | ||
261 | |||
262 | bh = head; | ||
263 | do { | ||
264 | bh->b_state = (1UL << BH_NILFS_Allocated) | state; | ||
265 | tail = bh; | ||
266 | bh->b_bdev = bdev; | ||
267 | bh = bh->b_this_page; | ||
268 | } while (bh); | ||
269 | |||
270 | tail->b_this_page = head; | ||
271 | attach_page_buffers(page, head); | ||
272 | |||
273 | return page; | ||
274 | } | ||
275 | |||
276 | void nilfs_free_private_page(struct page *page) | ||
277 | { | ||
278 | BUG_ON(!PageLocked(page)); | ||
279 | BUG_ON(page->mapping); | ||
280 | |||
281 | if (page_has_buffers(page) && !try_to_free_buffers(page)) | ||
282 | NILFS_PAGE_BUG(page, "failed to free page"); | ||
283 | |||
284 | unlock_page(page); | ||
285 | __free_page(page); | ||
286 | } | ||
287 | |||
288 | /** | ||
289 | * nilfs_copy_page -- copy the page with buffers | ||
290 | * @dst: destination page | ||
291 | * @src: source page | ||
292 | * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. | ||
293 | * | ||
294 | * This fuction is for both data pages and btnode pages. The dirty flag | ||
295 | * should be treated by caller. The page must not be under i/o. | ||
296 | * Both src and dst page must be locked | ||
297 | */ | ||
298 | static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) | ||
299 | { | ||
300 | struct buffer_head *dbh, *dbufs, *sbh, *sbufs; | ||
301 | unsigned long mask = NILFS_BUFFER_INHERENT_BITS; | ||
302 | |||
303 | BUG_ON(PageWriteback(dst)); | ||
304 | |||
305 | sbh = sbufs = page_buffers(src); | ||
306 | if (!page_has_buffers(dst)) | ||
307 | create_empty_buffers(dst, sbh->b_size, 0); | ||
308 | |||
309 | if (copy_dirty) | ||
310 | mask |= (1UL << BH_Dirty); | ||
311 | |||
312 | dbh = dbufs = page_buffers(dst); | ||
313 | do { | ||
314 | lock_buffer(sbh); | ||
315 | lock_buffer(dbh); | ||
316 | dbh->b_state = sbh->b_state & mask; | ||
317 | dbh->b_blocknr = sbh->b_blocknr; | ||
318 | dbh->b_bdev = sbh->b_bdev; | ||
319 | sbh = sbh->b_this_page; | ||
320 | dbh = dbh->b_this_page; | ||
321 | } while (dbh != dbufs); | ||
322 | |||
323 | copy_highpage(dst, src); | ||
324 | |||
325 | if (PageUptodate(src) && !PageUptodate(dst)) | ||
326 | SetPageUptodate(dst); | ||
327 | else if (!PageUptodate(src) && PageUptodate(dst)) | ||
328 | ClearPageUptodate(dst); | ||
329 | if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) | ||
330 | SetPageMappedToDisk(dst); | ||
331 | else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) | ||
332 | ClearPageMappedToDisk(dst); | ||
333 | |||
334 | do { | ||
335 | unlock_buffer(sbh); | ||
336 | unlock_buffer(dbh); | ||
337 | sbh = sbh->b_this_page; | ||
338 | dbh = dbh->b_this_page; | ||
339 | } while (dbh != dbufs); | ||
340 | } | ||
341 | |||
342 | int nilfs_copy_dirty_pages(struct address_space *dmap, | ||
343 | struct address_space *smap) | ||
344 | { | ||
345 | struct pagevec pvec; | ||
346 | unsigned int i; | ||
347 | pgoff_t index = 0; | ||
348 | int err = 0; | ||
349 | |||
350 | pagevec_init(&pvec, 0); | ||
351 | repeat: | ||
352 | if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, | ||
353 | PAGEVEC_SIZE)) | ||
354 | return 0; | ||
355 | |||
356 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
357 | struct page *page = pvec.pages[i], *dpage; | ||
358 | |||
359 | lock_page(page); | ||
360 | if (unlikely(!PageDirty(page))) | ||
361 | NILFS_PAGE_BUG(page, "inconsistent dirty state"); | ||
362 | |||
363 | dpage = grab_cache_page(dmap, page->index); | ||
364 | if (unlikely(!dpage)) { | ||
365 | /* No empty page is added to the page cache */ | ||
366 | err = -ENOMEM; | ||
367 | unlock_page(page); | ||
368 | break; | ||
369 | } | ||
370 | if (unlikely(!page_has_buffers(page))) | ||
371 | NILFS_PAGE_BUG(page, | ||
372 | "found empty page in dat page cache"); | ||
373 | |||
374 | nilfs_copy_page(dpage, page, 1); | ||
375 | __set_page_dirty_nobuffers(dpage); | ||
376 | |||
377 | unlock_page(dpage); | ||
378 | page_cache_release(dpage); | ||
379 | unlock_page(page); | ||
380 | } | ||
381 | pagevec_release(&pvec); | ||
382 | cond_resched(); | ||
383 | |||
384 | if (likely(!err)) | ||
385 | goto repeat; | ||
386 | return err; | ||
387 | } | ||
388 | |||
389 | /** | ||
390 | * nilfs_copy_back_pages -- copy back pages to orignal cache from shadow cache | ||
391 | * @dmap: destination page cache | ||
392 | * @smap: source page cache | ||
393 | * | ||
394 | * No pages must no be added to the cache during this process. | ||
395 | * This must be ensured by the caller. | ||
396 | */ | ||
397 | void nilfs_copy_back_pages(struct address_space *dmap, | ||
398 | struct address_space *smap) | ||
399 | { | ||
400 | struct pagevec pvec; | ||
401 | unsigned int i, n; | ||
402 | pgoff_t index = 0; | ||
403 | int err; | ||
404 | |||
405 | pagevec_init(&pvec, 0); | ||
406 | repeat: | ||
407 | n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE); | ||
408 | if (!n) | ||
409 | return; | ||
410 | index = pvec.pages[n - 1]->index + 1; | ||
411 | |||
412 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
413 | struct page *page = pvec.pages[i], *dpage; | ||
414 | pgoff_t offset = page->index; | ||
415 | |||
416 | lock_page(page); | ||
417 | dpage = find_lock_page(dmap, offset); | ||
418 | if (dpage) { | ||
419 | /* override existing page on the destination cache */ | ||
420 | WARN_ON(PageDirty(dpage)); | ||
421 | nilfs_copy_page(dpage, page, 0); | ||
422 | unlock_page(dpage); | ||
423 | page_cache_release(dpage); | ||
424 | } else { | ||
425 | struct page *page2; | ||
426 | |||
427 | /* move the page to the destination cache */ | ||
428 | spin_lock_irq(&smap->tree_lock); | ||
429 | page2 = radix_tree_delete(&smap->page_tree, offset); | ||
430 | WARN_ON(page2 != page); | ||
431 | |||
432 | smap->nrpages--; | ||
433 | spin_unlock_irq(&smap->tree_lock); | ||
434 | |||
435 | spin_lock_irq(&dmap->tree_lock); | ||
436 | err = radix_tree_insert(&dmap->page_tree, offset, page); | ||
437 | if (unlikely(err < 0)) { | ||
438 | WARN_ON(err == -EEXIST); | ||
439 | page->mapping = NULL; | ||
440 | page_cache_release(page); /* for cache */ | ||
441 | } else { | ||
442 | page->mapping = dmap; | ||
443 | dmap->nrpages++; | ||
444 | if (PageDirty(page)) | ||
445 | radix_tree_tag_set(&dmap->page_tree, | ||
446 | offset, | ||
447 | PAGECACHE_TAG_DIRTY); | ||
448 | } | ||
449 | spin_unlock_irq(&dmap->tree_lock); | ||
450 | } | ||
451 | unlock_page(page); | ||
452 | } | ||
453 | pagevec_release(&pvec); | ||
454 | cond_resched(); | ||
455 | |||
456 | goto repeat; | ||
457 | } | ||
458 | |||
459 | void nilfs_clear_dirty_pages(struct address_space *mapping) | ||
460 | { | ||
461 | struct pagevec pvec; | ||
462 | unsigned int i; | ||
463 | pgoff_t index = 0; | ||
464 | |||
465 | pagevec_init(&pvec, 0); | ||
466 | |||
467 | while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | ||
468 | PAGEVEC_SIZE)) { | ||
469 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
470 | struct page *page = pvec.pages[i]; | ||
471 | struct buffer_head *bh, *head; | ||
472 | |||
473 | lock_page(page); | ||
474 | ClearPageUptodate(page); | ||
475 | ClearPageMappedToDisk(page); | ||
476 | bh = head = page_buffers(page); | ||
477 | do { | ||
478 | lock_buffer(bh); | ||
479 | clear_buffer_dirty(bh); | ||
480 | clear_buffer_nilfs_volatile(bh); | ||
481 | clear_buffer_uptodate(bh); | ||
482 | clear_buffer_mapped(bh); | ||
483 | unlock_buffer(bh); | ||
484 | bh = bh->b_this_page; | ||
485 | } while (bh != head); | ||
486 | |||
487 | __nilfs_clear_page_dirty(page); | ||
488 | unlock_page(page); | ||
489 | } | ||
490 | pagevec_release(&pvec); | ||
491 | cond_resched(); | ||
492 | } | ||
493 | } | ||
494 | |||
495 | unsigned nilfs_page_count_clean_buffers(struct page *page, | ||
496 | unsigned from, unsigned to) | ||
497 | { | ||
498 | unsigned block_start, block_end; | ||
499 | struct buffer_head *bh, *head; | ||
500 | unsigned nc = 0; | ||
501 | |||
502 | for (bh = head = page_buffers(page), block_start = 0; | ||
503 | bh != head || !block_start; | ||
504 | block_start = block_end, bh = bh->b_this_page) { | ||
505 | block_end = block_start + bh->b_size; | ||
506 | if (block_end > from && block_start < to && !buffer_dirty(bh)) | ||
507 | nc++; | ||
508 | } | ||
509 | return nc; | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * NILFS2 needs clear_page_dirty() in the following two cases: | ||
514 | * | ||
515 | * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears | ||
516 | * page dirty flags when it copies back pages from the shadow cache | ||
517 | * (gcdat->{i_mapping,i_btnode_cache}) to its original cache | ||
518 | * (dat->{i_mapping,i_btnode_cache}). | ||
519 | * | ||
520 | * 2) Some B-tree operations like insertion or deletion may dispose buffers | ||
521 | * in dirty state, and this needs to cancel the dirty state of their pages. | ||
522 | */ | ||
523 | int __nilfs_clear_page_dirty(struct page *page) | ||
524 | { | ||
525 | struct address_space *mapping = page->mapping; | ||
526 | |||
527 | if (mapping) { | ||
528 | spin_lock_irq(&mapping->tree_lock); | ||
529 | if (test_bit(PG_dirty, &page->flags)) { | ||
530 | radix_tree_tag_clear(&mapping->page_tree, | ||
531 | page_index(page), | ||
532 | PAGECACHE_TAG_DIRTY); | ||
533 | spin_unlock_irq(&mapping->tree_lock); | ||
534 | return clear_page_dirty_for_io(page); | ||
535 | } | ||
536 | spin_unlock_irq(&mapping->tree_lock); | ||
537 | return 0; | ||
538 | } | ||
539 | return TestClearPageDirty(page); | ||
540 | } | ||
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h new file mode 100644 index 000000000000..8abca4d1c1f8 --- /dev/null +++ b/fs/nilfs2/page.h | |||
@@ -0,0 +1,76 @@ | |||
1 | /* | ||
2 | * page.h - buffer/page management specific to NILFS | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net>, | ||
21 | * Seiji Kihara <kihara@osrg.net>. | ||
22 | */ | ||
23 | |||
24 | #ifndef _NILFS_PAGE_H | ||
25 | #define _NILFS_PAGE_H | ||
26 | |||
27 | #include <linux/buffer_head.h> | ||
28 | #include "nilfs.h" | ||
29 | |||
30 | /* | ||
31 | * Extended buffer state bits | ||
32 | */ | ||
33 | enum { | ||
34 | BH_NILFS_Allocated = BH_PrivateStart, | ||
35 | BH_NILFS_Node, | ||
36 | BH_NILFS_Volatile, | ||
37 | }; | ||
38 | |||
39 | BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */ | ||
40 | BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ | ||
41 | BUFFER_FNS(NILFS_Volatile, nilfs_volatile) | ||
42 | |||
43 | |||
44 | void nilfs_mark_buffer_dirty(struct buffer_head *bh); | ||
45 | int __nilfs_clear_page_dirty(struct page *); | ||
46 | |||
47 | struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, | ||
48 | unsigned long, unsigned long); | ||
49 | void nilfs_forget_buffer(struct buffer_head *); | ||
50 | void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); | ||
51 | int nilfs_page_buffers_clean(struct page *); | ||
52 | void nilfs_page_bug(struct page *); | ||
53 | struct page *nilfs_alloc_private_page(struct block_device *, int, | ||
54 | unsigned long); | ||
55 | void nilfs_free_private_page(struct page *); | ||
56 | |||
57 | int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); | ||
58 | void nilfs_copy_back_pages(struct address_space *, struct address_space *); | ||
59 | void nilfs_clear_dirty_pages(struct address_space *); | ||
60 | unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); | ||
61 | |||
62 | #define NILFS_PAGE_BUG(page, m, a...) \ | ||
63 | do { nilfs_page_bug(page); BUG(); } while (0) | ||
64 | |||
65 | static inline struct buffer_head * | ||
66 | nilfs_page_get_nth_block(struct page *page, unsigned int count) | ||
67 | { | ||
68 | struct buffer_head *bh = page_buffers(page); | ||
69 | |||
70 | while (count-- > 0) | ||
71 | bh = bh->b_this_page; | ||
72 | get_bh(bh); | ||
73 | return bh; | ||
74 | } | ||
75 | |||
76 | #endif /* _NILFS_PAGE_H */ | ||
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c new file mode 100644 index 000000000000..6ade0963fc1d --- /dev/null +++ b/fs/nilfs2/recovery.c | |||
@@ -0,0 +1,929 @@ | |||
1 | /* | ||
2 | * recovery.c - NILFS recovery logic | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | */ | ||
22 | |||
23 | #include <linux/buffer_head.h> | ||
24 | #include <linux/blkdev.h> | ||
25 | #include <linux/swap.h> | ||
26 | #include <linux/crc32.h> | ||
27 | #include "nilfs.h" | ||
28 | #include "segment.h" | ||
29 | #include "sufile.h" | ||
30 | #include "page.h" | ||
31 | #include "seglist.h" | ||
32 | #include "segbuf.h" | ||
33 | |||
34 | /* | ||
35 | * Segment check result | ||
36 | */ | ||
37 | enum { | ||
38 | NILFS_SEG_VALID, | ||
39 | NILFS_SEG_NO_SUPER_ROOT, | ||
40 | NILFS_SEG_FAIL_IO, | ||
41 | NILFS_SEG_FAIL_MAGIC, | ||
42 | NILFS_SEG_FAIL_SEQ, | ||
43 | NILFS_SEG_FAIL_CHECKSUM_SEGSUM, | ||
44 | NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT, | ||
45 | NILFS_SEG_FAIL_CHECKSUM_FULL, | ||
46 | NILFS_SEG_FAIL_CONSISTENCY, | ||
47 | }; | ||
48 | |||
49 | /* work structure for recovery */ | ||
50 | struct nilfs_recovery_block { | ||
51 | ino_t ino; /* Inode number of the file that this block | ||
52 | belongs to */ | ||
53 | sector_t blocknr; /* block number */ | ||
54 | __u64 vblocknr; /* virtual block number */ | ||
55 | unsigned long blkoff; /* File offset of the data block (per block) */ | ||
56 | struct list_head list; | ||
57 | }; | ||
58 | |||
59 | |||
60 | static int nilfs_warn_segment_error(int err) | ||
61 | { | ||
62 | switch (err) { | ||
63 | case NILFS_SEG_FAIL_IO: | ||
64 | printk(KERN_WARNING | ||
65 | "NILFS warning: I/O error on loading last segment\n"); | ||
66 | return -EIO; | ||
67 | case NILFS_SEG_FAIL_MAGIC: | ||
68 | printk(KERN_WARNING | ||
69 | "NILFS warning: Segment magic number invalid\n"); | ||
70 | break; | ||
71 | case NILFS_SEG_FAIL_SEQ: | ||
72 | printk(KERN_WARNING | ||
73 | "NILFS warning: Sequence number mismatch\n"); | ||
74 | break; | ||
75 | case NILFS_SEG_FAIL_CHECKSUM_SEGSUM: | ||
76 | printk(KERN_WARNING | ||
77 | "NILFS warning: Checksum error in segment summary\n"); | ||
78 | break; | ||
79 | case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT: | ||
80 | printk(KERN_WARNING | ||
81 | "NILFS warning: Checksum error in super root\n"); | ||
82 | break; | ||
83 | case NILFS_SEG_FAIL_CHECKSUM_FULL: | ||
84 | printk(KERN_WARNING | ||
85 | "NILFS warning: Checksum error in segment payload\n"); | ||
86 | break; | ||
87 | case NILFS_SEG_FAIL_CONSISTENCY: | ||
88 | printk(KERN_WARNING | ||
89 | "NILFS warning: Inconsistent segment\n"); | ||
90 | break; | ||
91 | case NILFS_SEG_NO_SUPER_ROOT: | ||
92 | printk(KERN_WARNING | ||
93 | "NILFS warning: No super root in the last segment\n"); | ||
94 | break; | ||
95 | } | ||
96 | return -EINVAL; | ||
97 | } | ||
98 | |||
99 | static void store_segsum_info(struct nilfs_segsum_info *ssi, | ||
100 | struct nilfs_segment_summary *sum, | ||
101 | unsigned int blocksize) | ||
102 | { | ||
103 | ssi->flags = le16_to_cpu(sum->ss_flags); | ||
104 | ssi->seg_seq = le64_to_cpu(sum->ss_seq); | ||
105 | ssi->ctime = le64_to_cpu(sum->ss_create); | ||
106 | ssi->next = le64_to_cpu(sum->ss_next); | ||
107 | ssi->nblocks = le32_to_cpu(sum->ss_nblocks); | ||
108 | ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo); | ||
109 | ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes); | ||
110 | |||
111 | ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize); | ||
112 | ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi); | ||
113 | } | ||
114 | |||
115 | /** | ||
116 | * calc_crc_cont - check CRC of blocks continuously | ||
117 | * @sbi: nilfs_sb_info | ||
118 | * @bhs: buffer head of start block | ||
119 | * @sum: place to store result | ||
120 | * @offset: offset bytes in the first block | ||
121 | * @check_bytes: number of bytes to be checked | ||
122 | * @start: DBN of start block | ||
123 | * @nblock: number of blocks to be checked | ||
124 | */ | ||
125 | static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs, | ||
126 | u32 *sum, unsigned long offset, u64 check_bytes, | ||
127 | sector_t start, unsigned long nblock) | ||
128 | { | ||
129 | unsigned long blocksize = sbi->s_super->s_blocksize; | ||
130 | unsigned long size; | ||
131 | u32 crc; | ||
132 | |||
133 | BUG_ON(offset >= blocksize); | ||
134 | check_bytes -= offset; | ||
135 | size = min_t(u64, check_bytes, blocksize - offset); | ||
136 | crc = crc32_le(sbi->s_nilfs->ns_crc_seed, | ||
137 | (unsigned char *)bhs->b_data + offset, size); | ||
138 | if (--nblock > 0) { | ||
139 | do { | ||
140 | struct buffer_head *bh | ||
141 | = sb_bread(sbi->s_super, ++start); | ||
142 | if (!bh) | ||
143 | return -EIO; | ||
144 | check_bytes -= size; | ||
145 | size = min_t(u64, check_bytes, blocksize); | ||
146 | crc = crc32_le(crc, bh->b_data, size); | ||
147 | brelse(bh); | ||
148 | } while (--nblock > 0); | ||
149 | } | ||
150 | *sum = crc; | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | /** | ||
155 | * nilfs_read_super_root_block - read super root block | ||
156 | * @sb: super_block | ||
157 | * @sr_block: disk block number of the super root block | ||
158 | * @pbh: address of a buffer_head pointer to return super root buffer | ||
159 | * @check: CRC check flag | ||
160 | */ | ||
161 | int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, | ||
162 | struct buffer_head **pbh, int check) | ||
163 | { | ||
164 | struct buffer_head *bh_sr; | ||
165 | struct nilfs_super_root *sr; | ||
166 | u32 crc; | ||
167 | int ret; | ||
168 | |||
169 | *pbh = NULL; | ||
170 | bh_sr = sb_bread(sb, sr_block); | ||
171 | if (unlikely(!bh_sr)) { | ||
172 | ret = NILFS_SEG_FAIL_IO; | ||
173 | goto failed; | ||
174 | } | ||
175 | |||
176 | sr = (struct nilfs_super_root *)bh_sr->b_data; | ||
177 | if (check) { | ||
178 | unsigned bytes = le16_to_cpu(sr->sr_bytes); | ||
179 | |||
180 | if (bytes == 0 || bytes > sb->s_blocksize) { | ||
181 | ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT; | ||
182 | goto failed_bh; | ||
183 | } | ||
184 | if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc, | ||
185 | sizeof(sr->sr_sum), bytes, sr_block, 1)) { | ||
186 | ret = NILFS_SEG_FAIL_IO; | ||
187 | goto failed_bh; | ||
188 | } | ||
189 | if (crc != le32_to_cpu(sr->sr_sum)) { | ||
190 | ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT; | ||
191 | goto failed_bh; | ||
192 | } | ||
193 | } | ||
194 | *pbh = bh_sr; | ||
195 | return 0; | ||
196 | |||
197 | failed_bh: | ||
198 | brelse(bh_sr); | ||
199 | |||
200 | failed: | ||
201 | return nilfs_warn_segment_error(ret); | ||
202 | } | ||
203 | |||
204 | /** | ||
205 | * load_segment_summary - read segment summary of the specified partial segment | ||
206 | * @sbi: nilfs_sb_info | ||
207 | * @pseg_start: start disk block number of partial segment | ||
208 | * @seg_seq: sequence number requested | ||
209 | * @ssi: pointer to nilfs_segsum_info struct to store information | ||
210 | * @full_check: full check flag | ||
211 | * (0: only checks segment summary CRC, 1: data CRC) | ||
212 | */ | ||
213 | static int | ||
214 | load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start, | ||
215 | u64 seg_seq, struct nilfs_segsum_info *ssi, | ||
216 | int full_check) | ||
217 | { | ||
218 | struct buffer_head *bh_sum; | ||
219 | struct nilfs_segment_summary *sum; | ||
220 | unsigned long offset, nblock; | ||
221 | u64 check_bytes; | ||
222 | u32 crc, crc_sum; | ||
223 | int ret = NILFS_SEG_FAIL_IO; | ||
224 | |||
225 | bh_sum = sb_bread(sbi->s_super, pseg_start); | ||
226 | if (!bh_sum) | ||
227 | goto out; | ||
228 | |||
229 | sum = (struct nilfs_segment_summary *)bh_sum->b_data; | ||
230 | |||
231 | /* Check consistency of segment summary */ | ||
232 | if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) { | ||
233 | ret = NILFS_SEG_FAIL_MAGIC; | ||
234 | goto failed; | ||
235 | } | ||
236 | store_segsum_info(ssi, sum, sbi->s_super->s_blocksize); | ||
237 | if (seg_seq != ssi->seg_seq) { | ||
238 | ret = NILFS_SEG_FAIL_SEQ; | ||
239 | goto failed; | ||
240 | } | ||
241 | if (full_check) { | ||
242 | offset = sizeof(sum->ss_datasum); | ||
243 | check_bytes = | ||
244 | ((u64)ssi->nblocks << sbi->s_super->s_blocksize_bits); | ||
245 | nblock = ssi->nblocks; | ||
246 | crc_sum = le32_to_cpu(sum->ss_datasum); | ||
247 | ret = NILFS_SEG_FAIL_CHECKSUM_FULL; | ||
248 | } else { /* only checks segment summary */ | ||
249 | offset = sizeof(sum->ss_datasum) + sizeof(sum->ss_sumsum); | ||
250 | check_bytes = ssi->sumbytes; | ||
251 | nblock = ssi->nsumblk; | ||
252 | crc_sum = le32_to_cpu(sum->ss_sumsum); | ||
253 | ret = NILFS_SEG_FAIL_CHECKSUM_SEGSUM; | ||
254 | } | ||
255 | |||
256 | if (unlikely(nblock == 0 || | ||
257 | nblock > sbi->s_nilfs->ns_blocks_per_segment)) { | ||
258 | /* This limits the number of blocks read in the CRC check */ | ||
259 | ret = NILFS_SEG_FAIL_CONSISTENCY; | ||
260 | goto failed; | ||
261 | } | ||
262 | if (calc_crc_cont(sbi, bh_sum, &crc, offset, check_bytes, | ||
263 | pseg_start, nblock)) { | ||
264 | ret = NILFS_SEG_FAIL_IO; | ||
265 | goto failed; | ||
266 | } | ||
267 | if (crc == crc_sum) | ||
268 | ret = 0; | ||
269 | failed: | ||
270 | brelse(bh_sum); | ||
271 | out: | ||
272 | return ret; | ||
273 | } | ||
274 | |||
275 | static void *segsum_get(struct super_block *sb, struct buffer_head **pbh, | ||
276 | unsigned int *offset, unsigned int bytes) | ||
277 | { | ||
278 | void *ptr; | ||
279 | sector_t blocknr; | ||
280 | |||
281 | BUG_ON((*pbh)->b_size < *offset); | ||
282 | if (bytes > (*pbh)->b_size - *offset) { | ||
283 | blocknr = (*pbh)->b_blocknr; | ||
284 | brelse(*pbh); | ||
285 | *pbh = sb_bread(sb, blocknr + 1); | ||
286 | if (unlikely(!*pbh)) | ||
287 | return NULL; | ||
288 | *offset = 0; | ||
289 | } | ||
290 | ptr = (*pbh)->b_data + *offset; | ||
291 | *offset += bytes; | ||
292 | return ptr; | ||
293 | } | ||
294 | |||
295 | static void segsum_skip(struct super_block *sb, struct buffer_head **pbh, | ||
296 | unsigned int *offset, unsigned int bytes, | ||
297 | unsigned long count) | ||
298 | { | ||
299 | unsigned int rest_item_in_current_block | ||
300 | = ((*pbh)->b_size - *offset) / bytes; | ||
301 | |||
302 | if (count <= rest_item_in_current_block) { | ||
303 | *offset += bytes * count; | ||
304 | } else { | ||
305 | sector_t blocknr = (*pbh)->b_blocknr; | ||
306 | unsigned int nitem_per_block = (*pbh)->b_size / bytes; | ||
307 | unsigned int bcnt; | ||
308 | |||
309 | count -= rest_item_in_current_block; | ||
310 | bcnt = DIV_ROUND_UP(count, nitem_per_block); | ||
311 | *offset = bytes * (count - (bcnt - 1) * nitem_per_block); | ||
312 | |||
313 | brelse(*pbh); | ||
314 | *pbh = sb_bread(sb, blocknr + bcnt); | ||
315 | } | ||
316 | } | ||
317 | |||
318 | static int | ||
319 | collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, | ||
320 | struct nilfs_segsum_info *ssi, | ||
321 | struct list_head *head) | ||
322 | { | ||
323 | struct buffer_head *bh; | ||
324 | unsigned int offset; | ||
325 | unsigned long nfinfo = ssi->nfinfo; | ||
326 | sector_t blocknr = sum_blocknr + ssi->nsumblk; | ||
327 | ino_t ino; | ||
328 | int err = -EIO; | ||
329 | |||
330 | if (!nfinfo) | ||
331 | return 0; | ||
332 | |||
333 | bh = sb_bread(sbi->s_super, sum_blocknr); | ||
334 | if (unlikely(!bh)) | ||
335 | goto out; | ||
336 | |||
337 | offset = le16_to_cpu( | ||
338 | ((struct nilfs_segment_summary *)bh->b_data)->ss_bytes); | ||
339 | for (;;) { | ||
340 | unsigned long nblocks, ndatablk, nnodeblk; | ||
341 | struct nilfs_finfo *finfo; | ||
342 | |||
343 | finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo)); | ||
344 | if (unlikely(!finfo)) | ||
345 | goto out; | ||
346 | |||
347 | ino = le64_to_cpu(finfo->fi_ino); | ||
348 | nblocks = le32_to_cpu(finfo->fi_nblocks); | ||
349 | ndatablk = le32_to_cpu(finfo->fi_ndatablk); | ||
350 | nnodeblk = nblocks - ndatablk; | ||
351 | |||
352 | while (ndatablk-- > 0) { | ||
353 | struct nilfs_recovery_block *rb; | ||
354 | struct nilfs_binfo_v *binfo; | ||
355 | |||
356 | binfo = segsum_get(sbi->s_super, &bh, &offset, | ||
357 | sizeof(*binfo)); | ||
358 | if (unlikely(!binfo)) | ||
359 | goto out; | ||
360 | |||
361 | rb = kmalloc(sizeof(*rb), GFP_NOFS); | ||
362 | if (unlikely(!rb)) { | ||
363 | err = -ENOMEM; | ||
364 | goto out; | ||
365 | } | ||
366 | rb->ino = ino; | ||
367 | rb->blocknr = blocknr++; | ||
368 | rb->vblocknr = le64_to_cpu(binfo->bi_vblocknr); | ||
369 | rb->blkoff = le64_to_cpu(binfo->bi_blkoff); | ||
370 | /* INIT_LIST_HEAD(&rb->list); */ | ||
371 | list_add_tail(&rb->list, head); | ||
372 | } | ||
373 | if (--nfinfo == 0) | ||
374 | break; | ||
375 | blocknr += nnodeblk; /* always 0 for the data sync segments */ | ||
376 | segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64), | ||
377 | nnodeblk); | ||
378 | if (unlikely(!bh)) | ||
379 | goto out; | ||
380 | } | ||
381 | err = 0; | ||
382 | out: | ||
383 | brelse(bh); /* brelse(NULL) is just ignored */ | ||
384 | return err; | ||
385 | } | ||
386 | |||
387 | static void dispose_recovery_list(struct list_head *head) | ||
388 | { | ||
389 | while (!list_empty(head)) { | ||
390 | struct nilfs_recovery_block *rb | ||
391 | = list_entry(head->next, | ||
392 | struct nilfs_recovery_block, list); | ||
393 | list_del(&rb->list); | ||
394 | kfree(rb); | ||
395 | } | ||
396 | } | ||
397 | |||
398 | void nilfs_dispose_segment_list(struct list_head *head) | ||
399 | { | ||
400 | while (!list_empty(head)) { | ||
401 | struct nilfs_segment_entry *ent | ||
402 | = list_entry(head->next, | ||
403 | struct nilfs_segment_entry, list); | ||
404 | list_del(&ent->list); | ||
405 | nilfs_free_segment_entry(ent); | ||
406 | } | ||
407 | } | ||
408 | |||
409 | static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, | ||
410 | struct nilfs_recovery_info *ri) | ||
411 | { | ||
412 | struct list_head *head = &ri->ri_used_segments; | ||
413 | struct nilfs_segment_entry *ent, *n; | ||
414 | struct inode *sufile = nilfs->ns_sufile; | ||
415 | __u64 segnum[4]; | ||
416 | time_t mtime; | ||
417 | int err; | ||
418 | int i; | ||
419 | |||
420 | segnum[0] = nilfs->ns_segnum; | ||
421 | segnum[1] = nilfs->ns_nextnum; | ||
422 | segnum[2] = ri->ri_segnum; | ||
423 | segnum[3] = ri->ri_nextnum; | ||
424 | |||
425 | /* | ||
426 | * Releasing the next segment of the latest super root. | ||
427 | * The next segment is invalidated by this recovery. | ||
428 | */ | ||
429 | err = nilfs_sufile_free(sufile, segnum[1]); | ||
430 | if (unlikely(err)) | ||
431 | goto failed; | ||
432 | |||
433 | err = -ENOMEM; | ||
434 | for (i = 1; i < 4; i++) { | ||
435 | ent = nilfs_alloc_segment_entry(segnum[i]); | ||
436 | if (unlikely(!ent)) | ||
437 | goto failed; | ||
438 | list_add_tail(&ent->list, head); | ||
439 | } | ||
440 | |||
441 | /* | ||
442 | * Collecting segments written after the latest super root. | ||
443 | * These are marked dirty to avoid being reallocated in the next write. | ||
444 | */ | ||
445 | mtime = get_seconds(); | ||
446 | list_for_each_entry_safe(ent, n, head, list) { | ||
447 | if (ent->segnum == segnum[0]) { | ||
448 | list_del(&ent->list); | ||
449 | nilfs_free_segment_entry(ent); | ||
450 | continue; | ||
451 | } | ||
452 | err = nilfs_open_segment_entry(ent, sufile); | ||
453 | if (unlikely(err)) | ||
454 | goto failed; | ||
455 | if (!nilfs_segment_usage_dirty(ent->raw_su)) { | ||
456 | /* make the segment garbage */ | ||
457 | ent->raw_su->su_nblocks = cpu_to_le32(0); | ||
458 | ent->raw_su->su_lastmod = cpu_to_le32(mtime); | ||
459 | nilfs_segment_usage_set_dirty(ent->raw_su); | ||
460 | } | ||
461 | list_del(&ent->list); | ||
462 | nilfs_close_segment_entry(ent, sufile); | ||
463 | nilfs_free_segment_entry(ent); | ||
464 | } | ||
465 | |||
466 | /* Allocate new segments for recovery */ | ||
467 | err = nilfs_sufile_alloc(sufile, &segnum[0]); | ||
468 | if (unlikely(err)) | ||
469 | goto failed; | ||
470 | |||
471 | nilfs->ns_pseg_offset = 0; | ||
472 | nilfs->ns_seg_seq = ri->ri_seq + 2; | ||
473 | nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0]; | ||
474 | return 0; | ||
475 | |||
476 | failed: | ||
477 | /* No need to recover sufile because it will be destroyed on error */ | ||
478 | return err; | ||
479 | } | ||
480 | |||
481 | static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi, | ||
482 | struct nilfs_recovery_block *rb, | ||
483 | struct page *page) | ||
484 | { | ||
485 | struct buffer_head *bh_org; | ||
486 | void *kaddr; | ||
487 | |||
488 | bh_org = sb_bread(sbi->s_super, rb->blocknr); | ||
489 | if (unlikely(!bh_org)) | ||
490 | return -EIO; | ||
491 | |||
492 | kaddr = kmap_atomic(page, KM_USER0); | ||
493 | memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); | ||
494 | kunmap_atomic(kaddr, KM_USER0); | ||
495 | brelse(bh_org); | ||
496 | return 0; | ||
497 | } | ||
498 | |||
499 | static int recover_dsync_blocks(struct nilfs_sb_info *sbi, | ||
500 | struct list_head *head, | ||
501 | unsigned long *nr_salvaged_blocks) | ||
502 | { | ||
503 | struct inode *inode; | ||
504 | struct nilfs_recovery_block *rb, *n; | ||
505 | unsigned blocksize = sbi->s_super->s_blocksize; | ||
506 | struct page *page; | ||
507 | loff_t pos; | ||
508 | int err = 0, err2 = 0; | ||
509 | |||
510 | list_for_each_entry_safe(rb, n, head, list) { | ||
511 | inode = nilfs_iget(sbi->s_super, rb->ino); | ||
512 | if (IS_ERR(inode)) { | ||
513 | err = PTR_ERR(inode); | ||
514 | inode = NULL; | ||
515 | goto failed_inode; | ||
516 | } | ||
517 | |||
518 | pos = rb->blkoff << inode->i_blkbits; | ||
519 | page = NULL; | ||
520 | err = block_write_begin(NULL, inode->i_mapping, pos, blocksize, | ||
521 | 0, &page, NULL, nilfs_get_block); | ||
522 | if (unlikely(err)) | ||
523 | goto failed_inode; | ||
524 | |||
525 | err = nilfs_recovery_copy_block(sbi, rb, page); | ||
526 | if (unlikely(err)) | ||
527 | goto failed_page; | ||
528 | |||
529 | err = nilfs_set_file_dirty(sbi, inode, 1); | ||
530 | if (unlikely(err)) | ||
531 | goto failed_page; | ||
532 | |||
533 | block_write_end(NULL, inode->i_mapping, pos, blocksize, | ||
534 | blocksize, page, NULL); | ||
535 | |||
536 | unlock_page(page); | ||
537 | page_cache_release(page); | ||
538 | |||
539 | (*nr_salvaged_blocks)++; | ||
540 | goto next; | ||
541 | |||
542 | failed_page: | ||
543 | unlock_page(page); | ||
544 | page_cache_release(page); | ||
545 | |||
546 | failed_inode: | ||
547 | printk(KERN_WARNING | ||
548 | "NILFS warning: error recovering data block " | ||
549 | "(err=%d, ino=%lu, block-offset=%llu)\n", | ||
550 | err, rb->ino, (unsigned long long)rb->blkoff); | ||
551 | if (!err2) | ||
552 | err2 = err; | ||
553 | next: | ||
554 | iput(inode); /* iput(NULL) is just ignored */ | ||
555 | list_del_init(&rb->list); | ||
556 | kfree(rb); | ||
557 | } | ||
558 | return err2; | ||
559 | } | ||
560 | |||
561 | /** | ||
562 | * nilfs_do_roll_forward - salvage logical segments newer than the latest | ||
563 | * checkpoint | ||
564 | * @sbi: nilfs_sb_info | ||
565 | * @nilfs: the_nilfs | ||
566 | * @ri: pointer to a nilfs_recovery_info | ||
567 | */ | ||
568 | static int nilfs_do_roll_forward(struct the_nilfs *nilfs, | ||
569 | struct nilfs_sb_info *sbi, | ||
570 | struct nilfs_recovery_info *ri) | ||
571 | { | ||
572 | struct nilfs_segsum_info ssi; | ||
573 | sector_t pseg_start; | ||
574 | sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */ | ||
575 | unsigned long nsalvaged_blocks = 0; | ||
576 | u64 seg_seq; | ||
577 | __u64 segnum, nextnum = 0; | ||
578 | int empty_seg = 0; | ||
579 | int err = 0, ret; | ||
580 | LIST_HEAD(dsync_blocks); /* list of data blocks to be recovered */ | ||
581 | enum { | ||
582 | RF_INIT_ST, | ||
583 | RF_DSYNC_ST, /* scanning data-sync segments */ | ||
584 | }; | ||
585 | int state = RF_INIT_ST; | ||
586 | |||
587 | nilfs_attach_writer(nilfs, sbi); | ||
588 | pseg_start = ri->ri_lsegs_start; | ||
589 | seg_seq = ri->ri_lsegs_start_seq; | ||
590 | segnum = nilfs_get_segnum_of_block(nilfs, pseg_start); | ||
591 | nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); | ||
592 | |||
593 | while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) { | ||
594 | |||
595 | ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1); | ||
596 | if (ret) { | ||
597 | if (ret == NILFS_SEG_FAIL_IO) { | ||
598 | err = -EIO; | ||
599 | goto failed; | ||
600 | } | ||
601 | goto strayed; | ||
602 | } | ||
603 | if (unlikely(NILFS_SEG_HAS_SR(&ssi))) | ||
604 | goto confused; | ||
605 | |||
606 | /* Found a valid partial segment; do recovery actions */ | ||
607 | nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); | ||
608 | empty_seg = 0; | ||
609 | nilfs->ns_ctime = ssi.ctime; | ||
610 | if (!(ssi.flags & NILFS_SS_GC)) | ||
611 | nilfs->ns_nongc_ctime = ssi.ctime; | ||
612 | |||
613 | switch (state) { | ||
614 | case RF_INIT_ST: | ||
615 | if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi)) | ||
616 | goto try_next_pseg; | ||
617 | state = RF_DSYNC_ST; | ||
618 | /* Fall through */ | ||
619 | case RF_DSYNC_ST: | ||
620 | if (!NILFS_SEG_DSYNC(&ssi)) | ||
621 | goto confused; | ||
622 | |||
623 | err = collect_blocks_from_segsum( | ||
624 | sbi, pseg_start, &ssi, &dsync_blocks); | ||
625 | if (unlikely(err)) | ||
626 | goto failed; | ||
627 | if (NILFS_SEG_LOGEND(&ssi)) { | ||
628 | err = recover_dsync_blocks( | ||
629 | sbi, &dsync_blocks, &nsalvaged_blocks); | ||
630 | if (unlikely(err)) | ||
631 | goto failed; | ||
632 | state = RF_INIT_ST; | ||
633 | } | ||
634 | break; /* Fall through to try_next_pseg */ | ||
635 | } | ||
636 | |||
637 | try_next_pseg: | ||
638 | if (pseg_start == ri->ri_lsegs_end) | ||
639 | break; | ||
640 | pseg_start += ssi.nblocks; | ||
641 | if (pseg_start < seg_end) | ||
642 | continue; | ||
643 | goto feed_segment; | ||
644 | |||
645 | strayed: | ||
646 | if (pseg_start == ri->ri_lsegs_end) | ||
647 | break; | ||
648 | |||
649 | feed_segment: | ||
650 | /* Looking to the next full segment */ | ||
651 | if (empty_seg++) | ||
652 | break; | ||
653 | seg_seq++; | ||
654 | segnum = nextnum; | ||
655 | nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); | ||
656 | pseg_start = seg_start; | ||
657 | } | ||
658 | |||
659 | if (nsalvaged_blocks) { | ||
660 | printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n", | ||
661 | sbi->s_super->s_id, nsalvaged_blocks); | ||
662 | ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; | ||
663 | } | ||
664 | out: | ||
665 | dispose_recovery_list(&dsync_blocks); | ||
666 | nilfs_detach_writer(sbi->s_nilfs, sbi); | ||
667 | return err; | ||
668 | |||
669 | confused: | ||
670 | err = -EINVAL; | ||
671 | failed: | ||
672 | printk(KERN_ERR | ||
673 | "NILFS (device %s): Error roll-forwarding " | ||
674 | "(err=%d, pseg block=%llu). ", | ||
675 | sbi->s_super->s_id, err, (unsigned long long)pseg_start); | ||
676 | goto out; | ||
677 | } | ||
678 | |||
679 | static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, | ||
680 | struct nilfs_sb_info *sbi, | ||
681 | struct nilfs_recovery_info *ri) | ||
682 | { | ||
683 | struct buffer_head *bh; | ||
684 | int err; | ||
685 | |||
686 | if (nilfs_get_segnum_of_block(nilfs, ri->ri_lsegs_start) != | ||
687 | nilfs_get_segnum_of_block(nilfs, ri->ri_super_root)) | ||
688 | return; | ||
689 | |||
690 | bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start); | ||
691 | BUG_ON(!bh); | ||
692 | memset(bh->b_data, 0, bh->b_size); | ||
693 | set_buffer_dirty(bh); | ||
694 | err = sync_dirty_buffer(bh); | ||
695 | if (unlikely(err)) | ||
696 | printk(KERN_WARNING | ||
697 | "NILFS warning: buffer sync write failed during " | ||
698 | "post-cleaning of recovery.\n"); | ||
699 | brelse(bh); | ||
700 | } | ||
701 | |||
702 | /** | ||
703 | * nilfs_recover_logical_segments - salvage logical segments written after | ||
704 | * the latest super root | ||
705 | * @nilfs: the_nilfs | ||
706 | * @sbi: nilfs_sb_info | ||
707 | * @ri: pointer to a nilfs_recovery_info struct to store search results. | ||
708 | * | ||
709 | * Return Value: On success, 0 is returned. On error, one of the following | ||
710 | * negative error code is returned. | ||
711 | * | ||
712 | * %-EINVAL - Inconsistent filesystem state. | ||
713 | * | ||
714 | * %-EIO - I/O error | ||
715 | * | ||
716 | * %-ENOSPC - No space left on device (only in a panic state). | ||
717 | * | ||
718 | * %-ERESTARTSYS - Interrupted. | ||
719 | * | ||
720 | * %-ENOMEM - Insufficient memory available. | ||
721 | */ | ||
722 | int nilfs_recover_logical_segments(struct the_nilfs *nilfs, | ||
723 | struct nilfs_sb_info *sbi, | ||
724 | struct nilfs_recovery_info *ri) | ||
725 | { | ||
726 | int err; | ||
727 | |||
728 | if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0) | ||
729 | return 0; | ||
730 | |||
731 | err = nilfs_attach_checkpoint(sbi, ri->ri_cno); | ||
732 | if (unlikely(err)) { | ||
733 | printk(KERN_ERR | ||
734 | "NILFS: error loading the latest checkpoint.\n"); | ||
735 | return err; | ||
736 | } | ||
737 | |||
738 | err = nilfs_do_roll_forward(nilfs, sbi, ri); | ||
739 | if (unlikely(err)) | ||
740 | goto failed; | ||
741 | |||
742 | if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { | ||
743 | err = nilfs_prepare_segment_for_recovery(nilfs, ri); | ||
744 | if (unlikely(err)) { | ||
745 | printk(KERN_ERR "NILFS: Error preparing segments for " | ||
746 | "recovery.\n"); | ||
747 | goto failed; | ||
748 | } | ||
749 | |||
750 | err = nilfs_attach_segment_constructor(sbi); | ||
751 | if (unlikely(err)) | ||
752 | goto failed; | ||
753 | |||
754 | set_nilfs_discontinued(nilfs); | ||
755 | err = nilfs_construct_segment(sbi->s_super); | ||
756 | nilfs_detach_segment_constructor(sbi); | ||
757 | |||
758 | if (unlikely(err)) { | ||
759 | printk(KERN_ERR "NILFS: Oops! recovery failed. " | ||
760 | "(err=%d)\n", err); | ||
761 | goto failed; | ||
762 | } | ||
763 | |||
764 | nilfs_finish_roll_forward(nilfs, sbi, ri); | ||
765 | } | ||
766 | |||
767 | nilfs_detach_checkpoint(sbi); | ||
768 | return 0; | ||
769 | |||
770 | failed: | ||
771 | nilfs_detach_checkpoint(sbi); | ||
772 | nilfs_mdt_clear(nilfs->ns_cpfile); | ||
773 | nilfs_mdt_clear(nilfs->ns_sufile); | ||
774 | nilfs_mdt_clear(nilfs->ns_dat); | ||
775 | return err; | ||
776 | } | ||
777 | |||
778 | /** | ||
779 | * nilfs_search_super_root - search the latest valid super root | ||
780 | * @nilfs: the_nilfs | ||
781 | * @sbi: nilfs_sb_info | ||
782 | * @ri: pointer to a nilfs_recovery_info struct to store search results. | ||
783 | * | ||
784 | * nilfs_search_super_root() looks for the latest super-root from a partial | ||
785 | * segment pointed by the superblock. It sets up struct the_nilfs through | ||
786 | * this search. It fills nilfs_recovery_info (ri) required for recovery. | ||
787 | * | ||
788 | * Return Value: On success, 0 is returned. On error, one of the following | ||
789 | * negative error code is returned. | ||
790 | * | ||
791 | * %-EINVAL - No valid segment found | ||
792 | * | ||
793 | * %-EIO - I/O error | ||
794 | */ | ||
795 | int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, | ||
796 | struct nilfs_recovery_info *ri) | ||
797 | { | ||
798 | struct nilfs_segsum_info ssi; | ||
799 | sector_t pseg_start, pseg_end, sr_pseg_start = 0; | ||
800 | sector_t seg_start, seg_end; /* range of full segment (block number) */ | ||
801 | u64 seg_seq; | ||
802 | __u64 segnum, nextnum = 0; | ||
803 | __u64 cno; | ||
804 | struct nilfs_segment_entry *ent; | ||
805 | LIST_HEAD(segments); | ||
806 | int empty_seg = 0, scan_newer = 0; | ||
807 | int ret; | ||
808 | |||
809 | pseg_start = nilfs->ns_last_pseg; | ||
810 | seg_seq = nilfs->ns_last_seq; | ||
811 | cno = nilfs->ns_last_cno; | ||
812 | segnum = nilfs_get_segnum_of_block(nilfs, pseg_start); | ||
813 | |||
814 | /* Calculate range of segment */ | ||
815 | nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); | ||
816 | |||
817 | for (;;) { | ||
818 | /* Load segment summary */ | ||
819 | ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1); | ||
820 | if (ret) { | ||
821 | if (ret == NILFS_SEG_FAIL_IO) | ||
822 | goto failed; | ||
823 | goto strayed; | ||
824 | } | ||
825 | pseg_end = pseg_start + ssi.nblocks - 1; | ||
826 | if (unlikely(pseg_end > seg_end)) { | ||
827 | ret = NILFS_SEG_FAIL_CONSISTENCY; | ||
828 | goto strayed; | ||
829 | } | ||
830 | |||
831 | /* A valid partial segment */ | ||
832 | ri->ri_pseg_start = pseg_start; | ||
833 | ri->ri_seq = seg_seq; | ||
834 | ri->ri_segnum = segnum; | ||
835 | nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); | ||
836 | ri->ri_nextnum = nextnum; | ||
837 | empty_seg = 0; | ||
838 | |||
839 | if (!NILFS_SEG_HAS_SR(&ssi)) { | ||
840 | if (!scan_newer) { | ||
841 | /* This will never happen because a superblock | ||
842 | (last_segment) always points to a pseg | ||
843 | having a super root. */ | ||
844 | ret = NILFS_SEG_FAIL_CONSISTENCY; | ||
845 | goto failed; | ||
846 | } | ||
847 | if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) { | ||
848 | ri->ri_lsegs_start = pseg_start; | ||
849 | ri->ri_lsegs_start_seq = seg_seq; | ||
850 | } | ||
851 | if (NILFS_SEG_LOGEND(&ssi)) | ||
852 | ri->ri_lsegs_end = pseg_start; | ||
853 | goto try_next_pseg; | ||
854 | } | ||
855 | |||
856 | /* A valid super root was found. */ | ||
857 | ri->ri_cno = cno++; | ||
858 | ri->ri_super_root = pseg_end; | ||
859 | ri->ri_lsegs_start = ri->ri_lsegs_end = 0; | ||
860 | |||
861 | nilfs_dispose_segment_list(&segments); | ||
862 | nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start) | ||
863 | + ssi.nblocks - seg_start; | ||
864 | nilfs->ns_seg_seq = seg_seq; | ||
865 | nilfs->ns_segnum = segnum; | ||
866 | nilfs->ns_cno = cno; /* nilfs->ns_cno = ri->ri_cno + 1 */ | ||
867 | nilfs->ns_ctime = ssi.ctime; | ||
868 | nilfs->ns_nextnum = nextnum; | ||
869 | |||
870 | if (scan_newer) | ||
871 | ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED; | ||
872 | else { | ||
873 | if (nilfs->ns_mount_state & NILFS_VALID_FS) | ||
874 | goto super_root_found; | ||
875 | scan_newer = 1; | ||
876 | } | ||
877 | |||
878 | /* reset region for roll-forward */ | ||
879 | pseg_start += ssi.nblocks; | ||
880 | if (pseg_start < seg_end) | ||
881 | continue; | ||
882 | goto feed_segment; | ||
883 | |||
884 | try_next_pseg: | ||
885 | /* Standing on a course, or met an inconsistent state */ | ||
886 | pseg_start += ssi.nblocks; | ||
887 | if (pseg_start < seg_end) | ||
888 | continue; | ||
889 | goto feed_segment; | ||
890 | |||
891 | strayed: | ||
892 | /* Off the trail */ | ||
893 | if (!scan_newer) | ||
894 | /* | ||
895 | * This can happen if a checkpoint was written without | ||
896 | * barriers, or as a result of an I/O failure. | ||
897 | */ | ||
898 | goto failed; | ||
899 | |||
900 | feed_segment: | ||
901 | /* Looking to the next full segment */ | ||
902 | if (empty_seg++) | ||
903 | goto super_root_found; /* found a valid super root */ | ||
904 | |||
905 | ent = nilfs_alloc_segment_entry(segnum); | ||
906 | if (unlikely(!ent)) { | ||
907 | ret = -ENOMEM; | ||
908 | goto failed; | ||
909 | } | ||
910 | list_add_tail(&ent->list, &segments); | ||
911 | |||
912 | seg_seq++; | ||
913 | segnum = nextnum; | ||
914 | nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); | ||
915 | pseg_start = seg_start; | ||
916 | } | ||
917 | |||
918 | super_root_found: | ||
919 | /* Updating pointers relating to the latest checkpoint */ | ||
920 | list_splice(&segments, ri->ri_used_segments.prev); | ||
921 | nilfs->ns_last_pseg = sr_pseg_start; | ||
922 | nilfs->ns_last_seq = nilfs->ns_seg_seq; | ||
923 | nilfs->ns_last_cno = ri->ri_cno; | ||
924 | return 0; | ||
925 | |||
926 | failed: | ||
927 | nilfs_dispose_segment_list(&segments); | ||
928 | return (ret < 0) ? ret : nilfs_warn_segment_error(ret); | ||
929 | } | ||
diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h new file mode 100644 index 000000000000..adccd4fc654e --- /dev/null +++ b/fs/nilfs2/sb.h | |||
@@ -0,0 +1,102 @@ | |||
1 | /* | ||
2 | * sb.h - NILFS on-memory super block structure. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #ifndef _NILFS_SB | ||
25 | #define _NILFS_SB | ||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include <linux/fs.h> | ||
29 | |||
30 | /* | ||
31 | * Mount options | ||
32 | */ | ||
33 | struct nilfs_mount_options { | ||
34 | unsigned long mount_opt; | ||
35 | __u64 snapshot_cno; | ||
36 | }; | ||
37 | |||
38 | struct the_nilfs; | ||
39 | struct nilfs_sc_info; | ||
40 | |||
41 | /* | ||
42 | * NILFS super-block data in memory | ||
43 | */ | ||
44 | struct nilfs_sb_info { | ||
45 | /* Snapshot status */ | ||
46 | __u64 s_snapshot_cno; /* Checkpoint number */ | ||
47 | atomic_t s_inodes_count; | ||
48 | atomic_t s_blocks_count; /* Reserved (might be deleted) */ | ||
49 | |||
50 | /* Mount options */ | ||
51 | unsigned long s_mount_opt; | ||
52 | uid_t s_resuid; | ||
53 | gid_t s_resgid; | ||
54 | |||
55 | unsigned long s_interval; /* construction interval */ | ||
56 | unsigned long s_watermark; /* threshold of data amount | ||
57 | for the segment construction */ | ||
58 | |||
59 | /* Fundamental members */ | ||
60 | struct super_block *s_super; /* reverse pointer to super_block */ | ||
61 | struct the_nilfs *s_nilfs; | ||
62 | struct list_head s_list; /* list head for nilfs->ns_supers */ | ||
63 | |||
64 | /* Segment constructor */ | ||
65 | struct list_head s_dirty_files; /* dirty files list */ | ||
66 | struct nilfs_sc_info *s_sc_info; /* segment constructor info */ | ||
67 | spinlock_t s_inode_lock; /* Lock for the nilfs inode. | ||
68 | It covers s_dirty_files list */ | ||
69 | |||
70 | /* Metadata files */ | ||
71 | struct inode *s_ifile; /* index file inode */ | ||
72 | |||
73 | /* Inode allocator */ | ||
74 | spinlock_t s_next_gen_lock; | ||
75 | u32 s_next_generation; | ||
76 | }; | ||
77 | |||
78 | static inline struct nilfs_sb_info *NILFS_SB(struct super_block *sb) | ||
79 | { | ||
80 | return sb->s_fs_info; | ||
81 | } | ||
82 | |||
83 | static inline struct nilfs_sc_info *NILFS_SC(struct nilfs_sb_info *sbi) | ||
84 | { | ||
85 | return sbi->s_sc_info; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Bit operations for the mount option | ||
90 | */ | ||
91 | #define nilfs_clear_opt(sbi, opt) \ | ||
92 | do { (sbi)->s_mount_opt &= ~NILFS_MOUNT_##opt; } while (0) | ||
93 | #define nilfs_set_opt(sbi, opt) \ | ||
94 | do { (sbi)->s_mount_opt |= NILFS_MOUNT_##opt; } while (0) | ||
95 | #define nilfs_test_opt(sbi, opt) ((sbi)->s_mount_opt & NILFS_MOUNT_##opt) | ||
96 | #define nilfs_write_opt(sbi, mask, opt) \ | ||
97 | do { (sbi)->s_mount_opt = \ | ||
98 | (((sbi)->s_mount_opt & ~NILFS_MOUNT_##mask) | \ | ||
99 | NILFS_MOUNT_##opt); \ | ||
100 | } while (0) | ||
101 | |||
102 | #endif /* _NILFS_SB */ | ||
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c new file mode 100644 index 000000000000..1e68821b4a9b --- /dev/null +++ b/fs/nilfs2/segbuf.c | |||
@@ -0,0 +1,439 @@ | |||
1 | /* | ||
2 | * segbuf.c - NILFS segment buffer | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/buffer_head.h> | ||
25 | #include <linux/writeback.h> | ||
26 | #include <linux/crc32.h> | ||
27 | #include "page.h" | ||
28 | #include "segbuf.h" | ||
29 | #include "seglist.h" | ||
30 | |||
31 | |||
32 | static struct kmem_cache *nilfs_segbuf_cachep; | ||
33 | |||
34 | static void nilfs_segbuf_init_once(void *obj) | ||
35 | { | ||
36 | memset(obj, 0, sizeof(struct nilfs_segment_buffer)); | ||
37 | } | ||
38 | |||
39 | int __init nilfs_init_segbuf_cache(void) | ||
40 | { | ||
41 | nilfs_segbuf_cachep = | ||
42 | kmem_cache_create("nilfs2_segbuf_cache", | ||
43 | sizeof(struct nilfs_segment_buffer), | ||
44 | 0, SLAB_RECLAIM_ACCOUNT, | ||
45 | nilfs_segbuf_init_once); | ||
46 | |||
47 | return (nilfs_segbuf_cachep == NULL) ? -ENOMEM : 0; | ||
48 | } | ||
49 | |||
50 | void nilfs_destroy_segbuf_cache(void) | ||
51 | { | ||
52 | kmem_cache_destroy(nilfs_segbuf_cachep); | ||
53 | } | ||
54 | |||
55 | struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb) | ||
56 | { | ||
57 | struct nilfs_segment_buffer *segbuf; | ||
58 | |||
59 | segbuf = kmem_cache_alloc(nilfs_segbuf_cachep, GFP_NOFS); | ||
60 | if (unlikely(!segbuf)) | ||
61 | return NULL; | ||
62 | |||
63 | segbuf->sb_super = sb; | ||
64 | INIT_LIST_HEAD(&segbuf->sb_list); | ||
65 | INIT_LIST_HEAD(&segbuf->sb_segsum_buffers); | ||
66 | INIT_LIST_HEAD(&segbuf->sb_payload_buffers); | ||
67 | return segbuf; | ||
68 | } | ||
69 | |||
70 | void nilfs_segbuf_free(struct nilfs_segment_buffer *segbuf) | ||
71 | { | ||
72 | kmem_cache_free(nilfs_segbuf_cachep, segbuf); | ||
73 | } | ||
74 | |||
75 | void nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum, | ||
76 | unsigned long offset, struct the_nilfs *nilfs) | ||
77 | { | ||
78 | segbuf->sb_segnum = segnum; | ||
79 | nilfs_get_segment_range(nilfs, segnum, &segbuf->sb_fseg_start, | ||
80 | &segbuf->sb_fseg_end); | ||
81 | |||
82 | segbuf->sb_pseg_start = segbuf->sb_fseg_start + offset; | ||
83 | segbuf->sb_rest_blocks = | ||
84 | segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; | ||
85 | } | ||
86 | |||
87 | void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf, | ||
88 | __u64 nextnum, struct the_nilfs *nilfs) | ||
89 | { | ||
90 | segbuf->sb_nextnum = nextnum; | ||
91 | segbuf->sb_sum.next = nilfs_get_segment_start_blocknr(nilfs, nextnum); | ||
92 | } | ||
93 | |||
94 | int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf) | ||
95 | { | ||
96 | struct buffer_head *bh; | ||
97 | |||
98 | bh = sb_getblk(segbuf->sb_super, | ||
99 | segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk); | ||
100 | if (unlikely(!bh)) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | nilfs_segbuf_add_segsum_buffer(segbuf, bh); | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf, | ||
108 | struct buffer_head **bhp) | ||
109 | { | ||
110 | struct buffer_head *bh; | ||
111 | |||
112 | bh = sb_getblk(segbuf->sb_super, | ||
113 | segbuf->sb_pseg_start + segbuf->sb_sum.nblocks); | ||
114 | if (unlikely(!bh)) | ||
115 | return -ENOMEM; | ||
116 | |||
117 | nilfs_segbuf_add_payload_buffer(segbuf, bh); | ||
118 | *bhp = bh; | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags, | ||
123 | time_t ctime) | ||
124 | { | ||
125 | int err; | ||
126 | |||
127 | segbuf->sb_sum.nblocks = segbuf->sb_sum.nsumblk = 0; | ||
128 | err = nilfs_segbuf_extend_segsum(segbuf); | ||
129 | if (unlikely(err)) | ||
130 | return err; | ||
131 | |||
132 | segbuf->sb_sum.flags = flags; | ||
133 | segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary); | ||
134 | segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0; | ||
135 | segbuf->sb_sum.ctime = ctime; | ||
136 | |||
137 | segbuf->sb_io_error = 0; | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Setup segument summary | ||
143 | */ | ||
144 | void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *segbuf) | ||
145 | { | ||
146 | struct nilfs_segment_summary *raw_sum; | ||
147 | struct buffer_head *bh_sum; | ||
148 | |||
149 | bh_sum = list_entry(segbuf->sb_segsum_buffers.next, | ||
150 | struct buffer_head, b_assoc_buffers); | ||
151 | raw_sum = (struct nilfs_segment_summary *)bh_sum->b_data; | ||
152 | |||
153 | raw_sum->ss_magic = cpu_to_le32(NILFS_SEGSUM_MAGIC); | ||
154 | raw_sum->ss_bytes = cpu_to_le16(sizeof(*raw_sum)); | ||
155 | raw_sum->ss_flags = cpu_to_le16(segbuf->sb_sum.flags); | ||
156 | raw_sum->ss_seq = cpu_to_le64(segbuf->sb_sum.seg_seq); | ||
157 | raw_sum->ss_create = cpu_to_le64(segbuf->sb_sum.ctime); | ||
158 | raw_sum->ss_next = cpu_to_le64(segbuf->sb_sum.next); | ||
159 | raw_sum->ss_nblocks = cpu_to_le32(segbuf->sb_sum.nblocks); | ||
160 | raw_sum->ss_nfinfo = cpu_to_le32(segbuf->sb_sum.nfinfo); | ||
161 | raw_sum->ss_sumbytes = cpu_to_le32(segbuf->sb_sum.sumbytes); | ||
162 | raw_sum->ss_pad = 0; | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * CRC calculation routines | ||
167 | */ | ||
168 | void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf, | ||
169 | u32 seed) | ||
170 | { | ||
171 | struct buffer_head *bh; | ||
172 | struct nilfs_segment_summary *raw_sum; | ||
173 | unsigned long size, bytes = segbuf->sb_sum.sumbytes; | ||
174 | u32 crc; | ||
175 | |||
176 | bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head, | ||
177 | b_assoc_buffers); | ||
178 | |||
179 | raw_sum = (struct nilfs_segment_summary *)bh->b_data; | ||
180 | size = min_t(unsigned long, bytes, bh->b_size); | ||
181 | crc = crc32_le(seed, | ||
182 | (unsigned char *)raw_sum + | ||
183 | sizeof(raw_sum->ss_datasum) + sizeof(raw_sum->ss_sumsum), | ||
184 | size - (sizeof(raw_sum->ss_datasum) + | ||
185 | sizeof(raw_sum->ss_sumsum))); | ||
186 | |||
187 | list_for_each_entry_continue(bh, &segbuf->sb_segsum_buffers, | ||
188 | b_assoc_buffers) { | ||
189 | bytes -= size; | ||
190 | size = min_t(unsigned long, bytes, bh->b_size); | ||
191 | crc = crc32_le(crc, bh->b_data, size); | ||
192 | } | ||
193 | raw_sum->ss_sumsum = cpu_to_le32(crc); | ||
194 | } | ||
195 | |||
196 | void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, | ||
197 | u32 seed) | ||
198 | { | ||
199 | struct buffer_head *bh; | ||
200 | struct nilfs_segment_summary *raw_sum; | ||
201 | void *kaddr; | ||
202 | u32 crc; | ||
203 | |||
204 | bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head, | ||
205 | b_assoc_buffers); | ||
206 | raw_sum = (struct nilfs_segment_summary *)bh->b_data; | ||
207 | crc = crc32_le(seed, | ||
208 | (unsigned char *)raw_sum + sizeof(raw_sum->ss_datasum), | ||
209 | bh->b_size - sizeof(raw_sum->ss_datasum)); | ||
210 | |||
211 | list_for_each_entry_continue(bh, &segbuf->sb_segsum_buffers, | ||
212 | b_assoc_buffers) { | ||
213 | crc = crc32_le(crc, bh->b_data, bh->b_size); | ||
214 | } | ||
215 | list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { | ||
216 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
217 | crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); | ||
218 | kunmap_atomic(kaddr, KM_USER0); | ||
219 | } | ||
220 | raw_sum->ss_datasum = cpu_to_le32(crc); | ||
221 | } | ||
222 | |||
223 | void nilfs_release_buffers(struct list_head *list) | ||
224 | { | ||
225 | struct buffer_head *bh, *n; | ||
226 | |||
227 | list_for_each_entry_safe(bh, n, list, b_assoc_buffers) { | ||
228 | list_del_init(&bh->b_assoc_buffers); | ||
229 | if (buffer_nilfs_allocated(bh)) { | ||
230 | struct page *clone_page = bh->b_page; | ||
231 | |||
232 | /* remove clone page */ | ||
233 | brelse(bh); | ||
234 | page_cache_release(clone_page); /* for each bh */ | ||
235 | if (page_count(clone_page) <= 2) { | ||
236 | lock_page(clone_page); | ||
237 | nilfs_free_private_page(clone_page); | ||
238 | } | ||
239 | continue; | ||
240 | } | ||
241 | brelse(bh); | ||
242 | } | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * BIO operations | ||
247 | */ | ||
248 | static void nilfs_end_bio_write(struct bio *bio, int err) | ||
249 | { | ||
250 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
251 | struct nilfs_write_info *wi = bio->bi_private; | ||
252 | |||
253 | if (err == -EOPNOTSUPP) { | ||
254 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | ||
255 | bio_put(bio); | ||
256 | /* to be detected by submit_seg_bio() */ | ||
257 | } | ||
258 | |||
259 | if (!uptodate) | ||
260 | atomic_inc(&wi->err); | ||
261 | |||
262 | bio_put(bio); | ||
263 | complete(&wi->bio_event); | ||
264 | } | ||
265 | |||
266 | static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) | ||
267 | { | ||
268 | struct bio *bio = wi->bio; | ||
269 | int err; | ||
270 | |||
271 | if (wi->nbio > 0 && bdi_write_congested(wi->bdi)) { | ||
272 | wait_for_completion(&wi->bio_event); | ||
273 | wi->nbio--; | ||
274 | if (unlikely(atomic_read(&wi->err))) { | ||
275 | bio_put(bio); | ||
276 | err = -EIO; | ||
277 | goto failed; | ||
278 | } | ||
279 | } | ||
280 | |||
281 | bio->bi_end_io = nilfs_end_bio_write; | ||
282 | bio->bi_private = wi; | ||
283 | bio_get(bio); | ||
284 | submit_bio(mode, bio); | ||
285 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) { | ||
286 | bio_put(bio); | ||
287 | err = -EOPNOTSUPP; | ||
288 | goto failed; | ||
289 | } | ||
290 | wi->nbio++; | ||
291 | bio_put(bio); | ||
292 | |||
293 | wi->bio = NULL; | ||
294 | wi->rest_blocks -= wi->end - wi->start; | ||
295 | wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); | ||
296 | wi->start = wi->end; | ||
297 | return 0; | ||
298 | |||
299 | failed: | ||
300 | wi->bio = NULL; | ||
301 | return err; | ||
302 | } | ||
303 | |||
304 | /** | ||
305 | * nilfs_alloc_seg_bio - allocate a bio for writing segment. | ||
306 | * @sb: super block | ||
307 | * @start: beginning disk block number of this BIO. | ||
308 | * @nr_vecs: request size of page vector. | ||
309 | * | ||
310 | * alloc_seg_bio() allocates a new BIO structure and initialize it. | ||
311 | * | ||
312 | * Return Value: On success, pointer to the struct bio is returned. | ||
313 | * On error, NULL is returned. | ||
314 | */ | ||
315 | static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start, | ||
316 | int nr_vecs) | ||
317 | { | ||
318 | struct bio *bio; | ||
319 | |||
320 | bio = bio_alloc(GFP_NOWAIT, nr_vecs); | ||
321 | if (bio == NULL) { | ||
322 | while (!bio && (nr_vecs >>= 1)) | ||
323 | bio = bio_alloc(GFP_NOWAIT, nr_vecs); | ||
324 | } | ||
325 | if (likely(bio)) { | ||
326 | bio->bi_bdev = sb->s_bdev; | ||
327 | bio->bi_sector = (sector_t)start << (sb->s_blocksize_bits - 9); | ||
328 | } | ||
329 | return bio; | ||
330 | } | ||
331 | |||
332 | void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, | ||
333 | struct nilfs_write_info *wi) | ||
334 | { | ||
335 | wi->bio = NULL; | ||
336 | wi->rest_blocks = segbuf->sb_sum.nblocks; | ||
337 | wi->max_pages = bio_get_nr_vecs(wi->sb->s_bdev); | ||
338 | wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); | ||
339 | wi->start = wi->end = 0; | ||
340 | wi->nbio = 0; | ||
341 | wi->blocknr = segbuf->sb_pseg_start; | ||
342 | |||
343 | atomic_set(&wi->err, 0); | ||
344 | init_completion(&wi->bio_event); | ||
345 | } | ||
346 | |||
347 | static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh, | ||
348 | int mode) | ||
349 | { | ||
350 | int len, err; | ||
351 | |||
352 | BUG_ON(wi->nr_vecs <= 0); | ||
353 | repeat: | ||
354 | if (!wi->bio) { | ||
355 | wi->bio = nilfs_alloc_seg_bio(wi->sb, wi->blocknr + wi->end, | ||
356 | wi->nr_vecs); | ||
357 | if (unlikely(!wi->bio)) | ||
358 | return -ENOMEM; | ||
359 | } | ||
360 | |||
361 | len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh)); | ||
362 | if (len == bh->b_size) { | ||
363 | wi->end++; | ||
364 | return 0; | ||
365 | } | ||
366 | /* bio is FULL */ | ||
367 | err = nilfs_submit_seg_bio(wi, mode); | ||
368 | /* never submit current bh */ | ||
369 | if (likely(!err)) | ||
370 | goto repeat; | ||
371 | return err; | ||
372 | } | ||
373 | |||
374 | int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, | ||
375 | struct nilfs_write_info *wi) | ||
376 | { | ||
377 | struct buffer_head *bh; | ||
378 | int res, rw = WRITE; | ||
379 | |||
380 | list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { | ||
381 | res = nilfs_submit_bh(wi, bh, rw); | ||
382 | if (unlikely(res)) | ||
383 | goto failed_bio; | ||
384 | } | ||
385 | |||
386 | list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { | ||
387 | res = nilfs_submit_bh(wi, bh, rw); | ||
388 | if (unlikely(res)) | ||
389 | goto failed_bio; | ||
390 | } | ||
391 | |||
392 | if (wi->bio) { | ||
393 | /* | ||
394 | * Last BIO is always sent through the following | ||
395 | * submission. | ||
396 | */ | ||
397 | rw |= (1 << BIO_RW_SYNCIO); | ||
398 | res = nilfs_submit_seg_bio(wi, rw); | ||
399 | if (unlikely(res)) | ||
400 | goto failed_bio; | ||
401 | } | ||
402 | |||
403 | res = 0; | ||
404 | out: | ||
405 | return res; | ||
406 | |||
407 | failed_bio: | ||
408 | atomic_inc(&wi->err); | ||
409 | goto out; | ||
410 | } | ||
411 | |||
412 | /** | ||
413 | * nilfs_segbuf_wait - wait for completion of requested BIOs | ||
414 | * @wi: nilfs_write_info | ||
415 | * | ||
416 | * Return Value: On Success, 0 is returned. On Error, one of the following | ||
417 | * negative error code is returned. | ||
418 | * | ||
419 | * %-EIO - I/O error | ||
420 | */ | ||
421 | int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf, | ||
422 | struct nilfs_write_info *wi) | ||
423 | { | ||
424 | int err = 0; | ||
425 | |||
426 | if (!wi->nbio) | ||
427 | return 0; | ||
428 | |||
429 | do { | ||
430 | wait_for_completion(&wi->bio_event); | ||
431 | } while (--wi->nbio > 0); | ||
432 | |||
433 | if (unlikely(atomic_read(&wi->err) > 0)) { | ||
434 | printk(KERN_ERR "NILFS: IO error writing segment\n"); | ||
435 | err = -EIO; | ||
436 | segbuf->sb_io_error = 1; | ||
437 | } | ||
438 | return err; | ||
439 | } | ||
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h new file mode 100644 index 000000000000..0c3076f4e592 --- /dev/null +++ b/fs/nilfs2/segbuf.h | |||
@@ -0,0 +1,201 @@ | |||
1 | /* | ||
2 | * segbuf.h - NILFS Segment buffer prototypes and definitions | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | #ifndef _NILFS_SEGBUF_H | ||
24 | #define _NILFS_SEGBUF_H | ||
25 | |||
26 | #include <linux/fs.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/bio.h> | ||
29 | #include <linux/completion.h> | ||
30 | #include <linux/backing-dev.h> | ||
31 | |||
32 | /** | ||
33 | * struct nilfs_segsum_info - On-memory segment summary | ||
34 | * @flags: Flags | ||
35 | * @nfinfo: Number of file information structures | ||
36 | * @nblocks: Number of blocks included in the partial segment | ||
37 | * @nsumblk: Number of summary blocks | ||
38 | * @sumbytes: Byte count of segment summary | ||
39 | * @nfileblk: Total number of file blocks | ||
40 | * @seg_seq: Segment sequence number | ||
41 | * @ctime: Creation time | ||
42 | * @next: Block number of the next full segment | ||
43 | */ | ||
44 | struct nilfs_segsum_info { | ||
45 | unsigned int flags; | ||
46 | unsigned long nfinfo; | ||
47 | unsigned long nblocks; | ||
48 | unsigned long nsumblk; | ||
49 | unsigned long sumbytes; | ||
50 | unsigned long nfileblk; | ||
51 | u64 seg_seq; | ||
52 | time_t ctime; | ||
53 | sector_t next; | ||
54 | }; | ||
55 | |||
56 | /* macro for the flags */ | ||
57 | #define NILFS_SEG_HAS_SR(sum) ((sum)->flags & NILFS_SS_SR) | ||
58 | #define NILFS_SEG_LOGBGN(sum) ((sum)->flags & NILFS_SS_LOGBGN) | ||
59 | #define NILFS_SEG_LOGEND(sum) ((sum)->flags & NILFS_SS_LOGEND) | ||
60 | #define NILFS_SEG_DSYNC(sum) ((sum)->flags & NILFS_SS_SYNDT) | ||
61 | #define NILFS_SEG_SIMPLEX(sum) \ | ||
62 | (((sum)->flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) == \ | ||
63 | (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) | ||
64 | |||
65 | #define NILFS_SEG_EMPTY(sum) ((sum)->nblocks == (sum)->nsumblk) | ||
66 | |||
67 | /** | ||
68 | * struct nilfs_segment_buffer - Segment buffer | ||
69 | * @sb_super: back pointer to a superblock struct | ||
70 | * @sb_list: List head to chain this structure | ||
71 | * @sb_sum: On-memory segment summary | ||
72 | * @sb_segnum: Index number of the full segment | ||
73 | * @sb_nextnum: Index number of the next full segment | ||
74 | * @sb_fseg_start: Start block number of the full segment | ||
75 | * @sb_fseg_end: End block number of the full segment | ||
76 | * @sb_pseg_start: Disk block number of partial segment | ||
77 | * @sb_rest_blocks: Number of residual blocks in the current segment | ||
78 | * @sb_segsum_buffers: List of buffers for segment summaries | ||
79 | * @sb_payload_buffers: List of buffers for segment payload | ||
80 | * @sb_io_error: I/O error status | ||
81 | */ | ||
82 | struct nilfs_segment_buffer { | ||
83 | struct super_block *sb_super; | ||
84 | struct list_head sb_list; | ||
85 | |||
86 | /* Segment information */ | ||
87 | struct nilfs_segsum_info sb_sum; | ||
88 | __u64 sb_segnum; | ||
89 | __u64 sb_nextnum; | ||
90 | sector_t sb_fseg_start, sb_fseg_end; | ||
91 | sector_t sb_pseg_start; | ||
92 | unsigned sb_rest_blocks; | ||
93 | |||
94 | /* Buffers */ | ||
95 | struct list_head sb_segsum_buffers; | ||
96 | struct list_head sb_payload_buffers; /* including super root */ | ||
97 | |||
98 | /* io status */ | ||
99 | int sb_io_error; | ||
100 | }; | ||
101 | |||
102 | #define NILFS_LIST_SEGBUF(head) \ | ||
103 | list_entry((head), struct nilfs_segment_buffer, sb_list) | ||
104 | #define NILFS_NEXT_SEGBUF(segbuf) NILFS_LIST_SEGBUF((segbuf)->sb_list.next) | ||
105 | #define NILFS_PREV_SEGBUF(segbuf) NILFS_LIST_SEGBUF((segbuf)->sb_list.prev) | ||
106 | #define NILFS_LAST_SEGBUF(head) NILFS_LIST_SEGBUF((head)->prev) | ||
107 | #define NILFS_FIRST_SEGBUF(head) NILFS_LIST_SEGBUF((head)->next) | ||
108 | #define NILFS_SEGBUF_IS_LAST(segbuf, head) ((segbuf)->sb_list.next == (head)) | ||
109 | |||
110 | #define nilfs_for_each_segbuf_before(s, t, h) \ | ||
111 | for ((s) = NILFS_FIRST_SEGBUF(h); (s) != (t); \ | ||
112 | (s) = NILFS_NEXT_SEGBUF(s)) | ||
113 | |||
114 | #define NILFS_SEGBUF_FIRST_BH(head) \ | ||
115 | (list_entry((head)->next, struct buffer_head, b_assoc_buffers)) | ||
116 | #define NILFS_SEGBUF_NEXT_BH(bh) \ | ||
117 | (list_entry((bh)->b_assoc_buffers.next, struct buffer_head, \ | ||
118 | b_assoc_buffers)) | ||
119 | #define NILFS_SEGBUF_BH_IS_LAST(bh, head) ((bh)->b_assoc_buffers.next == head) | ||
120 | |||
121 | |||
122 | int __init nilfs_init_segbuf_cache(void); | ||
123 | void nilfs_destroy_segbuf_cache(void); | ||
124 | struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *); | ||
125 | void nilfs_segbuf_free(struct nilfs_segment_buffer *); | ||
126 | void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, | ||
127 | struct the_nilfs *); | ||
128 | void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, | ||
129 | struct the_nilfs *); | ||
130 | int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t); | ||
131 | int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *); | ||
132 | int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *, | ||
133 | struct buffer_head **); | ||
134 | void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *); | ||
135 | void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *, u32); | ||
136 | void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *, u32); | ||
137 | |||
138 | static inline void | ||
139 | nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf, | ||
140 | struct buffer_head *bh) | ||
141 | { | ||
142 | list_add_tail(&bh->b_assoc_buffers, &segbuf->sb_segsum_buffers); | ||
143 | segbuf->sb_sum.nblocks++; | ||
144 | segbuf->sb_sum.nsumblk++; | ||
145 | } | ||
146 | |||
147 | static inline void | ||
148 | nilfs_segbuf_add_payload_buffer(struct nilfs_segment_buffer *segbuf, | ||
149 | struct buffer_head *bh) | ||
150 | { | ||
151 | list_add_tail(&bh->b_assoc_buffers, &segbuf->sb_payload_buffers); | ||
152 | segbuf->sb_sum.nblocks++; | ||
153 | } | ||
154 | |||
155 | static inline void | ||
156 | nilfs_segbuf_add_file_buffer(struct nilfs_segment_buffer *segbuf, | ||
157 | struct buffer_head *bh) | ||
158 | { | ||
159 | get_bh(bh); | ||
160 | nilfs_segbuf_add_payload_buffer(segbuf, bh); | ||
161 | segbuf->sb_sum.nfileblk++; | ||
162 | } | ||
163 | |||
164 | void nilfs_release_buffers(struct list_head *); | ||
165 | |||
166 | static inline void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf) | ||
167 | { | ||
168 | nilfs_release_buffers(&segbuf->sb_segsum_buffers); | ||
169 | nilfs_release_buffers(&segbuf->sb_payload_buffers); | ||
170 | } | ||
171 | |||
172 | struct nilfs_write_info { | ||
173 | struct bio *bio; | ||
174 | int start, end; /* The region to be submitted */ | ||
175 | int rest_blocks; | ||
176 | int max_pages; | ||
177 | int nr_vecs; | ||
178 | sector_t blocknr; | ||
179 | |||
180 | int nbio; | ||
181 | atomic_t err; | ||
182 | struct completion bio_event; | ||
183 | /* completion event of segment write */ | ||
184 | |||
185 | /* | ||
186 | * The following fields must be set explicitly | ||
187 | */ | ||
188 | struct super_block *sb; | ||
189 | struct backing_dev_info *bdi; /* backing dev info */ | ||
190 | struct buffer_head *bh_sr; | ||
191 | }; | ||
192 | |||
193 | |||
194 | void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *, | ||
195 | struct nilfs_write_info *); | ||
196 | int nilfs_segbuf_write(struct nilfs_segment_buffer *, | ||
197 | struct nilfs_write_info *); | ||
198 | int nilfs_segbuf_wait(struct nilfs_segment_buffer *, | ||
199 | struct nilfs_write_info *); | ||
200 | |||
201 | #endif /* _NILFS_SEGBUF_H */ | ||
diff --git a/fs/nilfs2/seglist.h b/fs/nilfs2/seglist.h new file mode 100644 index 000000000000..d39df9144e99 --- /dev/null +++ b/fs/nilfs2/seglist.h | |||
@@ -0,0 +1,85 @@ | |||
1 | /* | ||
2 | * seglist.h - expediential structure and routines to handle list of segments | ||
3 | * (would be removed in a future release) | ||
4 | * | ||
5 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
20 | * | ||
21 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
22 | * | ||
23 | */ | ||
24 | #ifndef _NILFS_SEGLIST_H | ||
25 | #define _NILFS_SEGLIST_H | ||
26 | |||
27 | #include <linux/fs.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/nilfs2_fs.h> | ||
30 | #include "sufile.h" | ||
31 | |||
32 | struct nilfs_segment_entry { | ||
33 | __u64 segnum; | ||
34 | |||
35 | #define NILFS_SLH_FREED 0x0001 /* The segment was freed provisonally. | ||
36 | It must be cancelled if | ||
37 | construction aborted */ | ||
38 | |||
39 | unsigned flags; | ||
40 | struct list_head list; | ||
41 | struct buffer_head *bh_su; | ||
42 | struct nilfs_segment_usage *raw_su; | ||
43 | }; | ||
44 | |||
45 | |||
46 | void nilfs_dispose_segment_list(struct list_head *); | ||
47 | |||
48 | static inline struct nilfs_segment_entry * | ||
49 | nilfs_alloc_segment_entry(__u64 segnum) | ||
50 | { | ||
51 | struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS); | ||
52 | |||
53 | if (likely(ent)) { | ||
54 | ent->segnum = segnum; | ||
55 | ent->flags = 0; | ||
56 | ent->bh_su = NULL; | ||
57 | ent->raw_su = NULL; | ||
58 | INIT_LIST_HEAD(&ent->list); | ||
59 | } | ||
60 | return ent; | ||
61 | } | ||
62 | |||
63 | static inline int nilfs_open_segment_entry(struct nilfs_segment_entry *ent, | ||
64 | struct inode *sufile) | ||
65 | { | ||
66 | return nilfs_sufile_get_segment_usage(sufile, ent->segnum, | ||
67 | &ent->raw_su, &ent->bh_su); | ||
68 | } | ||
69 | |||
70 | static inline void nilfs_close_segment_entry(struct nilfs_segment_entry *ent, | ||
71 | struct inode *sufile) | ||
72 | { | ||
73 | if (!ent->bh_su) | ||
74 | return; | ||
75 | nilfs_sufile_put_segment_usage(sufile, ent->segnum, ent->bh_su); | ||
76 | ent->bh_su = NULL; | ||
77 | ent->raw_su = NULL; | ||
78 | } | ||
79 | |||
80 | static inline void nilfs_free_segment_entry(struct nilfs_segment_entry *ent) | ||
81 | { | ||
82 | kfree(ent); | ||
83 | } | ||
84 | |||
85 | #endif /* _NILFS_SEGLIST_H */ | ||
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c new file mode 100644 index 000000000000..fb70ec3be20e --- /dev/null +++ b/fs/nilfs2/segment.c | |||
@@ -0,0 +1,2977 @@ | |||
1 | /* | ||
2 | * segment.c - NILFS segment constructor. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/pagemap.h> | ||
25 | #include <linux/buffer_head.h> | ||
26 | #include <linux/writeback.h> | ||
27 | #include <linux/bio.h> | ||
28 | #include <linux/completion.h> | ||
29 | #include <linux/blkdev.h> | ||
30 | #include <linux/backing-dev.h> | ||
31 | #include <linux/freezer.h> | ||
32 | #include <linux/kthread.h> | ||
33 | #include <linux/crc32.h> | ||
34 | #include <linux/pagevec.h> | ||
35 | #include "nilfs.h" | ||
36 | #include "btnode.h" | ||
37 | #include "page.h" | ||
38 | #include "segment.h" | ||
39 | #include "sufile.h" | ||
40 | #include "cpfile.h" | ||
41 | #include "ifile.h" | ||
42 | #include "seglist.h" | ||
43 | #include "segbuf.h" | ||
44 | |||
45 | |||
46 | /* | ||
47 | * Segment constructor | ||
48 | */ | ||
49 | #define SC_N_INODEVEC 16 /* Size of locally allocated inode vector */ | ||
50 | |||
51 | #define SC_MAX_SEGDELTA 64 /* Upper limit of the number of segments | ||
52 | appended in collection retry loop */ | ||
53 | |||
54 | /* Construction mode */ | ||
55 | enum { | ||
56 | SC_LSEG_SR = 1, /* Make a logical segment having a super root */ | ||
57 | SC_LSEG_DSYNC, /* Flush data blocks of a given file and make | ||
58 | a logical segment without a super root */ | ||
59 | SC_FLUSH_FILE, /* Flush data files, leads to segment writes without | ||
60 | creating a checkpoint */ | ||
61 | SC_FLUSH_DAT, /* Flush DAT file. This also creates segments without | ||
62 | a checkpoint */ | ||
63 | }; | ||
64 | |||
65 | /* Stage numbers of dirty block collection */ | ||
66 | enum { | ||
67 | NILFS_ST_INIT = 0, | ||
68 | NILFS_ST_GC, /* Collecting dirty blocks for GC */ | ||
69 | NILFS_ST_FILE, | ||
70 | NILFS_ST_IFILE, | ||
71 | NILFS_ST_CPFILE, | ||
72 | NILFS_ST_SUFILE, | ||
73 | NILFS_ST_DAT, | ||
74 | NILFS_ST_SR, /* Super root */ | ||
75 | NILFS_ST_DSYNC, /* Data sync blocks */ | ||
76 | NILFS_ST_DONE, | ||
77 | }; | ||
78 | |||
79 | /* State flags of collection */ | ||
80 | #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ | ||
81 | #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ | ||
82 | #define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED) | ||
83 | |||
84 | /* Operations depending on the construction mode and file type */ | ||
85 | struct nilfs_sc_operations { | ||
86 | int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *, | ||
87 | struct inode *); | ||
88 | int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *, | ||
89 | struct inode *); | ||
90 | int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *, | ||
91 | struct inode *); | ||
92 | void (*write_data_binfo)(struct nilfs_sc_info *, | ||
93 | struct nilfs_segsum_pointer *, | ||
94 | union nilfs_binfo *); | ||
95 | void (*write_node_binfo)(struct nilfs_sc_info *, | ||
96 | struct nilfs_segsum_pointer *, | ||
97 | union nilfs_binfo *); | ||
98 | }; | ||
99 | |||
100 | /* | ||
101 | * Other definitions | ||
102 | */ | ||
103 | static void nilfs_segctor_start_timer(struct nilfs_sc_info *); | ||
104 | static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); | ||
105 | static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); | ||
106 | static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *, | ||
107 | int); | ||
108 | |||
109 | #define nilfs_cnt32_gt(a, b) \ | ||
110 | (typecheck(__u32, a) && typecheck(__u32, b) && \ | ||
111 | ((__s32)(b) - (__s32)(a) < 0)) | ||
112 | #define nilfs_cnt32_ge(a, b) \ | ||
113 | (typecheck(__u32, a) && typecheck(__u32, b) && \ | ||
114 | ((__s32)(a) - (__s32)(b) >= 0)) | ||
115 | #define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a) | ||
116 | #define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a) | ||
117 | |||
118 | /* | ||
119 | * Transaction | ||
120 | */ | ||
121 | static struct kmem_cache *nilfs_transaction_cachep; | ||
122 | |||
123 | /** | ||
124 | * nilfs_init_transaction_cache - create a cache for nilfs_transaction_info | ||
125 | * | ||
126 | * nilfs_init_transaction_cache() creates a slab cache for the struct | ||
127 | * nilfs_transaction_info. | ||
128 | * | ||
129 | * Return Value: On success, it returns 0. On error, one of the following | ||
130 | * negative error code is returned. | ||
131 | * | ||
132 | * %-ENOMEM - Insufficient memory available. | ||
133 | */ | ||
134 | int nilfs_init_transaction_cache(void) | ||
135 | { | ||
136 | nilfs_transaction_cachep = | ||
137 | kmem_cache_create("nilfs2_transaction_cache", | ||
138 | sizeof(struct nilfs_transaction_info), | ||
139 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
140 | return (nilfs_transaction_cachep == NULL) ? -ENOMEM : 0; | ||
141 | } | ||
142 | |||
143 | /** | ||
144 | * nilfs_detroy_transaction_cache - destroy the cache for transaction info | ||
145 | * | ||
146 | * nilfs_destroy_transaction_cache() frees the slab cache for the struct | ||
147 | * nilfs_transaction_info. | ||
148 | */ | ||
149 | void nilfs_destroy_transaction_cache(void) | ||
150 | { | ||
151 | kmem_cache_destroy(nilfs_transaction_cachep); | ||
152 | } | ||
153 | |||
154 | static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) | ||
155 | { | ||
156 | struct nilfs_transaction_info *cur_ti = current->journal_info; | ||
157 | void *save = NULL; | ||
158 | |||
159 | if (cur_ti) { | ||
160 | if (cur_ti->ti_magic == NILFS_TI_MAGIC) | ||
161 | return ++cur_ti->ti_count; | ||
162 | else { | ||
163 | /* | ||
164 | * If journal_info field is occupied by other FS, | ||
165 | * it is saved and will be restored on | ||
166 | * nilfs_transaction_commit(). | ||
167 | */ | ||
168 | printk(KERN_WARNING | ||
169 | "NILFS warning: journal info from a different " | ||
170 | "FS\n"); | ||
171 | save = current->journal_info; | ||
172 | } | ||
173 | } | ||
174 | if (!ti) { | ||
175 | ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS); | ||
176 | if (!ti) | ||
177 | return -ENOMEM; | ||
178 | ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC; | ||
179 | } else { | ||
180 | ti->ti_flags = 0; | ||
181 | } | ||
182 | ti->ti_count = 0; | ||
183 | ti->ti_save = save; | ||
184 | ti->ti_magic = NILFS_TI_MAGIC; | ||
185 | current->journal_info = ti; | ||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | /** | ||
190 | * nilfs_transaction_begin - start indivisible file operations. | ||
191 | * @sb: super block | ||
192 | * @ti: nilfs_transaction_info | ||
193 | * @vacancy_check: flags for vacancy rate checks | ||
194 | * | ||
195 | * nilfs_transaction_begin() acquires a reader/writer semaphore, called | ||
196 | * the segment semaphore, to make a segment construction and write tasks | ||
197 | * exclusive. The function is used with nilfs_transaction_commit() in pairs. | ||
198 | * The region enclosed by these two functions can be nested. To avoid a | ||
199 | * deadlock, the semaphore is only acquired or released in the outermost call. | ||
200 | * | ||
201 | * This function allocates a nilfs_transaction_info struct to keep context | ||
202 | * information on it. It is initialized and hooked onto the current task in | ||
203 | * the outermost call. If a pre-allocated struct is given to @ti, it is used | ||
204 | * instead; othewise a new struct is assigned from a slab. | ||
205 | * | ||
206 | * When @vacancy_check flag is set, this function will check the amount of | ||
207 | * free space, and will wait for the GC to reclaim disk space if low capacity. | ||
208 | * | ||
209 | * Return Value: On success, 0 is returned. On error, one of the following | ||
210 | * negative error code is returned. | ||
211 | * | ||
212 | * %-ENOMEM - Insufficient memory available. | ||
213 | * | ||
214 | * %-ENOSPC - No space left on device | ||
215 | */ | ||
216 | int nilfs_transaction_begin(struct super_block *sb, | ||
217 | struct nilfs_transaction_info *ti, | ||
218 | int vacancy_check) | ||
219 | { | ||
220 | struct nilfs_sb_info *sbi; | ||
221 | struct the_nilfs *nilfs; | ||
222 | int ret = nilfs_prepare_segment_lock(ti); | ||
223 | |||
224 | if (unlikely(ret < 0)) | ||
225 | return ret; | ||
226 | if (ret > 0) | ||
227 | return 0; | ||
228 | |||
229 | sbi = NILFS_SB(sb); | ||
230 | nilfs = sbi->s_nilfs; | ||
231 | down_read(&nilfs->ns_segctor_sem); | ||
232 | if (vacancy_check && nilfs_near_disk_full(nilfs)) { | ||
233 | up_read(&nilfs->ns_segctor_sem); | ||
234 | ret = -ENOSPC; | ||
235 | goto failed; | ||
236 | } | ||
237 | return 0; | ||
238 | |||
239 | failed: | ||
240 | ti = current->journal_info; | ||
241 | current->journal_info = ti->ti_save; | ||
242 | if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) | ||
243 | kmem_cache_free(nilfs_transaction_cachep, ti); | ||
244 | return ret; | ||
245 | } | ||
246 | |||
247 | /** | ||
248 | * nilfs_transaction_commit - commit indivisible file operations. | ||
249 | * @sb: super block | ||
250 | * | ||
251 | * nilfs_transaction_commit() releases the read semaphore which is | ||
252 | * acquired by nilfs_transaction_begin(). This is only performed | ||
253 | * in outermost call of this function. If a commit flag is set, | ||
254 | * nilfs_transaction_commit() sets a timer to start the segment | ||
255 | * constructor. If a sync flag is set, it starts construction | ||
256 | * directly. | ||
257 | */ | ||
258 | int nilfs_transaction_commit(struct super_block *sb) | ||
259 | { | ||
260 | struct nilfs_transaction_info *ti = current->journal_info; | ||
261 | struct nilfs_sb_info *sbi; | ||
262 | struct nilfs_sc_info *sci; | ||
263 | int err = 0; | ||
264 | |||
265 | BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); | ||
266 | ti->ti_flags |= NILFS_TI_COMMIT; | ||
267 | if (ti->ti_count > 0) { | ||
268 | ti->ti_count--; | ||
269 | return 0; | ||
270 | } | ||
271 | sbi = NILFS_SB(sb); | ||
272 | sci = NILFS_SC(sbi); | ||
273 | if (sci != NULL) { | ||
274 | if (ti->ti_flags & NILFS_TI_COMMIT) | ||
275 | nilfs_segctor_start_timer(sci); | ||
276 | if (atomic_read(&sbi->s_nilfs->ns_ndirtyblks) > | ||
277 | sci->sc_watermark) | ||
278 | nilfs_segctor_do_flush(sci, 0); | ||
279 | } | ||
280 | up_read(&sbi->s_nilfs->ns_segctor_sem); | ||
281 | current->journal_info = ti->ti_save; | ||
282 | |||
283 | if (ti->ti_flags & NILFS_TI_SYNC) | ||
284 | err = nilfs_construct_segment(sb); | ||
285 | if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) | ||
286 | kmem_cache_free(nilfs_transaction_cachep, ti); | ||
287 | return err; | ||
288 | } | ||
289 | |||
290 | void nilfs_transaction_abort(struct super_block *sb) | ||
291 | { | ||
292 | struct nilfs_transaction_info *ti = current->journal_info; | ||
293 | |||
294 | BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); | ||
295 | if (ti->ti_count > 0) { | ||
296 | ti->ti_count--; | ||
297 | return; | ||
298 | } | ||
299 | up_read(&NILFS_SB(sb)->s_nilfs->ns_segctor_sem); | ||
300 | |||
301 | current->journal_info = ti->ti_save; | ||
302 | if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) | ||
303 | kmem_cache_free(nilfs_transaction_cachep, ti); | ||
304 | } | ||
305 | |||
306 | void nilfs_relax_pressure_in_lock(struct super_block *sb) | ||
307 | { | ||
308 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
309 | struct nilfs_sc_info *sci = NILFS_SC(sbi); | ||
310 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
311 | |||
312 | if (!sci || !sci->sc_flush_request) | ||
313 | return; | ||
314 | |||
315 | set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); | ||
316 | up_read(&nilfs->ns_segctor_sem); | ||
317 | |||
318 | down_write(&nilfs->ns_segctor_sem); | ||
319 | if (sci->sc_flush_request && | ||
320 | test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) { | ||
321 | struct nilfs_transaction_info *ti = current->journal_info; | ||
322 | |||
323 | ti->ti_flags |= NILFS_TI_WRITER; | ||
324 | nilfs_segctor_do_immediate_flush(sci); | ||
325 | ti->ti_flags &= ~NILFS_TI_WRITER; | ||
326 | } | ||
327 | downgrade_write(&nilfs->ns_segctor_sem); | ||
328 | } | ||
329 | |||
330 | static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, | ||
331 | struct nilfs_transaction_info *ti, | ||
332 | int gcflag) | ||
333 | { | ||
334 | struct nilfs_transaction_info *cur_ti = current->journal_info; | ||
335 | |||
336 | WARN_ON(cur_ti); | ||
337 | ti->ti_flags = NILFS_TI_WRITER; | ||
338 | ti->ti_count = 0; | ||
339 | ti->ti_save = cur_ti; | ||
340 | ti->ti_magic = NILFS_TI_MAGIC; | ||
341 | INIT_LIST_HEAD(&ti->ti_garbage); | ||
342 | current->journal_info = ti; | ||
343 | |||
344 | for (;;) { | ||
345 | down_write(&sbi->s_nilfs->ns_segctor_sem); | ||
346 | if (!test_bit(NILFS_SC_PRIOR_FLUSH, &NILFS_SC(sbi)->sc_flags)) | ||
347 | break; | ||
348 | |||
349 | nilfs_segctor_do_immediate_flush(NILFS_SC(sbi)); | ||
350 | |||
351 | up_write(&sbi->s_nilfs->ns_segctor_sem); | ||
352 | yield(); | ||
353 | } | ||
354 | if (gcflag) | ||
355 | ti->ti_flags |= NILFS_TI_GC; | ||
356 | } | ||
357 | |||
358 | static void nilfs_transaction_unlock(struct nilfs_sb_info *sbi) | ||
359 | { | ||
360 | struct nilfs_transaction_info *ti = current->journal_info; | ||
361 | |||
362 | BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); | ||
363 | BUG_ON(ti->ti_count > 0); | ||
364 | |||
365 | up_write(&sbi->s_nilfs->ns_segctor_sem); | ||
366 | current->journal_info = ti->ti_save; | ||
367 | if (!list_empty(&ti->ti_garbage)) | ||
368 | nilfs_dispose_list(sbi, &ti->ti_garbage, 0); | ||
369 | } | ||
370 | |||
371 | static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, | ||
372 | struct nilfs_segsum_pointer *ssp, | ||
373 | unsigned bytes) | ||
374 | { | ||
375 | struct nilfs_segment_buffer *segbuf = sci->sc_curseg; | ||
376 | unsigned blocksize = sci->sc_super->s_blocksize; | ||
377 | void *p; | ||
378 | |||
379 | if (unlikely(ssp->offset + bytes > blocksize)) { | ||
380 | ssp->offset = 0; | ||
381 | BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh, | ||
382 | &segbuf->sb_segsum_buffers)); | ||
383 | ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh); | ||
384 | } | ||
385 | p = ssp->bh->b_data + ssp->offset; | ||
386 | ssp->offset += bytes; | ||
387 | return p; | ||
388 | } | ||
389 | |||
390 | /** | ||
391 | * nilfs_segctor_reset_segment_buffer - reset the current segment buffer | ||
392 | * @sci: nilfs_sc_info | ||
393 | */ | ||
394 | static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) | ||
395 | { | ||
396 | struct nilfs_segment_buffer *segbuf = sci->sc_curseg; | ||
397 | struct buffer_head *sumbh; | ||
398 | unsigned sumbytes; | ||
399 | unsigned flags = 0; | ||
400 | int err; | ||
401 | |||
402 | if (nilfs_doing_gc()) | ||
403 | flags = NILFS_SS_GC; | ||
404 | err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime); | ||
405 | if (unlikely(err)) | ||
406 | return err; | ||
407 | |||
408 | sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); | ||
409 | sumbytes = segbuf->sb_sum.sumbytes; | ||
410 | sci->sc_finfo_ptr.bh = sumbh; sci->sc_finfo_ptr.offset = sumbytes; | ||
411 | sci->sc_binfo_ptr.bh = sumbh; sci->sc_binfo_ptr.offset = sumbytes; | ||
412 | sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; | ||
413 | return 0; | ||
414 | } | ||
415 | |||
416 | static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) | ||
417 | { | ||
418 | sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; | ||
419 | if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs)) | ||
420 | return -E2BIG; /* The current segment is filled up | ||
421 | (internal code) */ | ||
422 | sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); | ||
423 | return nilfs_segctor_reset_segment_buffer(sci); | ||
424 | } | ||
425 | |||
426 | static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci) | ||
427 | { | ||
428 | struct nilfs_segment_buffer *segbuf = sci->sc_curseg; | ||
429 | int err; | ||
430 | |||
431 | if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) { | ||
432 | err = nilfs_segctor_feed_segment(sci); | ||
433 | if (err) | ||
434 | return err; | ||
435 | segbuf = sci->sc_curseg; | ||
436 | } | ||
437 | err = nilfs_segbuf_extend_payload(segbuf, &sci->sc_super_root); | ||
438 | if (likely(!err)) | ||
439 | segbuf->sb_sum.flags |= NILFS_SS_SR; | ||
440 | return err; | ||
441 | } | ||
442 | |||
443 | /* | ||
444 | * Functions for making segment summary and payloads | ||
445 | */ | ||
446 | static int nilfs_segctor_segsum_block_required( | ||
447 | struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp, | ||
448 | unsigned binfo_size) | ||
449 | { | ||
450 | unsigned blocksize = sci->sc_super->s_blocksize; | ||
451 | /* Size of finfo and binfo is enough small against blocksize */ | ||
452 | |||
453 | return ssp->offset + binfo_size + | ||
454 | (!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) > | ||
455 | blocksize; | ||
456 | } | ||
457 | |||
458 | static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci, | ||
459 | struct inode *inode) | ||
460 | { | ||
461 | sci->sc_curseg->sb_sum.nfinfo++; | ||
462 | sci->sc_binfo_ptr = sci->sc_finfo_ptr; | ||
463 | nilfs_segctor_map_segsum_entry( | ||
464 | sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); | ||
465 | |||
466 | if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) | ||
467 | set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); | ||
468 | /* skip finfo */ | ||
469 | } | ||
470 | |||
471 | static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, | ||
472 | struct inode *inode) | ||
473 | { | ||
474 | struct nilfs_finfo *finfo; | ||
475 | struct nilfs_inode_info *ii; | ||
476 | struct nilfs_segment_buffer *segbuf; | ||
477 | |||
478 | if (sci->sc_blk_cnt == 0) | ||
479 | return; | ||
480 | |||
481 | ii = NILFS_I(inode); | ||
482 | finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr, | ||
483 | sizeof(*finfo)); | ||
484 | finfo->fi_ino = cpu_to_le64(inode->i_ino); | ||
485 | finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt); | ||
486 | finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt); | ||
487 | finfo->fi_cno = cpu_to_le64(ii->i_cno); | ||
488 | |||
489 | segbuf = sci->sc_curseg; | ||
490 | segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset + | ||
491 | sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1); | ||
492 | sci->sc_finfo_ptr = sci->sc_binfo_ptr; | ||
493 | sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; | ||
494 | } | ||
495 | |||
496 | static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, | ||
497 | struct buffer_head *bh, | ||
498 | struct inode *inode, | ||
499 | unsigned binfo_size) | ||
500 | { | ||
501 | struct nilfs_segment_buffer *segbuf; | ||
502 | int required, err = 0; | ||
503 | |||
504 | retry: | ||
505 | segbuf = sci->sc_curseg; | ||
506 | required = nilfs_segctor_segsum_block_required( | ||
507 | sci, &sci->sc_binfo_ptr, binfo_size); | ||
508 | if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) { | ||
509 | nilfs_segctor_end_finfo(sci, inode); | ||
510 | err = nilfs_segctor_feed_segment(sci); | ||
511 | if (err) | ||
512 | return err; | ||
513 | goto retry; | ||
514 | } | ||
515 | if (unlikely(required)) { | ||
516 | err = nilfs_segbuf_extend_segsum(segbuf); | ||
517 | if (unlikely(err)) | ||
518 | goto failed; | ||
519 | } | ||
520 | if (sci->sc_blk_cnt == 0) | ||
521 | nilfs_segctor_begin_finfo(sci, inode); | ||
522 | |||
523 | nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size); | ||
524 | /* Substitution to vblocknr is delayed until update_blocknr() */ | ||
525 | nilfs_segbuf_add_file_buffer(segbuf, bh); | ||
526 | sci->sc_blk_cnt++; | ||
527 | failed: | ||
528 | return err; | ||
529 | } | ||
530 | |||
531 | static int nilfs_handle_bmap_error(int err, const char *fname, | ||
532 | struct inode *inode, struct super_block *sb) | ||
533 | { | ||
534 | if (err == -EINVAL) { | ||
535 | nilfs_error(sb, fname, "broken bmap (inode=%lu)\n", | ||
536 | inode->i_ino); | ||
537 | err = -EIO; | ||
538 | } | ||
539 | return err; | ||
540 | } | ||
541 | |||
542 | /* | ||
543 | * Callback functions that enumerate, mark, and collect dirty blocks | ||
544 | */ | ||
545 | static int nilfs_collect_file_data(struct nilfs_sc_info *sci, | ||
546 | struct buffer_head *bh, struct inode *inode) | ||
547 | { | ||
548 | int err; | ||
549 | |||
550 | err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); | ||
551 | if (unlikely(err < 0)) | ||
552 | return nilfs_handle_bmap_error(err, __func__, inode, | ||
553 | sci->sc_super); | ||
554 | |||
555 | err = nilfs_segctor_add_file_block(sci, bh, inode, | ||
556 | sizeof(struct nilfs_binfo_v)); | ||
557 | if (!err) | ||
558 | sci->sc_datablk_cnt++; | ||
559 | return err; | ||
560 | } | ||
561 | |||
562 | static int nilfs_collect_file_node(struct nilfs_sc_info *sci, | ||
563 | struct buffer_head *bh, | ||
564 | struct inode *inode) | ||
565 | { | ||
566 | int err; | ||
567 | |||
568 | err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); | ||
569 | if (unlikely(err < 0)) | ||
570 | return nilfs_handle_bmap_error(err, __func__, inode, | ||
571 | sci->sc_super); | ||
572 | return 0; | ||
573 | } | ||
574 | |||
575 | static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, | ||
576 | struct buffer_head *bh, | ||
577 | struct inode *inode) | ||
578 | { | ||
579 | WARN_ON(!buffer_dirty(bh)); | ||
580 | return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); | ||
581 | } | ||
582 | |||
583 | static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci, | ||
584 | struct nilfs_segsum_pointer *ssp, | ||
585 | union nilfs_binfo *binfo) | ||
586 | { | ||
587 | struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry( | ||
588 | sci, ssp, sizeof(*binfo_v)); | ||
589 | *binfo_v = binfo->bi_v; | ||
590 | } | ||
591 | |||
592 | static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci, | ||
593 | struct nilfs_segsum_pointer *ssp, | ||
594 | union nilfs_binfo *binfo) | ||
595 | { | ||
596 | __le64 *vblocknr = nilfs_segctor_map_segsum_entry( | ||
597 | sci, ssp, sizeof(*vblocknr)); | ||
598 | *vblocknr = binfo->bi_v.bi_vblocknr; | ||
599 | } | ||
600 | |||
601 | struct nilfs_sc_operations nilfs_sc_file_ops = { | ||
602 | .collect_data = nilfs_collect_file_data, | ||
603 | .collect_node = nilfs_collect_file_node, | ||
604 | .collect_bmap = nilfs_collect_file_bmap, | ||
605 | .write_data_binfo = nilfs_write_file_data_binfo, | ||
606 | .write_node_binfo = nilfs_write_file_node_binfo, | ||
607 | }; | ||
608 | |||
609 | static int nilfs_collect_dat_data(struct nilfs_sc_info *sci, | ||
610 | struct buffer_head *bh, struct inode *inode) | ||
611 | { | ||
612 | int err; | ||
613 | |||
614 | err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); | ||
615 | if (unlikely(err < 0)) | ||
616 | return nilfs_handle_bmap_error(err, __func__, inode, | ||
617 | sci->sc_super); | ||
618 | |||
619 | err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); | ||
620 | if (!err) | ||
621 | sci->sc_datablk_cnt++; | ||
622 | return err; | ||
623 | } | ||
624 | |||
625 | static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci, | ||
626 | struct buffer_head *bh, struct inode *inode) | ||
627 | { | ||
628 | WARN_ON(!buffer_dirty(bh)); | ||
629 | return nilfs_segctor_add_file_block(sci, bh, inode, | ||
630 | sizeof(struct nilfs_binfo_dat)); | ||
631 | } | ||
632 | |||
633 | static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci, | ||
634 | struct nilfs_segsum_pointer *ssp, | ||
635 | union nilfs_binfo *binfo) | ||
636 | { | ||
637 | __le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp, | ||
638 | sizeof(*blkoff)); | ||
639 | *blkoff = binfo->bi_dat.bi_blkoff; | ||
640 | } | ||
641 | |||
642 | static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci, | ||
643 | struct nilfs_segsum_pointer *ssp, | ||
644 | union nilfs_binfo *binfo) | ||
645 | { | ||
646 | struct nilfs_binfo_dat *binfo_dat = | ||
647 | nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat)); | ||
648 | *binfo_dat = binfo->bi_dat; | ||
649 | } | ||
650 | |||
651 | struct nilfs_sc_operations nilfs_sc_dat_ops = { | ||
652 | .collect_data = nilfs_collect_dat_data, | ||
653 | .collect_node = nilfs_collect_file_node, | ||
654 | .collect_bmap = nilfs_collect_dat_bmap, | ||
655 | .write_data_binfo = nilfs_write_dat_data_binfo, | ||
656 | .write_node_binfo = nilfs_write_dat_node_binfo, | ||
657 | }; | ||
658 | |||
659 | struct nilfs_sc_operations nilfs_sc_dsync_ops = { | ||
660 | .collect_data = nilfs_collect_file_data, | ||
661 | .collect_node = NULL, | ||
662 | .collect_bmap = NULL, | ||
663 | .write_data_binfo = nilfs_write_file_data_binfo, | ||
664 | .write_node_binfo = NULL, | ||
665 | }; | ||
666 | |||
667 | static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, | ||
668 | struct list_head *listp, | ||
669 | size_t nlimit, | ||
670 | loff_t start, loff_t end) | ||
671 | { | ||
672 | struct address_space *mapping = inode->i_mapping; | ||
673 | struct pagevec pvec; | ||
674 | pgoff_t index = 0, last = ULONG_MAX; | ||
675 | size_t ndirties = 0; | ||
676 | int i; | ||
677 | |||
678 | if (unlikely(start != 0 || end != LLONG_MAX)) { | ||
679 | /* | ||
680 | * A valid range is given for sync-ing data pages. The | ||
681 | * range is rounded to per-page; extra dirty buffers | ||
682 | * may be included if blocksize < pagesize. | ||
683 | */ | ||
684 | index = start >> PAGE_SHIFT; | ||
685 | last = end >> PAGE_SHIFT; | ||
686 | } | ||
687 | pagevec_init(&pvec, 0); | ||
688 | repeat: | ||
689 | if (unlikely(index > last) || | ||
690 | !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | ||
691 | min_t(pgoff_t, last - index, | ||
692 | PAGEVEC_SIZE - 1) + 1)) | ||
693 | return ndirties; | ||
694 | |||
695 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
696 | struct buffer_head *bh, *head; | ||
697 | struct page *page = pvec.pages[i]; | ||
698 | |||
699 | if (unlikely(page->index > last)) | ||
700 | break; | ||
701 | |||
702 | if (mapping->host) { | ||
703 | lock_page(page); | ||
704 | if (!page_has_buffers(page)) | ||
705 | create_empty_buffers(page, | ||
706 | 1 << inode->i_blkbits, 0); | ||
707 | unlock_page(page); | ||
708 | } | ||
709 | |||
710 | bh = head = page_buffers(page); | ||
711 | do { | ||
712 | if (!buffer_dirty(bh)) | ||
713 | continue; | ||
714 | get_bh(bh); | ||
715 | list_add_tail(&bh->b_assoc_buffers, listp); | ||
716 | ndirties++; | ||
717 | if (unlikely(ndirties >= nlimit)) { | ||
718 | pagevec_release(&pvec); | ||
719 | cond_resched(); | ||
720 | return ndirties; | ||
721 | } | ||
722 | } while (bh = bh->b_this_page, bh != head); | ||
723 | } | ||
724 | pagevec_release(&pvec); | ||
725 | cond_resched(); | ||
726 | goto repeat; | ||
727 | } | ||
728 | |||
729 | static void nilfs_lookup_dirty_node_buffers(struct inode *inode, | ||
730 | struct list_head *listp) | ||
731 | { | ||
732 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
733 | struct address_space *mapping = &ii->i_btnode_cache; | ||
734 | struct pagevec pvec; | ||
735 | struct buffer_head *bh, *head; | ||
736 | unsigned int i; | ||
737 | pgoff_t index = 0; | ||
738 | |||
739 | pagevec_init(&pvec, 0); | ||
740 | |||
741 | while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | ||
742 | PAGEVEC_SIZE)) { | ||
743 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
744 | bh = head = page_buffers(pvec.pages[i]); | ||
745 | do { | ||
746 | if (buffer_dirty(bh)) { | ||
747 | get_bh(bh); | ||
748 | list_add_tail(&bh->b_assoc_buffers, | ||
749 | listp); | ||
750 | } | ||
751 | bh = bh->b_this_page; | ||
752 | } while (bh != head); | ||
753 | } | ||
754 | pagevec_release(&pvec); | ||
755 | cond_resched(); | ||
756 | } | ||
757 | } | ||
758 | |||
759 | static void nilfs_dispose_list(struct nilfs_sb_info *sbi, | ||
760 | struct list_head *head, int force) | ||
761 | { | ||
762 | struct nilfs_inode_info *ii, *n; | ||
763 | struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii; | ||
764 | unsigned nv = 0; | ||
765 | |||
766 | while (!list_empty(head)) { | ||
767 | spin_lock(&sbi->s_inode_lock); | ||
768 | list_for_each_entry_safe(ii, n, head, i_dirty) { | ||
769 | list_del_init(&ii->i_dirty); | ||
770 | if (force) { | ||
771 | if (unlikely(ii->i_bh)) { | ||
772 | brelse(ii->i_bh); | ||
773 | ii->i_bh = NULL; | ||
774 | } | ||
775 | } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { | ||
776 | set_bit(NILFS_I_QUEUED, &ii->i_state); | ||
777 | list_add_tail(&ii->i_dirty, | ||
778 | &sbi->s_dirty_files); | ||
779 | continue; | ||
780 | } | ||
781 | ivec[nv++] = ii; | ||
782 | if (nv == SC_N_INODEVEC) | ||
783 | break; | ||
784 | } | ||
785 | spin_unlock(&sbi->s_inode_lock); | ||
786 | |||
787 | for (pii = ivec; nv > 0; pii++, nv--) | ||
788 | iput(&(*pii)->vfs_inode); | ||
789 | } | ||
790 | } | ||
791 | |||
792 | static int nilfs_test_metadata_dirty(struct nilfs_sb_info *sbi) | ||
793 | { | ||
794 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
795 | int ret = 0; | ||
796 | |||
797 | if (nilfs_mdt_fetch_dirty(sbi->s_ifile)) | ||
798 | ret++; | ||
799 | if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile)) | ||
800 | ret++; | ||
801 | if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile)) | ||
802 | ret++; | ||
803 | if (ret || nilfs_doing_gc()) | ||
804 | if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs))) | ||
805 | ret++; | ||
806 | return ret; | ||
807 | } | ||
808 | |||
809 | static int nilfs_segctor_clean(struct nilfs_sc_info *sci) | ||
810 | { | ||
811 | return list_empty(&sci->sc_dirty_files) && | ||
812 | !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) && | ||
813 | list_empty(&sci->sc_cleaning_segments) && | ||
814 | (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes)); | ||
815 | } | ||
816 | |||
817 | static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) | ||
818 | { | ||
819 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
820 | int ret = 0; | ||
821 | |||
822 | if (nilfs_test_metadata_dirty(sbi)) | ||
823 | set_bit(NILFS_SC_DIRTY, &sci->sc_flags); | ||
824 | |||
825 | spin_lock(&sbi->s_inode_lock); | ||
826 | if (list_empty(&sbi->s_dirty_files) && nilfs_segctor_clean(sci)) | ||
827 | ret++; | ||
828 | |||
829 | spin_unlock(&sbi->s_inode_lock); | ||
830 | return ret; | ||
831 | } | ||
832 | |||
833 | static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) | ||
834 | { | ||
835 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
836 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
837 | |||
838 | nilfs_mdt_clear_dirty(sbi->s_ifile); | ||
839 | nilfs_mdt_clear_dirty(nilfs->ns_cpfile); | ||
840 | nilfs_mdt_clear_dirty(nilfs->ns_sufile); | ||
841 | nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); | ||
842 | } | ||
843 | |||
844 | static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) | ||
845 | { | ||
846 | struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; | ||
847 | struct buffer_head *bh_cp; | ||
848 | struct nilfs_checkpoint *raw_cp; | ||
849 | int err; | ||
850 | |||
851 | /* XXX: this interface will be changed */ | ||
852 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1, | ||
853 | &raw_cp, &bh_cp); | ||
854 | if (likely(!err)) { | ||
855 | /* The following code is duplicated with cpfile. But, it is | ||
856 | needed to collect the checkpoint even if it was not newly | ||
857 | created */ | ||
858 | nilfs_mdt_mark_buffer_dirty(bh_cp); | ||
859 | nilfs_mdt_mark_dirty(nilfs->ns_cpfile); | ||
860 | nilfs_cpfile_put_checkpoint( | ||
861 | nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); | ||
862 | } else | ||
863 | WARN_ON(err == -EINVAL || err == -ENOENT); | ||
864 | |||
865 | return err; | ||
866 | } | ||
867 | |||
868 | static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) | ||
869 | { | ||
870 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
871 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
872 | struct buffer_head *bh_cp; | ||
873 | struct nilfs_checkpoint *raw_cp; | ||
874 | int err; | ||
875 | |||
876 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, | ||
877 | &raw_cp, &bh_cp); | ||
878 | if (unlikely(err)) { | ||
879 | WARN_ON(err == -EINVAL || err == -ENOENT); | ||
880 | goto failed_ibh; | ||
881 | } | ||
882 | raw_cp->cp_snapshot_list.ssl_next = 0; | ||
883 | raw_cp->cp_snapshot_list.ssl_prev = 0; | ||
884 | raw_cp->cp_inodes_count = | ||
885 | cpu_to_le64(atomic_read(&sbi->s_inodes_count)); | ||
886 | raw_cp->cp_blocks_count = | ||
887 | cpu_to_le64(atomic_read(&sbi->s_blocks_count)); | ||
888 | raw_cp->cp_nblk_inc = | ||
889 | cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); | ||
890 | raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); | ||
891 | raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); | ||
892 | |||
893 | if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) | ||
894 | nilfs_checkpoint_clear_minor(raw_cp); | ||
895 | else | ||
896 | nilfs_checkpoint_set_minor(raw_cp); | ||
897 | |||
898 | nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1); | ||
899 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); | ||
900 | return 0; | ||
901 | |||
902 | failed_ibh: | ||
903 | return err; | ||
904 | } | ||
905 | |||
906 | static void nilfs_fill_in_file_bmap(struct inode *ifile, | ||
907 | struct nilfs_inode_info *ii) | ||
908 | |||
909 | { | ||
910 | struct buffer_head *ibh; | ||
911 | struct nilfs_inode *raw_inode; | ||
912 | |||
913 | if (test_bit(NILFS_I_BMAP, &ii->i_state)) { | ||
914 | ibh = ii->i_bh; | ||
915 | BUG_ON(!ibh); | ||
916 | raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino, | ||
917 | ibh); | ||
918 | nilfs_bmap_write(ii->i_bmap, raw_inode); | ||
919 | nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh); | ||
920 | } | ||
921 | } | ||
922 | |||
923 | static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci, | ||
924 | struct inode *ifile) | ||
925 | { | ||
926 | struct nilfs_inode_info *ii; | ||
927 | |||
928 | list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) { | ||
929 | nilfs_fill_in_file_bmap(ifile, ii); | ||
930 | set_bit(NILFS_I_COLLECTED, &ii->i_state); | ||
931 | } | ||
932 | } | ||
933 | |||
934 | /* | ||
935 | * CRC calculation routines | ||
936 | */ | ||
937 | static void nilfs_fill_in_super_root_crc(struct buffer_head *bh_sr, u32 seed) | ||
938 | { | ||
939 | struct nilfs_super_root *raw_sr = | ||
940 | (struct nilfs_super_root *)bh_sr->b_data; | ||
941 | u32 crc; | ||
942 | |||
943 | crc = crc32_le(seed, | ||
944 | (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), | ||
945 | NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); | ||
946 | raw_sr->sr_sum = cpu_to_le32(crc); | ||
947 | } | ||
948 | |||
949 | static void nilfs_segctor_fill_in_checksums(struct nilfs_sc_info *sci, | ||
950 | u32 seed) | ||
951 | { | ||
952 | struct nilfs_segment_buffer *segbuf; | ||
953 | |||
954 | if (sci->sc_super_root) | ||
955 | nilfs_fill_in_super_root_crc(sci->sc_super_root, seed); | ||
956 | |||
957 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
958 | nilfs_segbuf_fill_in_segsum_crc(segbuf, seed); | ||
959 | nilfs_segbuf_fill_in_data_crc(segbuf, seed); | ||
960 | } | ||
961 | } | ||
962 | |||
963 | static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, | ||
964 | struct the_nilfs *nilfs) | ||
965 | { | ||
966 | struct buffer_head *bh_sr = sci->sc_super_root; | ||
967 | struct nilfs_super_root *raw_sr = | ||
968 | (struct nilfs_super_root *)bh_sr->b_data; | ||
969 | unsigned isz = nilfs->ns_inode_size; | ||
970 | |||
971 | raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); | ||
972 | raw_sr->sr_nongc_ctime | ||
973 | = cpu_to_le64(nilfs_doing_gc() ? | ||
974 | nilfs->ns_nongc_ctime : sci->sc_seg_ctime); | ||
975 | raw_sr->sr_flags = 0; | ||
976 | |||
977 | nilfs_mdt_write_inode_direct( | ||
978 | nilfs_dat_inode(nilfs), bh_sr, NILFS_SR_DAT_OFFSET(isz)); | ||
979 | nilfs_mdt_write_inode_direct( | ||
980 | nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(isz)); | ||
981 | nilfs_mdt_write_inode_direct( | ||
982 | nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(isz)); | ||
983 | } | ||
984 | |||
985 | static void nilfs_redirty_inodes(struct list_head *head) | ||
986 | { | ||
987 | struct nilfs_inode_info *ii; | ||
988 | |||
989 | list_for_each_entry(ii, head, i_dirty) { | ||
990 | if (test_bit(NILFS_I_COLLECTED, &ii->i_state)) | ||
991 | clear_bit(NILFS_I_COLLECTED, &ii->i_state); | ||
992 | } | ||
993 | } | ||
994 | |||
995 | static void nilfs_drop_collected_inodes(struct list_head *head) | ||
996 | { | ||
997 | struct nilfs_inode_info *ii; | ||
998 | |||
999 | list_for_each_entry(ii, head, i_dirty) { | ||
1000 | if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state)) | ||
1001 | continue; | ||
1002 | |||
1003 | clear_bit(NILFS_I_INODE_DIRTY, &ii->i_state); | ||
1004 | set_bit(NILFS_I_UPDATED, &ii->i_state); | ||
1005 | } | ||
1006 | } | ||
1007 | |||
1008 | static void nilfs_segctor_cancel_free_segments(struct nilfs_sc_info *sci, | ||
1009 | struct inode *sufile) | ||
1010 | |||
1011 | { | ||
1012 | struct list_head *head = &sci->sc_cleaning_segments; | ||
1013 | struct nilfs_segment_entry *ent; | ||
1014 | int err; | ||
1015 | |||
1016 | list_for_each_entry(ent, head, list) { | ||
1017 | if (!(ent->flags & NILFS_SLH_FREED)) | ||
1018 | break; | ||
1019 | err = nilfs_sufile_cancel_free(sufile, ent->segnum); | ||
1020 | WARN_ON(err); /* do not happen */ | ||
1021 | ent->flags &= ~NILFS_SLH_FREED; | ||
1022 | } | ||
1023 | } | ||
1024 | |||
1025 | static int nilfs_segctor_prepare_free_segments(struct nilfs_sc_info *sci, | ||
1026 | struct inode *sufile) | ||
1027 | { | ||
1028 | struct list_head *head = &sci->sc_cleaning_segments; | ||
1029 | struct nilfs_segment_entry *ent; | ||
1030 | int err; | ||
1031 | |||
1032 | list_for_each_entry(ent, head, list) { | ||
1033 | err = nilfs_sufile_free(sufile, ent->segnum); | ||
1034 | if (unlikely(err)) | ||
1035 | return err; | ||
1036 | ent->flags |= NILFS_SLH_FREED; | ||
1037 | } | ||
1038 | return 0; | ||
1039 | } | ||
1040 | |||
1041 | static void nilfs_segctor_commit_free_segments(struct nilfs_sc_info *sci) | ||
1042 | { | ||
1043 | nilfs_dispose_segment_list(&sci->sc_cleaning_segments); | ||
1044 | } | ||
1045 | |||
1046 | static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, | ||
1047 | struct inode *inode, | ||
1048 | struct list_head *listp, | ||
1049 | int (*collect)(struct nilfs_sc_info *, | ||
1050 | struct buffer_head *, | ||
1051 | struct inode *)) | ||
1052 | { | ||
1053 | struct buffer_head *bh, *n; | ||
1054 | int err = 0; | ||
1055 | |||
1056 | if (collect) { | ||
1057 | list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) { | ||
1058 | list_del_init(&bh->b_assoc_buffers); | ||
1059 | err = collect(sci, bh, inode); | ||
1060 | brelse(bh); | ||
1061 | if (unlikely(err)) | ||
1062 | goto dispose_buffers; | ||
1063 | } | ||
1064 | return 0; | ||
1065 | } | ||
1066 | |||
1067 | dispose_buffers: | ||
1068 | while (!list_empty(listp)) { | ||
1069 | bh = list_entry(listp->next, struct buffer_head, | ||
1070 | b_assoc_buffers); | ||
1071 | list_del_init(&bh->b_assoc_buffers); | ||
1072 | brelse(bh); | ||
1073 | } | ||
1074 | return err; | ||
1075 | } | ||
1076 | |||
1077 | static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci) | ||
1078 | { | ||
1079 | /* Remaining number of blocks within segment buffer */ | ||
1080 | return sci->sc_segbuf_nblocks - | ||
1081 | (sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks); | ||
1082 | } | ||
1083 | |||
1084 | static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci, | ||
1085 | struct inode *inode, | ||
1086 | struct nilfs_sc_operations *sc_ops) | ||
1087 | { | ||
1088 | LIST_HEAD(data_buffers); | ||
1089 | LIST_HEAD(node_buffers); | ||
1090 | int err; | ||
1091 | |||
1092 | if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { | ||
1093 | size_t n, rest = nilfs_segctor_buffer_rest(sci); | ||
1094 | |||
1095 | n = nilfs_lookup_dirty_data_buffers( | ||
1096 | inode, &data_buffers, rest + 1, 0, LLONG_MAX); | ||
1097 | if (n > rest) { | ||
1098 | err = nilfs_segctor_apply_buffers( | ||
1099 | sci, inode, &data_buffers, | ||
1100 | sc_ops->collect_data); | ||
1101 | BUG_ON(!err); /* always receive -E2BIG or true error */ | ||
1102 | goto break_or_fail; | ||
1103 | } | ||
1104 | } | ||
1105 | nilfs_lookup_dirty_node_buffers(inode, &node_buffers); | ||
1106 | |||
1107 | if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { | ||
1108 | err = nilfs_segctor_apply_buffers( | ||
1109 | sci, inode, &data_buffers, sc_ops->collect_data); | ||
1110 | if (unlikely(err)) { | ||
1111 | /* dispose node list */ | ||
1112 | nilfs_segctor_apply_buffers( | ||
1113 | sci, inode, &node_buffers, NULL); | ||
1114 | goto break_or_fail; | ||
1115 | } | ||
1116 | sci->sc_stage.flags |= NILFS_CF_NODE; | ||
1117 | } | ||
1118 | /* Collect node */ | ||
1119 | err = nilfs_segctor_apply_buffers( | ||
1120 | sci, inode, &node_buffers, sc_ops->collect_node); | ||
1121 | if (unlikely(err)) | ||
1122 | goto break_or_fail; | ||
1123 | |||
1124 | nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers); | ||
1125 | err = nilfs_segctor_apply_buffers( | ||
1126 | sci, inode, &node_buffers, sc_ops->collect_bmap); | ||
1127 | if (unlikely(err)) | ||
1128 | goto break_or_fail; | ||
1129 | |||
1130 | nilfs_segctor_end_finfo(sci, inode); | ||
1131 | sci->sc_stage.flags &= ~NILFS_CF_NODE; | ||
1132 | |||
1133 | break_or_fail: | ||
1134 | return err; | ||
1135 | } | ||
1136 | |||
1137 | static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, | ||
1138 | struct inode *inode) | ||
1139 | { | ||
1140 | LIST_HEAD(data_buffers); | ||
1141 | size_t n, rest = nilfs_segctor_buffer_rest(sci); | ||
1142 | int err; | ||
1143 | |||
1144 | n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1, | ||
1145 | sci->sc_dsync_start, | ||
1146 | sci->sc_dsync_end); | ||
1147 | |||
1148 | err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers, | ||
1149 | nilfs_collect_file_data); | ||
1150 | if (!err) { | ||
1151 | nilfs_segctor_end_finfo(sci, inode); | ||
1152 | BUG_ON(n > rest); | ||
1153 | /* always receive -E2BIG or true error if n > rest */ | ||
1154 | } | ||
1155 | return err; | ||
1156 | } | ||
1157 | |||
1158 | static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) | ||
1159 | { | ||
1160 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
1161 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
1162 | struct list_head *head; | ||
1163 | struct nilfs_inode_info *ii; | ||
1164 | int err = 0; | ||
1165 | |||
1166 | switch (sci->sc_stage.scnt) { | ||
1167 | case NILFS_ST_INIT: | ||
1168 | /* Pre-processes */ | ||
1169 | sci->sc_stage.flags = 0; | ||
1170 | |||
1171 | if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) { | ||
1172 | sci->sc_nblk_inc = 0; | ||
1173 | sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN; | ||
1174 | if (mode == SC_LSEG_DSYNC) { | ||
1175 | sci->sc_stage.scnt = NILFS_ST_DSYNC; | ||
1176 | goto dsync_mode; | ||
1177 | } | ||
1178 | } | ||
1179 | |||
1180 | sci->sc_stage.dirty_file_ptr = NULL; | ||
1181 | sci->sc_stage.gc_inode_ptr = NULL; | ||
1182 | if (mode == SC_FLUSH_DAT) { | ||
1183 | sci->sc_stage.scnt = NILFS_ST_DAT; | ||
1184 | goto dat_stage; | ||
1185 | } | ||
1186 | sci->sc_stage.scnt++; /* Fall through */ | ||
1187 | case NILFS_ST_GC: | ||
1188 | if (nilfs_doing_gc()) { | ||
1189 | head = &sci->sc_gc_inodes; | ||
1190 | ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr, | ||
1191 | head, i_dirty); | ||
1192 | list_for_each_entry_continue(ii, head, i_dirty) { | ||
1193 | err = nilfs_segctor_scan_file( | ||
1194 | sci, &ii->vfs_inode, | ||
1195 | &nilfs_sc_file_ops); | ||
1196 | if (unlikely(err)) { | ||
1197 | sci->sc_stage.gc_inode_ptr = list_entry( | ||
1198 | ii->i_dirty.prev, | ||
1199 | struct nilfs_inode_info, | ||
1200 | i_dirty); | ||
1201 | goto break_or_fail; | ||
1202 | } | ||
1203 | set_bit(NILFS_I_COLLECTED, &ii->i_state); | ||
1204 | } | ||
1205 | sci->sc_stage.gc_inode_ptr = NULL; | ||
1206 | } | ||
1207 | sci->sc_stage.scnt++; /* Fall through */ | ||
1208 | case NILFS_ST_FILE: | ||
1209 | head = &sci->sc_dirty_files; | ||
1210 | ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head, | ||
1211 | i_dirty); | ||
1212 | list_for_each_entry_continue(ii, head, i_dirty) { | ||
1213 | clear_bit(NILFS_I_DIRTY, &ii->i_state); | ||
1214 | |||
1215 | err = nilfs_segctor_scan_file(sci, &ii->vfs_inode, | ||
1216 | &nilfs_sc_file_ops); | ||
1217 | if (unlikely(err)) { | ||
1218 | sci->sc_stage.dirty_file_ptr = | ||
1219 | list_entry(ii->i_dirty.prev, | ||
1220 | struct nilfs_inode_info, | ||
1221 | i_dirty); | ||
1222 | goto break_or_fail; | ||
1223 | } | ||
1224 | /* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */ | ||
1225 | /* XXX: required ? */ | ||
1226 | } | ||
1227 | sci->sc_stage.dirty_file_ptr = NULL; | ||
1228 | if (mode == SC_FLUSH_FILE) { | ||
1229 | sci->sc_stage.scnt = NILFS_ST_DONE; | ||
1230 | return 0; | ||
1231 | } | ||
1232 | sci->sc_stage.scnt++; | ||
1233 | sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; | ||
1234 | /* Fall through */ | ||
1235 | case NILFS_ST_IFILE: | ||
1236 | err = nilfs_segctor_scan_file(sci, sbi->s_ifile, | ||
1237 | &nilfs_sc_file_ops); | ||
1238 | if (unlikely(err)) | ||
1239 | break; | ||
1240 | sci->sc_stage.scnt++; | ||
1241 | /* Creating a checkpoint */ | ||
1242 | err = nilfs_segctor_create_checkpoint(sci); | ||
1243 | if (unlikely(err)) | ||
1244 | break; | ||
1245 | /* Fall through */ | ||
1246 | case NILFS_ST_CPFILE: | ||
1247 | err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile, | ||
1248 | &nilfs_sc_file_ops); | ||
1249 | if (unlikely(err)) | ||
1250 | break; | ||
1251 | sci->sc_stage.scnt++; /* Fall through */ | ||
1252 | case NILFS_ST_SUFILE: | ||
1253 | err = nilfs_segctor_prepare_free_segments(sci, | ||
1254 | nilfs->ns_sufile); | ||
1255 | if (unlikely(err)) | ||
1256 | break; | ||
1257 | err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, | ||
1258 | &nilfs_sc_file_ops); | ||
1259 | if (unlikely(err)) | ||
1260 | break; | ||
1261 | sci->sc_stage.scnt++; /* Fall through */ | ||
1262 | case NILFS_ST_DAT: | ||
1263 | dat_stage: | ||
1264 | err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs), | ||
1265 | &nilfs_sc_dat_ops); | ||
1266 | if (unlikely(err)) | ||
1267 | break; | ||
1268 | if (mode == SC_FLUSH_DAT) { | ||
1269 | sci->sc_stage.scnt = NILFS_ST_DONE; | ||
1270 | return 0; | ||
1271 | } | ||
1272 | sci->sc_stage.scnt++; /* Fall through */ | ||
1273 | case NILFS_ST_SR: | ||
1274 | if (mode == SC_LSEG_SR) { | ||
1275 | /* Appending a super root */ | ||
1276 | err = nilfs_segctor_add_super_root(sci); | ||
1277 | if (unlikely(err)) | ||
1278 | break; | ||
1279 | } | ||
1280 | /* End of a logical segment */ | ||
1281 | sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; | ||
1282 | sci->sc_stage.scnt = NILFS_ST_DONE; | ||
1283 | return 0; | ||
1284 | case NILFS_ST_DSYNC: | ||
1285 | dsync_mode: | ||
1286 | sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT; | ||
1287 | ii = sci->sc_dsync_inode; | ||
1288 | if (!test_bit(NILFS_I_BUSY, &ii->i_state)) | ||
1289 | break; | ||
1290 | |||
1291 | err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode); | ||
1292 | if (unlikely(err)) | ||
1293 | break; | ||
1294 | sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; | ||
1295 | sci->sc_stage.scnt = NILFS_ST_DONE; | ||
1296 | return 0; | ||
1297 | case NILFS_ST_DONE: | ||
1298 | return 0; | ||
1299 | default: | ||
1300 | BUG(); | ||
1301 | } | ||
1302 | |||
1303 | break_or_fail: | ||
1304 | return err; | ||
1305 | } | ||
1306 | |||
1307 | static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum) | ||
1308 | { | ||
1309 | struct buffer_head *bh_su; | ||
1310 | struct nilfs_segment_usage *raw_su; | ||
1311 | int err; | ||
1312 | |||
1313 | err = nilfs_sufile_get_segment_usage(sufile, segnum, &raw_su, &bh_su); | ||
1314 | if (unlikely(err)) | ||
1315 | return err; | ||
1316 | nilfs_mdt_mark_buffer_dirty(bh_su); | ||
1317 | nilfs_mdt_mark_dirty(sufile); | ||
1318 | nilfs_sufile_put_segment_usage(sufile, segnum, bh_su); | ||
1319 | return 0; | ||
1320 | } | ||
1321 | |||
1322 | static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, | ||
1323 | struct the_nilfs *nilfs) | ||
1324 | { | ||
1325 | struct nilfs_segment_buffer *segbuf, *n; | ||
1326 | __u64 nextnum; | ||
1327 | int err; | ||
1328 | |||
1329 | if (list_empty(&sci->sc_segbufs)) { | ||
1330 | segbuf = nilfs_segbuf_new(sci->sc_super); | ||
1331 | if (unlikely(!segbuf)) | ||
1332 | return -ENOMEM; | ||
1333 | list_add(&segbuf->sb_list, &sci->sc_segbufs); | ||
1334 | } else | ||
1335 | segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); | ||
1336 | |||
1337 | nilfs_segbuf_map(segbuf, nilfs->ns_segnum, nilfs->ns_pseg_offset, | ||
1338 | nilfs); | ||
1339 | |||
1340 | if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { | ||
1341 | nilfs_shift_to_next_segment(nilfs); | ||
1342 | nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); | ||
1343 | } | ||
1344 | sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; | ||
1345 | |||
1346 | err = nilfs_touch_segusage(nilfs->ns_sufile, segbuf->sb_segnum); | ||
1347 | if (unlikely(err)) | ||
1348 | return err; | ||
1349 | |||
1350 | if (nilfs->ns_segnum == nilfs->ns_nextnum) { | ||
1351 | /* Start from the head of a new full segment */ | ||
1352 | err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum); | ||
1353 | if (unlikely(err)) | ||
1354 | return err; | ||
1355 | } else | ||
1356 | nextnum = nilfs->ns_nextnum; | ||
1357 | |||
1358 | segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; | ||
1359 | nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); | ||
1360 | |||
1361 | /* truncating segment buffers */ | ||
1362 | list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, | ||
1363 | sb_list) { | ||
1364 | list_del_init(&segbuf->sb_list); | ||
1365 | nilfs_segbuf_free(segbuf); | ||
1366 | } | ||
1367 | return 0; | ||
1368 | } | ||
1369 | |||
1370 | static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, | ||
1371 | struct the_nilfs *nilfs, int nadd) | ||
1372 | { | ||
1373 | struct nilfs_segment_buffer *segbuf, *prev, *n; | ||
1374 | struct inode *sufile = nilfs->ns_sufile; | ||
1375 | __u64 nextnextnum; | ||
1376 | LIST_HEAD(list); | ||
1377 | int err, ret, i; | ||
1378 | |||
1379 | prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs); | ||
1380 | /* | ||
1381 | * Since the segment specified with nextnum might be allocated during | ||
1382 | * the previous construction, the buffer including its segusage may | ||
1383 | * not be dirty. The following call ensures that the buffer is dirty | ||
1384 | * and will pin the buffer on memory until the sufile is written. | ||
1385 | */ | ||
1386 | err = nilfs_touch_segusage(sufile, prev->sb_nextnum); | ||
1387 | if (unlikely(err)) | ||
1388 | return err; | ||
1389 | |||
1390 | for (i = 0; i < nadd; i++) { | ||
1391 | /* extend segment info */ | ||
1392 | err = -ENOMEM; | ||
1393 | segbuf = nilfs_segbuf_new(sci->sc_super); | ||
1394 | if (unlikely(!segbuf)) | ||
1395 | goto failed; | ||
1396 | |||
1397 | /* map this buffer to region of segment on-disk */ | ||
1398 | nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); | ||
1399 | sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks; | ||
1400 | |||
1401 | /* allocate the next next full segment */ | ||
1402 | err = nilfs_sufile_alloc(sufile, &nextnextnum); | ||
1403 | if (unlikely(err)) | ||
1404 | goto failed_segbuf; | ||
1405 | |||
1406 | segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1; | ||
1407 | nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs); | ||
1408 | |||
1409 | list_add_tail(&segbuf->sb_list, &list); | ||
1410 | prev = segbuf; | ||
1411 | } | ||
1412 | list_splice(&list, sci->sc_segbufs.prev); | ||
1413 | return 0; | ||
1414 | |||
1415 | failed_segbuf: | ||
1416 | nilfs_segbuf_free(segbuf); | ||
1417 | failed: | ||
1418 | list_for_each_entry_safe(segbuf, n, &list, sb_list) { | ||
1419 | ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); | ||
1420 | WARN_ON(ret); /* never fails */ | ||
1421 | list_del_init(&segbuf->sb_list); | ||
1422 | nilfs_segbuf_free(segbuf); | ||
1423 | } | ||
1424 | return err; | ||
1425 | } | ||
1426 | |||
1427 | static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, | ||
1428 | struct the_nilfs *nilfs) | ||
1429 | { | ||
1430 | struct nilfs_segment_buffer *segbuf; | ||
1431 | int ret, done = 0; | ||
1432 | |||
1433 | segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); | ||
1434 | if (nilfs->ns_nextnum != segbuf->sb_nextnum) { | ||
1435 | ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); | ||
1436 | WARN_ON(ret); /* never fails */ | ||
1437 | } | ||
1438 | if (segbuf->sb_io_error) { | ||
1439 | /* Case 1: The first segment failed */ | ||
1440 | if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) | ||
1441 | /* Case 1a: Partial segment appended into an existing | ||
1442 | segment */ | ||
1443 | nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start, | ||
1444 | segbuf->sb_fseg_end); | ||
1445 | else /* Case 1b: New full segment */ | ||
1446 | set_nilfs_discontinued(nilfs); | ||
1447 | done++; | ||
1448 | } | ||
1449 | |||
1450 | list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { | ||
1451 | ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); | ||
1452 | WARN_ON(ret); /* never fails */ | ||
1453 | if (!done && segbuf->sb_io_error) { | ||
1454 | if (segbuf->sb_segnum != nilfs->ns_nextnum) | ||
1455 | /* Case 2: extended segment (!= next) failed */ | ||
1456 | nilfs_sufile_set_error(nilfs->ns_sufile, | ||
1457 | segbuf->sb_segnum); | ||
1458 | done++; | ||
1459 | } | ||
1460 | } | ||
1461 | } | ||
1462 | |||
1463 | static void nilfs_segctor_clear_segment_buffers(struct nilfs_sc_info *sci) | ||
1464 | { | ||
1465 | struct nilfs_segment_buffer *segbuf; | ||
1466 | |||
1467 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) | ||
1468 | nilfs_segbuf_clear(segbuf); | ||
1469 | sci->sc_super_root = NULL; | ||
1470 | } | ||
1471 | |||
1472 | static void nilfs_segctor_destroy_segment_buffers(struct nilfs_sc_info *sci) | ||
1473 | { | ||
1474 | struct nilfs_segment_buffer *segbuf; | ||
1475 | |||
1476 | while (!list_empty(&sci->sc_segbufs)) { | ||
1477 | segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); | ||
1478 | list_del_init(&segbuf->sb_list); | ||
1479 | nilfs_segbuf_free(segbuf); | ||
1480 | } | ||
1481 | /* sci->sc_curseg = NULL; */ | ||
1482 | } | ||
1483 | |||
1484 | static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci, | ||
1485 | struct the_nilfs *nilfs, int err) | ||
1486 | { | ||
1487 | if (unlikely(err)) { | ||
1488 | nilfs_segctor_free_incomplete_segments(sci, nilfs); | ||
1489 | nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); | ||
1490 | } | ||
1491 | nilfs_segctor_clear_segment_buffers(sci); | ||
1492 | } | ||
1493 | |||
1494 | static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, | ||
1495 | struct inode *sufile) | ||
1496 | { | ||
1497 | struct nilfs_segment_buffer *segbuf; | ||
1498 | struct buffer_head *bh_su; | ||
1499 | struct nilfs_segment_usage *raw_su; | ||
1500 | unsigned long live_blocks; | ||
1501 | int ret; | ||
1502 | |||
1503 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
1504 | ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, | ||
1505 | &raw_su, &bh_su); | ||
1506 | WARN_ON(ret); /* always succeed because bh_su is dirty */ | ||
1507 | live_blocks = segbuf->sb_sum.nblocks + | ||
1508 | (segbuf->sb_pseg_start - segbuf->sb_fseg_start); | ||
1509 | raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime); | ||
1510 | raw_su->su_nblocks = cpu_to_le32(live_blocks); | ||
1511 | nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, | ||
1512 | bh_su); | ||
1513 | } | ||
1514 | } | ||
1515 | |||
1516 | static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, | ||
1517 | struct inode *sufile) | ||
1518 | { | ||
1519 | struct nilfs_segment_buffer *segbuf; | ||
1520 | struct buffer_head *bh_su; | ||
1521 | struct nilfs_segment_usage *raw_su; | ||
1522 | int ret; | ||
1523 | |||
1524 | segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); | ||
1525 | ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, | ||
1526 | &raw_su, &bh_su); | ||
1527 | WARN_ON(ret); /* always succeed because bh_su is dirty */ | ||
1528 | raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start - | ||
1529 | segbuf->sb_fseg_start); | ||
1530 | nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su); | ||
1531 | |||
1532 | list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { | ||
1533 | ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, | ||
1534 | &raw_su, &bh_su); | ||
1535 | WARN_ON(ret); /* always succeed */ | ||
1536 | raw_su->su_nblocks = 0; | ||
1537 | nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, | ||
1538 | bh_su); | ||
1539 | } | ||
1540 | } | ||
1541 | |||
1542 | static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci, | ||
1543 | struct nilfs_segment_buffer *last, | ||
1544 | struct inode *sufile) | ||
1545 | { | ||
1546 | struct nilfs_segment_buffer *segbuf = last, *n; | ||
1547 | int ret; | ||
1548 | |||
1549 | list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, | ||
1550 | sb_list) { | ||
1551 | list_del_init(&segbuf->sb_list); | ||
1552 | sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; | ||
1553 | ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); | ||
1554 | WARN_ON(ret); | ||
1555 | nilfs_segbuf_free(segbuf); | ||
1556 | } | ||
1557 | } | ||
1558 | |||
1559 | |||
1560 | static int nilfs_segctor_collect(struct nilfs_sc_info *sci, | ||
1561 | struct the_nilfs *nilfs, int mode) | ||
1562 | { | ||
1563 | struct nilfs_cstage prev_stage = sci->sc_stage; | ||
1564 | int err, nadd = 1; | ||
1565 | |||
1566 | /* Collection retry loop */ | ||
1567 | for (;;) { | ||
1568 | sci->sc_super_root = NULL; | ||
1569 | sci->sc_nblk_this_inc = 0; | ||
1570 | sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); | ||
1571 | |||
1572 | err = nilfs_segctor_reset_segment_buffer(sci); | ||
1573 | if (unlikely(err)) | ||
1574 | goto failed; | ||
1575 | |||
1576 | err = nilfs_segctor_collect_blocks(sci, mode); | ||
1577 | sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; | ||
1578 | if (!err) | ||
1579 | break; | ||
1580 | |||
1581 | if (unlikely(err != -E2BIG)) | ||
1582 | goto failed; | ||
1583 | |||
1584 | /* The current segment is filled up */ | ||
1585 | if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) | ||
1586 | break; | ||
1587 | |||
1588 | nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); | ||
1589 | nilfs_segctor_clear_segment_buffers(sci); | ||
1590 | |||
1591 | err = nilfs_segctor_extend_segments(sci, nilfs, nadd); | ||
1592 | if (unlikely(err)) | ||
1593 | return err; | ||
1594 | |||
1595 | nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); | ||
1596 | sci->sc_stage = prev_stage; | ||
1597 | } | ||
1598 | nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); | ||
1599 | return 0; | ||
1600 | |||
1601 | failed: | ||
1602 | return err; | ||
1603 | } | ||
1604 | |||
1605 | static void nilfs_list_replace_buffer(struct buffer_head *old_bh, | ||
1606 | struct buffer_head *new_bh) | ||
1607 | { | ||
1608 | BUG_ON(!list_empty(&new_bh->b_assoc_buffers)); | ||
1609 | |||
1610 | list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers); | ||
1611 | /* The caller must release old_bh */ | ||
1612 | } | ||
1613 | |||
1614 | static int | ||
1615 | nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, | ||
1616 | struct nilfs_segment_buffer *segbuf, | ||
1617 | int mode) | ||
1618 | { | ||
1619 | struct inode *inode = NULL; | ||
1620 | sector_t blocknr; | ||
1621 | unsigned long nfinfo = segbuf->sb_sum.nfinfo; | ||
1622 | unsigned long nblocks = 0, ndatablk = 0; | ||
1623 | struct nilfs_sc_operations *sc_op = NULL; | ||
1624 | struct nilfs_segsum_pointer ssp; | ||
1625 | struct nilfs_finfo *finfo = NULL; | ||
1626 | union nilfs_binfo binfo; | ||
1627 | struct buffer_head *bh, *bh_org; | ||
1628 | ino_t ino = 0; | ||
1629 | int err = 0; | ||
1630 | |||
1631 | if (!nfinfo) | ||
1632 | goto out; | ||
1633 | |||
1634 | blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk; | ||
1635 | ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); | ||
1636 | ssp.offset = sizeof(struct nilfs_segment_summary); | ||
1637 | |||
1638 | list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { | ||
1639 | if (bh == sci->sc_super_root) | ||
1640 | break; | ||
1641 | if (!finfo) { | ||
1642 | finfo = nilfs_segctor_map_segsum_entry( | ||
1643 | sci, &ssp, sizeof(*finfo)); | ||
1644 | ino = le64_to_cpu(finfo->fi_ino); | ||
1645 | nblocks = le32_to_cpu(finfo->fi_nblocks); | ||
1646 | ndatablk = le32_to_cpu(finfo->fi_ndatablk); | ||
1647 | |||
1648 | if (buffer_nilfs_node(bh)) | ||
1649 | inode = NILFS_BTNC_I(bh->b_page->mapping); | ||
1650 | else | ||
1651 | inode = NILFS_AS_I(bh->b_page->mapping); | ||
1652 | |||
1653 | if (mode == SC_LSEG_DSYNC) | ||
1654 | sc_op = &nilfs_sc_dsync_ops; | ||
1655 | else if (ino == NILFS_DAT_INO) | ||
1656 | sc_op = &nilfs_sc_dat_ops; | ||
1657 | else /* file blocks */ | ||
1658 | sc_op = &nilfs_sc_file_ops; | ||
1659 | } | ||
1660 | bh_org = bh; | ||
1661 | get_bh(bh_org); | ||
1662 | err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr, | ||
1663 | &binfo); | ||
1664 | if (bh != bh_org) | ||
1665 | nilfs_list_replace_buffer(bh_org, bh); | ||
1666 | brelse(bh_org); | ||
1667 | if (unlikely(err)) | ||
1668 | goto failed_bmap; | ||
1669 | |||
1670 | if (ndatablk > 0) | ||
1671 | sc_op->write_data_binfo(sci, &ssp, &binfo); | ||
1672 | else | ||
1673 | sc_op->write_node_binfo(sci, &ssp, &binfo); | ||
1674 | |||
1675 | blocknr++; | ||
1676 | if (--nblocks == 0) { | ||
1677 | finfo = NULL; | ||
1678 | if (--nfinfo == 0) | ||
1679 | break; | ||
1680 | } else if (ndatablk > 0) | ||
1681 | ndatablk--; | ||
1682 | } | ||
1683 | out: | ||
1684 | return 0; | ||
1685 | |||
1686 | failed_bmap: | ||
1687 | err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super); | ||
1688 | return err; | ||
1689 | } | ||
1690 | |||
1691 | static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) | ||
1692 | { | ||
1693 | struct nilfs_segment_buffer *segbuf; | ||
1694 | int err; | ||
1695 | |||
1696 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
1697 | err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode); | ||
1698 | if (unlikely(err)) | ||
1699 | return err; | ||
1700 | nilfs_segbuf_fill_in_segsum(segbuf); | ||
1701 | } | ||
1702 | return 0; | ||
1703 | } | ||
1704 | |||
1705 | static int | ||
1706 | nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out) | ||
1707 | { | ||
1708 | struct page *clone_page; | ||
1709 | struct buffer_head *bh, *head, *bh2; | ||
1710 | void *kaddr; | ||
1711 | |||
1712 | bh = head = page_buffers(page); | ||
1713 | |||
1714 | clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0); | ||
1715 | if (unlikely(!clone_page)) | ||
1716 | return -ENOMEM; | ||
1717 | |||
1718 | bh2 = page_buffers(clone_page); | ||
1719 | kaddr = kmap_atomic(page, KM_USER0); | ||
1720 | do { | ||
1721 | if (list_empty(&bh->b_assoc_buffers)) | ||
1722 | continue; | ||
1723 | get_bh(bh2); | ||
1724 | page_cache_get(clone_page); /* for each bh */ | ||
1725 | memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size); | ||
1726 | bh2->b_blocknr = bh->b_blocknr; | ||
1727 | list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers); | ||
1728 | list_add_tail(&bh->b_assoc_buffers, out); | ||
1729 | } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head); | ||
1730 | kunmap_atomic(kaddr, KM_USER0); | ||
1731 | |||
1732 | if (!TestSetPageWriteback(clone_page)) | ||
1733 | inc_zone_page_state(clone_page, NR_WRITEBACK); | ||
1734 | unlock_page(clone_page); | ||
1735 | |||
1736 | return 0; | ||
1737 | } | ||
1738 | |||
1739 | static int nilfs_test_page_to_be_frozen(struct page *page) | ||
1740 | { | ||
1741 | struct address_space *mapping = page->mapping; | ||
1742 | |||
1743 | if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode)) | ||
1744 | return 0; | ||
1745 | |||
1746 | if (page_mapped(page)) { | ||
1747 | ClearPageChecked(page); | ||
1748 | return 1; | ||
1749 | } | ||
1750 | return PageChecked(page); | ||
1751 | } | ||
1752 | |||
1753 | static int nilfs_begin_page_io(struct page *page, struct list_head *out) | ||
1754 | { | ||
1755 | if (!page || PageWriteback(page)) | ||
1756 | /* For split b-tree node pages, this function may be called | ||
1757 | twice. We ignore the 2nd or later calls by this check. */ | ||
1758 | return 0; | ||
1759 | |||
1760 | lock_page(page); | ||
1761 | clear_page_dirty_for_io(page); | ||
1762 | set_page_writeback(page); | ||
1763 | unlock_page(page); | ||
1764 | |||
1765 | if (nilfs_test_page_to_be_frozen(page)) { | ||
1766 | int err = nilfs_copy_replace_page_buffers(page, out); | ||
1767 | if (unlikely(err)) | ||
1768 | return err; | ||
1769 | } | ||
1770 | return 0; | ||
1771 | } | ||
1772 | |||
1773 | static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, | ||
1774 | struct page **failed_page) | ||
1775 | { | ||
1776 | struct nilfs_segment_buffer *segbuf; | ||
1777 | struct page *bd_page = NULL, *fs_page = NULL; | ||
1778 | struct list_head *list = &sci->sc_copied_buffers; | ||
1779 | int err; | ||
1780 | |||
1781 | *failed_page = NULL; | ||
1782 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
1783 | struct buffer_head *bh; | ||
1784 | |||
1785 | list_for_each_entry(bh, &segbuf->sb_segsum_buffers, | ||
1786 | b_assoc_buffers) { | ||
1787 | if (bh->b_page != bd_page) { | ||
1788 | if (bd_page) { | ||
1789 | lock_page(bd_page); | ||
1790 | clear_page_dirty_for_io(bd_page); | ||
1791 | set_page_writeback(bd_page); | ||
1792 | unlock_page(bd_page); | ||
1793 | } | ||
1794 | bd_page = bh->b_page; | ||
1795 | } | ||
1796 | } | ||
1797 | |||
1798 | list_for_each_entry(bh, &segbuf->sb_payload_buffers, | ||
1799 | b_assoc_buffers) { | ||
1800 | if (bh == sci->sc_super_root) { | ||
1801 | if (bh->b_page != bd_page) { | ||
1802 | lock_page(bd_page); | ||
1803 | clear_page_dirty_for_io(bd_page); | ||
1804 | set_page_writeback(bd_page); | ||
1805 | unlock_page(bd_page); | ||
1806 | bd_page = bh->b_page; | ||
1807 | } | ||
1808 | break; | ||
1809 | } | ||
1810 | if (bh->b_page != fs_page) { | ||
1811 | err = nilfs_begin_page_io(fs_page, list); | ||
1812 | if (unlikely(err)) { | ||
1813 | *failed_page = fs_page; | ||
1814 | goto out; | ||
1815 | } | ||
1816 | fs_page = bh->b_page; | ||
1817 | } | ||
1818 | } | ||
1819 | } | ||
1820 | if (bd_page) { | ||
1821 | lock_page(bd_page); | ||
1822 | clear_page_dirty_for_io(bd_page); | ||
1823 | set_page_writeback(bd_page); | ||
1824 | unlock_page(bd_page); | ||
1825 | } | ||
1826 | err = nilfs_begin_page_io(fs_page, list); | ||
1827 | if (unlikely(err)) | ||
1828 | *failed_page = fs_page; | ||
1829 | out: | ||
1830 | return err; | ||
1831 | } | ||
1832 | |||
1833 | static int nilfs_segctor_write(struct nilfs_sc_info *sci, | ||
1834 | struct backing_dev_info *bdi) | ||
1835 | { | ||
1836 | struct nilfs_segment_buffer *segbuf; | ||
1837 | struct nilfs_write_info wi; | ||
1838 | int err, res; | ||
1839 | |||
1840 | wi.sb = sci->sc_super; | ||
1841 | wi.bh_sr = sci->sc_super_root; | ||
1842 | wi.bdi = bdi; | ||
1843 | |||
1844 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
1845 | nilfs_segbuf_prepare_write(segbuf, &wi); | ||
1846 | err = nilfs_segbuf_write(segbuf, &wi); | ||
1847 | |||
1848 | res = nilfs_segbuf_wait(segbuf, &wi); | ||
1849 | err = unlikely(err) ? : res; | ||
1850 | if (unlikely(err)) | ||
1851 | return err; | ||
1852 | } | ||
1853 | return 0; | ||
1854 | } | ||
1855 | |||
1856 | static int nilfs_page_has_uncleared_buffer(struct page *page) | ||
1857 | { | ||
1858 | struct buffer_head *head, *bh; | ||
1859 | |||
1860 | head = bh = page_buffers(page); | ||
1861 | do { | ||
1862 | if (buffer_dirty(bh) && !list_empty(&bh->b_assoc_buffers)) | ||
1863 | return 1; | ||
1864 | bh = bh->b_this_page; | ||
1865 | } while (bh != head); | ||
1866 | return 0; | ||
1867 | } | ||
1868 | |||
1869 | static void __nilfs_end_page_io(struct page *page, int err) | ||
1870 | { | ||
1871 | if (!err) { | ||
1872 | if (!nilfs_page_buffers_clean(page)) | ||
1873 | __set_page_dirty_nobuffers(page); | ||
1874 | ClearPageError(page); | ||
1875 | } else { | ||
1876 | __set_page_dirty_nobuffers(page); | ||
1877 | SetPageError(page); | ||
1878 | } | ||
1879 | |||
1880 | if (buffer_nilfs_allocated(page_buffers(page))) { | ||
1881 | if (TestClearPageWriteback(page)) | ||
1882 | dec_zone_page_state(page, NR_WRITEBACK); | ||
1883 | } else | ||
1884 | end_page_writeback(page); | ||
1885 | } | ||
1886 | |||
1887 | static void nilfs_end_page_io(struct page *page, int err) | ||
1888 | { | ||
1889 | if (!page) | ||
1890 | return; | ||
1891 | |||
1892 | if (buffer_nilfs_node(page_buffers(page)) && | ||
1893 | nilfs_page_has_uncleared_buffer(page)) | ||
1894 | /* For b-tree node pages, this function may be called twice | ||
1895 | or more because they might be split in a segment. | ||
1896 | This check assures that cleanup has been done for all | ||
1897 | buffers in a split btnode page. */ | ||
1898 | return; | ||
1899 | |||
1900 | __nilfs_end_page_io(page, err); | ||
1901 | } | ||
1902 | |||
1903 | static void nilfs_clear_copied_buffers(struct list_head *list, int err) | ||
1904 | { | ||
1905 | struct buffer_head *bh, *head; | ||
1906 | struct page *page; | ||
1907 | |||
1908 | while (!list_empty(list)) { | ||
1909 | bh = list_entry(list->next, struct buffer_head, | ||
1910 | b_assoc_buffers); | ||
1911 | page = bh->b_page; | ||
1912 | page_cache_get(page); | ||
1913 | head = bh = page_buffers(page); | ||
1914 | do { | ||
1915 | if (!list_empty(&bh->b_assoc_buffers)) { | ||
1916 | list_del_init(&bh->b_assoc_buffers); | ||
1917 | if (!err) { | ||
1918 | set_buffer_uptodate(bh); | ||
1919 | clear_buffer_dirty(bh); | ||
1920 | clear_buffer_nilfs_volatile(bh); | ||
1921 | } | ||
1922 | brelse(bh); /* for b_assoc_buffers */ | ||
1923 | } | ||
1924 | } while ((bh = bh->b_this_page) != head); | ||
1925 | |||
1926 | __nilfs_end_page_io(page, err); | ||
1927 | page_cache_release(page); | ||
1928 | } | ||
1929 | } | ||
1930 | |||
1931 | static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, | ||
1932 | struct page *failed_page, int err) | ||
1933 | { | ||
1934 | struct nilfs_segment_buffer *segbuf; | ||
1935 | struct page *bd_page = NULL, *fs_page = NULL; | ||
1936 | |||
1937 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
1938 | struct buffer_head *bh; | ||
1939 | |||
1940 | list_for_each_entry(bh, &segbuf->sb_segsum_buffers, | ||
1941 | b_assoc_buffers) { | ||
1942 | if (bh->b_page != bd_page) { | ||
1943 | if (bd_page) | ||
1944 | end_page_writeback(bd_page); | ||
1945 | bd_page = bh->b_page; | ||
1946 | } | ||
1947 | } | ||
1948 | |||
1949 | list_for_each_entry(bh, &segbuf->sb_payload_buffers, | ||
1950 | b_assoc_buffers) { | ||
1951 | if (bh == sci->sc_super_root) { | ||
1952 | if (bh->b_page != bd_page) { | ||
1953 | end_page_writeback(bd_page); | ||
1954 | bd_page = bh->b_page; | ||
1955 | } | ||
1956 | break; | ||
1957 | } | ||
1958 | if (bh->b_page != fs_page) { | ||
1959 | nilfs_end_page_io(fs_page, err); | ||
1960 | if (unlikely(fs_page == failed_page)) | ||
1961 | goto done; | ||
1962 | fs_page = bh->b_page; | ||
1963 | } | ||
1964 | } | ||
1965 | } | ||
1966 | if (bd_page) | ||
1967 | end_page_writeback(bd_page); | ||
1968 | |||
1969 | nilfs_end_page_io(fs_page, err); | ||
1970 | done: | ||
1971 | nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err); | ||
1972 | } | ||
1973 | |||
1974 | static void nilfs_set_next_segment(struct the_nilfs *nilfs, | ||
1975 | struct nilfs_segment_buffer *segbuf) | ||
1976 | { | ||
1977 | nilfs->ns_segnum = segbuf->sb_segnum; | ||
1978 | nilfs->ns_nextnum = segbuf->sb_nextnum; | ||
1979 | nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start | ||
1980 | + segbuf->sb_sum.nblocks; | ||
1981 | nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq; | ||
1982 | nilfs->ns_ctime = segbuf->sb_sum.ctime; | ||
1983 | } | ||
1984 | |||
1985 | static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) | ||
1986 | { | ||
1987 | struct nilfs_segment_buffer *segbuf; | ||
1988 | struct page *bd_page = NULL, *fs_page = NULL; | ||
1989 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
1990 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
1991 | int update_sr = (sci->sc_super_root != NULL); | ||
1992 | |||
1993 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
1994 | struct buffer_head *bh; | ||
1995 | |||
1996 | list_for_each_entry(bh, &segbuf->sb_segsum_buffers, | ||
1997 | b_assoc_buffers) { | ||
1998 | set_buffer_uptodate(bh); | ||
1999 | clear_buffer_dirty(bh); | ||
2000 | if (bh->b_page != bd_page) { | ||
2001 | if (bd_page) | ||
2002 | end_page_writeback(bd_page); | ||
2003 | bd_page = bh->b_page; | ||
2004 | } | ||
2005 | } | ||
2006 | /* | ||
2007 | * We assume that the buffers which belong to the same page | ||
2008 | * continue over the buffer list. | ||
2009 | * Under this assumption, the last BHs of pages is | ||
2010 | * identifiable by the discontinuity of bh->b_page | ||
2011 | * (page != fs_page). | ||
2012 | * | ||
2013 | * For B-tree node blocks, however, this assumption is not | ||
2014 | * guaranteed. The cleanup code of B-tree node pages needs | ||
2015 | * special care. | ||
2016 | */ | ||
2017 | list_for_each_entry(bh, &segbuf->sb_payload_buffers, | ||
2018 | b_assoc_buffers) { | ||
2019 | set_buffer_uptodate(bh); | ||
2020 | clear_buffer_dirty(bh); | ||
2021 | clear_buffer_nilfs_volatile(bh); | ||
2022 | if (bh == sci->sc_super_root) { | ||
2023 | if (bh->b_page != bd_page) { | ||
2024 | end_page_writeback(bd_page); | ||
2025 | bd_page = bh->b_page; | ||
2026 | } | ||
2027 | break; | ||
2028 | } | ||
2029 | if (bh->b_page != fs_page) { | ||
2030 | nilfs_end_page_io(fs_page, 0); | ||
2031 | fs_page = bh->b_page; | ||
2032 | } | ||
2033 | } | ||
2034 | |||
2035 | if (!NILFS_SEG_SIMPLEX(&segbuf->sb_sum)) { | ||
2036 | if (NILFS_SEG_LOGBGN(&segbuf->sb_sum)) { | ||
2037 | set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); | ||
2038 | sci->sc_lseg_stime = jiffies; | ||
2039 | } | ||
2040 | if (NILFS_SEG_LOGEND(&segbuf->sb_sum)) | ||
2041 | clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); | ||
2042 | } | ||
2043 | } | ||
2044 | /* | ||
2045 | * Since pages may continue over multiple segment buffers, | ||
2046 | * end of the last page must be checked outside of the loop. | ||
2047 | */ | ||
2048 | if (bd_page) | ||
2049 | end_page_writeback(bd_page); | ||
2050 | |||
2051 | nilfs_end_page_io(fs_page, 0); | ||
2052 | |||
2053 | nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0); | ||
2054 | |||
2055 | nilfs_drop_collected_inodes(&sci->sc_dirty_files); | ||
2056 | |||
2057 | if (nilfs_doing_gc()) { | ||
2058 | nilfs_drop_collected_inodes(&sci->sc_gc_inodes); | ||
2059 | if (update_sr) | ||
2060 | nilfs_commit_gcdat_inode(nilfs); | ||
2061 | } else | ||
2062 | nilfs->ns_nongc_ctime = sci->sc_seg_ctime; | ||
2063 | |||
2064 | sci->sc_nblk_inc += sci->sc_nblk_this_inc; | ||
2065 | |||
2066 | segbuf = NILFS_LAST_SEGBUF(&sci->sc_segbufs); | ||
2067 | nilfs_set_next_segment(nilfs, segbuf); | ||
2068 | |||
2069 | if (update_sr) { | ||
2070 | nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, | ||
2071 | segbuf->sb_sum.seg_seq, nilfs->ns_cno++); | ||
2072 | sbi->s_super->s_dirt = 1; | ||
2073 | |||
2074 | clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); | ||
2075 | clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); | ||
2076 | set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); | ||
2077 | } else | ||
2078 | clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); | ||
2079 | } | ||
2080 | |||
2081 | static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, | ||
2082 | struct nilfs_sb_info *sbi) | ||
2083 | { | ||
2084 | struct nilfs_inode_info *ii, *n; | ||
2085 | __u64 cno = sbi->s_nilfs->ns_cno; | ||
2086 | |||
2087 | spin_lock(&sbi->s_inode_lock); | ||
2088 | retry: | ||
2089 | list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) { | ||
2090 | if (!ii->i_bh) { | ||
2091 | struct buffer_head *ibh; | ||
2092 | int err; | ||
2093 | |||
2094 | spin_unlock(&sbi->s_inode_lock); | ||
2095 | err = nilfs_ifile_get_inode_block( | ||
2096 | sbi->s_ifile, ii->vfs_inode.i_ino, &ibh); | ||
2097 | if (unlikely(err)) { | ||
2098 | nilfs_warning(sbi->s_super, __func__, | ||
2099 | "failed to get inode block.\n"); | ||
2100 | return err; | ||
2101 | } | ||
2102 | nilfs_mdt_mark_buffer_dirty(ibh); | ||
2103 | nilfs_mdt_mark_dirty(sbi->s_ifile); | ||
2104 | spin_lock(&sbi->s_inode_lock); | ||
2105 | if (likely(!ii->i_bh)) | ||
2106 | ii->i_bh = ibh; | ||
2107 | else | ||
2108 | brelse(ibh); | ||
2109 | goto retry; | ||
2110 | } | ||
2111 | ii->i_cno = cno; | ||
2112 | |||
2113 | clear_bit(NILFS_I_QUEUED, &ii->i_state); | ||
2114 | set_bit(NILFS_I_BUSY, &ii->i_state); | ||
2115 | list_del(&ii->i_dirty); | ||
2116 | list_add_tail(&ii->i_dirty, &sci->sc_dirty_files); | ||
2117 | } | ||
2118 | spin_unlock(&sbi->s_inode_lock); | ||
2119 | |||
2120 | NILFS_I(sbi->s_ifile)->i_cno = cno; | ||
2121 | |||
2122 | return 0; | ||
2123 | } | ||
2124 | |||
2125 | static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, | ||
2126 | struct nilfs_sb_info *sbi) | ||
2127 | { | ||
2128 | struct nilfs_transaction_info *ti = current->journal_info; | ||
2129 | struct nilfs_inode_info *ii, *n; | ||
2130 | __u64 cno = sbi->s_nilfs->ns_cno; | ||
2131 | |||
2132 | spin_lock(&sbi->s_inode_lock); | ||
2133 | list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { | ||
2134 | if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || | ||
2135 | test_bit(NILFS_I_DIRTY, &ii->i_state)) { | ||
2136 | /* The current checkpoint number (=nilfs->ns_cno) is | ||
2137 | changed between check-in and check-out only if the | ||
2138 | super root is written out. So, we can update i_cno | ||
2139 | for the inodes that remain in the dirty list. */ | ||
2140 | ii->i_cno = cno; | ||
2141 | continue; | ||
2142 | } | ||
2143 | clear_bit(NILFS_I_BUSY, &ii->i_state); | ||
2144 | brelse(ii->i_bh); | ||
2145 | ii->i_bh = NULL; | ||
2146 | list_del(&ii->i_dirty); | ||
2147 | list_add_tail(&ii->i_dirty, &ti->ti_garbage); | ||
2148 | } | ||
2149 | spin_unlock(&sbi->s_inode_lock); | ||
2150 | } | ||
2151 | |||
2152 | /* | ||
2153 | * Main procedure of segment constructor | ||
2154 | */ | ||
2155 | static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) | ||
2156 | { | ||
2157 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
2158 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
2159 | struct page *failed_page; | ||
2160 | int err, has_sr = 0; | ||
2161 | |||
2162 | sci->sc_stage.scnt = NILFS_ST_INIT; | ||
2163 | |||
2164 | err = nilfs_segctor_check_in_files(sci, sbi); | ||
2165 | if (unlikely(err)) | ||
2166 | goto out; | ||
2167 | |||
2168 | if (nilfs_test_metadata_dirty(sbi)) | ||
2169 | set_bit(NILFS_SC_DIRTY, &sci->sc_flags); | ||
2170 | |||
2171 | if (nilfs_segctor_clean(sci)) | ||
2172 | goto out; | ||
2173 | |||
2174 | do { | ||
2175 | sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK; | ||
2176 | |||
2177 | err = nilfs_segctor_begin_construction(sci, nilfs); | ||
2178 | if (unlikely(err)) | ||
2179 | goto out; | ||
2180 | |||
2181 | /* Update time stamp */ | ||
2182 | sci->sc_seg_ctime = get_seconds(); | ||
2183 | |||
2184 | err = nilfs_segctor_collect(sci, nilfs, mode); | ||
2185 | if (unlikely(err)) | ||
2186 | goto failed; | ||
2187 | |||
2188 | has_sr = (sci->sc_super_root != NULL); | ||
2189 | |||
2190 | /* Avoid empty segment */ | ||
2191 | if (sci->sc_stage.scnt == NILFS_ST_DONE && | ||
2192 | NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { | ||
2193 | nilfs_segctor_end_construction(sci, nilfs, 1); | ||
2194 | goto out; | ||
2195 | } | ||
2196 | |||
2197 | err = nilfs_segctor_assign(sci, mode); | ||
2198 | if (unlikely(err)) | ||
2199 | goto failed; | ||
2200 | |||
2201 | if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) | ||
2202 | nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile); | ||
2203 | |||
2204 | if (has_sr) { | ||
2205 | err = nilfs_segctor_fill_in_checkpoint(sci); | ||
2206 | if (unlikely(err)) | ||
2207 | goto failed_to_make_up; | ||
2208 | |||
2209 | nilfs_segctor_fill_in_super_root(sci, nilfs); | ||
2210 | } | ||
2211 | nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); | ||
2212 | |||
2213 | /* Write partial segments */ | ||
2214 | err = nilfs_segctor_prepare_write(sci, &failed_page); | ||
2215 | if (unlikely(err)) | ||
2216 | goto failed_to_write; | ||
2217 | |||
2218 | nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed); | ||
2219 | |||
2220 | err = nilfs_segctor_write(sci, nilfs->ns_bdi); | ||
2221 | if (unlikely(err)) | ||
2222 | goto failed_to_write; | ||
2223 | |||
2224 | nilfs_segctor_complete_write(sci); | ||
2225 | |||
2226 | /* Commit segments */ | ||
2227 | if (has_sr) { | ||
2228 | nilfs_segctor_commit_free_segments(sci); | ||
2229 | nilfs_segctor_clear_metadata_dirty(sci); | ||
2230 | } | ||
2231 | |||
2232 | nilfs_segctor_end_construction(sci, nilfs, 0); | ||
2233 | |||
2234 | } while (sci->sc_stage.scnt != NILFS_ST_DONE); | ||
2235 | |||
2236 | out: | ||
2237 | nilfs_segctor_destroy_segment_buffers(sci); | ||
2238 | nilfs_segctor_check_out_files(sci, sbi); | ||
2239 | return err; | ||
2240 | |||
2241 | failed_to_write: | ||
2242 | nilfs_segctor_abort_write(sci, failed_page, err); | ||
2243 | nilfs_segctor_cancel_segusage(sci, nilfs->ns_sufile); | ||
2244 | |||
2245 | failed_to_make_up: | ||
2246 | if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) | ||
2247 | nilfs_redirty_inodes(&sci->sc_dirty_files); | ||
2248 | |||
2249 | failed: | ||
2250 | if (nilfs_doing_gc()) | ||
2251 | nilfs_redirty_inodes(&sci->sc_gc_inodes); | ||
2252 | nilfs_segctor_end_construction(sci, nilfs, err); | ||
2253 | goto out; | ||
2254 | } | ||
2255 | |||
2256 | /** | ||
2257 | * nilfs_secgtor_start_timer - set timer of background write | ||
2258 | * @sci: nilfs_sc_info | ||
2259 | * | ||
2260 | * If the timer has already been set, it ignores the new request. | ||
2261 | * This function MUST be called within a section locking the segment | ||
2262 | * semaphore. | ||
2263 | */ | ||
2264 | static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) | ||
2265 | { | ||
2266 | spin_lock(&sci->sc_state_lock); | ||
2267 | if (sci->sc_timer && !(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { | ||
2268 | sci->sc_timer->expires = jiffies + sci->sc_interval; | ||
2269 | add_timer(sci->sc_timer); | ||
2270 | sci->sc_state |= NILFS_SEGCTOR_COMMIT; | ||
2271 | } | ||
2272 | spin_unlock(&sci->sc_state_lock); | ||
2273 | } | ||
2274 | |||
2275 | static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) | ||
2276 | { | ||
2277 | spin_lock(&sci->sc_state_lock); | ||
2278 | if (!(sci->sc_flush_request & (1 << bn))) { | ||
2279 | unsigned long prev_req = sci->sc_flush_request; | ||
2280 | |||
2281 | sci->sc_flush_request |= (1 << bn); | ||
2282 | if (!prev_req) | ||
2283 | wake_up(&sci->sc_wait_daemon); | ||
2284 | } | ||
2285 | spin_unlock(&sci->sc_state_lock); | ||
2286 | } | ||
2287 | |||
2288 | /** | ||
2289 | * nilfs_flush_segment - trigger a segment construction for resource control | ||
2290 | * @sb: super block | ||
2291 | * @ino: inode number of the file to be flushed out. | ||
2292 | */ | ||
2293 | void nilfs_flush_segment(struct super_block *sb, ino_t ino) | ||
2294 | { | ||
2295 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
2296 | struct nilfs_sc_info *sci = NILFS_SC(sbi); | ||
2297 | |||
2298 | if (!sci || nilfs_doing_construction()) | ||
2299 | return; | ||
2300 | nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0); | ||
2301 | /* assign bit 0 to data files */ | ||
2302 | } | ||
2303 | |||
2304 | int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci, | ||
2305 | __u64 *segnum, size_t nsegs) | ||
2306 | { | ||
2307 | struct nilfs_segment_entry *ent; | ||
2308 | struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; | ||
2309 | struct inode *sufile = nilfs->ns_sufile; | ||
2310 | LIST_HEAD(list); | ||
2311 | __u64 *pnum; | ||
2312 | size_t i; | ||
2313 | int err; | ||
2314 | |||
2315 | for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) { | ||
2316 | ent = nilfs_alloc_segment_entry(*pnum); | ||
2317 | if (unlikely(!ent)) { | ||
2318 | err = -ENOMEM; | ||
2319 | goto failed; | ||
2320 | } | ||
2321 | list_add_tail(&ent->list, &list); | ||
2322 | |||
2323 | err = nilfs_open_segment_entry(ent, sufile); | ||
2324 | if (unlikely(err)) | ||
2325 | goto failed; | ||
2326 | |||
2327 | if (unlikely(!nilfs_segment_usage_dirty(ent->raw_su))) | ||
2328 | printk(KERN_WARNING "NILFS: unused segment is " | ||
2329 | "requested to be cleaned (segnum=%llu)\n", | ||
2330 | (unsigned long long)ent->segnum); | ||
2331 | nilfs_close_segment_entry(ent, sufile); | ||
2332 | } | ||
2333 | list_splice(&list, sci->sc_cleaning_segments.prev); | ||
2334 | return 0; | ||
2335 | |||
2336 | failed: | ||
2337 | nilfs_dispose_segment_list(&list); | ||
2338 | return err; | ||
2339 | } | ||
2340 | |||
2341 | void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *sci) | ||
2342 | { | ||
2343 | nilfs_dispose_segment_list(&sci->sc_cleaning_segments); | ||
2344 | } | ||
2345 | |||
2346 | struct nilfs_segctor_wait_request { | ||
2347 | wait_queue_t wq; | ||
2348 | __u32 seq; | ||
2349 | int err; | ||
2350 | atomic_t done; | ||
2351 | }; | ||
2352 | |||
2353 | static int nilfs_segctor_sync(struct nilfs_sc_info *sci) | ||
2354 | { | ||
2355 | struct nilfs_segctor_wait_request wait_req; | ||
2356 | int err = 0; | ||
2357 | |||
2358 | spin_lock(&sci->sc_state_lock); | ||
2359 | init_wait(&wait_req.wq); | ||
2360 | wait_req.err = 0; | ||
2361 | atomic_set(&wait_req.done, 0); | ||
2362 | wait_req.seq = ++sci->sc_seq_request; | ||
2363 | spin_unlock(&sci->sc_state_lock); | ||
2364 | |||
2365 | init_waitqueue_entry(&wait_req.wq, current); | ||
2366 | add_wait_queue(&sci->sc_wait_request, &wait_req.wq); | ||
2367 | set_current_state(TASK_INTERRUPTIBLE); | ||
2368 | wake_up(&sci->sc_wait_daemon); | ||
2369 | |||
2370 | for (;;) { | ||
2371 | if (atomic_read(&wait_req.done)) { | ||
2372 | err = wait_req.err; | ||
2373 | break; | ||
2374 | } | ||
2375 | if (!signal_pending(current)) { | ||
2376 | schedule(); | ||
2377 | continue; | ||
2378 | } | ||
2379 | err = -ERESTARTSYS; | ||
2380 | break; | ||
2381 | } | ||
2382 | finish_wait(&sci->sc_wait_request, &wait_req.wq); | ||
2383 | return err; | ||
2384 | } | ||
2385 | |||
2386 | static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) | ||
2387 | { | ||
2388 | struct nilfs_segctor_wait_request *wrq, *n; | ||
2389 | unsigned long flags; | ||
2390 | |||
2391 | spin_lock_irqsave(&sci->sc_wait_request.lock, flags); | ||
2392 | list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list, | ||
2393 | wq.task_list) { | ||
2394 | if (!atomic_read(&wrq->done) && | ||
2395 | nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { | ||
2396 | wrq->err = err; | ||
2397 | atomic_set(&wrq->done, 1); | ||
2398 | } | ||
2399 | if (atomic_read(&wrq->done)) { | ||
2400 | wrq->wq.func(&wrq->wq, | ||
2401 | TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, | ||
2402 | 0, NULL); | ||
2403 | } | ||
2404 | } | ||
2405 | spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags); | ||
2406 | } | ||
2407 | |||
2408 | /** | ||
2409 | * nilfs_construct_segment - construct a logical segment | ||
2410 | * @sb: super block | ||
2411 | * | ||
2412 | * Return Value: On success, 0 is retured. On errors, one of the following | ||
2413 | * negative error code is returned. | ||
2414 | * | ||
2415 | * %-EROFS - Read only filesystem. | ||
2416 | * | ||
2417 | * %-EIO - I/O error | ||
2418 | * | ||
2419 | * %-ENOSPC - No space left on device (only in a panic state). | ||
2420 | * | ||
2421 | * %-ERESTARTSYS - Interrupted. | ||
2422 | * | ||
2423 | * %-ENOMEM - Insufficient memory available. | ||
2424 | */ | ||
2425 | int nilfs_construct_segment(struct super_block *sb) | ||
2426 | { | ||
2427 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
2428 | struct nilfs_sc_info *sci = NILFS_SC(sbi); | ||
2429 | struct nilfs_transaction_info *ti; | ||
2430 | int err; | ||
2431 | |||
2432 | if (!sci) | ||
2433 | return -EROFS; | ||
2434 | |||
2435 | /* A call inside transactions causes a deadlock. */ | ||
2436 | BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC); | ||
2437 | |||
2438 | err = nilfs_segctor_sync(sci); | ||
2439 | return err; | ||
2440 | } | ||
2441 | |||
2442 | /** | ||
2443 | * nilfs_construct_dsync_segment - construct a data-only logical segment | ||
2444 | * @sb: super block | ||
2445 | * @inode: inode whose data blocks should be written out | ||
2446 | * @start: start byte offset | ||
2447 | * @end: end byte offset (inclusive) | ||
2448 | * | ||
2449 | * Return Value: On success, 0 is retured. On errors, one of the following | ||
2450 | * negative error code is returned. | ||
2451 | * | ||
2452 | * %-EROFS - Read only filesystem. | ||
2453 | * | ||
2454 | * %-EIO - I/O error | ||
2455 | * | ||
2456 | * %-ENOSPC - No space left on device (only in a panic state). | ||
2457 | * | ||
2458 | * %-ERESTARTSYS - Interrupted. | ||
2459 | * | ||
2460 | * %-ENOMEM - Insufficient memory available. | ||
2461 | */ | ||
2462 | int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, | ||
2463 | loff_t start, loff_t end) | ||
2464 | { | ||
2465 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
2466 | struct nilfs_sc_info *sci = NILFS_SC(sbi); | ||
2467 | struct nilfs_inode_info *ii; | ||
2468 | struct nilfs_transaction_info ti; | ||
2469 | int err = 0; | ||
2470 | |||
2471 | if (!sci) | ||
2472 | return -EROFS; | ||
2473 | |||
2474 | nilfs_transaction_lock(sbi, &ti, 0); | ||
2475 | |||
2476 | ii = NILFS_I(inode); | ||
2477 | if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) || | ||
2478 | nilfs_test_opt(sbi, STRICT_ORDER) || | ||
2479 | test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || | ||
2480 | nilfs_discontinued(sbi->s_nilfs)) { | ||
2481 | nilfs_transaction_unlock(sbi); | ||
2482 | err = nilfs_segctor_sync(sci); | ||
2483 | return err; | ||
2484 | } | ||
2485 | |||
2486 | spin_lock(&sbi->s_inode_lock); | ||
2487 | if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && | ||
2488 | !test_bit(NILFS_I_BUSY, &ii->i_state)) { | ||
2489 | spin_unlock(&sbi->s_inode_lock); | ||
2490 | nilfs_transaction_unlock(sbi); | ||
2491 | return 0; | ||
2492 | } | ||
2493 | spin_unlock(&sbi->s_inode_lock); | ||
2494 | sci->sc_dsync_inode = ii; | ||
2495 | sci->sc_dsync_start = start; | ||
2496 | sci->sc_dsync_end = end; | ||
2497 | |||
2498 | err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); | ||
2499 | |||
2500 | nilfs_transaction_unlock(sbi); | ||
2501 | return err; | ||
2502 | } | ||
2503 | |||
2504 | struct nilfs_segctor_req { | ||
2505 | int mode; | ||
2506 | __u32 seq_accepted; | ||
2507 | int sc_err; /* construction failure */ | ||
2508 | int sb_err; /* super block writeback failure */ | ||
2509 | }; | ||
2510 | |||
2511 | #define FLUSH_FILE_BIT (0x1) /* data file only */ | ||
2512 | #define FLUSH_DAT_BIT (1 << NILFS_DAT_INO) /* DAT only */ | ||
2513 | |||
2514 | static void nilfs_segctor_accept(struct nilfs_sc_info *sci, | ||
2515 | struct nilfs_segctor_req *req) | ||
2516 | { | ||
2517 | req->sc_err = req->sb_err = 0; | ||
2518 | spin_lock(&sci->sc_state_lock); | ||
2519 | req->seq_accepted = sci->sc_seq_request; | ||
2520 | spin_unlock(&sci->sc_state_lock); | ||
2521 | |||
2522 | if (sci->sc_timer) | ||
2523 | del_timer_sync(sci->sc_timer); | ||
2524 | } | ||
2525 | |||
2526 | static void nilfs_segctor_notify(struct nilfs_sc_info *sci, | ||
2527 | struct nilfs_segctor_req *req) | ||
2528 | { | ||
2529 | /* Clear requests (even when the construction failed) */ | ||
2530 | spin_lock(&sci->sc_state_lock); | ||
2531 | |||
2532 | sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; | ||
2533 | |||
2534 | if (req->mode == SC_LSEG_SR) { | ||
2535 | sci->sc_seq_done = req->seq_accepted; | ||
2536 | nilfs_segctor_wakeup(sci, req->sc_err ? : req->sb_err); | ||
2537 | sci->sc_flush_request = 0; | ||
2538 | } else if (req->mode == SC_FLUSH_FILE) | ||
2539 | sci->sc_flush_request &= ~FLUSH_FILE_BIT; | ||
2540 | else if (req->mode == SC_FLUSH_DAT) | ||
2541 | sci->sc_flush_request &= ~FLUSH_DAT_BIT; | ||
2542 | |||
2543 | spin_unlock(&sci->sc_state_lock); | ||
2544 | } | ||
2545 | |||
2546 | static int nilfs_segctor_construct(struct nilfs_sc_info *sci, | ||
2547 | struct nilfs_segctor_req *req) | ||
2548 | { | ||
2549 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
2550 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
2551 | int err = 0; | ||
2552 | |||
2553 | if (nilfs_discontinued(nilfs)) | ||
2554 | req->mode = SC_LSEG_SR; | ||
2555 | if (!nilfs_segctor_confirm(sci)) { | ||
2556 | err = nilfs_segctor_do_construct(sci, req->mode); | ||
2557 | req->sc_err = err; | ||
2558 | } | ||
2559 | if (likely(!err)) { | ||
2560 | if (req->mode != SC_FLUSH_DAT) | ||
2561 | atomic_set(&nilfs->ns_ndirtyblks, 0); | ||
2562 | if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && | ||
2563 | nilfs_discontinued(nilfs)) { | ||
2564 | down_write(&nilfs->ns_sem); | ||
2565 | req->sb_err = nilfs_commit_super(sbi, 0); | ||
2566 | up_write(&nilfs->ns_sem); | ||
2567 | } | ||
2568 | } | ||
2569 | return err; | ||
2570 | } | ||
2571 | |||
2572 | static void nilfs_construction_timeout(unsigned long data) | ||
2573 | { | ||
2574 | struct task_struct *p = (struct task_struct *)data; | ||
2575 | wake_up_process(p); | ||
2576 | } | ||
2577 | |||
2578 | static void | ||
2579 | nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) | ||
2580 | { | ||
2581 | struct nilfs_inode_info *ii, *n; | ||
2582 | |||
2583 | list_for_each_entry_safe(ii, n, head, i_dirty) { | ||
2584 | if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) | ||
2585 | continue; | ||
2586 | hlist_del_init(&ii->vfs_inode.i_hash); | ||
2587 | list_del_init(&ii->i_dirty); | ||
2588 | nilfs_clear_gcinode(&ii->vfs_inode); | ||
2589 | } | ||
2590 | } | ||
2591 | |||
2592 | int nilfs_clean_segments(struct super_block *sb, void __user *argp) | ||
2593 | { | ||
2594 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
2595 | struct nilfs_sc_info *sci = NILFS_SC(sbi); | ||
2596 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
2597 | struct nilfs_transaction_info ti; | ||
2598 | struct nilfs_segctor_req req = { .mode = SC_LSEG_SR }; | ||
2599 | int err; | ||
2600 | |||
2601 | if (unlikely(!sci)) | ||
2602 | return -EROFS; | ||
2603 | |||
2604 | nilfs_transaction_lock(sbi, &ti, 1); | ||
2605 | |||
2606 | err = nilfs_init_gcdat_inode(nilfs); | ||
2607 | if (unlikely(err)) | ||
2608 | goto out_unlock; | ||
2609 | err = nilfs_ioctl_prepare_clean_segments(nilfs, argp); | ||
2610 | if (unlikely(err)) | ||
2611 | goto out_unlock; | ||
2612 | |||
2613 | list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev); | ||
2614 | |||
2615 | for (;;) { | ||
2616 | nilfs_segctor_accept(sci, &req); | ||
2617 | err = nilfs_segctor_construct(sci, &req); | ||
2618 | nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes); | ||
2619 | nilfs_segctor_notify(sci, &req); | ||
2620 | |||
2621 | if (likely(!err)) | ||
2622 | break; | ||
2623 | |||
2624 | nilfs_warning(sb, __func__, | ||
2625 | "segment construction failed. (err=%d)", err); | ||
2626 | set_current_state(TASK_INTERRUPTIBLE); | ||
2627 | schedule_timeout(sci->sc_interval); | ||
2628 | } | ||
2629 | |||
2630 | out_unlock: | ||
2631 | nilfs_clear_gcdat_inode(nilfs); | ||
2632 | nilfs_transaction_unlock(sbi); | ||
2633 | return err; | ||
2634 | } | ||
2635 | |||
2636 | static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) | ||
2637 | { | ||
2638 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
2639 | struct nilfs_transaction_info ti; | ||
2640 | struct nilfs_segctor_req req = { .mode = mode }; | ||
2641 | |||
2642 | nilfs_transaction_lock(sbi, &ti, 0); | ||
2643 | |||
2644 | nilfs_segctor_accept(sci, &req); | ||
2645 | nilfs_segctor_construct(sci, &req); | ||
2646 | nilfs_segctor_notify(sci, &req); | ||
2647 | |||
2648 | /* | ||
2649 | * Unclosed segment should be retried. We do this using sc_timer. | ||
2650 | * Timeout of sc_timer will invoke complete construction which leads | ||
2651 | * to close the current logical segment. | ||
2652 | */ | ||
2653 | if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) | ||
2654 | nilfs_segctor_start_timer(sci); | ||
2655 | |||
2656 | nilfs_transaction_unlock(sbi); | ||
2657 | } | ||
2658 | |||
2659 | static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) | ||
2660 | { | ||
2661 | int mode = 0; | ||
2662 | int err; | ||
2663 | |||
2664 | spin_lock(&sci->sc_state_lock); | ||
2665 | mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ? | ||
2666 | SC_FLUSH_DAT : SC_FLUSH_FILE; | ||
2667 | spin_unlock(&sci->sc_state_lock); | ||
2668 | |||
2669 | if (mode) { | ||
2670 | err = nilfs_segctor_do_construct(sci, mode); | ||
2671 | |||
2672 | spin_lock(&sci->sc_state_lock); | ||
2673 | sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ? | ||
2674 | ~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT; | ||
2675 | spin_unlock(&sci->sc_state_lock); | ||
2676 | } | ||
2677 | clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); | ||
2678 | } | ||
2679 | |||
2680 | static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) | ||
2681 | { | ||
2682 | if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || | ||
2683 | time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) { | ||
2684 | if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT)) | ||
2685 | return SC_FLUSH_FILE; | ||
2686 | else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT)) | ||
2687 | return SC_FLUSH_DAT; | ||
2688 | } | ||
2689 | return SC_LSEG_SR; | ||
2690 | } | ||
2691 | |||
2692 | /** | ||
2693 | * nilfs_segctor_thread - main loop of the segment constructor thread. | ||
2694 | * @arg: pointer to a struct nilfs_sc_info. | ||
2695 | * | ||
2696 | * nilfs_segctor_thread() initializes a timer and serves as a daemon | ||
2697 | * to execute segment constructions. | ||
2698 | */ | ||
2699 | static int nilfs_segctor_thread(void *arg) | ||
2700 | { | ||
2701 | struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; | ||
2702 | struct timer_list timer; | ||
2703 | int timeout = 0; | ||
2704 | |||
2705 | init_timer(&timer); | ||
2706 | timer.data = (unsigned long)current; | ||
2707 | timer.function = nilfs_construction_timeout; | ||
2708 | sci->sc_timer = &timer; | ||
2709 | |||
2710 | /* start sync. */ | ||
2711 | sci->sc_task = current; | ||
2712 | wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ | ||
2713 | printk(KERN_INFO | ||
2714 | "segctord starting. Construction interval = %lu seconds, " | ||
2715 | "CP frequency < %lu seconds\n", | ||
2716 | sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); | ||
2717 | |||
2718 | spin_lock(&sci->sc_state_lock); | ||
2719 | loop: | ||
2720 | for (;;) { | ||
2721 | int mode; | ||
2722 | |||
2723 | if (sci->sc_state & NILFS_SEGCTOR_QUIT) | ||
2724 | goto end_thread; | ||
2725 | |||
2726 | if (timeout || sci->sc_seq_request != sci->sc_seq_done) | ||
2727 | mode = SC_LSEG_SR; | ||
2728 | else if (!sci->sc_flush_request) | ||
2729 | break; | ||
2730 | else | ||
2731 | mode = nilfs_segctor_flush_mode(sci); | ||
2732 | |||
2733 | spin_unlock(&sci->sc_state_lock); | ||
2734 | nilfs_segctor_thread_construct(sci, mode); | ||
2735 | spin_lock(&sci->sc_state_lock); | ||
2736 | timeout = 0; | ||
2737 | } | ||
2738 | |||
2739 | |||
2740 | if (freezing(current)) { | ||
2741 | spin_unlock(&sci->sc_state_lock); | ||
2742 | refrigerator(); | ||
2743 | spin_lock(&sci->sc_state_lock); | ||
2744 | } else { | ||
2745 | DEFINE_WAIT(wait); | ||
2746 | int should_sleep = 1; | ||
2747 | |||
2748 | prepare_to_wait(&sci->sc_wait_daemon, &wait, | ||
2749 | TASK_INTERRUPTIBLE); | ||
2750 | |||
2751 | if (sci->sc_seq_request != sci->sc_seq_done) | ||
2752 | should_sleep = 0; | ||
2753 | else if (sci->sc_flush_request) | ||
2754 | should_sleep = 0; | ||
2755 | else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) | ||
2756 | should_sleep = time_before(jiffies, | ||
2757 | sci->sc_timer->expires); | ||
2758 | |||
2759 | if (should_sleep) { | ||
2760 | spin_unlock(&sci->sc_state_lock); | ||
2761 | schedule(); | ||
2762 | spin_lock(&sci->sc_state_lock); | ||
2763 | } | ||
2764 | finish_wait(&sci->sc_wait_daemon, &wait); | ||
2765 | timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && | ||
2766 | time_after_eq(jiffies, sci->sc_timer->expires)); | ||
2767 | } | ||
2768 | goto loop; | ||
2769 | |||
2770 | end_thread: | ||
2771 | spin_unlock(&sci->sc_state_lock); | ||
2772 | del_timer_sync(sci->sc_timer); | ||
2773 | sci->sc_timer = NULL; | ||
2774 | |||
2775 | /* end sync. */ | ||
2776 | sci->sc_task = NULL; | ||
2777 | wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ | ||
2778 | return 0; | ||
2779 | } | ||
2780 | |||
2781 | static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) | ||
2782 | { | ||
2783 | struct task_struct *t; | ||
2784 | |||
2785 | t = kthread_run(nilfs_segctor_thread, sci, "segctord"); | ||
2786 | if (IS_ERR(t)) { | ||
2787 | int err = PTR_ERR(t); | ||
2788 | |||
2789 | printk(KERN_ERR "NILFS: error %d creating segctord thread\n", | ||
2790 | err); | ||
2791 | return err; | ||
2792 | } | ||
2793 | wait_event(sci->sc_wait_task, sci->sc_task != NULL); | ||
2794 | return 0; | ||
2795 | } | ||
2796 | |||
2797 | static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) | ||
2798 | { | ||
2799 | sci->sc_state |= NILFS_SEGCTOR_QUIT; | ||
2800 | |||
2801 | while (sci->sc_task) { | ||
2802 | wake_up(&sci->sc_wait_daemon); | ||
2803 | spin_unlock(&sci->sc_state_lock); | ||
2804 | wait_event(sci->sc_wait_task, sci->sc_task == NULL); | ||
2805 | spin_lock(&sci->sc_state_lock); | ||
2806 | } | ||
2807 | } | ||
2808 | |||
2809 | static int nilfs_segctor_init(struct nilfs_sc_info *sci) | ||
2810 | { | ||
2811 | sci->sc_seq_done = sci->sc_seq_request; | ||
2812 | |||
2813 | return nilfs_segctor_start_thread(sci); | ||
2814 | } | ||
2815 | |||
2816 | /* | ||
2817 | * Setup & clean-up functions | ||
2818 | */ | ||
2819 | static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) | ||
2820 | { | ||
2821 | struct nilfs_sc_info *sci; | ||
2822 | |||
2823 | sci = kzalloc(sizeof(*sci), GFP_KERNEL); | ||
2824 | if (!sci) | ||
2825 | return NULL; | ||
2826 | |||
2827 | sci->sc_sbi = sbi; | ||
2828 | sci->sc_super = sbi->s_super; | ||
2829 | |||
2830 | init_waitqueue_head(&sci->sc_wait_request); | ||
2831 | init_waitqueue_head(&sci->sc_wait_daemon); | ||
2832 | init_waitqueue_head(&sci->sc_wait_task); | ||
2833 | spin_lock_init(&sci->sc_state_lock); | ||
2834 | INIT_LIST_HEAD(&sci->sc_dirty_files); | ||
2835 | INIT_LIST_HEAD(&sci->sc_segbufs); | ||
2836 | INIT_LIST_HEAD(&sci->sc_gc_inodes); | ||
2837 | INIT_LIST_HEAD(&sci->sc_cleaning_segments); | ||
2838 | INIT_LIST_HEAD(&sci->sc_copied_buffers); | ||
2839 | |||
2840 | sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; | ||
2841 | sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; | ||
2842 | sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; | ||
2843 | |||
2844 | if (sbi->s_interval) | ||
2845 | sci->sc_interval = sbi->s_interval; | ||
2846 | if (sbi->s_watermark) | ||
2847 | sci->sc_watermark = sbi->s_watermark; | ||
2848 | return sci; | ||
2849 | } | ||
2850 | |||
2851 | static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) | ||
2852 | { | ||
2853 | int ret, retrycount = NILFS_SC_CLEANUP_RETRY; | ||
2854 | |||
2855 | /* The segctord thread was stopped and its timer was removed. | ||
2856 | But some tasks remain. */ | ||
2857 | do { | ||
2858 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
2859 | struct nilfs_transaction_info ti; | ||
2860 | struct nilfs_segctor_req req = { .mode = SC_LSEG_SR }; | ||
2861 | |||
2862 | nilfs_transaction_lock(sbi, &ti, 0); | ||
2863 | nilfs_segctor_accept(sci, &req); | ||
2864 | ret = nilfs_segctor_construct(sci, &req); | ||
2865 | nilfs_segctor_notify(sci, &req); | ||
2866 | nilfs_transaction_unlock(sbi); | ||
2867 | |||
2868 | } while (ret && retrycount-- > 0); | ||
2869 | } | ||
2870 | |||
2871 | /** | ||
2872 | * nilfs_segctor_destroy - destroy the segment constructor. | ||
2873 | * @sci: nilfs_sc_info | ||
2874 | * | ||
2875 | * nilfs_segctor_destroy() kills the segctord thread and frees | ||
2876 | * the nilfs_sc_info struct. | ||
2877 | * Caller must hold the segment semaphore. | ||
2878 | */ | ||
2879 | static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) | ||
2880 | { | ||
2881 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
2882 | int flag; | ||
2883 | |||
2884 | up_write(&sbi->s_nilfs->ns_segctor_sem); | ||
2885 | |||
2886 | spin_lock(&sci->sc_state_lock); | ||
2887 | nilfs_segctor_kill_thread(sci); | ||
2888 | flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request | ||
2889 | || sci->sc_seq_request != sci->sc_seq_done); | ||
2890 | spin_unlock(&sci->sc_state_lock); | ||
2891 | |||
2892 | if (flag || nilfs_segctor_confirm(sci)) | ||
2893 | nilfs_segctor_write_out(sci); | ||
2894 | |||
2895 | WARN_ON(!list_empty(&sci->sc_copied_buffers)); | ||
2896 | |||
2897 | if (!list_empty(&sci->sc_dirty_files)) { | ||
2898 | nilfs_warning(sbi->s_super, __func__, | ||
2899 | "dirty file(s) after the final construction\n"); | ||
2900 | nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); | ||
2901 | } | ||
2902 | |||
2903 | if (!list_empty(&sci->sc_cleaning_segments)) | ||
2904 | nilfs_dispose_segment_list(&sci->sc_cleaning_segments); | ||
2905 | |||
2906 | WARN_ON(!list_empty(&sci->sc_segbufs)); | ||
2907 | |||
2908 | down_write(&sbi->s_nilfs->ns_segctor_sem); | ||
2909 | |||
2910 | kfree(sci); | ||
2911 | } | ||
2912 | |||
2913 | /** | ||
2914 | * nilfs_attach_segment_constructor - attach a segment constructor | ||
2915 | * @sbi: nilfs_sb_info | ||
2916 | * | ||
2917 | * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, | ||
2918 | * initilizes it, and starts the segment constructor. | ||
2919 | * | ||
2920 | * Return Value: On success, 0 is returned. On error, one of the following | ||
2921 | * negative error code is returned. | ||
2922 | * | ||
2923 | * %-ENOMEM - Insufficient memory available. | ||
2924 | */ | ||
2925 | int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi) | ||
2926 | { | ||
2927 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
2928 | int err; | ||
2929 | |||
2930 | /* Each field of nilfs_segctor is cleared through the initialization | ||
2931 | of super-block info */ | ||
2932 | sbi->s_sc_info = nilfs_segctor_new(sbi); | ||
2933 | if (!sbi->s_sc_info) | ||
2934 | return -ENOMEM; | ||
2935 | |||
2936 | nilfs_attach_writer(nilfs, sbi); | ||
2937 | err = nilfs_segctor_init(NILFS_SC(sbi)); | ||
2938 | if (err) { | ||
2939 | nilfs_detach_writer(nilfs, sbi); | ||
2940 | kfree(sbi->s_sc_info); | ||
2941 | sbi->s_sc_info = NULL; | ||
2942 | } | ||
2943 | return err; | ||
2944 | } | ||
2945 | |||
2946 | /** | ||
2947 | * nilfs_detach_segment_constructor - destroy the segment constructor | ||
2948 | * @sbi: nilfs_sb_info | ||
2949 | * | ||
2950 | * nilfs_detach_segment_constructor() kills the segment constructor daemon, | ||
2951 | * frees the struct nilfs_sc_info, and destroy the dirty file list. | ||
2952 | */ | ||
2953 | void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) | ||
2954 | { | ||
2955 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
2956 | LIST_HEAD(garbage_list); | ||
2957 | |||
2958 | down_write(&nilfs->ns_segctor_sem); | ||
2959 | if (NILFS_SC(sbi)) { | ||
2960 | nilfs_segctor_destroy(NILFS_SC(sbi)); | ||
2961 | sbi->s_sc_info = NULL; | ||
2962 | } | ||
2963 | |||
2964 | /* Force to free the list of dirty files */ | ||
2965 | spin_lock(&sbi->s_inode_lock); | ||
2966 | if (!list_empty(&sbi->s_dirty_files)) { | ||
2967 | list_splice_init(&sbi->s_dirty_files, &garbage_list); | ||
2968 | nilfs_warning(sbi->s_super, __func__, | ||
2969 | "Non empty dirty list after the last " | ||
2970 | "segment construction\n"); | ||
2971 | } | ||
2972 | spin_unlock(&sbi->s_inode_lock); | ||
2973 | up_write(&nilfs->ns_segctor_sem); | ||
2974 | |||
2975 | nilfs_dispose_list(sbi, &garbage_list, 1); | ||
2976 | nilfs_detach_writer(nilfs, sbi); | ||
2977 | } | ||
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h new file mode 100644 index 000000000000..a98fc1ed0bbb --- /dev/null +++ b/fs/nilfs2/segment.h | |||
@@ -0,0 +1,243 @@ | |||
1 | /* | ||
2 | * segment.h - NILFS Segment constructor prototypes and definitions | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | #ifndef _NILFS_SEGMENT_H | ||
24 | #define _NILFS_SEGMENT_H | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/fs.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/nilfs2_fs.h> | ||
30 | #include "sb.h" | ||
31 | |||
32 | /** | ||
33 | * struct nilfs_recovery_info - Recovery infomation | ||
34 | * @ri_need_recovery: Recovery status | ||
35 | * @ri_super_root: Block number of the last super root | ||
36 | * @ri_ri_cno: Number of the last checkpoint | ||
37 | * @ri_lsegs_start: Region for roll-forwarding (start block number) | ||
38 | * @ri_lsegs_end: Region for roll-forwarding (end block number) | ||
39 | * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start | ||
40 | * @ri_used_segments: List of segments to be mark active | ||
41 | * @ri_pseg_start: Block number of the last partial segment | ||
42 | * @ri_seq: Sequence number on the last partial segment | ||
43 | * @ri_segnum: Segment number on the last partial segment | ||
44 | * @ri_nextnum: Next segment number on the last partial segment | ||
45 | */ | ||
46 | struct nilfs_recovery_info { | ||
47 | int ri_need_recovery; | ||
48 | sector_t ri_super_root; | ||
49 | __u64 ri_cno; | ||
50 | |||
51 | sector_t ri_lsegs_start; | ||
52 | sector_t ri_lsegs_end; | ||
53 | u64 ri_lsegs_start_seq; | ||
54 | struct list_head ri_used_segments; | ||
55 | sector_t ri_pseg_start; | ||
56 | u64 ri_seq; | ||
57 | __u64 ri_segnum; | ||
58 | __u64 ri_nextnum; | ||
59 | }; | ||
60 | |||
61 | /* ri_need_recovery */ | ||
62 | #define NILFS_RECOVERY_SR_UPDATED 1 /* The super root was updated */ | ||
63 | #define NILFS_RECOVERY_ROLLFORWARD_DONE 2 /* Rollforward was carried out */ | ||
64 | |||
65 | /** | ||
66 | * struct nilfs_cstage - Context of collection stage | ||
67 | * @scnt: Stage count | ||
68 | * @flags: State flags | ||
69 | * @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file | ||
70 | * @gc_inode_ptr: Pointer on the list of gc-inodes | ||
71 | */ | ||
72 | struct nilfs_cstage { | ||
73 | int scnt; | ||
74 | unsigned flags; | ||
75 | struct nilfs_inode_info *dirty_file_ptr; | ||
76 | struct nilfs_inode_info *gc_inode_ptr; | ||
77 | }; | ||
78 | |||
79 | struct nilfs_segment_buffer; | ||
80 | |||
81 | struct nilfs_segsum_pointer { | ||
82 | struct buffer_head *bh; | ||
83 | unsigned offset; /* offset in bytes */ | ||
84 | }; | ||
85 | |||
86 | /** | ||
87 | * struct nilfs_sc_info - Segment constructor information | ||
88 | * @sc_super: Back pointer to super_block struct | ||
89 | * @sc_sbi: Back pointer to nilfs_sb_info struct | ||
90 | * @sc_nblk_inc: Block count of current generation | ||
91 | * @sc_dirty_files: List of files to be written | ||
92 | * @sc_gc_inodes: List of GC inodes having blocks to be written | ||
93 | * @sc_cleaning_segments: List of segments to be freed through construction | ||
94 | * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data | ||
95 | * @sc_dsync_inode: inode whose data pages are written for a sync operation | ||
96 | * @sc_dsync_start: start byte offset of data pages | ||
97 | * @sc_dsync_end: end byte offset of data pages (inclusive) | ||
98 | * @sc_segbufs: List of segment buffers | ||
99 | * @sc_segbuf_nblocks: Number of available blocks in segment buffers. | ||
100 | * @sc_curseg: Current segment buffer | ||
101 | * @sc_super_root: Pointer to the super root buffer | ||
102 | * @sc_stage: Collection stage | ||
103 | * @sc_finfo_ptr: pointer to the current finfo struct in the segment summary | ||
104 | * @sc_binfo_ptr: pointer to the current binfo struct in the segment summary | ||
105 | * @sc_blk_cnt: Block count of a file | ||
106 | * @sc_datablk_cnt: Data block count of a file | ||
107 | * @sc_nblk_this_inc: Number of blocks included in the current logical segment | ||
108 | * @sc_seg_ctime: Creation time | ||
109 | * @sc_flags: Internal flags | ||
110 | * @sc_state_lock: spinlock for sc_state and so on | ||
111 | * @sc_state: Segctord state flags | ||
112 | * @sc_flush_request: inode bitmap of metadata files to be flushed | ||
113 | * @sc_wait_request: Client request queue | ||
114 | * @sc_wait_daemon: Daemon wait queue | ||
115 | * @sc_wait_task: Start/end wait queue to control segctord task | ||
116 | * @sc_seq_request: Request counter | ||
117 | * @sc_seq_done: Completion counter | ||
118 | * @sc_sync: Request of explicit sync operation | ||
119 | * @sc_interval: Timeout value of background construction | ||
120 | * @sc_mjcp_freq: Frequency of creating checkpoints | ||
121 | * @sc_lseg_stime: Start time of the latest logical segment | ||
122 | * @sc_watermark: Watermark for the number of dirty buffers | ||
123 | * @sc_timer: Timer for segctord | ||
124 | * @sc_task: current thread of segctord | ||
125 | */ | ||
126 | struct nilfs_sc_info { | ||
127 | struct super_block *sc_super; | ||
128 | struct nilfs_sb_info *sc_sbi; | ||
129 | |||
130 | unsigned long sc_nblk_inc; | ||
131 | |||
132 | struct list_head sc_dirty_files; | ||
133 | struct list_head sc_gc_inodes; | ||
134 | struct list_head sc_cleaning_segments; | ||
135 | struct list_head sc_copied_buffers; | ||
136 | |||
137 | struct nilfs_inode_info *sc_dsync_inode; | ||
138 | loff_t sc_dsync_start; | ||
139 | loff_t sc_dsync_end; | ||
140 | |||
141 | /* Segment buffers */ | ||
142 | struct list_head sc_segbufs; | ||
143 | unsigned long sc_segbuf_nblocks; | ||
144 | struct nilfs_segment_buffer *sc_curseg; | ||
145 | struct buffer_head *sc_super_root; | ||
146 | |||
147 | struct nilfs_cstage sc_stage; | ||
148 | |||
149 | struct nilfs_segsum_pointer sc_finfo_ptr; | ||
150 | struct nilfs_segsum_pointer sc_binfo_ptr; | ||
151 | unsigned long sc_blk_cnt; | ||
152 | unsigned long sc_datablk_cnt; | ||
153 | unsigned long sc_nblk_this_inc; | ||
154 | time_t sc_seg_ctime; | ||
155 | |||
156 | unsigned long sc_flags; | ||
157 | |||
158 | spinlock_t sc_state_lock; | ||
159 | unsigned long sc_state; | ||
160 | unsigned long sc_flush_request; | ||
161 | |||
162 | wait_queue_head_t sc_wait_request; | ||
163 | wait_queue_head_t sc_wait_daemon; | ||
164 | wait_queue_head_t sc_wait_task; | ||
165 | |||
166 | __u32 sc_seq_request; | ||
167 | __u32 sc_seq_done; | ||
168 | |||
169 | int sc_sync; | ||
170 | unsigned long sc_interval; | ||
171 | unsigned long sc_mjcp_freq; | ||
172 | unsigned long sc_lseg_stime; /* in 1/HZ seconds */ | ||
173 | unsigned long sc_watermark; | ||
174 | |||
175 | struct timer_list *sc_timer; | ||
176 | struct task_struct *sc_task; | ||
177 | }; | ||
178 | |||
179 | /* sc_flags */ | ||
180 | enum { | ||
181 | NILFS_SC_DIRTY, /* One or more dirty meta-data blocks exist */ | ||
182 | NILFS_SC_UNCLOSED, /* Logical segment is not closed */ | ||
183 | NILFS_SC_SUPER_ROOT, /* The latest segment has a super root */ | ||
184 | NILFS_SC_PRIOR_FLUSH, /* Requesting immediate flush without making a | ||
185 | checkpoint */ | ||
186 | NILFS_SC_HAVE_DELTA, /* Next checkpoint will have update of files | ||
187 | other than DAT, cpfile, sufile, or files | ||
188 | moved by GC */ | ||
189 | }; | ||
190 | |||
191 | /* sc_state */ | ||
192 | #define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */ | ||
193 | #define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */ | ||
194 | |||
195 | /* | ||
196 | * Constant parameters | ||
197 | */ | ||
198 | #define NILFS_SC_CLEANUP_RETRY 3 /* Retry count of construction when | ||
199 | destroying segctord */ | ||
200 | |||
201 | /* | ||
202 | * Default values of timeout, in seconds. | ||
203 | */ | ||
204 | #define NILFS_SC_DEFAULT_TIMEOUT 5 /* Timeout value of dirty blocks. | ||
205 | It triggers construction of a | ||
206 | logical segment with a super root */ | ||
207 | #define NILFS_SC_DEFAULT_SR_FREQ 30 /* Maximum frequency of super root | ||
208 | creation */ | ||
209 | |||
210 | /* | ||
211 | * The default threshold amount of data, in block counts. | ||
212 | */ | ||
213 | #define NILFS_SC_DEFAULT_WATERMARK 3600 | ||
214 | |||
215 | |||
216 | /* segment.c */ | ||
217 | extern int nilfs_init_transaction_cache(void); | ||
218 | extern void nilfs_destroy_transaction_cache(void); | ||
219 | extern void nilfs_relax_pressure_in_lock(struct super_block *); | ||
220 | |||
221 | extern int nilfs_construct_segment(struct super_block *); | ||
222 | extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, | ||
223 | loff_t, loff_t); | ||
224 | extern void nilfs_flush_segment(struct super_block *, ino_t); | ||
225 | extern int nilfs_clean_segments(struct super_block *, void __user *); | ||
226 | |||
227 | extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, | ||
228 | __u64 *, size_t); | ||
229 | extern void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *); | ||
230 | |||
231 | extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *); | ||
232 | extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); | ||
233 | |||
234 | /* recovery.c */ | ||
235 | extern int nilfs_read_super_root_block(struct super_block *, sector_t, | ||
236 | struct buffer_head **, int); | ||
237 | extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *, | ||
238 | struct nilfs_recovery_info *); | ||
239 | extern int nilfs_recover_logical_segments(struct the_nilfs *, | ||
240 | struct nilfs_sb_info *, | ||
241 | struct nilfs_recovery_info *); | ||
242 | |||
243 | #endif /* _NILFS_SEGMENT_H */ | ||
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c new file mode 100644 index 000000000000..c774cf397e2f --- /dev/null +++ b/fs/nilfs2/sufile.c | |||
@@ -0,0 +1,640 @@ | |||
1 | /* | ||
2 | * sufile.c - NILFS segment usage file. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/fs.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/buffer_head.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/nilfs2_fs.h> | ||
29 | #include "mdt.h" | ||
30 | #include "sufile.h" | ||
31 | |||
32 | |||
33 | static inline unsigned long | ||
34 | nilfs_sufile_segment_usages_per_block(const struct inode *sufile) | ||
35 | { | ||
36 | return NILFS_MDT(sufile)->mi_entries_per_block; | ||
37 | } | ||
38 | |||
39 | static unsigned long | ||
40 | nilfs_sufile_get_blkoff(const struct inode *sufile, __u64 segnum) | ||
41 | { | ||
42 | __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; | ||
43 | do_div(t, nilfs_sufile_segment_usages_per_block(sufile)); | ||
44 | return (unsigned long)t; | ||
45 | } | ||
46 | |||
47 | static unsigned long | ||
48 | nilfs_sufile_get_offset(const struct inode *sufile, __u64 segnum) | ||
49 | { | ||
50 | __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; | ||
51 | return do_div(t, nilfs_sufile_segment_usages_per_block(sufile)); | ||
52 | } | ||
53 | |||
54 | static unsigned long | ||
55 | nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr, | ||
56 | __u64 max) | ||
57 | { | ||
58 | return min_t(unsigned long, | ||
59 | nilfs_sufile_segment_usages_per_block(sufile) - | ||
60 | nilfs_sufile_get_offset(sufile, curr), | ||
61 | max - curr + 1); | ||
62 | } | ||
63 | |||
64 | static inline struct nilfs_sufile_header * | ||
65 | nilfs_sufile_block_get_header(const struct inode *sufile, | ||
66 | struct buffer_head *bh, | ||
67 | void *kaddr) | ||
68 | { | ||
69 | return kaddr + bh_offset(bh); | ||
70 | } | ||
71 | |||
72 | static struct nilfs_segment_usage * | ||
73 | nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, | ||
74 | struct buffer_head *bh, void *kaddr) | ||
75 | { | ||
76 | return kaddr + bh_offset(bh) + | ||
77 | nilfs_sufile_get_offset(sufile, segnum) * | ||
78 | NILFS_MDT(sufile)->mi_entry_size; | ||
79 | } | ||
80 | |||
81 | static inline int nilfs_sufile_get_header_block(struct inode *sufile, | ||
82 | struct buffer_head **bhp) | ||
83 | { | ||
84 | return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); | ||
85 | } | ||
86 | |||
87 | static inline int | ||
88 | nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, | ||
89 | int create, struct buffer_head **bhp) | ||
90 | { | ||
91 | return nilfs_mdt_get_block(sufile, | ||
92 | nilfs_sufile_get_blkoff(sufile, segnum), | ||
93 | create, NULL, bhp); | ||
94 | } | ||
95 | |||
96 | /** | ||
97 | * nilfs_sufile_alloc - allocate a segment | ||
98 | * @sufile: inode of segment usage file | ||
99 | * @segnump: pointer to segment number | ||
100 | * | ||
101 | * Description: nilfs_sufile_alloc() allocates a clean segment. | ||
102 | * | ||
103 | * Return Value: On success, 0 is returned and the segment number of the | ||
104 | * allocated segment is stored in the place pointed by @segnump. On error, one | ||
105 | * of the following negative error codes is returned. | ||
106 | * | ||
107 | * %-EIO - I/O error. | ||
108 | * | ||
109 | * %-ENOMEM - Insufficient amount of memory available. | ||
110 | * | ||
111 | * %-ENOSPC - No clean segment left. | ||
112 | */ | ||
113 | int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) | ||
114 | { | ||
115 | struct buffer_head *header_bh, *su_bh; | ||
116 | struct the_nilfs *nilfs; | ||
117 | struct nilfs_sufile_header *header; | ||
118 | struct nilfs_segment_usage *su; | ||
119 | size_t susz = NILFS_MDT(sufile)->mi_entry_size; | ||
120 | __u64 segnum, maxsegnum, last_alloc; | ||
121 | void *kaddr; | ||
122 | unsigned long nsegments, ncleansegs, nsus; | ||
123 | int ret, i, j; | ||
124 | |||
125 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
126 | |||
127 | nilfs = NILFS_MDT(sufile)->mi_nilfs; | ||
128 | |||
129 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
130 | if (ret < 0) | ||
131 | goto out_sem; | ||
132 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
133 | header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); | ||
134 | ncleansegs = le64_to_cpu(header->sh_ncleansegs); | ||
135 | last_alloc = le64_to_cpu(header->sh_last_alloc); | ||
136 | kunmap_atomic(kaddr, KM_USER0); | ||
137 | |||
138 | nsegments = nilfs_sufile_get_nsegments(sufile); | ||
139 | segnum = last_alloc + 1; | ||
140 | maxsegnum = nsegments - 1; | ||
141 | for (i = 0; i < nsegments; i += nsus) { | ||
142 | if (segnum >= nsegments) { | ||
143 | /* wrap around */ | ||
144 | segnum = 0; | ||
145 | maxsegnum = last_alloc; | ||
146 | } | ||
147 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, | ||
148 | &su_bh); | ||
149 | if (ret < 0) | ||
150 | goto out_header; | ||
151 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); | ||
152 | su = nilfs_sufile_block_get_segment_usage( | ||
153 | sufile, segnum, su_bh, kaddr); | ||
154 | |||
155 | nsus = nilfs_sufile_segment_usages_in_block( | ||
156 | sufile, segnum, maxsegnum); | ||
157 | for (j = 0; j < nsus; j++, su = (void *)su + susz, segnum++) { | ||
158 | if (!nilfs_segment_usage_clean(su)) | ||
159 | continue; | ||
160 | /* found a clean segment */ | ||
161 | nilfs_segment_usage_set_dirty(su); | ||
162 | kunmap_atomic(kaddr, KM_USER0); | ||
163 | |||
164 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
165 | header = nilfs_sufile_block_get_header( | ||
166 | sufile, header_bh, kaddr); | ||
167 | le64_add_cpu(&header->sh_ncleansegs, -1); | ||
168 | le64_add_cpu(&header->sh_ndirtysegs, 1); | ||
169 | header->sh_last_alloc = cpu_to_le64(segnum); | ||
170 | kunmap_atomic(kaddr, KM_USER0); | ||
171 | |||
172 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
173 | nilfs_mdt_mark_buffer_dirty(su_bh); | ||
174 | nilfs_mdt_mark_dirty(sufile); | ||
175 | brelse(su_bh); | ||
176 | *segnump = segnum; | ||
177 | goto out_header; | ||
178 | } | ||
179 | |||
180 | kunmap_atomic(kaddr, KM_USER0); | ||
181 | brelse(su_bh); | ||
182 | } | ||
183 | |||
184 | /* no segments left */ | ||
185 | ret = -ENOSPC; | ||
186 | |||
187 | out_header: | ||
188 | brelse(header_bh); | ||
189 | |||
190 | out_sem: | ||
191 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
192 | return ret; | ||
193 | } | ||
194 | |||
195 | /** | ||
196 | * nilfs_sufile_cancel_free - | ||
197 | * @sufile: inode of segment usage file | ||
198 | * @segnum: segment number | ||
199 | * | ||
200 | * Description: | ||
201 | * | ||
202 | * Return Value: On success, 0 is returned. On error, one of the following | ||
203 | * negative error codes is returned. | ||
204 | * | ||
205 | * %-EIO - I/O error. | ||
206 | * | ||
207 | * %-ENOMEM - Insufficient amount of memory available. | ||
208 | */ | ||
209 | int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) | ||
210 | { | ||
211 | struct buffer_head *header_bh, *su_bh; | ||
212 | struct the_nilfs *nilfs; | ||
213 | struct nilfs_sufile_header *header; | ||
214 | struct nilfs_segment_usage *su; | ||
215 | void *kaddr; | ||
216 | int ret; | ||
217 | |||
218 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
219 | |||
220 | nilfs = NILFS_MDT(sufile)->mi_nilfs; | ||
221 | |||
222 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
223 | if (ret < 0) | ||
224 | goto out_sem; | ||
225 | |||
226 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); | ||
227 | if (ret < 0) | ||
228 | goto out_header; | ||
229 | |||
230 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); | ||
231 | su = nilfs_sufile_block_get_segment_usage( | ||
232 | sufile, segnum, su_bh, kaddr); | ||
233 | if (unlikely(!nilfs_segment_usage_clean(su))) { | ||
234 | printk(KERN_WARNING "%s: segment %llu must be clean\n", | ||
235 | __func__, (unsigned long long)segnum); | ||
236 | kunmap_atomic(kaddr, KM_USER0); | ||
237 | goto out_su_bh; | ||
238 | } | ||
239 | nilfs_segment_usage_set_dirty(su); | ||
240 | kunmap_atomic(kaddr, KM_USER0); | ||
241 | |||
242 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
243 | header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); | ||
244 | le64_add_cpu(&header->sh_ncleansegs, -1); | ||
245 | le64_add_cpu(&header->sh_ndirtysegs, 1); | ||
246 | kunmap_atomic(kaddr, KM_USER0); | ||
247 | |||
248 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
249 | nilfs_mdt_mark_buffer_dirty(su_bh); | ||
250 | nilfs_mdt_mark_dirty(sufile); | ||
251 | |||
252 | out_su_bh: | ||
253 | brelse(su_bh); | ||
254 | out_header: | ||
255 | brelse(header_bh); | ||
256 | out_sem: | ||
257 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
258 | return ret; | ||
259 | } | ||
260 | |||
261 | /** | ||
262 | * nilfs_sufile_freev - free segments | ||
263 | * @sufile: inode of segment usage file | ||
264 | * @segnum: array of segment numbers | ||
265 | * @nsegs: number of segments | ||
266 | * | ||
267 | * Description: nilfs_sufile_freev() frees segments specified by @segnum and | ||
268 | * @nsegs, which must have been returned by a previous call to | ||
269 | * nilfs_sufile_alloc(). | ||
270 | * | ||
271 | * Return Value: On success, 0 is returned. On error, one of the following | ||
272 | * negative error codes is returned. | ||
273 | * | ||
274 | * %-EIO - I/O error. | ||
275 | * | ||
276 | * %-ENOMEM - Insufficient amount of memory available. | ||
277 | */ | ||
278 | #define NILFS_SUFILE_FREEV_PREALLOC 16 | ||
279 | int nilfs_sufile_freev(struct inode *sufile, __u64 *segnum, size_t nsegs) | ||
280 | { | ||
281 | struct buffer_head *header_bh, **su_bh, | ||
282 | *su_bh_prealloc[NILFS_SUFILE_FREEV_PREALLOC]; | ||
283 | struct the_nilfs *nilfs; | ||
284 | struct nilfs_sufile_header *header; | ||
285 | struct nilfs_segment_usage *su; | ||
286 | void *kaddr; | ||
287 | int ret, i; | ||
288 | |||
289 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
290 | |||
291 | nilfs = NILFS_MDT(sufile)->mi_nilfs; | ||
292 | |||
293 | /* prepare resources */ | ||
294 | if (nsegs <= NILFS_SUFILE_FREEV_PREALLOC) | ||
295 | su_bh = su_bh_prealloc; | ||
296 | else { | ||
297 | su_bh = kmalloc(sizeof(*su_bh) * nsegs, GFP_NOFS); | ||
298 | if (su_bh == NULL) { | ||
299 | ret = -ENOMEM; | ||
300 | goto out_sem; | ||
301 | } | ||
302 | } | ||
303 | |||
304 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
305 | if (ret < 0) | ||
306 | goto out_su_bh; | ||
307 | for (i = 0; i < nsegs; i++) { | ||
308 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum[i], | ||
309 | 0, &su_bh[i]); | ||
310 | if (ret < 0) | ||
311 | goto out_bh; | ||
312 | } | ||
313 | |||
314 | /* free segments */ | ||
315 | for (i = 0; i < nsegs; i++) { | ||
316 | kaddr = kmap_atomic(su_bh[i]->b_page, KM_USER0); | ||
317 | su = nilfs_sufile_block_get_segment_usage( | ||
318 | sufile, segnum[i], su_bh[i], kaddr); | ||
319 | WARN_ON(nilfs_segment_usage_error(su)); | ||
320 | nilfs_segment_usage_set_clean(su); | ||
321 | kunmap_atomic(kaddr, KM_USER0); | ||
322 | nilfs_mdt_mark_buffer_dirty(su_bh[i]); | ||
323 | } | ||
324 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
325 | header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); | ||
326 | le64_add_cpu(&header->sh_ncleansegs, nsegs); | ||
327 | le64_add_cpu(&header->sh_ndirtysegs, -(u64)nsegs); | ||
328 | kunmap_atomic(kaddr, KM_USER0); | ||
329 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
330 | nilfs_mdt_mark_dirty(sufile); | ||
331 | |||
332 | out_bh: | ||
333 | for (i--; i >= 0; i--) | ||
334 | brelse(su_bh[i]); | ||
335 | brelse(header_bh); | ||
336 | |||
337 | out_su_bh: | ||
338 | if (su_bh != su_bh_prealloc) | ||
339 | kfree(su_bh); | ||
340 | |||
341 | out_sem: | ||
342 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
343 | return ret; | ||
344 | } | ||
345 | |||
346 | /** | ||
347 | * nilfs_sufile_free - | ||
348 | * @sufile: | ||
349 | * @segnum: | ||
350 | */ | ||
351 | int nilfs_sufile_free(struct inode *sufile, __u64 segnum) | ||
352 | { | ||
353 | return nilfs_sufile_freev(sufile, &segnum, 1); | ||
354 | } | ||
355 | |||
356 | /** | ||
357 | * nilfs_sufile_get_segment_usage - get a segment usage | ||
358 | * @sufile: inode of segment usage file | ||
359 | * @segnum: segment number | ||
360 | * @sup: pointer to segment usage | ||
361 | * @bhp: pointer to buffer head | ||
362 | * | ||
363 | * Description: nilfs_sufile_get_segment_usage() acquires the segment usage | ||
364 | * specified by @segnum. | ||
365 | * | ||
366 | * Return Value: On success, 0 is returned, and the segment usage and the | ||
367 | * buffer head of the buffer on which the segment usage is located are stored | ||
368 | * in the place pointed by @sup and @bhp, respectively. On error, one of the | ||
369 | * following negative error codes is returned. | ||
370 | * | ||
371 | * %-EIO - I/O error. | ||
372 | * | ||
373 | * %-ENOMEM - Insufficient amount of memory available. | ||
374 | * | ||
375 | * %-EINVAL - Invalid segment usage number. | ||
376 | */ | ||
377 | int nilfs_sufile_get_segment_usage(struct inode *sufile, __u64 segnum, | ||
378 | struct nilfs_segment_usage **sup, | ||
379 | struct buffer_head **bhp) | ||
380 | { | ||
381 | struct buffer_head *bh; | ||
382 | struct nilfs_segment_usage *su; | ||
383 | void *kaddr; | ||
384 | int ret; | ||
385 | |||
386 | /* segnum is 0 origin */ | ||
387 | if (segnum >= nilfs_sufile_get_nsegments(sufile)) | ||
388 | return -EINVAL; | ||
389 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
390 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &bh); | ||
391 | if (ret < 0) | ||
392 | goto out_sem; | ||
393 | kaddr = kmap(bh->b_page); | ||
394 | su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); | ||
395 | if (nilfs_segment_usage_error(su)) { | ||
396 | kunmap(bh->b_page); | ||
397 | brelse(bh); | ||
398 | ret = -EINVAL; | ||
399 | goto out_sem; | ||
400 | } | ||
401 | |||
402 | if (sup != NULL) | ||
403 | *sup = su; | ||
404 | *bhp = bh; | ||
405 | |||
406 | out_sem: | ||
407 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
408 | return ret; | ||
409 | } | ||
410 | |||
411 | /** | ||
412 | * nilfs_sufile_put_segment_usage - put a segment usage | ||
413 | * @sufile: inode of segment usage file | ||
414 | * @segnum: segment number | ||
415 | * @bh: buffer head | ||
416 | * | ||
417 | * Description: nilfs_sufile_put_segment_usage() releases the segment usage | ||
418 | * specified by @segnum. @bh must be the buffer head which have been returned | ||
419 | * by a previous call to nilfs_sufile_get_segment_usage() with @segnum. | ||
420 | */ | ||
421 | void nilfs_sufile_put_segment_usage(struct inode *sufile, __u64 segnum, | ||
422 | struct buffer_head *bh) | ||
423 | { | ||
424 | kunmap(bh->b_page); | ||
425 | brelse(bh); | ||
426 | } | ||
427 | |||
428 | /** | ||
429 | * nilfs_sufile_get_stat - get segment usage statistics | ||
430 | * @sufile: inode of segment usage file | ||
431 | * @stat: pointer to a structure of segment usage statistics | ||
432 | * | ||
433 | * Description: nilfs_sufile_get_stat() returns information about segment | ||
434 | * usage. | ||
435 | * | ||
436 | * Return Value: On success, 0 is returned, and segment usage information is | ||
437 | * stored in the place pointed by @stat. On error, one of the following | ||
438 | * negative error codes is returned. | ||
439 | * | ||
440 | * %-EIO - I/O error. | ||
441 | * | ||
442 | * %-ENOMEM - Insufficient amount of memory available. | ||
443 | */ | ||
444 | int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) | ||
445 | { | ||
446 | struct buffer_head *header_bh; | ||
447 | struct nilfs_sufile_header *header; | ||
448 | struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; | ||
449 | void *kaddr; | ||
450 | int ret; | ||
451 | |||
452 | down_read(&NILFS_MDT(sufile)->mi_sem); | ||
453 | |||
454 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
455 | if (ret < 0) | ||
456 | goto out_sem; | ||
457 | |||
458 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
459 | header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); | ||
460 | sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); | ||
461 | sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); | ||
462 | sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); | ||
463 | sustat->ss_ctime = nilfs->ns_ctime; | ||
464 | sustat->ss_nongc_ctime = nilfs->ns_nongc_ctime; | ||
465 | spin_lock(&nilfs->ns_last_segment_lock); | ||
466 | sustat->ss_prot_seq = nilfs->ns_prot_seq; | ||
467 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
468 | kunmap_atomic(kaddr, KM_USER0); | ||
469 | brelse(header_bh); | ||
470 | |||
471 | out_sem: | ||
472 | up_read(&NILFS_MDT(sufile)->mi_sem); | ||
473 | return ret; | ||
474 | } | ||
475 | |||
476 | /** | ||
477 | * nilfs_sufile_get_ncleansegs - get the number of clean segments | ||
478 | * @sufile: inode of segment usage file | ||
479 | * @nsegsp: pointer to the number of clean segments | ||
480 | * | ||
481 | * Description: nilfs_sufile_get_ncleansegs() acquires the number of clean | ||
482 | * segments. | ||
483 | * | ||
484 | * Return Value: On success, 0 is returned and the number of clean segments is | ||
485 | * stored in the place pointed by @nsegsp. On error, one of the following | ||
486 | * negative error codes is returned. | ||
487 | * | ||
488 | * %-EIO - I/O error. | ||
489 | * | ||
490 | * %-ENOMEM - Insufficient amount of memory available. | ||
491 | */ | ||
492 | int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp) | ||
493 | { | ||
494 | struct nilfs_sustat sustat; | ||
495 | int ret; | ||
496 | |||
497 | ret = nilfs_sufile_get_stat(sufile, &sustat); | ||
498 | if (ret == 0) | ||
499 | *nsegsp = sustat.ss_ncleansegs; | ||
500 | return ret; | ||
501 | } | ||
502 | |||
503 | /** | ||
504 | * nilfs_sufile_set_error - mark a segment as erroneous | ||
505 | * @sufile: inode of segment usage file | ||
506 | * @segnum: segment number | ||
507 | * | ||
508 | * Description: nilfs_sufile_set_error() marks the segment specified by | ||
509 | * @segnum as erroneous. The error segment will never be used again. | ||
510 | * | ||
511 | * Return Value: On success, 0 is returned. On error, one of the following | ||
512 | * negative error codes is returned. | ||
513 | * | ||
514 | * %-EIO - I/O error. | ||
515 | * | ||
516 | * %-ENOMEM - Insufficient amount of memory available. | ||
517 | * | ||
518 | * %-EINVAL - Invalid segment usage number. | ||
519 | */ | ||
520 | int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) | ||
521 | { | ||
522 | struct buffer_head *header_bh, *su_bh; | ||
523 | struct nilfs_segment_usage *su; | ||
524 | struct nilfs_sufile_header *header; | ||
525 | void *kaddr; | ||
526 | int ret; | ||
527 | |||
528 | if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { | ||
529 | printk(KERN_WARNING "%s: invalid segment number: %llu\n", | ||
530 | __func__, (unsigned long long)segnum); | ||
531 | return -EINVAL; | ||
532 | } | ||
533 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
534 | |||
535 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
536 | if (ret < 0) | ||
537 | goto out_sem; | ||
538 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); | ||
539 | if (ret < 0) | ||
540 | goto out_header; | ||
541 | |||
542 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); | ||
543 | su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); | ||
544 | if (nilfs_segment_usage_error(su)) { | ||
545 | kunmap_atomic(kaddr, KM_USER0); | ||
546 | brelse(su_bh); | ||
547 | goto out_header; | ||
548 | } | ||
549 | |||
550 | nilfs_segment_usage_set_error(su); | ||
551 | kunmap_atomic(kaddr, KM_USER0); | ||
552 | brelse(su_bh); | ||
553 | |||
554 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
555 | header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); | ||
556 | le64_add_cpu(&header->sh_ndirtysegs, -1); | ||
557 | kunmap_atomic(kaddr, KM_USER0); | ||
558 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
559 | nilfs_mdt_mark_buffer_dirty(su_bh); | ||
560 | nilfs_mdt_mark_dirty(sufile); | ||
561 | brelse(su_bh); | ||
562 | |||
563 | out_header: | ||
564 | brelse(header_bh); | ||
565 | |||
566 | out_sem: | ||
567 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
568 | return ret; | ||
569 | } | ||
570 | |||
571 | /** | ||
572 | * nilfs_sufile_get_suinfo - | ||
573 | * @sufile: inode of segment usage file | ||
574 | * @segnum: segment number to start looking | ||
575 | * @si: array of suinfo | ||
576 | * @nsi: size of suinfo array | ||
577 | * | ||
578 | * Description: | ||
579 | * | ||
580 | * Return Value: On success, 0 is returned and .... On error, one of the | ||
581 | * following negative error codes is returned. | ||
582 | * | ||
583 | * %-EIO - I/O error. | ||
584 | * | ||
585 | * %-ENOMEM - Insufficient amount of memory available. | ||
586 | */ | ||
587 | ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, | ||
588 | struct nilfs_suinfo *si, size_t nsi) | ||
589 | { | ||
590 | struct buffer_head *su_bh; | ||
591 | struct nilfs_segment_usage *su; | ||
592 | size_t susz = NILFS_MDT(sufile)->mi_entry_size; | ||
593 | struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; | ||
594 | void *kaddr; | ||
595 | unsigned long nsegs, segusages_per_block; | ||
596 | ssize_t n; | ||
597 | int ret, i, j; | ||
598 | |||
599 | down_read(&NILFS_MDT(sufile)->mi_sem); | ||
600 | |||
601 | segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile); | ||
602 | nsegs = min_t(unsigned long, | ||
603 | nilfs_sufile_get_nsegments(sufile) - segnum, | ||
604 | nsi); | ||
605 | for (i = 0; i < nsegs; i += n, segnum += n) { | ||
606 | n = min_t(unsigned long, | ||
607 | segusages_per_block - | ||
608 | nilfs_sufile_get_offset(sufile, segnum), | ||
609 | nsegs - i); | ||
610 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, | ||
611 | &su_bh); | ||
612 | if (ret < 0) { | ||
613 | if (ret != -ENOENT) | ||
614 | goto out; | ||
615 | /* hole */ | ||
616 | memset(&si[i], 0, sizeof(struct nilfs_suinfo) * n); | ||
617 | continue; | ||
618 | } | ||
619 | |||
620 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); | ||
621 | su = nilfs_sufile_block_get_segment_usage( | ||
622 | sufile, segnum, su_bh, kaddr); | ||
623 | for (j = 0; j < n; j++, su = (void *)su + susz) { | ||
624 | si[i + j].sui_lastmod = le64_to_cpu(su->su_lastmod); | ||
625 | si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); | ||
626 | si[i + j].sui_flags = le32_to_cpu(su->su_flags) & | ||
627 | ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); | ||
628 | if (nilfs_segment_is_active(nilfs, segnum + i + j)) | ||
629 | si[i + j].sui_flags |= | ||
630 | (1UL << NILFS_SEGMENT_USAGE_ACTIVE); | ||
631 | } | ||
632 | kunmap_atomic(kaddr, KM_USER0); | ||
633 | brelse(su_bh); | ||
634 | } | ||
635 | ret = nsegs; | ||
636 | |||
637 | out: | ||
638 | up_read(&NILFS_MDT(sufile)->mi_sem); | ||
639 | return ret; | ||
640 | } | ||
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h new file mode 100644 index 000000000000..d595f33a768d --- /dev/null +++ b/fs/nilfs2/sufile.h | |||
@@ -0,0 +1,54 @@ | |||
1 | /* | ||
2 | * sufile.h - NILFS segment usage file. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_SUFILE_H | ||
24 | #define _NILFS_SUFILE_H | ||
25 | |||
26 | #include <linux/fs.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/nilfs2_fs.h> | ||
29 | #include "mdt.h" | ||
30 | |||
31 | #define NILFS_SUFILE_GFP NILFS_MDT_GFP | ||
32 | |||
33 | static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) | ||
34 | { | ||
35 | return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments; | ||
36 | } | ||
37 | |||
38 | int nilfs_sufile_alloc(struct inode *, __u64 *); | ||
39 | int nilfs_sufile_cancel_free(struct inode *, __u64); | ||
40 | int nilfs_sufile_freev(struct inode *, __u64 *, size_t); | ||
41 | int nilfs_sufile_free(struct inode *, __u64); | ||
42 | int nilfs_sufile_get_segment_usage(struct inode *, __u64, | ||
43 | struct nilfs_segment_usage **, | ||
44 | struct buffer_head **); | ||
45 | void nilfs_sufile_put_segment_usage(struct inode *, __u64, | ||
46 | struct buffer_head *); | ||
47 | int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); | ||
48 | int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); | ||
49 | int nilfs_sufile_set_error(struct inode *, __u64); | ||
50 | ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *, | ||
51 | size_t); | ||
52 | |||
53 | |||
54 | #endif /* _NILFS_SUFILE_H */ | ||
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c new file mode 100644 index 000000000000..e117e1ea9bff --- /dev/null +++ b/fs/nilfs2/super.c | |||
@@ -0,0 +1,1323 @@ | |||
1 | /* | ||
2 | * super.c - NILFS module and super block management. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | */ | ||
22 | /* | ||
23 | * linux/fs/ext2/super.c | ||
24 | * | ||
25 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
26 | * Remy Card (card@masi.ibp.fr) | ||
27 | * Laboratoire MASI - Institut Blaise Pascal | ||
28 | * Universite Pierre et Marie Curie (Paris VI) | ||
29 | * | ||
30 | * from | ||
31 | * | ||
32 | * linux/fs/minix/inode.c | ||
33 | * | ||
34 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
35 | * | ||
36 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
37 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
38 | */ | ||
39 | |||
40 | #include <linux/module.h> | ||
41 | #include <linux/string.h> | ||
42 | #include <linux/slab.h> | ||
43 | #include <linux/init.h> | ||
44 | #include <linux/blkdev.h> | ||
45 | #include <linux/parser.h> | ||
46 | #include <linux/random.h> | ||
47 | #include <linux/crc32.h> | ||
48 | #include <linux/smp_lock.h> | ||
49 | #include <linux/vfs.h> | ||
50 | #include <linux/writeback.h> | ||
51 | #include <linux/kobject.h> | ||
52 | #include <linux/exportfs.h> | ||
53 | #include "nilfs.h" | ||
54 | #include "mdt.h" | ||
55 | #include "alloc.h" | ||
56 | #include "page.h" | ||
57 | #include "cpfile.h" | ||
58 | #include "ifile.h" | ||
59 | #include "dat.h" | ||
60 | #include "segment.h" | ||
61 | #include "segbuf.h" | ||
62 | |||
63 | MODULE_AUTHOR("NTT Corp."); | ||
64 | MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " | ||
65 | "(NILFS)"); | ||
66 | MODULE_VERSION(NILFS_VERSION); | ||
67 | MODULE_LICENSE("GPL"); | ||
68 | |||
69 | static int nilfs_remount(struct super_block *sb, int *flags, char *data); | ||
70 | static int test_exclusive_mount(struct file_system_type *fs_type, | ||
71 | struct block_device *bdev, int flags); | ||
72 | |||
73 | /** | ||
74 | * nilfs_error() - report failure condition on a filesystem | ||
75 | * | ||
76 | * nilfs_error() sets an ERROR_FS flag on the superblock as well as | ||
77 | * reporting an error message. It should be called when NILFS detects | ||
78 | * incoherences or defects of meta data on disk. As for sustainable | ||
79 | * errors such as a single-shot I/O error, nilfs_warning() or the printk() | ||
80 | * function should be used instead. | ||
81 | * | ||
82 | * The segment constructor must not call this function because it can | ||
83 | * kill itself. | ||
84 | */ | ||
85 | void nilfs_error(struct super_block *sb, const char *function, | ||
86 | const char *fmt, ...) | ||
87 | { | ||
88 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
89 | va_list args; | ||
90 | |||
91 | va_start(args, fmt); | ||
92 | printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function); | ||
93 | vprintk(fmt, args); | ||
94 | printk("\n"); | ||
95 | va_end(args); | ||
96 | |||
97 | if (!(sb->s_flags & MS_RDONLY)) { | ||
98 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
99 | |||
100 | if (!nilfs_test_opt(sbi, ERRORS_CONT)) | ||
101 | nilfs_detach_segment_constructor(sbi); | ||
102 | |||
103 | down_write(&nilfs->ns_sem); | ||
104 | if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { | ||
105 | nilfs->ns_mount_state |= NILFS_ERROR_FS; | ||
106 | nilfs->ns_sbp[0]->s_state |= | ||
107 | cpu_to_le16(NILFS_ERROR_FS); | ||
108 | nilfs_commit_super(sbi, 1); | ||
109 | } | ||
110 | up_write(&nilfs->ns_sem); | ||
111 | |||
112 | if (nilfs_test_opt(sbi, ERRORS_RO)) { | ||
113 | printk(KERN_CRIT "Remounting filesystem read-only\n"); | ||
114 | sb->s_flags |= MS_RDONLY; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | if (nilfs_test_opt(sbi, ERRORS_PANIC)) | ||
119 | panic("NILFS (device %s): panic forced after error\n", | ||
120 | sb->s_id); | ||
121 | } | ||
122 | |||
123 | void nilfs_warning(struct super_block *sb, const char *function, | ||
124 | const char *fmt, ...) | ||
125 | { | ||
126 | va_list args; | ||
127 | |||
128 | va_start(args, fmt); | ||
129 | printk(KERN_WARNING "NILFS warning (device %s): %s: ", | ||
130 | sb->s_id, function); | ||
131 | vprintk(fmt, args); | ||
132 | printk("\n"); | ||
133 | va_end(args); | ||
134 | } | ||
135 | |||
136 | static struct kmem_cache *nilfs_inode_cachep; | ||
137 | |||
138 | struct inode *nilfs_alloc_inode(struct super_block *sb) | ||
139 | { | ||
140 | struct nilfs_inode_info *ii; | ||
141 | |||
142 | ii = kmem_cache_alloc(nilfs_inode_cachep, GFP_NOFS); | ||
143 | if (!ii) | ||
144 | return NULL; | ||
145 | ii->i_bh = NULL; | ||
146 | ii->i_state = 0; | ||
147 | ii->vfs_inode.i_version = 1; | ||
148 | nilfs_btnode_cache_init(&ii->i_btnode_cache); | ||
149 | return &ii->vfs_inode; | ||
150 | } | ||
151 | |||
152 | void nilfs_destroy_inode(struct inode *inode) | ||
153 | { | ||
154 | kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); | ||
155 | } | ||
156 | |||
157 | static void init_once(void *obj) | ||
158 | { | ||
159 | struct nilfs_inode_info *ii = obj; | ||
160 | |||
161 | INIT_LIST_HEAD(&ii->i_dirty); | ||
162 | #ifdef CONFIG_NILFS_XATTR | ||
163 | init_rwsem(&ii->xattr_sem); | ||
164 | #endif | ||
165 | nilfs_btnode_cache_init_once(&ii->i_btnode_cache); | ||
166 | ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union; | ||
167 | inode_init_once(&ii->vfs_inode); | ||
168 | } | ||
169 | |||
170 | static int nilfs_init_inode_cache(void) | ||
171 | { | ||
172 | nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache", | ||
173 | sizeof(struct nilfs_inode_info), | ||
174 | 0, SLAB_RECLAIM_ACCOUNT, | ||
175 | init_once); | ||
176 | |||
177 | return (nilfs_inode_cachep == NULL) ? -ENOMEM : 0; | ||
178 | } | ||
179 | |||
180 | static inline void nilfs_destroy_inode_cache(void) | ||
181 | { | ||
182 | kmem_cache_destroy(nilfs_inode_cachep); | ||
183 | } | ||
184 | |||
185 | static void nilfs_clear_inode(struct inode *inode) | ||
186 | { | ||
187 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
188 | |||
189 | #ifdef CONFIG_NILFS_POSIX_ACL | ||
190 | if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) { | ||
191 | posix_acl_release(ii->i_acl); | ||
192 | ii->i_acl = NILFS_ACL_NOT_CACHED; | ||
193 | } | ||
194 | if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) { | ||
195 | posix_acl_release(ii->i_default_acl); | ||
196 | ii->i_default_acl = NILFS_ACL_NOT_CACHED; | ||
197 | } | ||
198 | #endif | ||
199 | /* | ||
200 | * Free resources allocated in nilfs_read_inode(), here. | ||
201 | */ | ||
202 | BUG_ON(!list_empty(&ii->i_dirty)); | ||
203 | brelse(ii->i_bh); | ||
204 | ii->i_bh = NULL; | ||
205 | |||
206 | if (test_bit(NILFS_I_BMAP, &ii->i_state)) | ||
207 | nilfs_bmap_clear(ii->i_bmap); | ||
208 | |||
209 | nilfs_btnode_cache_clear(&ii->i_btnode_cache); | ||
210 | } | ||
211 | |||
212 | static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb) | ||
213 | { | ||
214 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
215 | int err; | ||
216 | int barrier_done = 0; | ||
217 | |||
218 | if (nilfs_test_opt(sbi, BARRIER)) { | ||
219 | set_buffer_ordered(nilfs->ns_sbh[0]); | ||
220 | barrier_done = 1; | ||
221 | } | ||
222 | retry: | ||
223 | set_buffer_dirty(nilfs->ns_sbh[0]); | ||
224 | err = sync_dirty_buffer(nilfs->ns_sbh[0]); | ||
225 | if (err == -EOPNOTSUPP && barrier_done) { | ||
226 | nilfs_warning(sbi->s_super, __func__, | ||
227 | "barrier-based sync failed. " | ||
228 | "disabling barriers\n"); | ||
229 | nilfs_clear_opt(sbi, BARRIER); | ||
230 | barrier_done = 0; | ||
231 | clear_buffer_ordered(nilfs->ns_sbh[0]); | ||
232 | goto retry; | ||
233 | } | ||
234 | if (unlikely(err)) { | ||
235 | printk(KERN_ERR | ||
236 | "NILFS: unable to write superblock (err=%d)\n", err); | ||
237 | if (err == -EIO && nilfs->ns_sbh[1]) { | ||
238 | nilfs_fall_back_super_block(nilfs); | ||
239 | goto retry; | ||
240 | } | ||
241 | } else { | ||
242 | struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; | ||
243 | |||
244 | /* | ||
245 | * The latest segment becomes trailable from the position | ||
246 | * written in superblock. | ||
247 | */ | ||
248 | clear_nilfs_discontinued(nilfs); | ||
249 | |||
250 | /* update GC protection for recent segments */ | ||
251 | if (nilfs->ns_sbh[1]) { | ||
252 | sbp = NULL; | ||
253 | if (dupsb) { | ||
254 | set_buffer_dirty(nilfs->ns_sbh[1]); | ||
255 | if (!sync_dirty_buffer(nilfs->ns_sbh[1])) | ||
256 | sbp = nilfs->ns_sbp[1]; | ||
257 | } | ||
258 | } | ||
259 | if (sbp) { | ||
260 | spin_lock(&nilfs->ns_last_segment_lock); | ||
261 | nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); | ||
262 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
263 | } | ||
264 | } | ||
265 | |||
266 | return err; | ||
267 | } | ||
268 | |||
269 | int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb) | ||
270 | { | ||
271 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
272 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | ||
273 | sector_t nfreeblocks; | ||
274 | time_t t; | ||
275 | int err; | ||
276 | |||
277 | /* nilfs->sem must be locked by the caller. */ | ||
278 | if (sbp[0]->s_magic != NILFS_SUPER_MAGIC) { | ||
279 | if (sbp[1] && sbp[1]->s_magic == NILFS_SUPER_MAGIC) | ||
280 | nilfs_swap_super_block(nilfs); | ||
281 | else { | ||
282 | printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", | ||
283 | sbi->s_super->s_id); | ||
284 | return -EIO; | ||
285 | } | ||
286 | } | ||
287 | err = nilfs_count_free_blocks(nilfs, &nfreeblocks); | ||
288 | if (unlikely(err)) { | ||
289 | printk(KERN_ERR "NILFS: failed to count free blocks\n"); | ||
290 | return err; | ||
291 | } | ||
292 | spin_lock(&nilfs->ns_last_segment_lock); | ||
293 | sbp[0]->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); | ||
294 | sbp[0]->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); | ||
295 | sbp[0]->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); | ||
296 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
297 | |||
298 | t = get_seconds(); | ||
299 | nilfs->ns_sbwtime[0] = t; | ||
300 | sbp[0]->s_free_blocks_count = cpu_to_le64(nfreeblocks); | ||
301 | sbp[0]->s_wtime = cpu_to_le64(t); | ||
302 | sbp[0]->s_sum = 0; | ||
303 | sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, | ||
304 | (unsigned char *)sbp[0], | ||
305 | nilfs->ns_sbsize)); | ||
306 | if (dupsb && sbp[1]) { | ||
307 | memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); | ||
308 | nilfs->ns_sbwtime[1] = t; | ||
309 | } | ||
310 | sbi->s_super->s_dirt = 0; | ||
311 | return nilfs_sync_super(sbi, dupsb); | ||
312 | } | ||
313 | |||
314 | static void nilfs_put_super(struct super_block *sb) | ||
315 | { | ||
316 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
317 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
318 | |||
319 | nilfs_detach_segment_constructor(sbi); | ||
320 | |||
321 | if (!(sb->s_flags & MS_RDONLY)) { | ||
322 | down_write(&nilfs->ns_sem); | ||
323 | nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); | ||
324 | nilfs_commit_super(sbi, 1); | ||
325 | up_write(&nilfs->ns_sem); | ||
326 | } | ||
327 | |||
328 | nilfs_detach_checkpoint(sbi); | ||
329 | put_nilfs(sbi->s_nilfs); | ||
330 | sbi->s_super = NULL; | ||
331 | sb->s_fs_info = NULL; | ||
332 | kfree(sbi); | ||
333 | } | ||
334 | |||
335 | /** | ||
336 | * nilfs_write_super - write super block(s) of NILFS | ||
337 | * @sb: super_block | ||
338 | * | ||
339 | * nilfs_write_super() gets a fs-dependent lock, writes super block(s), and | ||
340 | * clears s_dirt. This function is called in the section protected by | ||
341 | * lock_super(). | ||
342 | * | ||
343 | * The s_dirt flag is managed by each filesystem and we protect it by ns_sem | ||
344 | * of the struct the_nilfs. Lock order must be as follows: | ||
345 | * | ||
346 | * 1. lock_super() | ||
347 | * 2. down_write(&nilfs->ns_sem) | ||
348 | * | ||
349 | * Inside NILFS, locking ns_sem is enough to protect s_dirt and the buffer | ||
350 | * of the super block (nilfs->ns_sbp[]). | ||
351 | * | ||
352 | * In most cases, VFS functions call lock_super() before calling these | ||
353 | * methods. So we must be careful not to bring on deadlocks when using | ||
354 | * lock_super(); see generic_shutdown_super(), write_super(), and so on. | ||
355 | * | ||
356 | * Note that order of lock_kernel() and lock_super() depends on contexts | ||
357 | * of VFS. We should also note that lock_kernel() can be used in its | ||
358 | * protective section and only the outermost one has an effect. | ||
359 | */ | ||
360 | static void nilfs_write_super(struct super_block *sb) | ||
361 | { | ||
362 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
363 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
364 | |||
365 | down_write(&nilfs->ns_sem); | ||
366 | if (!(sb->s_flags & MS_RDONLY)) { | ||
367 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | ||
368 | u64 t = get_seconds(); | ||
369 | int dupsb; | ||
370 | |||
371 | if (!nilfs_discontinued(nilfs) && t >= nilfs->ns_sbwtime[0] && | ||
372 | t < nilfs->ns_sbwtime[0] + NILFS_SB_FREQ) { | ||
373 | up_write(&nilfs->ns_sem); | ||
374 | return; | ||
375 | } | ||
376 | dupsb = sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ; | ||
377 | nilfs_commit_super(sbi, dupsb); | ||
378 | } | ||
379 | sb->s_dirt = 0; | ||
380 | up_write(&nilfs->ns_sem); | ||
381 | } | ||
382 | |||
383 | static int nilfs_sync_fs(struct super_block *sb, int wait) | ||
384 | { | ||
385 | int err = 0; | ||
386 | |||
387 | /* This function is called when super block should be written back */ | ||
388 | if (wait) | ||
389 | err = nilfs_construct_segment(sb); | ||
390 | return err; | ||
391 | } | ||
392 | |||
393 | int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) | ||
394 | { | ||
395 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
396 | struct nilfs_checkpoint *raw_cp; | ||
397 | struct buffer_head *bh_cp; | ||
398 | int err; | ||
399 | |||
400 | down_write(&nilfs->ns_sem); | ||
401 | list_add(&sbi->s_list, &nilfs->ns_supers); | ||
402 | up_write(&nilfs->ns_sem); | ||
403 | |||
404 | sbi->s_ifile = nilfs_mdt_new( | ||
405 | nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP); | ||
406 | if (!sbi->s_ifile) | ||
407 | return -ENOMEM; | ||
408 | |||
409 | err = nilfs_palloc_init_blockgroup(sbi->s_ifile, nilfs->ns_inode_size); | ||
410 | if (unlikely(err)) | ||
411 | goto failed; | ||
412 | |||
413 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, | ||
414 | &bh_cp); | ||
415 | if (unlikely(err)) { | ||
416 | if (err == -ENOENT || err == -EINVAL) { | ||
417 | printk(KERN_ERR | ||
418 | "NILFS: Invalid checkpoint " | ||
419 | "(checkpoint number=%llu)\n", | ||
420 | (unsigned long long)cno); | ||
421 | err = -EINVAL; | ||
422 | } | ||
423 | goto failed; | ||
424 | } | ||
425 | err = nilfs_read_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode); | ||
426 | if (unlikely(err)) | ||
427 | goto failed_bh; | ||
428 | atomic_set(&sbi->s_inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); | ||
429 | atomic_set(&sbi->s_blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); | ||
430 | |||
431 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); | ||
432 | return 0; | ||
433 | |||
434 | failed_bh: | ||
435 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); | ||
436 | failed: | ||
437 | nilfs_mdt_destroy(sbi->s_ifile); | ||
438 | sbi->s_ifile = NULL; | ||
439 | |||
440 | down_write(&nilfs->ns_sem); | ||
441 | list_del_init(&sbi->s_list); | ||
442 | up_write(&nilfs->ns_sem); | ||
443 | |||
444 | return err; | ||
445 | } | ||
446 | |||
447 | void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) | ||
448 | { | ||
449 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
450 | |||
451 | nilfs_mdt_clear(sbi->s_ifile); | ||
452 | nilfs_mdt_destroy(sbi->s_ifile); | ||
453 | sbi->s_ifile = NULL; | ||
454 | down_write(&nilfs->ns_sem); | ||
455 | list_del_init(&sbi->s_list); | ||
456 | up_write(&nilfs->ns_sem); | ||
457 | } | ||
458 | |||
459 | static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi) | ||
460 | { | ||
461 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
462 | int err = 0; | ||
463 | |||
464 | down_write(&nilfs->ns_sem); | ||
465 | if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { | ||
466 | nilfs->ns_mount_state |= NILFS_VALID_FS; | ||
467 | err = nilfs_commit_super(sbi, 1); | ||
468 | if (likely(!err)) | ||
469 | printk(KERN_INFO "NILFS: recovery complete.\n"); | ||
470 | } | ||
471 | up_write(&nilfs->ns_sem); | ||
472 | return err; | ||
473 | } | ||
474 | |||
475 | static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
476 | { | ||
477 | struct super_block *sb = dentry->d_sb; | ||
478 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
479 | unsigned long long blocks; | ||
480 | unsigned long overhead; | ||
481 | unsigned long nrsvblocks; | ||
482 | sector_t nfreeblocks; | ||
483 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
484 | int err; | ||
485 | |||
486 | /* | ||
487 | * Compute all of the segment blocks | ||
488 | * | ||
489 | * The blocks before first segment and after last segment | ||
490 | * are excluded. | ||
491 | */ | ||
492 | blocks = nilfs->ns_blocks_per_segment * nilfs->ns_nsegments | ||
493 | - nilfs->ns_first_data_block; | ||
494 | nrsvblocks = nilfs->ns_nrsvsegs * nilfs->ns_blocks_per_segment; | ||
495 | |||
496 | /* | ||
497 | * Compute the overhead | ||
498 | * | ||
499 | * When distributing meta data blocks outside semgent structure, | ||
500 | * We must count them as the overhead. | ||
501 | */ | ||
502 | overhead = 0; | ||
503 | |||
504 | err = nilfs_count_free_blocks(nilfs, &nfreeblocks); | ||
505 | if (unlikely(err)) | ||
506 | return err; | ||
507 | |||
508 | buf->f_type = NILFS_SUPER_MAGIC; | ||
509 | buf->f_bsize = sb->s_blocksize; | ||
510 | buf->f_blocks = blocks - overhead; | ||
511 | buf->f_bfree = nfreeblocks; | ||
512 | buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? | ||
513 | (buf->f_bfree - nrsvblocks) : 0; | ||
514 | buf->f_files = atomic_read(&sbi->s_inodes_count); | ||
515 | buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ | ||
516 | buf->f_namelen = NILFS_NAME_LEN; | ||
517 | return 0; | ||
518 | } | ||
519 | |||
520 | static struct super_operations nilfs_sops = { | ||
521 | .alloc_inode = nilfs_alloc_inode, | ||
522 | .destroy_inode = nilfs_destroy_inode, | ||
523 | .dirty_inode = nilfs_dirty_inode, | ||
524 | /* .write_inode = nilfs_write_inode, */ | ||
525 | /* .put_inode = nilfs_put_inode, */ | ||
526 | /* .drop_inode = nilfs_drop_inode, */ | ||
527 | .delete_inode = nilfs_delete_inode, | ||
528 | .put_super = nilfs_put_super, | ||
529 | .write_super = nilfs_write_super, | ||
530 | .sync_fs = nilfs_sync_fs, | ||
531 | /* .write_super_lockfs */ | ||
532 | /* .unlockfs */ | ||
533 | .statfs = nilfs_statfs, | ||
534 | .remount_fs = nilfs_remount, | ||
535 | .clear_inode = nilfs_clear_inode, | ||
536 | /* .umount_begin */ | ||
537 | /* .show_options */ | ||
538 | }; | ||
539 | |||
540 | static struct inode * | ||
541 | nilfs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) | ||
542 | { | ||
543 | struct inode *inode; | ||
544 | |||
545 | if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO && | ||
546 | ino != NILFS_SKETCH_INO) | ||
547 | return ERR_PTR(-ESTALE); | ||
548 | |||
549 | inode = nilfs_iget(sb, ino); | ||
550 | if (IS_ERR(inode)) | ||
551 | return ERR_CAST(inode); | ||
552 | if (generation && inode->i_generation != generation) { | ||
553 | iput(inode); | ||
554 | return ERR_PTR(-ESTALE); | ||
555 | } | ||
556 | |||
557 | return inode; | ||
558 | } | ||
559 | |||
560 | static struct dentry * | ||
561 | nilfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, | ||
562 | int fh_type) | ||
563 | { | ||
564 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | ||
565 | nilfs_nfs_get_inode); | ||
566 | } | ||
567 | |||
568 | static struct dentry * | ||
569 | nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, | ||
570 | int fh_type) | ||
571 | { | ||
572 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
573 | nilfs_nfs_get_inode); | ||
574 | } | ||
575 | |||
576 | static struct export_operations nilfs_export_ops = { | ||
577 | .fh_to_dentry = nilfs_fh_to_dentry, | ||
578 | .fh_to_parent = nilfs_fh_to_parent, | ||
579 | .get_parent = nilfs_get_parent, | ||
580 | }; | ||
581 | |||
582 | enum { | ||
583 | Opt_err_cont, Opt_err_panic, Opt_err_ro, | ||
584 | Opt_barrier, Opt_snapshot, Opt_order, | ||
585 | Opt_err, | ||
586 | }; | ||
587 | |||
588 | static match_table_t tokens = { | ||
589 | {Opt_err_cont, "errors=continue"}, | ||
590 | {Opt_err_panic, "errors=panic"}, | ||
591 | {Opt_err_ro, "errors=remount-ro"}, | ||
592 | {Opt_barrier, "barrier=%s"}, | ||
593 | {Opt_snapshot, "cp=%u"}, | ||
594 | {Opt_order, "order=%s"}, | ||
595 | {Opt_err, NULL} | ||
596 | }; | ||
597 | |||
598 | static int match_bool(substring_t *s, int *result) | ||
599 | { | ||
600 | int len = s->to - s->from; | ||
601 | |||
602 | if (strncmp(s->from, "on", len) == 0) | ||
603 | *result = 1; | ||
604 | else if (strncmp(s->from, "off", len) == 0) | ||
605 | *result = 0; | ||
606 | else | ||
607 | return 1; | ||
608 | return 0; | ||
609 | } | ||
610 | |||
611 | static int parse_options(char *options, struct super_block *sb) | ||
612 | { | ||
613 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
614 | char *p; | ||
615 | substring_t args[MAX_OPT_ARGS]; | ||
616 | int option; | ||
617 | |||
618 | if (!options) | ||
619 | return 1; | ||
620 | |||
621 | while ((p = strsep(&options, ",")) != NULL) { | ||
622 | int token; | ||
623 | if (!*p) | ||
624 | continue; | ||
625 | |||
626 | token = match_token(p, tokens, args); | ||
627 | switch (token) { | ||
628 | case Opt_barrier: | ||
629 | if (match_bool(&args[0], &option)) | ||
630 | return 0; | ||
631 | if (option) | ||
632 | nilfs_set_opt(sbi, BARRIER); | ||
633 | else | ||
634 | nilfs_clear_opt(sbi, BARRIER); | ||
635 | break; | ||
636 | case Opt_order: | ||
637 | if (strcmp(args[0].from, "relaxed") == 0) | ||
638 | /* Ordered data semantics */ | ||
639 | nilfs_clear_opt(sbi, STRICT_ORDER); | ||
640 | else if (strcmp(args[0].from, "strict") == 0) | ||
641 | /* Strict in-order semantics */ | ||
642 | nilfs_set_opt(sbi, STRICT_ORDER); | ||
643 | else | ||
644 | return 0; | ||
645 | break; | ||
646 | case Opt_err_panic: | ||
647 | nilfs_write_opt(sbi, ERROR_MODE, ERRORS_PANIC); | ||
648 | break; | ||
649 | case Opt_err_ro: | ||
650 | nilfs_write_opt(sbi, ERROR_MODE, ERRORS_RO); | ||
651 | break; | ||
652 | case Opt_err_cont: | ||
653 | nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT); | ||
654 | break; | ||
655 | case Opt_snapshot: | ||
656 | if (match_int(&args[0], &option) || option <= 0) | ||
657 | return 0; | ||
658 | if (!(sb->s_flags & MS_RDONLY)) | ||
659 | return 0; | ||
660 | sbi->s_snapshot_cno = option; | ||
661 | nilfs_set_opt(sbi, SNAPSHOT); | ||
662 | break; | ||
663 | default: | ||
664 | printk(KERN_ERR | ||
665 | "NILFS: Unrecognized mount option \"%s\"\n", p); | ||
666 | return 0; | ||
667 | } | ||
668 | } | ||
669 | return 1; | ||
670 | } | ||
671 | |||
672 | static inline void | ||
673 | nilfs_set_default_options(struct nilfs_sb_info *sbi, | ||
674 | struct nilfs_super_block *sbp) | ||
675 | { | ||
676 | sbi->s_mount_opt = | ||
677 | NILFS_MOUNT_ERRORS_CONT | NILFS_MOUNT_BARRIER; | ||
678 | } | ||
679 | |||
680 | static int nilfs_setup_super(struct nilfs_sb_info *sbi) | ||
681 | { | ||
682 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
683 | struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; | ||
684 | int max_mnt_count = le16_to_cpu(sbp->s_max_mnt_count); | ||
685 | int mnt_count = le16_to_cpu(sbp->s_mnt_count); | ||
686 | |||
687 | /* nilfs->sem must be locked by the caller. */ | ||
688 | if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { | ||
689 | printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); | ||
690 | } else if (nilfs->ns_mount_state & NILFS_ERROR_FS) { | ||
691 | printk(KERN_WARNING | ||
692 | "NILFS warning: mounting fs with errors\n"); | ||
693 | #if 0 | ||
694 | } else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) { | ||
695 | printk(KERN_WARNING | ||
696 | "NILFS warning: maximal mount count reached\n"); | ||
697 | #endif | ||
698 | } | ||
699 | if (!max_mnt_count) | ||
700 | sbp->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); | ||
701 | |||
702 | sbp->s_mnt_count = cpu_to_le16(mnt_count + 1); | ||
703 | sbp->s_state = cpu_to_le16(le16_to_cpu(sbp->s_state) & ~NILFS_VALID_FS); | ||
704 | sbp->s_mtime = cpu_to_le64(get_seconds()); | ||
705 | return nilfs_commit_super(sbi, 1); | ||
706 | } | ||
707 | |||
708 | struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, | ||
709 | u64 pos, int blocksize, | ||
710 | struct buffer_head **pbh) | ||
711 | { | ||
712 | unsigned long long sb_index = pos; | ||
713 | unsigned long offset; | ||
714 | |||
715 | offset = do_div(sb_index, blocksize); | ||
716 | *pbh = sb_bread(sb, sb_index); | ||
717 | if (!*pbh) | ||
718 | return NULL; | ||
719 | return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); | ||
720 | } | ||
721 | |||
722 | int nilfs_store_magic_and_option(struct super_block *sb, | ||
723 | struct nilfs_super_block *sbp, | ||
724 | char *data) | ||
725 | { | ||
726 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
727 | |||
728 | sb->s_magic = le16_to_cpu(sbp->s_magic); | ||
729 | |||
730 | /* FS independent flags */ | ||
731 | #ifdef NILFS_ATIME_DISABLE | ||
732 | sb->s_flags |= MS_NOATIME; | ||
733 | #endif | ||
734 | |||
735 | nilfs_set_default_options(sbi, sbp); | ||
736 | |||
737 | sbi->s_resuid = le16_to_cpu(sbp->s_def_resuid); | ||
738 | sbi->s_resgid = le16_to_cpu(sbp->s_def_resgid); | ||
739 | sbi->s_interval = le32_to_cpu(sbp->s_c_interval); | ||
740 | sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); | ||
741 | |||
742 | return !parse_options(data, sb) ? -EINVAL : 0 ; | ||
743 | } | ||
744 | |||
745 | /** | ||
746 | * nilfs_fill_super() - initialize a super block instance | ||
747 | * @sb: super_block | ||
748 | * @data: mount options | ||
749 | * @silent: silent mode flag | ||
750 | * @nilfs: the_nilfs struct | ||
751 | * | ||
752 | * This function is called exclusively by bd_mount_mutex. | ||
753 | * So, the recovery process is protected from other simultaneous mounts. | ||
754 | */ | ||
755 | static int | ||
756 | nilfs_fill_super(struct super_block *sb, void *data, int silent, | ||
757 | struct the_nilfs *nilfs) | ||
758 | { | ||
759 | struct nilfs_sb_info *sbi; | ||
760 | struct inode *root; | ||
761 | __u64 cno; | ||
762 | int err; | ||
763 | |||
764 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | ||
765 | if (!sbi) | ||
766 | return -ENOMEM; | ||
767 | |||
768 | sb->s_fs_info = sbi; | ||
769 | |||
770 | get_nilfs(nilfs); | ||
771 | sbi->s_nilfs = nilfs; | ||
772 | sbi->s_super = sb; | ||
773 | |||
774 | err = init_nilfs(nilfs, sbi, (char *)data); | ||
775 | if (err) | ||
776 | goto failed_sbi; | ||
777 | |||
778 | spin_lock_init(&sbi->s_inode_lock); | ||
779 | INIT_LIST_HEAD(&sbi->s_dirty_files); | ||
780 | INIT_LIST_HEAD(&sbi->s_list); | ||
781 | |||
782 | /* | ||
783 | * Following initialization is overlapped because | ||
784 | * nilfs_sb_info structure has been cleared at the beginning. | ||
785 | * But we reserve them to keep our interest and make ready | ||
786 | * for the future change. | ||
787 | */ | ||
788 | get_random_bytes(&sbi->s_next_generation, | ||
789 | sizeof(sbi->s_next_generation)); | ||
790 | spin_lock_init(&sbi->s_next_gen_lock); | ||
791 | |||
792 | sb->s_op = &nilfs_sops; | ||
793 | sb->s_export_op = &nilfs_export_ops; | ||
794 | sb->s_root = NULL; | ||
795 | sb->s_time_gran = 1; | ||
796 | |||
797 | if (!nilfs_loaded(nilfs)) { | ||
798 | err = load_nilfs(nilfs, sbi); | ||
799 | if (err) | ||
800 | goto failed_sbi; | ||
801 | } | ||
802 | cno = nilfs_last_cno(nilfs); | ||
803 | |||
804 | if (sb->s_flags & MS_RDONLY) { | ||
805 | if (nilfs_test_opt(sbi, SNAPSHOT)) { | ||
806 | err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, | ||
807 | sbi->s_snapshot_cno); | ||
808 | if (err < 0) | ||
809 | goto failed_sbi; | ||
810 | if (!err) { | ||
811 | printk(KERN_ERR | ||
812 | "NILFS: The specified checkpoint is " | ||
813 | "not a snapshot " | ||
814 | "(checkpoint number=%llu).\n", | ||
815 | (unsigned long long)sbi->s_snapshot_cno); | ||
816 | err = -EINVAL; | ||
817 | goto failed_sbi; | ||
818 | } | ||
819 | cno = sbi->s_snapshot_cno; | ||
820 | } else | ||
821 | /* Read-only mount */ | ||
822 | sbi->s_snapshot_cno = cno; | ||
823 | } | ||
824 | |||
825 | err = nilfs_attach_checkpoint(sbi, cno); | ||
826 | if (err) { | ||
827 | printk(KERN_ERR "NILFS: error loading a checkpoint" | ||
828 | " (checkpoint number=%llu).\n", (unsigned long long)cno); | ||
829 | goto failed_sbi; | ||
830 | } | ||
831 | |||
832 | if (!(sb->s_flags & MS_RDONLY)) { | ||
833 | err = nilfs_attach_segment_constructor(sbi); | ||
834 | if (err) | ||
835 | goto failed_checkpoint; | ||
836 | } | ||
837 | |||
838 | root = nilfs_iget(sb, NILFS_ROOT_INO); | ||
839 | if (IS_ERR(root)) { | ||
840 | printk(KERN_ERR "NILFS: get root inode failed\n"); | ||
841 | err = PTR_ERR(root); | ||
842 | goto failed_segctor; | ||
843 | } | ||
844 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { | ||
845 | iput(root); | ||
846 | printk(KERN_ERR "NILFS: corrupt root inode.\n"); | ||
847 | err = -EINVAL; | ||
848 | goto failed_segctor; | ||
849 | } | ||
850 | sb->s_root = d_alloc_root(root); | ||
851 | if (!sb->s_root) { | ||
852 | iput(root); | ||
853 | printk(KERN_ERR "NILFS: get root dentry failed\n"); | ||
854 | err = -ENOMEM; | ||
855 | goto failed_segctor; | ||
856 | } | ||
857 | |||
858 | if (!(sb->s_flags & MS_RDONLY)) { | ||
859 | down_write(&nilfs->ns_sem); | ||
860 | nilfs_setup_super(sbi); | ||
861 | up_write(&nilfs->ns_sem); | ||
862 | } | ||
863 | |||
864 | err = nilfs_mark_recovery_complete(sbi); | ||
865 | if (unlikely(err)) { | ||
866 | printk(KERN_ERR "NILFS: recovery failed.\n"); | ||
867 | goto failed_root; | ||
868 | } | ||
869 | |||
870 | return 0; | ||
871 | |||
872 | failed_root: | ||
873 | dput(sb->s_root); | ||
874 | sb->s_root = NULL; | ||
875 | |||
876 | failed_segctor: | ||
877 | nilfs_detach_segment_constructor(sbi); | ||
878 | |||
879 | failed_checkpoint: | ||
880 | nilfs_detach_checkpoint(sbi); | ||
881 | |||
882 | failed_sbi: | ||
883 | put_nilfs(nilfs); | ||
884 | sb->s_fs_info = NULL; | ||
885 | kfree(sbi); | ||
886 | return err; | ||
887 | } | ||
888 | |||
889 | static int nilfs_remount(struct super_block *sb, int *flags, char *data) | ||
890 | { | ||
891 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
892 | struct nilfs_super_block *sbp; | ||
893 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
894 | unsigned long old_sb_flags; | ||
895 | struct nilfs_mount_options old_opts; | ||
896 | int err; | ||
897 | |||
898 | old_sb_flags = sb->s_flags; | ||
899 | old_opts.mount_opt = sbi->s_mount_opt; | ||
900 | old_opts.snapshot_cno = sbi->s_snapshot_cno; | ||
901 | |||
902 | if (!parse_options(data, sb)) { | ||
903 | err = -EINVAL; | ||
904 | goto restore_opts; | ||
905 | } | ||
906 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL); | ||
907 | |||
908 | if ((*flags & MS_RDONLY) && | ||
909 | sbi->s_snapshot_cno != old_opts.snapshot_cno) { | ||
910 | printk(KERN_WARNING "NILFS (device %s): couldn't " | ||
911 | "remount to a different snapshot. \n", | ||
912 | sb->s_id); | ||
913 | err = -EINVAL; | ||
914 | goto restore_opts; | ||
915 | } | ||
916 | |||
917 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | ||
918 | goto out; | ||
919 | if (*flags & MS_RDONLY) { | ||
920 | /* Shutting down the segment constructor */ | ||
921 | nilfs_detach_segment_constructor(sbi); | ||
922 | sb->s_flags |= MS_RDONLY; | ||
923 | |||
924 | sbi->s_snapshot_cno = nilfs_last_cno(nilfs); | ||
925 | /* nilfs_set_opt(sbi, SNAPSHOT); */ | ||
926 | |||
927 | /* | ||
928 | * Remounting a valid RW partition RDONLY, so set | ||
929 | * the RDONLY flag and then mark the partition as valid again. | ||
930 | */ | ||
931 | down_write(&nilfs->ns_sem); | ||
932 | sbp = nilfs->ns_sbp[0]; | ||
933 | if (!(sbp->s_state & le16_to_cpu(NILFS_VALID_FS)) && | ||
934 | (nilfs->ns_mount_state & NILFS_VALID_FS)) | ||
935 | sbp->s_state = cpu_to_le16(nilfs->ns_mount_state); | ||
936 | sbp->s_mtime = cpu_to_le64(get_seconds()); | ||
937 | nilfs_commit_super(sbi, 1); | ||
938 | up_write(&nilfs->ns_sem); | ||
939 | } else { | ||
940 | /* | ||
941 | * Mounting a RDONLY partition read-write, so reread and | ||
942 | * store the current valid flag. (It may have been changed | ||
943 | * by fsck since we originally mounted the partition.) | ||
944 | */ | ||
945 | down(&sb->s_bdev->bd_mount_sem); | ||
946 | /* Check existing RW-mount */ | ||
947 | if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) { | ||
948 | printk(KERN_WARNING "NILFS (device %s): couldn't " | ||
949 | "remount because a RW-mount exists.\n", | ||
950 | sb->s_id); | ||
951 | err = -EBUSY; | ||
952 | goto rw_remount_failed; | ||
953 | } | ||
954 | if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) { | ||
955 | printk(KERN_WARNING "NILFS (device %s): couldn't " | ||
956 | "remount because the current RO-mount is not " | ||
957 | "the latest one.\n", | ||
958 | sb->s_id); | ||
959 | err = -EINVAL; | ||
960 | goto rw_remount_failed; | ||
961 | } | ||
962 | sb->s_flags &= ~MS_RDONLY; | ||
963 | nilfs_clear_opt(sbi, SNAPSHOT); | ||
964 | sbi->s_snapshot_cno = 0; | ||
965 | |||
966 | err = nilfs_attach_segment_constructor(sbi); | ||
967 | if (err) | ||
968 | goto rw_remount_failed; | ||
969 | |||
970 | down_write(&nilfs->ns_sem); | ||
971 | nilfs_setup_super(sbi); | ||
972 | up_write(&nilfs->ns_sem); | ||
973 | |||
974 | up(&sb->s_bdev->bd_mount_sem); | ||
975 | } | ||
976 | out: | ||
977 | return 0; | ||
978 | |||
979 | rw_remount_failed: | ||
980 | up(&sb->s_bdev->bd_mount_sem); | ||
981 | restore_opts: | ||
982 | sb->s_flags = old_sb_flags; | ||
983 | sbi->s_mount_opt = old_opts.mount_opt; | ||
984 | sbi->s_snapshot_cno = old_opts.snapshot_cno; | ||
985 | return err; | ||
986 | } | ||
987 | |||
988 | struct nilfs_super_data { | ||
989 | struct block_device *bdev; | ||
990 | __u64 cno; | ||
991 | int flags; | ||
992 | }; | ||
993 | |||
994 | /** | ||
995 | * nilfs_identify - pre-read mount options needed to identify mount instance | ||
996 | * @data: mount options | ||
997 | * @sd: nilfs_super_data | ||
998 | */ | ||
999 | static int nilfs_identify(char *data, struct nilfs_super_data *sd) | ||
1000 | { | ||
1001 | char *p, *options = data; | ||
1002 | substring_t args[MAX_OPT_ARGS]; | ||
1003 | int option, token; | ||
1004 | int ret = 0; | ||
1005 | |||
1006 | do { | ||
1007 | p = strsep(&options, ","); | ||
1008 | if (p != NULL && *p) { | ||
1009 | token = match_token(p, tokens, args); | ||
1010 | if (token == Opt_snapshot) { | ||
1011 | if (!(sd->flags & MS_RDONLY)) | ||
1012 | ret++; | ||
1013 | else { | ||
1014 | ret = match_int(&args[0], &option); | ||
1015 | if (!ret) { | ||
1016 | if (option > 0) | ||
1017 | sd->cno = option; | ||
1018 | else | ||
1019 | ret++; | ||
1020 | } | ||
1021 | } | ||
1022 | } | ||
1023 | if (ret) | ||
1024 | printk(KERN_ERR | ||
1025 | "NILFS: invalid mount option: %s\n", p); | ||
1026 | } | ||
1027 | if (!options) | ||
1028 | break; | ||
1029 | BUG_ON(options == data); | ||
1030 | *(options - 1) = ','; | ||
1031 | } while (!ret); | ||
1032 | return ret; | ||
1033 | } | ||
1034 | |||
1035 | static int nilfs_set_bdev_super(struct super_block *s, void *data) | ||
1036 | { | ||
1037 | struct nilfs_super_data *sd = data; | ||
1038 | |||
1039 | s->s_bdev = sd->bdev; | ||
1040 | s->s_dev = s->s_bdev->bd_dev; | ||
1041 | return 0; | ||
1042 | } | ||
1043 | |||
1044 | static int nilfs_test_bdev_super(struct super_block *s, void *data) | ||
1045 | { | ||
1046 | struct nilfs_super_data *sd = data; | ||
1047 | |||
1048 | return s->s_bdev == sd->bdev; | ||
1049 | } | ||
1050 | |||
1051 | static int nilfs_test_bdev_super2(struct super_block *s, void *data) | ||
1052 | { | ||
1053 | struct nilfs_super_data *sd = data; | ||
1054 | int ret; | ||
1055 | |||
1056 | if (s->s_bdev != sd->bdev) | ||
1057 | return 0; | ||
1058 | |||
1059 | if (!((s->s_flags | sd->flags) & MS_RDONLY)) | ||
1060 | return 1; /* Reuse an old R/W-mode super_block */ | ||
1061 | |||
1062 | if (s->s_flags & sd->flags & MS_RDONLY) { | ||
1063 | if (down_read_trylock(&s->s_umount)) { | ||
1064 | ret = s->s_root && | ||
1065 | (sd->cno == NILFS_SB(s)->s_snapshot_cno); | ||
1066 | up_read(&s->s_umount); | ||
1067 | /* | ||
1068 | * This path is locked with sb_lock by sget(). | ||
1069 | * So, drop_super() causes deadlock. | ||
1070 | */ | ||
1071 | return ret; | ||
1072 | } | ||
1073 | } | ||
1074 | return 0; | ||
1075 | } | ||
1076 | |||
1077 | static int | ||
1078 | nilfs_get_sb(struct file_system_type *fs_type, int flags, | ||
1079 | const char *dev_name, void *data, struct vfsmount *mnt) | ||
1080 | { | ||
1081 | struct nilfs_super_data sd; | ||
1082 | struct super_block *s, *s2; | ||
1083 | struct the_nilfs *nilfs = NULL; | ||
1084 | int err, need_to_close = 1; | ||
1085 | |||
1086 | sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type); | ||
1087 | if (IS_ERR(sd.bdev)) | ||
1088 | return PTR_ERR(sd.bdev); | ||
1089 | |||
1090 | /* | ||
1091 | * To get mount instance using sget() vfs-routine, NILFS needs | ||
1092 | * much more information than normal filesystems to identify mount | ||
1093 | * instance. For snapshot mounts, not only a mount type (ro-mount | ||
1094 | * or rw-mount) but also a checkpoint number is required. | ||
1095 | * The results are passed in sget() using nilfs_super_data. | ||
1096 | */ | ||
1097 | sd.cno = 0; | ||
1098 | sd.flags = flags; | ||
1099 | if (nilfs_identify((char *)data, &sd)) { | ||
1100 | err = -EINVAL; | ||
1101 | goto failed; | ||
1102 | } | ||
1103 | |||
1104 | /* | ||
1105 | * once the super is inserted into the list by sget, s_umount | ||
1106 | * will protect the lockfs code from trying to start a snapshot | ||
1107 | * while we are mounting | ||
1108 | */ | ||
1109 | down(&sd.bdev->bd_mount_sem); | ||
1110 | if (!sd.cno && | ||
1111 | (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) { | ||
1112 | err = (err < 0) ? : -EBUSY; | ||
1113 | goto failed_unlock; | ||
1114 | } | ||
1115 | |||
1116 | /* | ||
1117 | * Phase-1: search any existent instance and get the_nilfs | ||
1118 | */ | ||
1119 | s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); | ||
1120 | if (IS_ERR(s)) | ||
1121 | goto error_s; | ||
1122 | |||
1123 | if (!s->s_root) { | ||
1124 | err = -ENOMEM; | ||
1125 | nilfs = alloc_nilfs(sd.bdev); | ||
1126 | if (!nilfs) | ||
1127 | goto cancel_new; | ||
1128 | } else { | ||
1129 | struct nilfs_sb_info *sbi = NILFS_SB(s); | ||
1130 | |||
1131 | /* | ||
1132 | * s_umount protects super_block from unmount process; | ||
1133 | * It covers pointers of nilfs_sb_info and the_nilfs. | ||
1134 | */ | ||
1135 | nilfs = sbi->s_nilfs; | ||
1136 | get_nilfs(nilfs); | ||
1137 | up_write(&s->s_umount); | ||
1138 | |||
1139 | /* | ||
1140 | * Phase-2: search specified snapshot or R/W mode super_block | ||
1141 | */ | ||
1142 | if (!sd.cno) | ||
1143 | /* trying to get the latest checkpoint. */ | ||
1144 | sd.cno = nilfs_last_cno(nilfs); | ||
1145 | |||
1146 | s2 = sget(fs_type, nilfs_test_bdev_super2, | ||
1147 | nilfs_set_bdev_super, &sd); | ||
1148 | deactivate_super(s); | ||
1149 | /* | ||
1150 | * Although deactivate_super() invokes close_bdev_exclusive() at | ||
1151 | * kill_block_super(). Here, s is an existent mount; we need | ||
1152 | * one more close_bdev_exclusive() call. | ||
1153 | */ | ||
1154 | s = s2; | ||
1155 | if (IS_ERR(s)) | ||
1156 | goto error_s; | ||
1157 | } | ||
1158 | |||
1159 | if (!s->s_root) { | ||
1160 | char b[BDEVNAME_SIZE]; | ||
1161 | |||
1162 | s->s_flags = flags; | ||
1163 | strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); | ||
1164 | sb_set_blocksize(s, block_size(sd.bdev)); | ||
1165 | |||
1166 | err = nilfs_fill_super(s, data, flags & MS_VERBOSE, nilfs); | ||
1167 | if (err) | ||
1168 | goto cancel_new; | ||
1169 | |||
1170 | s->s_flags |= MS_ACTIVE; | ||
1171 | need_to_close = 0; | ||
1172 | } else if (!(s->s_flags & MS_RDONLY)) { | ||
1173 | err = -EBUSY; | ||
1174 | } | ||
1175 | |||
1176 | up(&sd.bdev->bd_mount_sem); | ||
1177 | put_nilfs(nilfs); | ||
1178 | if (need_to_close) | ||
1179 | close_bdev_exclusive(sd.bdev, flags); | ||
1180 | simple_set_mnt(mnt, s); | ||
1181 | return 0; | ||
1182 | |||
1183 | error_s: | ||
1184 | up(&sd.bdev->bd_mount_sem); | ||
1185 | if (nilfs) | ||
1186 | put_nilfs(nilfs); | ||
1187 | close_bdev_exclusive(sd.bdev, flags); | ||
1188 | return PTR_ERR(s); | ||
1189 | |||
1190 | failed_unlock: | ||
1191 | up(&sd.bdev->bd_mount_sem); | ||
1192 | failed: | ||
1193 | close_bdev_exclusive(sd.bdev, flags); | ||
1194 | |||
1195 | return err; | ||
1196 | |||
1197 | cancel_new: | ||
1198 | /* Abandoning the newly allocated superblock */ | ||
1199 | up(&sd.bdev->bd_mount_sem); | ||
1200 | if (nilfs) | ||
1201 | put_nilfs(nilfs); | ||
1202 | up_write(&s->s_umount); | ||
1203 | deactivate_super(s); | ||
1204 | /* | ||
1205 | * deactivate_super() invokes close_bdev_exclusive(). | ||
1206 | * We must finish all post-cleaning before this call; | ||
1207 | * put_nilfs() and unlocking bd_mount_sem need the block device. | ||
1208 | */ | ||
1209 | return err; | ||
1210 | } | ||
1211 | |||
1212 | static int nilfs_test_bdev_super3(struct super_block *s, void *data) | ||
1213 | { | ||
1214 | struct nilfs_super_data *sd = data; | ||
1215 | int ret; | ||
1216 | |||
1217 | if (s->s_bdev != sd->bdev) | ||
1218 | return 0; | ||
1219 | if (down_read_trylock(&s->s_umount)) { | ||
1220 | ret = (s->s_flags & MS_RDONLY) && s->s_root && | ||
1221 | nilfs_test_opt(NILFS_SB(s), SNAPSHOT); | ||
1222 | up_read(&s->s_umount); | ||
1223 | if (ret) | ||
1224 | return 0; /* ignore snapshot mounts */ | ||
1225 | } | ||
1226 | return !((sd->flags ^ s->s_flags) & MS_RDONLY); | ||
1227 | } | ||
1228 | |||
1229 | static int __false_bdev_super(struct super_block *s, void *data) | ||
1230 | { | ||
1231 | #if 0 /* XXX: workaround for lock debug. This is not good idea */ | ||
1232 | up_write(&s->s_umount); | ||
1233 | #endif | ||
1234 | return -EFAULT; | ||
1235 | } | ||
1236 | |||
1237 | /** | ||
1238 | * test_exclusive_mount - check whether an exclusive RW/RO mount exists or not. | ||
1239 | * fs_type: filesystem type | ||
1240 | * bdev: block device | ||
1241 | * flag: 0 (check rw-mount) or MS_RDONLY (check ro-mount) | ||
1242 | * res: pointer to an integer to store result | ||
1243 | * | ||
1244 | * This function must be called within a section protected by bd_mount_mutex. | ||
1245 | */ | ||
1246 | static int test_exclusive_mount(struct file_system_type *fs_type, | ||
1247 | struct block_device *bdev, int flags) | ||
1248 | { | ||
1249 | struct super_block *s; | ||
1250 | struct nilfs_super_data sd = { .flags = flags, .bdev = bdev }; | ||
1251 | |||
1252 | s = sget(fs_type, nilfs_test_bdev_super3, __false_bdev_super, &sd); | ||
1253 | if (IS_ERR(s)) { | ||
1254 | if (PTR_ERR(s) != -EFAULT) | ||
1255 | return PTR_ERR(s); | ||
1256 | return 0; /* Not found */ | ||
1257 | } | ||
1258 | up_write(&s->s_umount); | ||
1259 | deactivate_super(s); | ||
1260 | return 1; /* Found */ | ||
1261 | } | ||
1262 | |||
1263 | struct file_system_type nilfs_fs_type = { | ||
1264 | .owner = THIS_MODULE, | ||
1265 | .name = "nilfs2", | ||
1266 | .get_sb = nilfs_get_sb, | ||
1267 | .kill_sb = kill_block_super, | ||
1268 | .fs_flags = FS_REQUIRES_DEV, | ||
1269 | }; | ||
1270 | |||
1271 | static int __init init_nilfs_fs(void) | ||
1272 | { | ||
1273 | int err; | ||
1274 | |||
1275 | err = nilfs_init_inode_cache(); | ||
1276 | if (err) | ||
1277 | goto failed; | ||
1278 | |||
1279 | err = nilfs_init_transaction_cache(); | ||
1280 | if (err) | ||
1281 | goto failed_inode_cache; | ||
1282 | |||
1283 | err = nilfs_init_segbuf_cache(); | ||
1284 | if (err) | ||
1285 | goto failed_transaction_cache; | ||
1286 | |||
1287 | err = nilfs_btree_path_cache_init(); | ||
1288 | if (err) | ||
1289 | goto failed_segbuf_cache; | ||
1290 | |||
1291 | err = register_filesystem(&nilfs_fs_type); | ||
1292 | if (err) | ||
1293 | goto failed_btree_path_cache; | ||
1294 | |||
1295 | return 0; | ||
1296 | |||
1297 | failed_btree_path_cache: | ||
1298 | nilfs_btree_path_cache_destroy(); | ||
1299 | |||
1300 | failed_segbuf_cache: | ||
1301 | nilfs_destroy_segbuf_cache(); | ||
1302 | |||
1303 | failed_transaction_cache: | ||
1304 | nilfs_destroy_transaction_cache(); | ||
1305 | |||
1306 | failed_inode_cache: | ||
1307 | nilfs_destroy_inode_cache(); | ||
1308 | |||
1309 | failed: | ||
1310 | return err; | ||
1311 | } | ||
1312 | |||
1313 | static void __exit exit_nilfs_fs(void) | ||
1314 | { | ||
1315 | nilfs_destroy_segbuf_cache(); | ||
1316 | nilfs_destroy_transaction_cache(); | ||
1317 | nilfs_destroy_inode_cache(); | ||
1318 | nilfs_btree_path_cache_destroy(); | ||
1319 | unregister_filesystem(&nilfs_fs_type); | ||
1320 | } | ||
1321 | |||
1322 | module_init(init_nilfs_fs) | ||
1323 | module_exit(exit_nilfs_fs) | ||
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c new file mode 100644 index 000000000000..33400cf0bbe2 --- /dev/null +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -0,0 +1,637 @@ | |||
1 | /* | ||
2 | * the_nilfs.c - the_nilfs shared structure. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/buffer_head.h> | ||
25 | #include <linux/slab.h> | ||
26 | #include <linux/blkdev.h> | ||
27 | #include <linux/backing-dev.h> | ||
28 | #include <linux/crc32.h> | ||
29 | #include "nilfs.h" | ||
30 | #include "segment.h" | ||
31 | #include "alloc.h" | ||
32 | #include "cpfile.h" | ||
33 | #include "sufile.h" | ||
34 | #include "dat.h" | ||
35 | #include "seglist.h" | ||
36 | #include "segbuf.h" | ||
37 | |||
38 | void nilfs_set_last_segment(struct the_nilfs *nilfs, | ||
39 | sector_t start_blocknr, u64 seq, __u64 cno) | ||
40 | { | ||
41 | spin_lock(&nilfs->ns_last_segment_lock); | ||
42 | nilfs->ns_last_pseg = start_blocknr; | ||
43 | nilfs->ns_last_seq = seq; | ||
44 | nilfs->ns_last_cno = cno; | ||
45 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
46 | } | ||
47 | |||
48 | /** | ||
49 | * alloc_nilfs - allocate the_nilfs structure | ||
50 | * @bdev: block device to which the_nilfs is related | ||
51 | * | ||
52 | * alloc_nilfs() allocates memory for the_nilfs and | ||
53 | * initializes its reference count and locks. | ||
54 | * | ||
55 | * Return Value: On success, pointer to the_nilfs is returned. | ||
56 | * On error, NULL is returned. | ||
57 | */ | ||
58 | struct the_nilfs *alloc_nilfs(struct block_device *bdev) | ||
59 | { | ||
60 | struct the_nilfs *nilfs; | ||
61 | |||
62 | nilfs = kzalloc(sizeof(*nilfs), GFP_KERNEL); | ||
63 | if (!nilfs) | ||
64 | return NULL; | ||
65 | |||
66 | nilfs->ns_bdev = bdev; | ||
67 | atomic_set(&nilfs->ns_count, 1); | ||
68 | atomic_set(&nilfs->ns_writer_refcount, -1); | ||
69 | atomic_set(&nilfs->ns_ndirtyblks, 0); | ||
70 | init_rwsem(&nilfs->ns_sem); | ||
71 | mutex_init(&nilfs->ns_writer_mutex); | ||
72 | INIT_LIST_HEAD(&nilfs->ns_supers); | ||
73 | spin_lock_init(&nilfs->ns_last_segment_lock); | ||
74 | nilfs->ns_gc_inodes_h = NULL; | ||
75 | init_rwsem(&nilfs->ns_segctor_sem); | ||
76 | |||
77 | return nilfs; | ||
78 | } | ||
79 | |||
80 | /** | ||
81 | * put_nilfs - release a reference to the_nilfs | ||
82 | * @nilfs: the_nilfs structure to be released | ||
83 | * | ||
84 | * put_nilfs() decrements a reference counter of the_nilfs. | ||
85 | * If the reference count reaches zero, the_nilfs is freed. | ||
86 | */ | ||
87 | void put_nilfs(struct the_nilfs *nilfs) | ||
88 | { | ||
89 | if (!atomic_dec_and_test(&nilfs->ns_count)) | ||
90 | return; | ||
91 | /* | ||
92 | * Increment of ns_count never occur below because the caller | ||
93 | * of get_nilfs() holds at least one reference to the_nilfs. | ||
94 | * Thus its exclusion control is not required here. | ||
95 | */ | ||
96 | might_sleep(); | ||
97 | if (nilfs_loaded(nilfs)) { | ||
98 | nilfs_mdt_clear(nilfs->ns_sufile); | ||
99 | nilfs_mdt_destroy(nilfs->ns_sufile); | ||
100 | nilfs_mdt_clear(nilfs->ns_cpfile); | ||
101 | nilfs_mdt_destroy(nilfs->ns_cpfile); | ||
102 | nilfs_mdt_clear(nilfs->ns_dat); | ||
103 | nilfs_mdt_destroy(nilfs->ns_dat); | ||
104 | /* XXX: how and when to clear nilfs->ns_gc_dat? */ | ||
105 | nilfs_mdt_destroy(nilfs->ns_gc_dat); | ||
106 | } | ||
107 | if (nilfs_init(nilfs)) { | ||
108 | nilfs_destroy_gccache(nilfs); | ||
109 | brelse(nilfs->ns_sbh[0]); | ||
110 | brelse(nilfs->ns_sbh[1]); | ||
111 | } | ||
112 | kfree(nilfs); | ||
113 | } | ||
114 | |||
115 | static int nilfs_load_super_root(struct the_nilfs *nilfs, | ||
116 | struct nilfs_sb_info *sbi, sector_t sr_block) | ||
117 | { | ||
118 | struct buffer_head *bh_sr; | ||
119 | struct nilfs_super_root *raw_sr; | ||
120 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | ||
121 | unsigned dat_entry_size, segment_usage_size, checkpoint_size; | ||
122 | unsigned inode_size; | ||
123 | int err; | ||
124 | |||
125 | err = nilfs_read_super_root_block(sbi->s_super, sr_block, &bh_sr, 1); | ||
126 | if (unlikely(err)) | ||
127 | return err; | ||
128 | |||
129 | down_read(&nilfs->ns_sem); | ||
130 | dat_entry_size = le16_to_cpu(sbp[0]->s_dat_entry_size); | ||
131 | checkpoint_size = le16_to_cpu(sbp[0]->s_checkpoint_size); | ||
132 | segment_usage_size = le16_to_cpu(sbp[0]->s_segment_usage_size); | ||
133 | up_read(&nilfs->ns_sem); | ||
134 | |||
135 | inode_size = nilfs->ns_inode_size; | ||
136 | |||
137 | err = -ENOMEM; | ||
138 | nilfs->ns_dat = nilfs_mdt_new( | ||
139 | nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP); | ||
140 | if (unlikely(!nilfs->ns_dat)) | ||
141 | goto failed; | ||
142 | |||
143 | nilfs->ns_gc_dat = nilfs_mdt_new( | ||
144 | nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP); | ||
145 | if (unlikely(!nilfs->ns_gc_dat)) | ||
146 | goto failed_dat; | ||
147 | |||
148 | nilfs->ns_cpfile = nilfs_mdt_new( | ||
149 | nilfs, NULL, NILFS_CPFILE_INO, NILFS_CPFILE_GFP); | ||
150 | if (unlikely(!nilfs->ns_cpfile)) | ||
151 | goto failed_gc_dat; | ||
152 | |||
153 | nilfs->ns_sufile = nilfs_mdt_new( | ||
154 | nilfs, NULL, NILFS_SUFILE_INO, NILFS_SUFILE_GFP); | ||
155 | if (unlikely(!nilfs->ns_sufile)) | ||
156 | goto failed_cpfile; | ||
157 | |||
158 | err = nilfs_palloc_init_blockgroup(nilfs->ns_dat, dat_entry_size); | ||
159 | if (unlikely(err)) | ||
160 | goto failed_sufile; | ||
161 | |||
162 | err = nilfs_palloc_init_blockgroup(nilfs->ns_gc_dat, dat_entry_size); | ||
163 | if (unlikely(err)) | ||
164 | goto failed_sufile; | ||
165 | |||
166 | nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); | ||
167 | nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size, | ||
168 | sizeof(struct nilfs_cpfile_header)); | ||
169 | nilfs_mdt_set_entry_size(nilfs->ns_sufile, segment_usage_size, | ||
170 | sizeof(struct nilfs_sufile_header)); | ||
171 | |||
172 | err = nilfs_mdt_read_inode_direct( | ||
173 | nilfs->ns_dat, bh_sr, NILFS_SR_DAT_OFFSET(inode_size)); | ||
174 | if (unlikely(err)) | ||
175 | goto failed_sufile; | ||
176 | |||
177 | err = nilfs_mdt_read_inode_direct( | ||
178 | nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(inode_size)); | ||
179 | if (unlikely(err)) | ||
180 | goto failed_sufile; | ||
181 | |||
182 | err = nilfs_mdt_read_inode_direct( | ||
183 | nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(inode_size)); | ||
184 | if (unlikely(err)) | ||
185 | goto failed_sufile; | ||
186 | |||
187 | raw_sr = (struct nilfs_super_root *)bh_sr->b_data; | ||
188 | nilfs->ns_nongc_ctime = le64_to_cpu(raw_sr->sr_nongc_ctime); | ||
189 | |||
190 | failed: | ||
191 | brelse(bh_sr); | ||
192 | return err; | ||
193 | |||
194 | failed_sufile: | ||
195 | nilfs_mdt_destroy(nilfs->ns_sufile); | ||
196 | |||
197 | failed_cpfile: | ||
198 | nilfs_mdt_destroy(nilfs->ns_cpfile); | ||
199 | |||
200 | failed_gc_dat: | ||
201 | nilfs_mdt_destroy(nilfs->ns_gc_dat); | ||
202 | |||
203 | failed_dat: | ||
204 | nilfs_mdt_destroy(nilfs->ns_dat); | ||
205 | goto failed; | ||
206 | } | ||
207 | |||
208 | static void nilfs_init_recovery_info(struct nilfs_recovery_info *ri) | ||
209 | { | ||
210 | memset(ri, 0, sizeof(*ri)); | ||
211 | INIT_LIST_HEAD(&ri->ri_used_segments); | ||
212 | } | ||
213 | |||
214 | static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri) | ||
215 | { | ||
216 | nilfs_dispose_segment_list(&ri->ri_used_segments); | ||
217 | } | ||
218 | |||
219 | /** | ||
220 | * load_nilfs - load and recover the nilfs | ||
221 | * @nilfs: the_nilfs structure to be released | ||
222 | * @sbi: nilfs_sb_info used to recover past segment | ||
223 | * | ||
224 | * load_nilfs() searches and load the latest super root, | ||
225 | * attaches the last segment, and does recovery if needed. | ||
226 | * The caller must call this exclusively for simultaneous mounts. | ||
227 | */ | ||
228 | int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | ||
229 | { | ||
230 | struct nilfs_recovery_info ri; | ||
231 | unsigned int s_flags = sbi->s_super->s_flags; | ||
232 | int really_read_only = bdev_read_only(nilfs->ns_bdev); | ||
233 | unsigned valid_fs; | ||
234 | int err = 0; | ||
235 | |||
236 | nilfs_init_recovery_info(&ri); | ||
237 | |||
238 | down_write(&nilfs->ns_sem); | ||
239 | valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); | ||
240 | up_write(&nilfs->ns_sem); | ||
241 | |||
242 | if (!valid_fs && (s_flags & MS_RDONLY)) { | ||
243 | printk(KERN_INFO "NILFS: INFO: recovery " | ||
244 | "required for readonly filesystem.\n"); | ||
245 | if (really_read_only) { | ||
246 | printk(KERN_ERR "NILFS: write access " | ||
247 | "unavailable, cannot proceed.\n"); | ||
248 | err = -EROFS; | ||
249 | goto failed; | ||
250 | } | ||
251 | printk(KERN_INFO "NILFS: write access will " | ||
252 | "be enabled during recovery.\n"); | ||
253 | sbi->s_super->s_flags &= ~MS_RDONLY; | ||
254 | } | ||
255 | |||
256 | err = nilfs_search_super_root(nilfs, sbi, &ri); | ||
257 | if (unlikely(err)) { | ||
258 | printk(KERN_ERR "NILFS: error searching super root.\n"); | ||
259 | goto failed; | ||
260 | } | ||
261 | |||
262 | err = nilfs_load_super_root(nilfs, sbi, ri.ri_super_root); | ||
263 | if (unlikely(err)) { | ||
264 | printk(KERN_ERR "NILFS: error loading super root.\n"); | ||
265 | goto failed; | ||
266 | } | ||
267 | |||
268 | if (!valid_fs) { | ||
269 | err = nilfs_recover_logical_segments(nilfs, sbi, &ri); | ||
270 | if (unlikely(err)) { | ||
271 | nilfs_mdt_destroy(nilfs->ns_cpfile); | ||
272 | nilfs_mdt_destroy(nilfs->ns_sufile); | ||
273 | nilfs_mdt_destroy(nilfs->ns_dat); | ||
274 | goto failed; | ||
275 | } | ||
276 | if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED) | ||
277 | sbi->s_super->s_dirt = 1; | ||
278 | } | ||
279 | |||
280 | set_nilfs_loaded(nilfs); | ||
281 | |||
282 | failed: | ||
283 | nilfs_clear_recovery_info(&ri); | ||
284 | sbi->s_super->s_flags = s_flags; | ||
285 | return err; | ||
286 | } | ||
287 | |||
288 | static unsigned long long nilfs_max_size(unsigned int blkbits) | ||
289 | { | ||
290 | unsigned int max_bits; | ||
291 | unsigned long long res = MAX_LFS_FILESIZE; /* page cache limit */ | ||
292 | |||
293 | max_bits = blkbits + NILFS_BMAP_KEY_BIT; /* bmap size limit */ | ||
294 | if (max_bits < 64) | ||
295 | res = min_t(unsigned long long, res, (1ULL << max_bits) - 1); | ||
296 | return res; | ||
297 | } | ||
298 | |||
299 | static int nilfs_store_disk_layout(struct the_nilfs *nilfs, | ||
300 | struct nilfs_super_block *sbp) | ||
301 | { | ||
302 | if (le32_to_cpu(sbp->s_rev_level) != NILFS_CURRENT_REV) { | ||
303 | printk(KERN_ERR "NILFS: revision mismatch " | ||
304 | "(superblock rev.=%d.%d, current rev.=%d.%d). " | ||
305 | "Please check the version of mkfs.nilfs.\n", | ||
306 | le32_to_cpu(sbp->s_rev_level), | ||
307 | le16_to_cpu(sbp->s_minor_rev_level), | ||
308 | NILFS_CURRENT_REV, NILFS_MINOR_REV); | ||
309 | return -EINVAL; | ||
310 | } | ||
311 | nilfs->ns_sbsize = le16_to_cpu(sbp->s_bytes); | ||
312 | if (nilfs->ns_sbsize > BLOCK_SIZE) | ||
313 | return -EINVAL; | ||
314 | |||
315 | nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); | ||
316 | nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); | ||
317 | |||
318 | nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); | ||
319 | if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { | ||
320 | printk(KERN_ERR "NILFS: too short segment. \n"); | ||
321 | return -EINVAL; | ||
322 | } | ||
323 | |||
324 | nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); | ||
325 | nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments); | ||
326 | nilfs->ns_r_segments_percentage = | ||
327 | le32_to_cpu(sbp->s_r_segments_percentage); | ||
328 | nilfs->ns_nrsvsegs = | ||
329 | max_t(unsigned long, NILFS_MIN_NRSVSEGS, | ||
330 | DIV_ROUND_UP(nilfs->ns_nsegments * | ||
331 | nilfs->ns_r_segments_percentage, 100)); | ||
332 | nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); | ||
333 | return 0; | ||
334 | } | ||
335 | |||
336 | static int nilfs_valid_sb(struct nilfs_super_block *sbp) | ||
337 | { | ||
338 | static unsigned char sum[4]; | ||
339 | const int sumoff = offsetof(struct nilfs_super_block, s_sum); | ||
340 | size_t bytes; | ||
341 | u32 crc; | ||
342 | |||
343 | if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC) | ||
344 | return 0; | ||
345 | bytes = le16_to_cpu(sbp->s_bytes); | ||
346 | if (bytes > BLOCK_SIZE) | ||
347 | return 0; | ||
348 | crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp, | ||
349 | sumoff); | ||
350 | crc = crc32_le(crc, sum, 4); | ||
351 | crc = crc32_le(crc, (unsigned char *)sbp + sumoff + 4, | ||
352 | bytes - sumoff - 4); | ||
353 | return crc == le32_to_cpu(sbp->s_sum); | ||
354 | } | ||
355 | |||
356 | static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) | ||
357 | { | ||
358 | return offset < ((le64_to_cpu(sbp->s_nsegments) * | ||
359 | le32_to_cpu(sbp->s_blocks_per_segment)) << | ||
360 | (le32_to_cpu(sbp->s_log_block_size) + 10)); | ||
361 | } | ||
362 | |||
363 | static void nilfs_release_super_block(struct the_nilfs *nilfs) | ||
364 | { | ||
365 | int i; | ||
366 | |||
367 | for (i = 0; i < 2; i++) { | ||
368 | if (nilfs->ns_sbp[i]) { | ||
369 | brelse(nilfs->ns_sbh[i]); | ||
370 | nilfs->ns_sbh[i] = NULL; | ||
371 | nilfs->ns_sbp[i] = NULL; | ||
372 | } | ||
373 | } | ||
374 | } | ||
375 | |||
376 | void nilfs_fall_back_super_block(struct the_nilfs *nilfs) | ||
377 | { | ||
378 | brelse(nilfs->ns_sbh[0]); | ||
379 | nilfs->ns_sbh[0] = nilfs->ns_sbh[1]; | ||
380 | nilfs->ns_sbp[0] = nilfs->ns_sbp[1]; | ||
381 | nilfs->ns_sbh[1] = NULL; | ||
382 | nilfs->ns_sbp[1] = NULL; | ||
383 | } | ||
384 | |||
385 | void nilfs_swap_super_block(struct the_nilfs *nilfs) | ||
386 | { | ||
387 | struct buffer_head *tsbh = nilfs->ns_sbh[0]; | ||
388 | struct nilfs_super_block *tsbp = nilfs->ns_sbp[0]; | ||
389 | |||
390 | nilfs->ns_sbh[0] = nilfs->ns_sbh[1]; | ||
391 | nilfs->ns_sbp[0] = nilfs->ns_sbp[1]; | ||
392 | nilfs->ns_sbh[1] = tsbh; | ||
393 | nilfs->ns_sbp[1] = tsbp; | ||
394 | } | ||
395 | |||
396 | static int nilfs_load_super_block(struct the_nilfs *nilfs, | ||
397 | struct super_block *sb, int blocksize, | ||
398 | struct nilfs_super_block **sbpp) | ||
399 | { | ||
400 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | ||
401 | struct buffer_head **sbh = nilfs->ns_sbh; | ||
402 | u64 sb2off = NILFS_SB2_OFFSET_BYTES(nilfs->ns_bdev->bd_inode->i_size); | ||
403 | int valid[2], swp = 0; | ||
404 | |||
405 | sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize, | ||
406 | &sbh[0]); | ||
407 | sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]); | ||
408 | |||
409 | if (!sbp[0]) { | ||
410 | if (!sbp[1]) { | ||
411 | printk(KERN_ERR "NILFS: unable to read superblock\n"); | ||
412 | return -EIO; | ||
413 | } | ||
414 | printk(KERN_WARNING | ||
415 | "NILFS warning: unable to read primary superblock\n"); | ||
416 | } else if (!sbp[1]) | ||
417 | printk(KERN_WARNING | ||
418 | "NILFS warning: unable to read secondary superblock\n"); | ||
419 | |||
420 | valid[0] = nilfs_valid_sb(sbp[0]); | ||
421 | valid[1] = nilfs_valid_sb(sbp[1]); | ||
422 | swp = valid[1] && | ||
423 | (!valid[0] || | ||
424 | le64_to_cpu(sbp[1]->s_wtime) > le64_to_cpu(sbp[0]->s_wtime)); | ||
425 | |||
426 | if (valid[swp] && nilfs_sb2_bad_offset(sbp[swp], sb2off)) { | ||
427 | brelse(sbh[1]); | ||
428 | sbh[1] = NULL; | ||
429 | sbp[1] = NULL; | ||
430 | swp = 0; | ||
431 | } | ||
432 | if (!valid[swp]) { | ||
433 | nilfs_release_super_block(nilfs); | ||
434 | printk(KERN_ERR "NILFS: Can't find nilfs on dev %s.\n", | ||
435 | sb->s_id); | ||
436 | return -EINVAL; | ||
437 | } | ||
438 | |||
439 | if (swp) { | ||
440 | printk(KERN_WARNING "NILFS warning: broken superblock. " | ||
441 | "using spare superblock.\n"); | ||
442 | nilfs_swap_super_block(nilfs); | ||
443 | } | ||
444 | |||
445 | nilfs->ns_sbwtime[0] = le64_to_cpu(sbp[0]->s_wtime); | ||
446 | nilfs->ns_sbwtime[1] = valid[!swp] ? le64_to_cpu(sbp[1]->s_wtime) : 0; | ||
447 | nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq); | ||
448 | *sbpp = sbp[0]; | ||
449 | return 0; | ||
450 | } | ||
451 | |||
452 | /** | ||
453 | * init_nilfs - initialize a NILFS instance. | ||
454 | * @nilfs: the_nilfs structure | ||
455 | * @sbi: nilfs_sb_info | ||
456 | * @sb: super block | ||
457 | * @data: mount options | ||
458 | * | ||
459 | * init_nilfs() performs common initialization per block device (e.g. | ||
460 | * reading the super block, getting disk layout information, initializing | ||
461 | * shared fields in the_nilfs). It takes on some portion of the jobs | ||
462 | * typically done by a fill_super() routine. This division arises from | ||
463 | * the nature that multiple NILFS instances may be simultaneously | ||
464 | * mounted on a device. | ||
465 | * For multiple mounts on the same device, only the first mount | ||
466 | * invokes these tasks. | ||
467 | * | ||
468 | * Return Value: On success, 0 is returned. On error, a negative error | ||
469 | * code is returned. | ||
470 | */ | ||
471 | int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) | ||
472 | { | ||
473 | struct super_block *sb = sbi->s_super; | ||
474 | struct nilfs_super_block *sbp; | ||
475 | struct backing_dev_info *bdi; | ||
476 | int blocksize; | ||
477 | int err; | ||
478 | |||
479 | down_write(&nilfs->ns_sem); | ||
480 | if (nilfs_init(nilfs)) { | ||
481 | /* Load values from existing the_nilfs */ | ||
482 | sbp = nilfs->ns_sbp[0]; | ||
483 | err = nilfs_store_magic_and_option(sb, sbp, data); | ||
484 | if (err) | ||
485 | goto out; | ||
486 | |||
487 | blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); | ||
488 | if (sb->s_blocksize != blocksize && | ||
489 | !sb_set_blocksize(sb, blocksize)) { | ||
490 | printk(KERN_ERR "NILFS: blocksize %d unfit to device\n", | ||
491 | blocksize); | ||
492 | err = -EINVAL; | ||
493 | } | ||
494 | sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); | ||
495 | goto out; | ||
496 | } | ||
497 | |||
498 | blocksize = sb_min_blocksize(sb, BLOCK_SIZE); | ||
499 | if (!blocksize) { | ||
500 | printk(KERN_ERR "NILFS: unable to set blocksize\n"); | ||
501 | err = -EINVAL; | ||
502 | goto out; | ||
503 | } | ||
504 | err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); | ||
505 | if (err) | ||
506 | goto out; | ||
507 | |||
508 | err = nilfs_store_magic_and_option(sb, sbp, data); | ||
509 | if (err) | ||
510 | goto failed_sbh; | ||
511 | |||
512 | blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); | ||
513 | if (sb->s_blocksize != blocksize) { | ||
514 | int hw_blocksize = bdev_hardsect_size(sb->s_bdev); | ||
515 | |||
516 | if (blocksize < hw_blocksize) { | ||
517 | printk(KERN_ERR | ||
518 | "NILFS: blocksize %d too small for device " | ||
519 | "(sector-size = %d).\n", | ||
520 | blocksize, hw_blocksize); | ||
521 | err = -EINVAL; | ||
522 | goto failed_sbh; | ||
523 | } | ||
524 | nilfs_release_super_block(nilfs); | ||
525 | sb_set_blocksize(sb, blocksize); | ||
526 | |||
527 | err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); | ||
528 | if (err) | ||
529 | goto out; | ||
530 | /* not failed_sbh; sbh is released automatically | ||
531 | when reloading fails. */ | ||
532 | } | ||
533 | nilfs->ns_blocksize_bits = sb->s_blocksize_bits; | ||
534 | |||
535 | err = nilfs_store_disk_layout(nilfs, sbp); | ||
536 | if (err) | ||
537 | goto failed_sbh; | ||
538 | |||
539 | sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); | ||
540 | |||
541 | nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); | ||
542 | |||
543 | bdi = nilfs->ns_bdev->bd_inode_backing_dev_info; | ||
544 | if (!bdi) | ||
545 | bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; | ||
546 | nilfs->ns_bdi = bdi ? : &default_backing_dev_info; | ||
547 | |||
548 | /* Finding last segment */ | ||
549 | nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg); | ||
550 | nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno); | ||
551 | nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq); | ||
552 | |||
553 | nilfs->ns_seg_seq = nilfs->ns_last_seq; | ||
554 | nilfs->ns_segnum = | ||
555 | nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg); | ||
556 | nilfs->ns_cno = nilfs->ns_last_cno + 1; | ||
557 | if (nilfs->ns_segnum >= nilfs->ns_nsegments) { | ||
558 | printk(KERN_ERR "NILFS invalid last segment number.\n"); | ||
559 | err = -EINVAL; | ||
560 | goto failed_sbh; | ||
561 | } | ||
562 | /* Dummy values */ | ||
563 | nilfs->ns_free_segments_count = | ||
564 | nilfs->ns_nsegments - (nilfs->ns_segnum + 1); | ||
565 | |||
566 | /* Initialize gcinode cache */ | ||
567 | err = nilfs_init_gccache(nilfs); | ||
568 | if (err) | ||
569 | goto failed_sbh; | ||
570 | |||
571 | set_nilfs_init(nilfs); | ||
572 | err = 0; | ||
573 | out: | ||
574 | up_write(&nilfs->ns_sem); | ||
575 | return err; | ||
576 | |||
577 | failed_sbh: | ||
578 | nilfs_release_super_block(nilfs); | ||
579 | goto out; | ||
580 | } | ||
581 | |||
582 | int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) | ||
583 | { | ||
584 | struct inode *dat = nilfs_dat_inode(nilfs); | ||
585 | unsigned long ncleansegs; | ||
586 | int err; | ||
587 | |||
588 | down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
589 | err = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile, &ncleansegs); | ||
590 | up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
591 | if (likely(!err)) | ||
592 | *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; | ||
593 | return err; | ||
594 | } | ||
595 | |||
596 | int nilfs_near_disk_full(struct the_nilfs *nilfs) | ||
597 | { | ||
598 | struct inode *sufile = nilfs->ns_sufile; | ||
599 | unsigned long ncleansegs, nincsegs; | ||
600 | int ret; | ||
601 | |||
602 | ret = nilfs_sufile_get_ncleansegs(sufile, &ncleansegs); | ||
603 | if (likely(!ret)) { | ||
604 | nincsegs = atomic_read(&nilfs->ns_ndirtyblks) / | ||
605 | nilfs->ns_blocks_per_segment + 1; | ||
606 | if (ncleansegs <= nilfs->ns_nrsvsegs + nincsegs) | ||
607 | ret++; | ||
608 | } | ||
609 | return ret; | ||
610 | } | ||
611 | |||
612 | int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, | ||
613 | int snapshot_mount) | ||
614 | { | ||
615 | struct nilfs_sb_info *sbi; | ||
616 | int ret = 0; | ||
617 | |||
618 | down_read(&nilfs->ns_sem); | ||
619 | if (cno == 0 || cno > nilfs->ns_cno) | ||
620 | goto out_unlock; | ||
621 | |||
622 | list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { | ||
623 | if (sbi->s_snapshot_cno == cno && | ||
624 | (!snapshot_mount || nilfs_test_opt(sbi, SNAPSHOT))) { | ||
625 | /* exclude read-only mounts */ | ||
626 | ret++; | ||
627 | break; | ||
628 | } | ||
629 | } | ||
630 | /* for protecting recent checkpoints */ | ||
631 | if (cno >= nilfs_last_cno(nilfs)) | ||
632 | ret++; | ||
633 | |||
634 | out_unlock: | ||
635 | up_read(&nilfs->ns_sem); | ||
636 | return ret; | ||
637 | } | ||
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h new file mode 100644 index 000000000000..30fe58778d05 --- /dev/null +++ b/fs/nilfs2/the_nilfs.h | |||
@@ -0,0 +1,298 @@ | |||
1 | /* | ||
2 | * the_nilfs.h - the_nilfs shared structure. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #ifndef _THE_NILFS_H | ||
25 | #define _THE_NILFS_H | ||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/fs.h> | ||
30 | #include <linux/blkdev.h> | ||
31 | #include <linux/backing-dev.h> | ||
32 | #include "sb.h" | ||
33 | |||
34 | /* the_nilfs struct */ | ||
35 | enum { | ||
36 | THE_NILFS_INIT = 0, /* Information from super_block is set */ | ||
37 | THE_NILFS_LOADED, /* Roll-back/roll-forward has done and | ||
38 | the latest checkpoint was loaded */ | ||
39 | THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ | ||
40 | }; | ||
41 | |||
42 | /** | ||
43 | * struct the_nilfs - struct to supervise multiple nilfs mount points | ||
44 | * @ns_flags: flags | ||
45 | * @ns_count: reference count | ||
46 | * @ns_bdev: block device | ||
47 | * @ns_bdi: backing dev info | ||
48 | * @ns_writer: back pointer to writable nilfs_sb_info | ||
49 | * @ns_sem: semaphore for shared states | ||
50 | * @ns_writer_mutex: mutex protecting ns_writer attach/detach | ||
51 | * @ns_writer_refcount: number of referrers on ns_writer | ||
52 | * @ns_sbh: buffer heads of on-disk super blocks | ||
53 | * @ns_sbp: pointers to super block data | ||
54 | * @ns_sbwtime: previous write time of super blocks | ||
55 | * @ns_sbsize: size of valid data in super block | ||
56 | * @ns_supers: list of nilfs super block structs | ||
57 | * @ns_seg_seq: segment sequence counter | ||
58 | * @ns_segnum: index number of the latest full segment. | ||
59 | * @ns_nextnum: index number of the full segment index to be used next | ||
60 | * @ns_pseg_offset: offset of next partial segment in the current full segment | ||
61 | * @ns_cno: next checkpoint number | ||
62 | * @ns_ctime: write time of the last segment | ||
63 | * @ns_nongc_ctime: write time of the last segment not for cleaner operation | ||
64 | * @ns_ndirtyblks: Number of dirty data blocks | ||
65 | * @ns_last_segment_lock: lock protecting fields for the latest segment | ||
66 | * @ns_last_pseg: start block number of the latest segment | ||
67 | * @ns_last_seq: sequence value of the latest segment | ||
68 | * @ns_last_cno: checkpoint number of the latest segment | ||
69 | * @ns_prot_seq: least sequence number of segments which must not be reclaimed | ||
70 | * @ns_free_segments_count: counter of free segments | ||
71 | * @ns_segctor_sem: segment constructor semaphore | ||
72 | * @ns_dat: DAT file inode | ||
73 | * @ns_cpfile: checkpoint file inode | ||
74 | * @ns_sufile: segusage file inode | ||
75 | * @ns_gc_dat: shadow inode of the DAT file inode for GC | ||
76 | * @ns_gc_inodes: dummy inodes to keep live blocks | ||
77 | * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks | ||
78 | * @ns_blocksize_bits: bit length of block size | ||
79 | * @ns_nsegments: number of segments in filesystem | ||
80 | * @ns_blocks_per_segment: number of blocks per segment | ||
81 | * @ns_r_segments_percentage: reserved segments percentage | ||
82 | * @ns_nrsvsegs: number of reserved segments | ||
83 | * @ns_first_data_block: block number of first data block | ||
84 | * @ns_inode_size: size of on-disk inode | ||
85 | * @ns_first_ino: first not-special inode number | ||
86 | * @ns_crc_seed: seed value of CRC32 calculation | ||
87 | */ | ||
88 | struct the_nilfs { | ||
89 | unsigned long ns_flags; | ||
90 | atomic_t ns_count; | ||
91 | |||
92 | struct block_device *ns_bdev; | ||
93 | struct backing_dev_info *ns_bdi; | ||
94 | struct nilfs_sb_info *ns_writer; | ||
95 | struct rw_semaphore ns_sem; | ||
96 | struct mutex ns_writer_mutex; | ||
97 | atomic_t ns_writer_refcount; | ||
98 | |||
99 | /* | ||
100 | * used for | ||
101 | * - loading the latest checkpoint exclusively. | ||
102 | * - allocating a new full segment. | ||
103 | * - protecting s_dirt in the super_block struct | ||
104 | * (see nilfs_write_super) and the following fields. | ||
105 | */ | ||
106 | struct buffer_head *ns_sbh[2]; | ||
107 | struct nilfs_super_block *ns_sbp[2]; | ||
108 | time_t ns_sbwtime[2]; | ||
109 | unsigned ns_sbsize; | ||
110 | unsigned ns_mount_state; | ||
111 | struct list_head ns_supers; | ||
112 | |||
113 | /* | ||
114 | * Following fields are dedicated to a writable FS-instance. | ||
115 | * Except for the period seeking checkpoint, code outside the segment | ||
116 | * constructor must lock a segment semaphore while accessing these | ||
117 | * fields. | ||
118 | * The writable FS-instance is sole during a lifetime of the_nilfs. | ||
119 | */ | ||
120 | u64 ns_seg_seq; | ||
121 | __u64 ns_segnum; | ||
122 | __u64 ns_nextnum; | ||
123 | unsigned long ns_pseg_offset; | ||
124 | __u64 ns_cno; | ||
125 | time_t ns_ctime; | ||
126 | time_t ns_nongc_ctime; | ||
127 | atomic_t ns_ndirtyblks; | ||
128 | |||
129 | /* | ||
130 | * The following fields hold information on the latest partial segment | ||
131 | * written to disk with a super root. These fields are protected by | ||
132 | * ns_last_segment_lock. | ||
133 | */ | ||
134 | spinlock_t ns_last_segment_lock; | ||
135 | sector_t ns_last_pseg; | ||
136 | u64 ns_last_seq; | ||
137 | __u64 ns_last_cno; | ||
138 | u64 ns_prot_seq; | ||
139 | unsigned long ns_free_segments_count; | ||
140 | |||
141 | struct rw_semaphore ns_segctor_sem; | ||
142 | |||
143 | /* | ||
144 | * Following fields are lock free except for the period before | ||
145 | * the_nilfs is initialized. | ||
146 | */ | ||
147 | struct inode *ns_dat; | ||
148 | struct inode *ns_cpfile; | ||
149 | struct inode *ns_sufile; | ||
150 | struct inode *ns_gc_dat; | ||
151 | |||
152 | /* GC inode list and hash table head */ | ||
153 | struct list_head ns_gc_inodes; | ||
154 | struct hlist_head *ns_gc_inodes_h; | ||
155 | |||
156 | /* Disk layout information (static) */ | ||
157 | unsigned int ns_blocksize_bits; | ||
158 | unsigned long ns_nsegments; | ||
159 | unsigned long ns_blocks_per_segment; | ||
160 | unsigned long ns_r_segments_percentage; | ||
161 | unsigned long ns_nrsvsegs; | ||
162 | unsigned long ns_first_data_block; | ||
163 | int ns_inode_size; | ||
164 | int ns_first_ino; | ||
165 | u32 ns_crc_seed; | ||
166 | }; | ||
167 | |||
168 | #define NILFS_GCINODE_HASH_BITS 8 | ||
169 | #define NILFS_GCINODE_HASH_SIZE (1<<NILFS_GCINODE_HASH_BITS) | ||
170 | |||
171 | #define THE_NILFS_FNS(bit, name) \ | ||
172 | static inline void set_nilfs_##name(struct the_nilfs *nilfs) \ | ||
173 | { \ | ||
174 | set_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \ | ||
175 | } \ | ||
176 | static inline void clear_nilfs_##name(struct the_nilfs *nilfs) \ | ||
177 | { \ | ||
178 | clear_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \ | ||
179 | } \ | ||
180 | static inline int nilfs_##name(struct the_nilfs *nilfs) \ | ||
181 | { \ | ||
182 | return test_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \ | ||
183 | } | ||
184 | |||
185 | THE_NILFS_FNS(INIT, init) | ||
186 | THE_NILFS_FNS(LOADED, loaded) | ||
187 | THE_NILFS_FNS(DISCONTINUED, discontinued) | ||
188 | |||
189 | /* Minimum interval of periodical update of superblocks (in seconds) */ | ||
190 | #define NILFS_SB_FREQ 10 | ||
191 | #define NILFS_ALTSB_FREQ 60 /* spare superblock */ | ||
192 | |||
193 | void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); | ||
194 | struct the_nilfs *alloc_nilfs(struct block_device *); | ||
195 | void put_nilfs(struct the_nilfs *); | ||
196 | int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); | ||
197 | int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); | ||
198 | int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); | ||
199 | int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); | ||
200 | int nilfs_near_disk_full(struct the_nilfs *); | ||
201 | void nilfs_fall_back_super_block(struct the_nilfs *); | ||
202 | void nilfs_swap_super_block(struct the_nilfs *); | ||
203 | |||
204 | |||
205 | static inline void get_nilfs(struct the_nilfs *nilfs) | ||
206 | { | ||
207 | /* Caller must have at least one reference of the_nilfs. */ | ||
208 | atomic_inc(&nilfs->ns_count); | ||
209 | } | ||
210 | |||
211 | static inline struct nilfs_sb_info *nilfs_get_writer(struct the_nilfs *nilfs) | ||
212 | { | ||
213 | if (atomic_inc_and_test(&nilfs->ns_writer_refcount)) | ||
214 | mutex_lock(&nilfs->ns_writer_mutex); | ||
215 | return nilfs->ns_writer; | ||
216 | } | ||
217 | |||
218 | static inline void nilfs_put_writer(struct the_nilfs *nilfs) | ||
219 | { | ||
220 | if (atomic_add_negative(-1, &nilfs->ns_writer_refcount)) | ||
221 | mutex_unlock(&nilfs->ns_writer_mutex); | ||
222 | } | ||
223 | |||
224 | static inline void | ||
225 | nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | ||
226 | { | ||
227 | mutex_lock(&nilfs->ns_writer_mutex); | ||
228 | nilfs->ns_writer = sbi; | ||
229 | mutex_unlock(&nilfs->ns_writer_mutex); | ||
230 | } | ||
231 | |||
232 | static inline void | ||
233 | nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | ||
234 | { | ||
235 | mutex_lock(&nilfs->ns_writer_mutex); | ||
236 | if (sbi == nilfs->ns_writer) | ||
237 | nilfs->ns_writer = NULL; | ||
238 | mutex_unlock(&nilfs->ns_writer_mutex); | ||
239 | } | ||
240 | |||
241 | static inline void | ||
242 | nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum, | ||
243 | sector_t *seg_start, sector_t *seg_end) | ||
244 | { | ||
245 | *seg_start = (sector_t)nilfs->ns_blocks_per_segment * segnum; | ||
246 | *seg_end = *seg_start + nilfs->ns_blocks_per_segment - 1; | ||
247 | if (segnum == 0) | ||
248 | *seg_start = nilfs->ns_first_data_block; | ||
249 | } | ||
250 | |||
251 | static inline sector_t | ||
252 | nilfs_get_segment_start_blocknr(struct the_nilfs *nilfs, __u64 segnum) | ||
253 | { | ||
254 | return (segnum == 0) ? nilfs->ns_first_data_block : | ||
255 | (sector_t)nilfs->ns_blocks_per_segment * segnum; | ||
256 | } | ||
257 | |||
258 | static inline __u64 | ||
259 | nilfs_get_segnum_of_block(struct the_nilfs *nilfs, sector_t blocknr) | ||
260 | { | ||
261 | sector_t segnum = blocknr; | ||
262 | |||
263 | sector_div(segnum, nilfs->ns_blocks_per_segment); | ||
264 | return segnum; | ||
265 | } | ||
266 | |||
267 | static inline void | ||
268 | nilfs_terminate_segment(struct the_nilfs *nilfs, sector_t seg_start, | ||
269 | sector_t seg_end) | ||
270 | { | ||
271 | /* terminate the current full segment (used in case of I/O-error) */ | ||
272 | nilfs->ns_pseg_offset = seg_end - seg_start + 1; | ||
273 | } | ||
274 | |||
275 | static inline void nilfs_shift_to_next_segment(struct the_nilfs *nilfs) | ||
276 | { | ||
277 | /* move forward with a full segment */ | ||
278 | nilfs->ns_segnum = nilfs->ns_nextnum; | ||
279 | nilfs->ns_pseg_offset = 0; | ||
280 | nilfs->ns_seg_seq++; | ||
281 | } | ||
282 | |||
283 | static inline __u64 nilfs_last_cno(struct the_nilfs *nilfs) | ||
284 | { | ||
285 | __u64 cno; | ||
286 | |||
287 | spin_lock(&nilfs->ns_last_segment_lock); | ||
288 | cno = nilfs->ns_last_cno; | ||
289 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
290 | return cno; | ||
291 | } | ||
292 | |||
293 | static inline int nilfs_segment_is_active(struct the_nilfs *nilfs, __u64 n) | ||
294 | { | ||
295 | return n == nilfs->ns_segnum || n == nilfs->ns_nextnum; | ||
296 | } | ||
297 | |||
298 | #endif /* _THE_NILFS_H */ | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index a5887df2cd8a..8672b9536039 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1926,7 +1926,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |||
1926 | out->f_path.dentry->d_name.len, | 1926 | out->f_path.dentry->d_name.len, |
1927 | out->f_path.dentry->d_name.name); | 1927 | out->f_path.dentry->d_name.name); |
1928 | 1928 | ||
1929 | inode_double_lock(inode, pipe->inode); | 1929 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); |
1930 | 1930 | ||
1931 | ret = ocfs2_rw_lock(inode, 1); | 1931 | ret = ocfs2_rw_lock(inode, 1); |
1932 | if (ret < 0) { | 1932 | if (ret < 0) { |
@@ -1941,12 +1941,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |||
1941 | goto out_unlock; | 1941 | goto out_unlock; |
1942 | } | 1942 | } |
1943 | 1943 | ||
1944 | if (pipe->inode) | ||
1945 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | ||
1944 | ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); | 1946 | ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); |
1947 | if (pipe->inode) | ||
1948 | mutex_unlock(&pipe->inode->i_mutex); | ||
1945 | 1949 | ||
1946 | out_unlock: | 1950 | out_unlock: |
1947 | ocfs2_rw_unlock(inode, 1); | 1951 | ocfs2_rw_unlock(inode, 1); |
1948 | out: | 1952 | out: |
1949 | inode_double_unlock(inode, pipe->inode); | 1953 | mutex_unlock(&inode->i_mutex); |
1950 | 1954 | ||
1951 | mlog_exit(ret); | 1955 | mlog_exit(ret); |
1952 | return ret; | 1956 | return ret; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index b0ae0be4801f..39e4ad4f59f4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -204,6 +204,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
204 | struct file *file = vma->vm_file; | 204 | struct file *file = vma->vm_file; |
205 | int flags = vma->vm_flags; | 205 | int flags = vma->vm_flags; |
206 | unsigned long ino = 0; | 206 | unsigned long ino = 0; |
207 | unsigned long long pgoff = 0; | ||
207 | dev_t dev = 0; | 208 | dev_t dev = 0; |
208 | int len; | 209 | int len; |
209 | 210 | ||
@@ -211,6 +212,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
211 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 212 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
212 | dev = inode->i_sb->s_dev; | 213 | dev = inode->i_sb->s_dev; |
213 | ino = inode->i_ino; | 214 | ino = inode->i_ino; |
215 | pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; | ||
214 | } | 216 | } |
215 | 217 | ||
216 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", | 218 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", |
@@ -220,7 +222,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
220 | flags & VM_WRITE ? 'w' : '-', | 222 | flags & VM_WRITE ? 'w' : '-', |
221 | flags & VM_EXEC ? 'x' : '-', | 223 | flags & VM_EXEC ? 'x' : '-', |
222 | flags & VM_MAYSHARE ? 's' : 'p', | 224 | flags & VM_MAYSHARE ? 's' : 'p', |
223 | ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, | 225 | pgoff, |
224 | MAJOR(dev), MINOR(dev), ino, &len); | 226 | MAJOR(dev), MINOR(dev), ino, &len); |
225 | 227 | ||
226 | /* | 228 | /* |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 863464d5519c..64a72e2e7650 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -126,6 +126,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |||
126 | struct file *file; | 126 | struct file *file; |
127 | dev_t dev = 0; | 127 | dev_t dev = 0; |
128 | int flags, len; | 128 | int flags, len; |
129 | unsigned long long pgoff = 0; | ||
129 | 130 | ||
130 | flags = vma->vm_flags; | 131 | flags = vma->vm_flags; |
131 | file = vma->vm_file; | 132 | file = vma->vm_file; |
@@ -134,6 +135,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |||
134 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 135 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
135 | dev = inode->i_sb->s_dev; | 136 | dev = inode->i_sb->s_dev; |
136 | ino = inode->i_ino; | 137 | ino = inode->i_ino; |
138 | pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; | ||
137 | } | 139 | } |
138 | 140 | ||
139 | seq_printf(m, | 141 | seq_printf(m, |
@@ -144,7 +146,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |||
144 | flags & VM_WRITE ? 'w' : '-', | 146 | flags & VM_WRITE ? 'w' : '-', |
145 | flags & VM_EXEC ? 'x' : '-', | 147 | flags & VM_EXEC ? 'x' : '-', |
146 | flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', | 148 | flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', |
147 | (unsigned long long) vma->vm_pgoff << PAGE_SHIFT, | 149 | pgoff, |
148 | MAJOR(dev), MINOR(dev), ino, &len); | 150 | MAJOR(dev), MINOR(dev), ino, &len); |
149 | 151 | ||
150 | if (file) { | 152 | if (file) { |
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index a404fb88e456..3a6b193d8444 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
@@ -221,22 +221,23 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent) | |||
221 | save_mount_options(sb, data); | 221 | save_mount_options(sb, data); |
222 | 222 | ||
223 | fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL); | 223 | fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL); |
224 | sb->s_fs_info = fsi; | ||
224 | if (!fsi) { | 225 | if (!fsi) { |
225 | err = -ENOMEM; | 226 | err = -ENOMEM; |
226 | goto fail; | 227 | goto fail; |
227 | } | 228 | } |
228 | sb->s_fs_info = fsi; | ||
229 | 229 | ||
230 | err = ramfs_parse_options(data, &fsi->mount_opts); | 230 | err = ramfs_parse_options(data, &fsi->mount_opts); |
231 | if (err) | 231 | if (err) |
232 | goto fail; | 232 | goto fail; |
233 | 233 | ||
234 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 234 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
235 | sb->s_blocksize = PAGE_CACHE_SIZE; | 235 | sb->s_blocksize = PAGE_CACHE_SIZE; |
236 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | 236 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; |
237 | sb->s_magic = RAMFS_MAGIC; | 237 | sb->s_magic = RAMFS_MAGIC; |
238 | sb->s_op = &ramfs_ops; | 238 | sb->s_op = &ramfs_ops; |
239 | sb->s_time_gran = 1; | 239 | sb->s_time_gran = 1; |
240 | |||
240 | inode = ramfs_get_inode(sb, S_IFDIR | fsi->mount_opts.mode, 0); | 241 | inode = ramfs_get_inode(sb, S_IFDIR | fsi->mount_opts.mode, 0); |
241 | if (!inode) { | 242 | if (!inode) { |
242 | err = -ENOMEM; | 243 | err = -ENOMEM; |
@@ -244,14 +245,16 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent) | |||
244 | } | 245 | } |
245 | 246 | ||
246 | root = d_alloc_root(inode); | 247 | root = d_alloc_root(inode); |
248 | sb->s_root = root; | ||
247 | if (!root) { | 249 | if (!root) { |
248 | err = -ENOMEM; | 250 | err = -ENOMEM; |
249 | goto fail; | 251 | goto fail; |
250 | } | 252 | } |
251 | sb->s_root = root; | 253 | |
252 | return 0; | 254 | return 0; |
253 | fail: | 255 | fail: |
254 | kfree(fsi); | 256 | kfree(fsi); |
257 | sb->s_fs_info = NULL; | ||
255 | iput(inode); | 258 | iput(inode); |
256 | return err; | 259 | return err; |
257 | } | 260 | } |
diff --git a/fs/romfs/Kconfig b/fs/romfs/Kconfig index 1a17020f9faf..ce2d6bcc6266 100644 --- a/fs/romfs/Kconfig +++ b/fs/romfs/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config ROMFS_FS | 1 | config ROMFS_FS |
2 | tristate "ROM file system support" | 2 | tristate "ROM file system support" |
3 | depends on BLOCK | 3 | depends on BLOCK || MTD |
4 | ---help--- | 4 | ---help--- |
5 | This is a very small read-only file system mainly intended for | 5 | This is a very small read-only file system mainly intended for |
6 | initial ram disks of installation disks, but it could be used for | 6 | initial ram disks of installation disks, but it could be used for |
@@ -14,3 +14,49 @@ config ROMFS_FS | |||
14 | 14 | ||
15 | If you don't know whether you need it, then you don't need it: | 15 | If you don't know whether you need it, then you don't need it: |
16 | answer N. | 16 | answer N. |
17 | |||
18 | # | ||
19 | # Select the backing stores to be supported | ||
20 | # | ||
21 | choice | ||
22 | prompt "RomFS backing stores" | ||
23 | depends on ROMFS_FS | ||
24 | default ROMFS_BACKED_BY_BLOCK | ||
25 | help | ||
26 | Select the backing stores to be supported. | ||
27 | |||
28 | config ROMFS_BACKED_BY_BLOCK | ||
29 | bool "Block device-backed ROM file system support" | ||
30 | depends on BLOCK | ||
31 | help | ||
32 | This permits ROMFS to use block devices buffered through the page | ||
33 | cache as the medium from which to retrieve data. It does not allow | ||
34 | direct mapping of the medium. | ||
35 | |||
36 | If unsure, answer Y. | ||
37 | |||
38 | config ROMFS_BACKED_BY_MTD | ||
39 | bool "MTD-backed ROM file system support" | ||
40 | depends on MTD=y || (ROMFS_FS=m && MTD) | ||
41 | help | ||
42 | This permits ROMFS to use MTD based devices directly, without the | ||
43 | intercession of the block layer (which may have been disabled). It | ||
44 | also allows direct mapping of MTD devices through romfs files under | ||
45 | NOMMU conditions if the underlying device is directly addressable by | ||
46 | the CPU. | ||
47 | |||
48 | If unsure, answer Y. | ||
49 | |||
50 | config ROMFS_BACKED_BY_BOTH | ||
51 | bool "Both the above" | ||
52 | depends on BLOCK && (MTD=y || (ROMFS_FS=m && MTD)) | ||
53 | endchoice | ||
54 | |||
55 | |||
56 | config ROMFS_ON_BLOCK | ||
57 | bool | ||
58 | default y if ROMFS_BACKED_BY_BLOCK || ROMFS_BACKED_BY_BOTH | ||
59 | |||
60 | config ROMFS_ON_MTD | ||
61 | bool | ||
62 | default y if ROMFS_BACKED_BY_MTD || ROMFS_BACKED_BY_BOTH | ||
diff --git a/fs/romfs/Makefile b/fs/romfs/Makefile index c95b21cf49a3..420beb7d495c 100644 --- a/fs/romfs/Makefile +++ b/fs/romfs/Makefile | |||
@@ -1,7 +1,12 @@ | |||
1 | # | 1 | # |
2 | # Makefile for the linux romfs filesystem routines. | 2 | # Makefile for the linux RomFS filesystem routines. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_ROMFS_FS) += romfs.o | 5 | obj-$(CONFIG_ROMFS_FS) += romfs.o |
6 | 6 | ||
7 | romfs-objs := inode.o | 7 | romfs-y := storage.o super.o |
8 | |||
9 | ifneq ($(CONFIG_MMU),y) | ||
10 | romfs-$(CONFIG_ROMFS_ON_MTD) += mmap-nommu.o | ||
11 | endif | ||
12 | |||
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c deleted file mode 100644 index 98a232f7196b..000000000000 --- a/fs/romfs/inode.c +++ /dev/null | |||
@@ -1,665 +0,0 @@ | |||
1 | /* | ||
2 | * ROMFS file system, Linux implementation | ||
3 | * | ||
4 | * Copyright (C) 1997-1999 Janos Farkas <chexum@shadow.banki.hu> | ||
5 | * | ||
6 | * Using parts of the minix filesystem | ||
7 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
8 | * | ||
9 | * and parts of the affs filesystem additionally | ||
10 | * Copyright (C) 1993 Ray Burr | ||
11 | * Copyright (C) 1996 Hans-Joachim Widmaier | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or | ||
14 | * modify it under the terms of the GNU General Public License | ||
15 | * as published by the Free Software Foundation; either version | ||
16 | * 2 of the License, or (at your option) any later version. | ||
17 | * | ||
18 | * Changes | ||
19 | * Changed for 2.1.19 modules | ||
20 | * Jan 1997 Initial release | ||
21 | * Jun 1997 2.1.43+ changes | ||
22 | * Proper page locking in readpage | ||
23 | * Changed to work with 2.1.45+ fs | ||
24 | * Jul 1997 Fixed follow_link | ||
25 | * 2.1.47 | ||
26 | * lookup shouldn't return -ENOENT | ||
27 | * from Horst von Brand: | ||
28 | * fail on wrong checksum | ||
29 | * double unlock_super was possible | ||
30 | * correct namelen for statfs | ||
31 | * spotted by Bill Hawes: | ||
32 | * readlink shouldn't iput() | ||
33 | * Jun 1998 2.1.106 from Avery Pennarun: glibc scandir() | ||
34 | * exposed a problem in readdir | ||
35 | * 2.1.107 code-freeze spellchecker run | ||
36 | * Aug 1998 2.1.118+ VFS changes | ||
37 | * Sep 1998 2.1.122 another VFS change (follow_link) | ||
38 | * Apr 1999 2.2.7 no more EBADF checking in | ||
39 | * lookup/readdir, use ERR_PTR | ||
40 | * Jun 1999 2.3.6 d_alloc_root use changed | ||
41 | * 2.3.9 clean up usage of ENOENT/negative | ||
42 | * dentries in lookup | ||
43 | * clean up page flags setting | ||
44 | * (error, uptodate, locking) in | ||
45 | * in readpage | ||
46 | * use init_special_inode for | ||
47 | * fifos/sockets (and streamline) in | ||
48 | * read_inode, fix _ops table order | ||
49 | * Aug 1999 2.3.16 __initfunc() => __init change | ||
50 | * Oct 1999 2.3.24 page->owner hack obsoleted | ||
51 | * Nov 1999 2.3.27 2.3.25+ page->offset => index change | ||
52 | */ | ||
53 | |||
54 | /* todo: | ||
55 | * - see Documentation/filesystems/romfs.txt | ||
56 | * - use allocated, not stack memory for file names? | ||
57 | * - considering write access... | ||
58 | * - network (tftp) files? | ||
59 | * - merge back some _op tables | ||
60 | */ | ||
61 | |||
62 | /* | ||
63 | * Sorry about some optimizations and for some goto's. I just wanted | ||
64 | * to squeeze some more bytes out of this code.. :) | ||
65 | */ | ||
66 | |||
67 | #include <linux/module.h> | ||
68 | #include <linux/types.h> | ||
69 | #include <linux/errno.h> | ||
70 | #include <linux/slab.h> | ||
71 | #include <linux/romfs_fs.h> | ||
72 | #include <linux/fs.h> | ||
73 | #include <linux/init.h> | ||
74 | #include <linux/pagemap.h> | ||
75 | #include <linux/smp_lock.h> | ||
76 | #include <linux/buffer_head.h> | ||
77 | #include <linux/vfs.h> | ||
78 | |||
79 | #include <asm/uaccess.h> | ||
80 | |||
81 | struct romfs_inode_info { | ||
82 | unsigned long i_metasize; /* size of non-data area */ | ||
83 | unsigned long i_dataoffset; /* from the start of fs */ | ||
84 | struct inode vfs_inode; | ||
85 | }; | ||
86 | |||
87 | static struct inode *romfs_iget(struct super_block *, unsigned long); | ||
88 | |||
89 | /* instead of private superblock data */ | ||
90 | static inline unsigned long romfs_maxsize(struct super_block *sb) | ||
91 | { | ||
92 | return (unsigned long)sb->s_fs_info; | ||
93 | } | ||
94 | |||
95 | static inline struct romfs_inode_info *ROMFS_I(struct inode *inode) | ||
96 | { | ||
97 | return container_of(inode, struct romfs_inode_info, vfs_inode); | ||
98 | } | ||
99 | |||
100 | static __u32 | ||
101 | romfs_checksum(void *data, int size) | ||
102 | { | ||
103 | __u32 sum; | ||
104 | __be32 *ptr; | ||
105 | |||
106 | sum = 0; ptr = data; | ||
107 | size>>=2; | ||
108 | while (size>0) { | ||
109 | sum += be32_to_cpu(*ptr++); | ||
110 | size--; | ||
111 | } | ||
112 | return sum; | ||
113 | } | ||
114 | |||
115 | static const struct super_operations romfs_ops; | ||
116 | |||
117 | static int romfs_fill_super(struct super_block *s, void *data, int silent) | ||
118 | { | ||
119 | struct buffer_head *bh; | ||
120 | struct romfs_super_block *rsb; | ||
121 | struct inode *root; | ||
122 | int sz, ret = -EINVAL; | ||
123 | |||
124 | /* I would parse the options here, but there are none.. :) */ | ||
125 | |||
126 | sb_set_blocksize(s, ROMBSIZE); | ||
127 | s->s_maxbytes = 0xFFFFFFFF; | ||
128 | |||
129 | bh = sb_bread(s, 0); | ||
130 | if (!bh) { | ||
131 | /* XXX merge with other printk? */ | ||
132 | printk ("romfs: unable to read superblock\n"); | ||
133 | goto outnobh; | ||
134 | } | ||
135 | |||
136 | rsb = (struct romfs_super_block *)bh->b_data; | ||
137 | sz = be32_to_cpu(rsb->size); | ||
138 | if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 | ||
139 | || sz < ROMFH_SIZE) { | ||
140 | if (!silent) | ||
141 | printk ("VFS: Can't find a romfs filesystem on dev " | ||
142 | "%s.\n", s->s_id); | ||
143 | goto out; | ||
144 | } | ||
145 | if (romfs_checksum(rsb, min_t(int, sz, 512))) { | ||
146 | printk ("romfs: bad initial checksum on dev " | ||
147 | "%s.\n", s->s_id); | ||
148 | goto out; | ||
149 | } | ||
150 | |||
151 | s->s_magic = ROMFS_MAGIC; | ||
152 | s->s_fs_info = (void *)(long)sz; | ||
153 | |||
154 | s->s_flags |= MS_RDONLY; | ||
155 | |||
156 | /* Find the start of the fs */ | ||
157 | sz = (ROMFH_SIZE + | ||
158 | strnlen(rsb->name, ROMFS_MAXFN) + 1 + ROMFH_PAD) | ||
159 | & ROMFH_MASK; | ||
160 | |||
161 | s->s_op = &romfs_ops; | ||
162 | root = romfs_iget(s, sz); | ||
163 | if (IS_ERR(root)) { | ||
164 | ret = PTR_ERR(root); | ||
165 | goto out; | ||
166 | } | ||
167 | |||
168 | ret = -ENOMEM; | ||
169 | s->s_root = d_alloc_root(root); | ||
170 | if (!s->s_root) | ||
171 | goto outiput; | ||
172 | |||
173 | brelse(bh); | ||
174 | return 0; | ||
175 | |||
176 | outiput: | ||
177 | iput(root); | ||
178 | out: | ||
179 | brelse(bh); | ||
180 | outnobh: | ||
181 | return ret; | ||
182 | } | ||
183 | |||
184 | /* That's simple too. */ | ||
185 | |||
186 | static int | ||
187 | romfs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
188 | { | ||
189 | buf->f_type = ROMFS_MAGIC; | ||
190 | buf->f_bsize = ROMBSIZE; | ||
191 | buf->f_bfree = buf->f_bavail = buf->f_ffree; | ||
192 | buf->f_blocks = (romfs_maxsize(dentry->d_sb)+ROMBSIZE-1)>>ROMBSBITS; | ||
193 | buf->f_namelen = ROMFS_MAXFN; | ||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | /* some helper routines */ | ||
198 | |||
199 | static int | ||
200 | romfs_strnlen(struct inode *i, unsigned long offset, unsigned long count) | ||
201 | { | ||
202 | struct buffer_head *bh; | ||
203 | unsigned long avail, maxsize, res; | ||
204 | |||
205 | maxsize = romfs_maxsize(i->i_sb); | ||
206 | if (offset >= maxsize) | ||
207 | return -1; | ||
208 | |||
209 | /* strnlen is almost always valid */ | ||
210 | if (count > maxsize || offset+count > maxsize) | ||
211 | count = maxsize-offset; | ||
212 | |||
213 | bh = sb_bread(i->i_sb, offset>>ROMBSBITS); | ||
214 | if (!bh) | ||
215 | return -1; /* error */ | ||
216 | |||
217 | avail = ROMBSIZE - (offset & ROMBMASK); | ||
218 | maxsize = min_t(unsigned long, count, avail); | ||
219 | res = strnlen(((char *)bh->b_data)+(offset&ROMBMASK), maxsize); | ||
220 | brelse(bh); | ||
221 | |||
222 | if (res < maxsize) | ||
223 | return res; /* found all of it */ | ||
224 | |||
225 | while (res < count) { | ||
226 | offset += maxsize; | ||
227 | |||
228 | bh = sb_bread(i->i_sb, offset>>ROMBSBITS); | ||
229 | if (!bh) | ||
230 | return -1; | ||
231 | maxsize = min_t(unsigned long, count - res, ROMBSIZE); | ||
232 | avail = strnlen(bh->b_data, maxsize); | ||
233 | res += avail; | ||
234 | brelse(bh); | ||
235 | if (avail < maxsize) | ||
236 | return res; | ||
237 | } | ||
238 | return res; | ||
239 | } | ||
240 | |||
241 | static int | ||
242 | romfs_copyfrom(struct inode *i, void *dest, unsigned long offset, unsigned long count) | ||
243 | { | ||
244 | struct buffer_head *bh; | ||
245 | unsigned long avail, maxsize, res; | ||
246 | |||
247 | maxsize = romfs_maxsize(i->i_sb); | ||
248 | if (offset >= maxsize || count > maxsize || offset+count>maxsize) | ||
249 | return -1; | ||
250 | |||
251 | bh = sb_bread(i->i_sb, offset>>ROMBSBITS); | ||
252 | if (!bh) | ||
253 | return -1; /* error */ | ||
254 | |||
255 | avail = ROMBSIZE - (offset & ROMBMASK); | ||
256 | maxsize = min_t(unsigned long, count, avail); | ||
257 | memcpy(dest, ((char *)bh->b_data) + (offset & ROMBMASK), maxsize); | ||
258 | brelse(bh); | ||
259 | |||
260 | res = maxsize; /* all of it */ | ||
261 | |||
262 | while (res < count) { | ||
263 | offset += maxsize; | ||
264 | dest += maxsize; | ||
265 | |||
266 | bh = sb_bread(i->i_sb, offset>>ROMBSBITS); | ||
267 | if (!bh) | ||
268 | return -1; | ||
269 | maxsize = min_t(unsigned long, count - res, ROMBSIZE); | ||
270 | memcpy(dest, bh->b_data, maxsize); | ||
271 | brelse(bh); | ||
272 | res += maxsize; | ||
273 | } | ||
274 | return res; | ||
275 | } | ||
276 | |||
277 | static unsigned char romfs_dtype_table[] = { | ||
278 | DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_SOCK, DT_FIFO | ||
279 | }; | ||
280 | |||
281 | static int | ||
282 | romfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
283 | { | ||
284 | struct inode *i = filp->f_path.dentry->d_inode; | ||
285 | struct romfs_inode ri; | ||
286 | unsigned long offset, maxoff; | ||
287 | int j, ino, nextfh; | ||
288 | int stored = 0; | ||
289 | char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ | ||
290 | |||
291 | lock_kernel(); | ||
292 | |||
293 | maxoff = romfs_maxsize(i->i_sb); | ||
294 | |||
295 | offset = filp->f_pos; | ||
296 | if (!offset) { | ||
297 | offset = i->i_ino & ROMFH_MASK; | ||
298 | if (romfs_copyfrom(i, &ri, offset, ROMFH_SIZE) <= 0) | ||
299 | goto out; | ||
300 | offset = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
301 | } | ||
302 | |||
303 | /* Not really failsafe, but we are read-only... */ | ||
304 | for(;;) { | ||
305 | if (!offset || offset >= maxoff) { | ||
306 | offset = maxoff; | ||
307 | filp->f_pos = offset; | ||
308 | goto out; | ||
309 | } | ||
310 | filp->f_pos = offset; | ||
311 | |||
312 | /* Fetch inode info */ | ||
313 | if (romfs_copyfrom(i, &ri, offset, ROMFH_SIZE) <= 0) | ||
314 | goto out; | ||
315 | |||
316 | j = romfs_strnlen(i, offset+ROMFH_SIZE, sizeof(fsname)-1); | ||
317 | if (j < 0) | ||
318 | goto out; | ||
319 | |||
320 | fsname[j]=0; | ||
321 | romfs_copyfrom(i, fsname, offset+ROMFH_SIZE, j); | ||
322 | |||
323 | ino = offset; | ||
324 | nextfh = be32_to_cpu(ri.next); | ||
325 | if ((nextfh & ROMFH_TYPE) == ROMFH_HRD) | ||
326 | ino = be32_to_cpu(ri.spec); | ||
327 | if (filldir(dirent, fsname, j, offset, ino, | ||
328 | romfs_dtype_table[nextfh & ROMFH_TYPE]) < 0) { | ||
329 | goto out; | ||
330 | } | ||
331 | stored++; | ||
332 | offset = nextfh & ROMFH_MASK; | ||
333 | } | ||
334 | out: | ||
335 | unlock_kernel(); | ||
336 | return stored; | ||
337 | } | ||
338 | |||
339 | static struct dentry * | ||
340 | romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | ||
341 | { | ||
342 | unsigned long offset, maxoff; | ||
343 | long res; | ||
344 | int fslen; | ||
345 | struct inode *inode = NULL; | ||
346 | char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ | ||
347 | struct romfs_inode ri; | ||
348 | const char *name; /* got from dentry */ | ||
349 | int len; | ||
350 | |||
351 | res = -EACCES; /* placeholder for "no data here" */ | ||
352 | offset = dir->i_ino & ROMFH_MASK; | ||
353 | lock_kernel(); | ||
354 | if (romfs_copyfrom(dir, &ri, offset, ROMFH_SIZE) <= 0) | ||
355 | goto error; | ||
356 | |||
357 | maxoff = romfs_maxsize(dir->i_sb); | ||
358 | offset = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
359 | |||
360 | /* OK, now find the file whose name is in "dentry" in the | ||
361 | * directory specified by "dir". */ | ||
362 | |||
363 | name = dentry->d_name.name; | ||
364 | len = dentry->d_name.len; | ||
365 | |||
366 | for(;;) { | ||
367 | if (!offset || offset >= maxoff) | ||
368 | goto success; /* negative success */ | ||
369 | if (romfs_copyfrom(dir, &ri, offset, ROMFH_SIZE) <= 0) | ||
370 | goto error; | ||
371 | |||
372 | /* try to match the first 16 bytes of name */ | ||
373 | fslen = romfs_strnlen(dir, offset+ROMFH_SIZE, ROMFH_SIZE); | ||
374 | if (len < ROMFH_SIZE) { | ||
375 | if (len == fslen) { | ||
376 | /* both are shorter, and same size */ | ||
377 | romfs_copyfrom(dir, fsname, offset+ROMFH_SIZE, len+1); | ||
378 | if (strncmp (name, fsname, len) == 0) | ||
379 | break; | ||
380 | } | ||
381 | } else if (fslen >= ROMFH_SIZE) { | ||
382 | /* both are longer; XXX optimize max size */ | ||
383 | fslen = romfs_strnlen(dir, offset+ROMFH_SIZE, sizeof(fsname)-1); | ||
384 | if (len == fslen) { | ||
385 | romfs_copyfrom(dir, fsname, offset+ROMFH_SIZE, len+1); | ||
386 | if (strncmp(name, fsname, len) == 0) | ||
387 | break; | ||
388 | } | ||
389 | } | ||
390 | /* next entry */ | ||
391 | offset = be32_to_cpu(ri.next) & ROMFH_MASK; | ||
392 | } | ||
393 | |||
394 | /* Hard link handling */ | ||
395 | if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD) | ||
396 | offset = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
397 | |||
398 | inode = romfs_iget(dir->i_sb, offset); | ||
399 | if (IS_ERR(inode)) { | ||
400 | res = PTR_ERR(inode); | ||
401 | goto error; | ||
402 | } | ||
403 | |||
404 | success: | ||
405 | d_add(dentry, inode); | ||
406 | res = 0; | ||
407 | error: | ||
408 | unlock_kernel(); | ||
409 | return ERR_PTR(res); | ||
410 | } | ||
411 | |||
412 | /* | ||
413 | * Ok, we do readpage, to be able to execute programs. Unfortunately, | ||
414 | * we can't use bmap, since we may have looser alignments. | ||
415 | */ | ||
416 | |||
417 | static int | ||
418 | romfs_readpage(struct file *file, struct page * page) | ||
419 | { | ||
420 | struct inode *inode = page->mapping->host; | ||
421 | loff_t offset, size; | ||
422 | unsigned long filled; | ||
423 | void *buf; | ||
424 | int result = -EIO; | ||
425 | |||
426 | page_cache_get(page); | ||
427 | lock_kernel(); | ||
428 | buf = kmap(page); | ||
429 | if (!buf) | ||
430 | goto err_out; | ||
431 | |||
432 | /* 32 bit warning -- but not for us :) */ | ||
433 | offset = page_offset(page); | ||
434 | size = i_size_read(inode); | ||
435 | filled = 0; | ||
436 | result = 0; | ||
437 | if (offset < size) { | ||
438 | unsigned long readlen; | ||
439 | |||
440 | size -= offset; | ||
441 | readlen = size > PAGE_SIZE ? PAGE_SIZE : size; | ||
442 | |||
443 | filled = romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen); | ||
444 | |||
445 | if (filled != readlen) { | ||
446 | SetPageError(page); | ||
447 | filled = 0; | ||
448 | result = -EIO; | ||
449 | } | ||
450 | } | ||
451 | |||
452 | if (filled < PAGE_SIZE) | ||
453 | memset(buf + filled, 0, PAGE_SIZE-filled); | ||
454 | |||
455 | if (!result) | ||
456 | SetPageUptodate(page); | ||
457 | flush_dcache_page(page); | ||
458 | |||
459 | unlock_page(page); | ||
460 | |||
461 | kunmap(page); | ||
462 | err_out: | ||
463 | page_cache_release(page); | ||
464 | unlock_kernel(); | ||
465 | |||
466 | return result; | ||
467 | } | ||
468 | |||
469 | /* Mapping from our types to the kernel */ | ||
470 | |||
471 | static const struct address_space_operations romfs_aops = { | ||
472 | .readpage = romfs_readpage | ||
473 | }; | ||
474 | |||
475 | static const struct file_operations romfs_dir_operations = { | ||
476 | .read = generic_read_dir, | ||
477 | .readdir = romfs_readdir, | ||
478 | }; | ||
479 | |||
480 | static const struct inode_operations romfs_dir_inode_operations = { | ||
481 | .lookup = romfs_lookup, | ||
482 | }; | ||
483 | |||
484 | static mode_t romfs_modemap[] = | ||
485 | { | ||
486 | 0, S_IFDIR+0644, S_IFREG+0644, S_IFLNK+0777, | ||
487 | S_IFBLK+0600, S_IFCHR+0600, S_IFSOCK+0644, S_IFIFO+0644 | ||
488 | }; | ||
489 | |||
490 | static struct inode * | ||
491 | romfs_iget(struct super_block *sb, unsigned long ino) | ||
492 | { | ||
493 | int nextfh, ret; | ||
494 | struct romfs_inode ri; | ||
495 | struct inode *i; | ||
496 | |||
497 | ino &= ROMFH_MASK; | ||
498 | i = iget_locked(sb, ino); | ||
499 | if (!i) | ||
500 | return ERR_PTR(-ENOMEM); | ||
501 | if (!(i->i_state & I_NEW)) | ||
502 | return i; | ||
503 | |||
504 | i->i_mode = 0; | ||
505 | |||
506 | /* Loop for finding the real hard link */ | ||
507 | for(;;) { | ||
508 | if (romfs_copyfrom(i, &ri, ino, ROMFH_SIZE) <= 0) { | ||
509 | printk(KERN_ERR "romfs: read error for inode 0x%lx\n", | ||
510 | ino); | ||
511 | iget_failed(i); | ||
512 | return ERR_PTR(-EIO); | ||
513 | } | ||
514 | /* XXX: do romfs_checksum here too (with name) */ | ||
515 | |||
516 | nextfh = be32_to_cpu(ri.next); | ||
517 | if ((nextfh & ROMFH_TYPE) != ROMFH_HRD) | ||
518 | break; | ||
519 | |||
520 | ino = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
521 | } | ||
522 | |||
523 | i->i_nlink = 1; /* Hard to decide.. */ | ||
524 | i->i_size = be32_to_cpu(ri.size); | ||
525 | i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0; | ||
526 | i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0; | ||
527 | |||
528 | /* Precalculate the data offset */ | ||
529 | ret = romfs_strnlen(i, ino + ROMFH_SIZE, ROMFS_MAXFN); | ||
530 | if (ret >= 0) | ||
531 | ino = (ROMFH_SIZE + ret + 1 + ROMFH_PAD) & ROMFH_MASK; | ||
532 | else | ||
533 | ino = 0; | ||
534 | |||
535 | ROMFS_I(i)->i_metasize = ino; | ||
536 | ROMFS_I(i)->i_dataoffset = ino+(i->i_ino&ROMFH_MASK); | ||
537 | |||
538 | /* Compute permissions */ | ||
539 | ino = romfs_modemap[nextfh & ROMFH_TYPE]; | ||
540 | /* only "normal" files have ops */ | ||
541 | switch (nextfh & ROMFH_TYPE) { | ||
542 | case 1: | ||
543 | i->i_size = ROMFS_I(i)->i_metasize; | ||
544 | i->i_op = &romfs_dir_inode_operations; | ||
545 | i->i_fop = &romfs_dir_operations; | ||
546 | if (nextfh & ROMFH_EXEC) | ||
547 | ino |= S_IXUGO; | ||
548 | i->i_mode = ino; | ||
549 | break; | ||
550 | case 2: | ||
551 | i->i_fop = &generic_ro_fops; | ||
552 | i->i_data.a_ops = &romfs_aops; | ||
553 | if (nextfh & ROMFH_EXEC) | ||
554 | ino |= S_IXUGO; | ||
555 | i->i_mode = ino; | ||
556 | break; | ||
557 | case 3: | ||
558 | i->i_op = &page_symlink_inode_operations; | ||
559 | i->i_data.a_ops = &romfs_aops; | ||
560 | i->i_mode = ino | S_IRWXUGO; | ||
561 | break; | ||
562 | default: | ||
563 | /* depending on MBZ for sock/fifos */ | ||
564 | nextfh = be32_to_cpu(ri.spec); | ||
565 | init_special_inode(i, ino, | ||
566 | MKDEV(nextfh>>16,nextfh&0xffff)); | ||
567 | } | ||
568 | unlock_new_inode(i); | ||
569 | return i; | ||
570 | } | ||
571 | |||
572 | static struct kmem_cache * romfs_inode_cachep; | ||
573 | |||
574 | static struct inode *romfs_alloc_inode(struct super_block *sb) | ||
575 | { | ||
576 | struct romfs_inode_info *ei; | ||
577 | ei = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL); | ||
578 | if (!ei) | ||
579 | return NULL; | ||
580 | return &ei->vfs_inode; | ||
581 | } | ||
582 | |||
583 | static void romfs_destroy_inode(struct inode *inode) | ||
584 | { | ||
585 | kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); | ||
586 | } | ||
587 | |||
588 | static void init_once(void *foo) | ||
589 | { | ||
590 | struct romfs_inode_info *ei = foo; | ||
591 | |||
592 | inode_init_once(&ei->vfs_inode); | ||
593 | } | ||
594 | |||
595 | static int init_inodecache(void) | ||
596 | { | ||
597 | romfs_inode_cachep = kmem_cache_create("romfs_inode_cache", | ||
598 | sizeof(struct romfs_inode_info), | ||
599 | 0, (SLAB_RECLAIM_ACCOUNT| | ||
600 | SLAB_MEM_SPREAD), | ||
601 | init_once); | ||
602 | if (romfs_inode_cachep == NULL) | ||
603 | return -ENOMEM; | ||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | static void destroy_inodecache(void) | ||
608 | { | ||
609 | kmem_cache_destroy(romfs_inode_cachep); | ||
610 | } | ||
611 | |||
612 | static int romfs_remount(struct super_block *sb, int *flags, char *data) | ||
613 | { | ||
614 | *flags |= MS_RDONLY; | ||
615 | return 0; | ||
616 | } | ||
617 | |||
618 | static const struct super_operations romfs_ops = { | ||
619 | .alloc_inode = romfs_alloc_inode, | ||
620 | .destroy_inode = romfs_destroy_inode, | ||
621 | .statfs = romfs_statfs, | ||
622 | .remount_fs = romfs_remount, | ||
623 | }; | ||
624 | |||
625 | static int romfs_get_sb(struct file_system_type *fs_type, | ||
626 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | ||
627 | { | ||
628 | return get_sb_bdev(fs_type, flags, dev_name, data, romfs_fill_super, | ||
629 | mnt); | ||
630 | } | ||
631 | |||
632 | static struct file_system_type romfs_fs_type = { | ||
633 | .owner = THIS_MODULE, | ||
634 | .name = "romfs", | ||
635 | .get_sb = romfs_get_sb, | ||
636 | .kill_sb = kill_block_super, | ||
637 | .fs_flags = FS_REQUIRES_DEV, | ||
638 | }; | ||
639 | |||
640 | static int __init init_romfs_fs(void) | ||
641 | { | ||
642 | int err = init_inodecache(); | ||
643 | if (err) | ||
644 | goto out1; | ||
645 | err = register_filesystem(&romfs_fs_type); | ||
646 | if (err) | ||
647 | goto out; | ||
648 | return 0; | ||
649 | out: | ||
650 | destroy_inodecache(); | ||
651 | out1: | ||
652 | return err; | ||
653 | } | ||
654 | |||
655 | static void __exit exit_romfs_fs(void) | ||
656 | { | ||
657 | unregister_filesystem(&romfs_fs_type); | ||
658 | destroy_inodecache(); | ||
659 | } | ||
660 | |||
661 | /* Yes, works even as a module... :) */ | ||
662 | |||
663 | module_init(init_romfs_fs) | ||
664 | module_exit(exit_romfs_fs) | ||
665 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/romfs/internal.h b/fs/romfs/internal.h new file mode 100644 index 000000000000..06044a9dc62d --- /dev/null +++ b/fs/romfs/internal.h | |||
@@ -0,0 +1,47 @@ | |||
1 | /* RomFS internal definitions | ||
2 | * | ||
3 | * Copyright © 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/romfs_fs.h> | ||
13 | |||
14 | struct romfs_inode_info { | ||
15 | struct inode vfs_inode; | ||
16 | unsigned long i_metasize; /* size of non-data area */ | ||
17 | unsigned long i_dataoffset; /* from the start of fs */ | ||
18 | }; | ||
19 | |||
20 | static inline size_t romfs_maxsize(struct super_block *sb) | ||
21 | { | ||
22 | return (size_t) (unsigned long) sb->s_fs_info; | ||
23 | } | ||
24 | |||
25 | static inline struct romfs_inode_info *ROMFS_I(struct inode *inode) | ||
26 | { | ||
27 | return container_of(inode, struct romfs_inode_info, vfs_inode); | ||
28 | } | ||
29 | |||
30 | /* | ||
31 | * mmap-nommu.c | ||
32 | */ | ||
33 | #if !defined(CONFIG_MMU) && defined(CONFIG_ROMFS_ON_MTD) | ||
34 | extern const struct file_operations romfs_ro_fops; | ||
35 | #else | ||
36 | #define romfs_ro_fops generic_ro_fops | ||
37 | #endif | ||
38 | |||
39 | /* | ||
40 | * storage.c | ||
41 | */ | ||
42 | extern int romfs_dev_read(struct super_block *sb, unsigned long pos, | ||
43 | void *buf, size_t buflen); | ||
44 | extern ssize_t romfs_dev_strnlen(struct super_block *sb, | ||
45 | unsigned long pos, size_t maxlen); | ||
46 | extern int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, | ||
47 | const char *str, size_t size); | ||
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c new file mode 100644 index 000000000000..f0511e816967 --- /dev/null +++ b/fs/romfs/mmap-nommu.c | |||
@@ -0,0 +1,75 @@ | |||
1 | /* NOMMU mmap support for RomFS on MTD devices | ||
2 | * | ||
3 | * Copyright © 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/mm.h> | ||
13 | #include <linux/mtd/super.h> | ||
14 | #include "internal.h" | ||
15 | |||
16 | /* | ||
17 | * try to determine where a shared mapping can be made | ||
18 | * - only supported for NOMMU at the moment (MMU can't doesn't copy private | ||
19 | * mappings) | ||
20 | * - attempts to map through to the underlying MTD device | ||
21 | */ | ||
22 | static unsigned long romfs_get_unmapped_area(struct file *file, | ||
23 | unsigned long addr, | ||
24 | unsigned long len, | ||
25 | unsigned long pgoff, | ||
26 | unsigned long flags) | ||
27 | { | ||
28 | struct inode *inode = file->f_mapping->host; | ||
29 | struct mtd_info *mtd = inode->i_sb->s_mtd; | ||
30 | unsigned long isize, offset; | ||
31 | |||
32 | if (!mtd) | ||
33 | goto cant_map_directly; | ||
34 | |||
35 | isize = i_size_read(inode); | ||
36 | offset = pgoff << PAGE_SHIFT; | ||
37 | if (offset > isize || len > isize || offset > isize - len) | ||
38 | return (unsigned long) -EINVAL; | ||
39 | |||
40 | /* we need to call down to the MTD layer to do the actual mapping */ | ||
41 | if (mtd->get_unmapped_area) { | ||
42 | if (addr != 0) | ||
43 | return (unsigned long) -EINVAL; | ||
44 | |||
45 | if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT)) | ||
46 | return (unsigned long) -EINVAL; | ||
47 | |||
48 | offset += ROMFS_I(inode)->i_dataoffset; | ||
49 | if (offset > mtd->size - len) | ||
50 | return (unsigned long) -EINVAL; | ||
51 | |||
52 | return mtd->get_unmapped_area(mtd, len, offset, flags); | ||
53 | } | ||
54 | |||
55 | cant_map_directly: | ||
56 | return (unsigned long) -ENOSYS; | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * permit a R/O mapping to be made directly through onto an MTD device if | ||
61 | * possible | ||
62 | */ | ||
63 | static int romfs_mmap(struct file *file, struct vm_area_struct *vma) | ||
64 | { | ||
65 | return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS; | ||
66 | } | ||
67 | |||
68 | const struct file_operations romfs_ro_fops = { | ||
69 | .llseek = generic_file_llseek, | ||
70 | .read = do_sync_read, | ||
71 | .aio_read = generic_file_aio_read, | ||
72 | .splice_read = generic_file_splice_read, | ||
73 | .mmap = romfs_mmap, | ||
74 | .get_unmapped_area = romfs_get_unmapped_area, | ||
75 | }; | ||
diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c new file mode 100644 index 000000000000..7e3e1e12a081 --- /dev/null +++ b/fs/romfs/storage.c | |||
@@ -0,0 +1,261 @@ | |||
1 | /* RomFS storage access routines | ||
2 | * | ||
3 | * Copyright © 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/fs.h> | ||
13 | #include <linux/mtd/super.h> | ||
14 | #include <linux/buffer_head.h> | ||
15 | #include "internal.h" | ||
16 | |||
17 | #if !defined(CONFIG_ROMFS_ON_MTD) && !defined(CONFIG_ROMFS_ON_BLOCK) | ||
18 | #error no ROMFS backing store interface configured | ||
19 | #endif | ||
20 | |||
21 | #ifdef CONFIG_ROMFS_ON_MTD | ||
22 | #define ROMFS_MTD_READ(sb, ...) ((sb)->s_mtd->read((sb)->s_mtd, ##__VA_ARGS__)) | ||
23 | |||
24 | /* | ||
25 | * read data from an romfs image on an MTD device | ||
26 | */ | ||
27 | static int romfs_mtd_read(struct super_block *sb, unsigned long pos, | ||
28 | void *buf, size_t buflen) | ||
29 | { | ||
30 | size_t rlen; | ||
31 | int ret; | ||
32 | |||
33 | ret = ROMFS_MTD_READ(sb, pos, buflen, &rlen, buf); | ||
34 | return (ret < 0 || rlen != buflen) ? -EIO : 0; | ||
35 | } | ||
36 | |||
37 | /* | ||
38 | * determine the length of a string in a romfs image on an MTD device | ||
39 | */ | ||
40 | static ssize_t romfs_mtd_strnlen(struct super_block *sb, | ||
41 | unsigned long pos, size_t maxlen) | ||
42 | { | ||
43 | ssize_t n = 0; | ||
44 | size_t segment; | ||
45 | u_char buf[16], *p; | ||
46 | size_t len; | ||
47 | int ret; | ||
48 | |||
49 | /* scan the string up to 16 bytes at a time */ | ||
50 | while (maxlen > 0) { | ||
51 | segment = min_t(size_t, maxlen, 16); | ||
52 | ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf); | ||
53 | if (ret < 0) | ||
54 | return ret; | ||
55 | p = memchr(buf, 0, len); | ||
56 | if (p) | ||
57 | return n + (p - buf); | ||
58 | maxlen -= len; | ||
59 | pos += len; | ||
60 | n += len; | ||
61 | } | ||
62 | |||
63 | return n; | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * compare a string to one in a romfs image on MTD | ||
68 | * - return 1 if matched, 0 if differ, -ve if error | ||
69 | */ | ||
70 | static int romfs_mtd_strncmp(struct super_block *sb, unsigned long pos, | ||
71 | const char *str, size_t size) | ||
72 | { | ||
73 | u_char buf[16]; | ||
74 | size_t len, segment; | ||
75 | int ret; | ||
76 | |||
77 | /* scan the string up to 16 bytes at a time */ | ||
78 | while (size > 0) { | ||
79 | segment = min_t(size_t, size, 16); | ||
80 | ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf); | ||
81 | if (ret < 0) | ||
82 | return ret; | ||
83 | if (memcmp(buf, str, len) != 0) | ||
84 | return 0; | ||
85 | size -= len; | ||
86 | pos += len; | ||
87 | str += len; | ||
88 | } | ||
89 | |||
90 | return 1; | ||
91 | } | ||
92 | #endif /* CONFIG_ROMFS_ON_MTD */ | ||
93 | |||
94 | #ifdef CONFIG_ROMFS_ON_BLOCK | ||
95 | /* | ||
96 | * read data from an romfs image on a block device | ||
97 | */ | ||
98 | static int romfs_blk_read(struct super_block *sb, unsigned long pos, | ||
99 | void *buf, size_t buflen) | ||
100 | { | ||
101 | struct buffer_head *bh; | ||
102 | unsigned long offset; | ||
103 | size_t segment; | ||
104 | |||
105 | /* copy the string up to blocksize bytes at a time */ | ||
106 | while (buflen > 0) { | ||
107 | offset = pos & (ROMBSIZE - 1); | ||
108 | segment = min_t(size_t, buflen, ROMBSIZE - offset); | ||
109 | bh = sb_bread(sb, pos >> ROMBSBITS); | ||
110 | if (!bh) | ||
111 | return -EIO; | ||
112 | memcpy(buf, bh->b_data + offset, segment); | ||
113 | brelse(bh); | ||
114 | buflen -= segment; | ||
115 | pos += segment; | ||
116 | } | ||
117 | |||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * determine the length of a string in romfs on a block device | ||
123 | */ | ||
124 | static ssize_t romfs_blk_strnlen(struct super_block *sb, | ||
125 | unsigned long pos, size_t limit) | ||
126 | { | ||
127 | struct buffer_head *bh; | ||
128 | unsigned long offset; | ||
129 | ssize_t n = 0; | ||
130 | size_t segment; | ||
131 | u_char *buf, *p; | ||
132 | |||
133 | /* scan the string up to blocksize bytes at a time */ | ||
134 | while (limit > 0) { | ||
135 | offset = pos & (ROMBSIZE - 1); | ||
136 | segment = min_t(size_t, limit, ROMBSIZE - offset); | ||
137 | bh = sb_bread(sb, pos >> ROMBSBITS); | ||
138 | if (!bh) | ||
139 | return -EIO; | ||
140 | buf = bh->b_data + offset; | ||
141 | p = memchr(buf, 0, segment); | ||
142 | brelse(bh); | ||
143 | if (p) | ||
144 | return n + (p - buf); | ||
145 | limit -= segment; | ||
146 | pos += segment; | ||
147 | n += segment; | ||
148 | } | ||
149 | |||
150 | return n; | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * compare a string to one in a romfs image on a block device | ||
155 | * - return 1 if matched, 0 if differ, -ve if error | ||
156 | */ | ||
157 | static int romfs_blk_strncmp(struct super_block *sb, unsigned long pos, | ||
158 | const char *str, size_t size) | ||
159 | { | ||
160 | struct buffer_head *bh; | ||
161 | unsigned long offset; | ||
162 | size_t segment; | ||
163 | bool x; | ||
164 | |||
165 | /* scan the string up to 16 bytes at a time */ | ||
166 | while (size > 0) { | ||
167 | offset = pos & (ROMBSIZE - 1); | ||
168 | segment = min_t(size_t, size, ROMBSIZE - offset); | ||
169 | bh = sb_bread(sb, pos >> ROMBSBITS); | ||
170 | if (!bh) | ||
171 | return -EIO; | ||
172 | x = (memcmp(bh->b_data + offset, str, segment) != 0); | ||
173 | brelse(bh); | ||
174 | if (x) | ||
175 | return 0; | ||
176 | size -= segment; | ||
177 | pos += segment; | ||
178 | str += segment; | ||
179 | } | ||
180 | |||
181 | return 1; | ||
182 | } | ||
183 | #endif /* CONFIG_ROMFS_ON_BLOCK */ | ||
184 | |||
185 | /* | ||
186 | * read data from the romfs image | ||
187 | */ | ||
188 | int romfs_dev_read(struct super_block *sb, unsigned long pos, | ||
189 | void *buf, size_t buflen) | ||
190 | { | ||
191 | size_t limit; | ||
192 | |||
193 | limit = romfs_maxsize(sb); | ||
194 | if (pos >= limit) | ||
195 | return -EIO; | ||
196 | if (buflen > limit - pos) | ||
197 | buflen = limit - pos; | ||
198 | |||
199 | #ifdef CONFIG_ROMFS_ON_MTD | ||
200 | if (sb->s_mtd) | ||
201 | return romfs_mtd_read(sb, pos, buf, buflen); | ||
202 | #endif | ||
203 | #ifdef CONFIG_ROMFS_ON_BLOCK | ||
204 | if (sb->s_bdev) | ||
205 | return romfs_blk_read(sb, pos, buf, buflen); | ||
206 | #endif | ||
207 | return -EIO; | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * determine the length of a string in romfs | ||
212 | */ | ||
213 | ssize_t romfs_dev_strnlen(struct super_block *sb, | ||
214 | unsigned long pos, size_t maxlen) | ||
215 | { | ||
216 | size_t limit; | ||
217 | |||
218 | limit = romfs_maxsize(sb); | ||
219 | if (pos >= limit) | ||
220 | return -EIO; | ||
221 | if (maxlen > limit - pos) | ||
222 | maxlen = limit - pos; | ||
223 | |||
224 | #ifdef CONFIG_ROMFS_ON_MTD | ||
225 | if (sb->s_mtd) | ||
226 | return romfs_mtd_strnlen(sb, pos, limit); | ||
227 | #endif | ||
228 | #ifdef CONFIG_ROMFS_ON_BLOCK | ||
229 | if (sb->s_bdev) | ||
230 | return romfs_blk_strnlen(sb, pos, limit); | ||
231 | #endif | ||
232 | return -EIO; | ||
233 | } | ||
234 | |||
235 | /* | ||
236 | * compare a string to one in romfs | ||
237 | * - return 1 if matched, 0 if differ, -ve if error | ||
238 | */ | ||
239 | int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, | ||
240 | const char *str, size_t size) | ||
241 | { | ||
242 | size_t limit; | ||
243 | |||
244 | limit = romfs_maxsize(sb); | ||
245 | if (pos >= limit) | ||
246 | return -EIO; | ||
247 | if (size > ROMFS_MAXFN) | ||
248 | return -ENAMETOOLONG; | ||
249 | if (size > limit - pos) | ||
250 | return -EIO; | ||
251 | |||
252 | #ifdef CONFIG_ROMFS_ON_MTD | ||
253 | if (sb->s_mtd) | ||
254 | return romfs_mtd_strncmp(sb, pos, str, size); | ||
255 | #endif | ||
256 | #ifdef CONFIG_ROMFS_ON_BLOCK | ||
257 | if (sb->s_bdev) | ||
258 | return romfs_blk_strncmp(sb, pos, str, size); | ||
259 | #endif | ||
260 | return -EIO; | ||
261 | } | ||
diff --git a/fs/romfs/super.c b/fs/romfs/super.c new file mode 100644 index 000000000000..10ca7d984a8b --- /dev/null +++ b/fs/romfs/super.c | |||
@@ -0,0 +1,653 @@ | |||
1 | /* Block- or MTD-based romfs | ||
2 | * | ||
3 | * Copyright © 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * Derived from: ROMFS file system, Linux implementation | ||
7 | * | ||
8 | * Copyright © 1997-1999 Janos Farkas <chexum@shadow.banki.hu> | ||
9 | * | ||
10 | * Using parts of the minix filesystem | ||
11 | * Copyright © 1991, 1992 Linus Torvalds | ||
12 | * | ||
13 | * and parts of the affs filesystem additionally | ||
14 | * Copyright © 1993 Ray Burr | ||
15 | * Copyright © 1996 Hans-Joachim Widmaier | ||
16 | * | ||
17 | * Changes | ||
18 | * Changed for 2.1.19 modules | ||
19 | * Jan 1997 Initial release | ||
20 | * Jun 1997 2.1.43+ changes | ||
21 | * Proper page locking in readpage | ||
22 | * Changed to work with 2.1.45+ fs | ||
23 | * Jul 1997 Fixed follow_link | ||
24 | * 2.1.47 | ||
25 | * lookup shouldn't return -ENOENT | ||
26 | * from Horst von Brand: | ||
27 | * fail on wrong checksum | ||
28 | * double unlock_super was possible | ||
29 | * correct namelen for statfs | ||
30 | * spotted by Bill Hawes: | ||
31 | * readlink shouldn't iput() | ||
32 | * Jun 1998 2.1.106 from Avery Pennarun: glibc scandir() | ||
33 | * exposed a problem in readdir | ||
34 | * 2.1.107 code-freeze spellchecker run | ||
35 | * Aug 1998 2.1.118+ VFS changes | ||
36 | * Sep 1998 2.1.122 another VFS change (follow_link) | ||
37 | * Apr 1999 2.2.7 no more EBADF checking in | ||
38 | * lookup/readdir, use ERR_PTR | ||
39 | * Jun 1999 2.3.6 d_alloc_root use changed | ||
40 | * 2.3.9 clean up usage of ENOENT/negative | ||
41 | * dentries in lookup | ||
42 | * clean up page flags setting | ||
43 | * (error, uptodate, locking) in | ||
44 | * in readpage | ||
45 | * use init_special_inode for | ||
46 | * fifos/sockets (and streamline) in | ||
47 | * read_inode, fix _ops table order | ||
48 | * Aug 1999 2.3.16 __initfunc() => __init change | ||
49 | * Oct 1999 2.3.24 page->owner hack obsoleted | ||
50 | * Nov 1999 2.3.27 2.3.25+ page->offset => index change | ||
51 | * | ||
52 | * | ||
53 | * This program is free software; you can redistribute it and/or | ||
54 | * modify it under the terms of the GNU General Public Licence | ||
55 | * as published by the Free Software Foundation; either version | ||
56 | * 2 of the Licence, or (at your option) any later version. | ||
57 | */ | ||
58 | |||
59 | #include <linux/module.h> | ||
60 | #include <linux/string.h> | ||
61 | #include <linux/fs.h> | ||
62 | #include <linux/time.h> | ||
63 | #include <linux/slab.h> | ||
64 | #include <linux/init.h> | ||
65 | #include <linux/blkdev.h> | ||
66 | #include <linux/parser.h> | ||
67 | #include <linux/mount.h> | ||
68 | #include <linux/namei.h> | ||
69 | #include <linux/statfs.h> | ||
70 | #include <linux/mtd/super.h> | ||
71 | #include <linux/ctype.h> | ||
72 | #include <linux/highmem.h> | ||
73 | #include <linux/pagemap.h> | ||
74 | #include <linux/uaccess.h> | ||
75 | #include "internal.h" | ||
76 | |||
77 | static struct kmem_cache *romfs_inode_cachep; | ||
78 | |||
79 | static const umode_t romfs_modemap[8] = { | ||
80 | 0, /* hard link */ | ||
81 | S_IFDIR | 0644, /* directory */ | ||
82 | S_IFREG | 0644, /* regular file */ | ||
83 | S_IFLNK | 0777, /* symlink */ | ||
84 | S_IFBLK | 0600, /* blockdev */ | ||
85 | S_IFCHR | 0600, /* chardev */ | ||
86 | S_IFSOCK | 0644, /* socket */ | ||
87 | S_IFIFO | 0644 /* FIFO */ | ||
88 | }; | ||
89 | |||
90 | static const unsigned char romfs_dtype_table[] = { | ||
91 | DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_SOCK, DT_FIFO | ||
92 | }; | ||
93 | |||
94 | static struct inode *romfs_iget(struct super_block *sb, unsigned long pos); | ||
95 | |||
96 | /* | ||
97 | * read a page worth of data from the image | ||
98 | */ | ||
99 | static int romfs_readpage(struct file *file, struct page *page) | ||
100 | { | ||
101 | struct inode *inode = page->mapping->host; | ||
102 | loff_t offset, size; | ||
103 | unsigned long fillsize, pos; | ||
104 | void *buf; | ||
105 | int ret; | ||
106 | |||
107 | buf = kmap(page); | ||
108 | if (!buf) | ||
109 | return -ENOMEM; | ||
110 | |||
111 | /* 32 bit warning -- but not for us :) */ | ||
112 | offset = page_offset(page); | ||
113 | size = i_size_read(inode); | ||
114 | fillsize = 0; | ||
115 | ret = 0; | ||
116 | if (offset < size) { | ||
117 | size -= offset; | ||
118 | fillsize = size > PAGE_SIZE ? PAGE_SIZE : size; | ||
119 | |||
120 | pos = ROMFS_I(inode)->i_dataoffset + offset; | ||
121 | |||
122 | ret = romfs_dev_read(inode->i_sb, pos, buf, fillsize); | ||
123 | if (ret < 0) { | ||
124 | SetPageError(page); | ||
125 | fillsize = 0; | ||
126 | ret = -EIO; | ||
127 | } | ||
128 | } | ||
129 | |||
130 | if (fillsize < PAGE_SIZE) | ||
131 | memset(buf + fillsize, 0, PAGE_SIZE - fillsize); | ||
132 | if (ret == 0) | ||
133 | SetPageUptodate(page); | ||
134 | |||
135 | flush_dcache_page(page); | ||
136 | kunmap(page); | ||
137 | unlock_page(page); | ||
138 | return ret; | ||
139 | } | ||
140 | |||
141 | static const struct address_space_operations romfs_aops = { | ||
142 | .readpage = romfs_readpage | ||
143 | }; | ||
144 | |||
145 | /* | ||
146 | * read the entries from a directory | ||
147 | */ | ||
148 | static int romfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
149 | { | ||
150 | struct inode *i = filp->f_dentry->d_inode; | ||
151 | struct romfs_inode ri; | ||
152 | unsigned long offset, maxoff; | ||
153 | int j, ino, nextfh; | ||
154 | int stored = 0; | ||
155 | char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ | ||
156 | int ret; | ||
157 | |||
158 | maxoff = romfs_maxsize(i->i_sb); | ||
159 | |||
160 | offset = filp->f_pos; | ||
161 | if (!offset) { | ||
162 | offset = i->i_ino & ROMFH_MASK; | ||
163 | ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); | ||
164 | if (ret < 0) | ||
165 | goto out; | ||
166 | offset = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
167 | } | ||
168 | |||
169 | /* Not really failsafe, but we are read-only... */ | ||
170 | for (;;) { | ||
171 | if (!offset || offset >= maxoff) { | ||
172 | offset = maxoff; | ||
173 | filp->f_pos = offset; | ||
174 | goto out; | ||
175 | } | ||
176 | filp->f_pos = offset; | ||
177 | |||
178 | /* Fetch inode info */ | ||
179 | ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); | ||
180 | if (ret < 0) | ||
181 | goto out; | ||
182 | |||
183 | j = romfs_dev_strnlen(i->i_sb, offset + ROMFH_SIZE, | ||
184 | sizeof(fsname) - 1); | ||
185 | if (j < 0) | ||
186 | goto out; | ||
187 | |||
188 | ret = romfs_dev_read(i->i_sb, offset + ROMFH_SIZE, fsname, j); | ||
189 | if (ret < 0) | ||
190 | goto out; | ||
191 | fsname[j] = '\0'; | ||
192 | |||
193 | ino = offset; | ||
194 | nextfh = be32_to_cpu(ri.next); | ||
195 | if ((nextfh & ROMFH_TYPE) == ROMFH_HRD) | ||
196 | ino = be32_to_cpu(ri.spec); | ||
197 | if (filldir(dirent, fsname, j, offset, ino, | ||
198 | romfs_dtype_table[nextfh & ROMFH_TYPE]) < 0) | ||
199 | goto out; | ||
200 | |||
201 | stored++; | ||
202 | offset = nextfh & ROMFH_MASK; | ||
203 | } | ||
204 | |||
205 | out: | ||
206 | return stored; | ||
207 | } | ||
208 | |||
209 | /* | ||
210 | * look up an entry in a directory | ||
211 | */ | ||
212 | static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry, | ||
213 | struct nameidata *nd) | ||
214 | { | ||
215 | unsigned long offset, maxoff; | ||
216 | struct inode *inode; | ||
217 | struct romfs_inode ri; | ||
218 | const char *name; /* got from dentry */ | ||
219 | int len, ret; | ||
220 | |||
221 | offset = dir->i_ino & ROMFH_MASK; | ||
222 | ret = romfs_dev_read(dir->i_sb, offset, &ri, ROMFH_SIZE); | ||
223 | if (ret < 0) | ||
224 | goto error; | ||
225 | |||
226 | /* search all the file entries in the list starting from the one | ||
227 | * pointed to by the directory's special data */ | ||
228 | maxoff = romfs_maxsize(dir->i_sb); | ||
229 | offset = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
230 | |||
231 | name = dentry->d_name.name; | ||
232 | len = dentry->d_name.len; | ||
233 | |||
234 | for (;;) { | ||
235 | if (!offset || offset >= maxoff) | ||
236 | goto out0; | ||
237 | |||
238 | ret = romfs_dev_read(dir->i_sb, offset, &ri, sizeof(ri)); | ||
239 | if (ret < 0) | ||
240 | goto error; | ||
241 | |||
242 | /* try to match the first 16 bytes of name */ | ||
243 | ret = romfs_dev_strncmp(dir->i_sb, offset + ROMFH_SIZE, name, | ||
244 | len); | ||
245 | if (ret < 0) | ||
246 | goto error; | ||
247 | if (ret == 1) | ||
248 | break; | ||
249 | |||
250 | /* next entry */ | ||
251 | offset = be32_to_cpu(ri.next) & ROMFH_MASK; | ||
252 | } | ||
253 | |||
254 | /* Hard link handling */ | ||
255 | if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD) | ||
256 | offset = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
257 | |||
258 | inode = romfs_iget(dir->i_sb, offset); | ||
259 | if (IS_ERR(inode)) { | ||
260 | ret = PTR_ERR(inode); | ||
261 | goto error; | ||
262 | } | ||
263 | goto outi; | ||
264 | |||
265 | /* | ||
266 | * it's a bit funky, _lookup needs to return an error code | ||
267 | * (negative) or a NULL, both as a dentry. ENOENT should not | ||
268 | * be returned, instead we need to create a negative dentry by | ||
269 | * d_add(dentry, NULL); and return 0 as no error. | ||
270 | * (Although as I see, it only matters on writable file | ||
271 | * systems). | ||
272 | */ | ||
273 | out0: | ||
274 | inode = NULL; | ||
275 | outi: | ||
276 | d_add(dentry, inode); | ||
277 | ret = 0; | ||
278 | error: | ||
279 | return ERR_PTR(ret); | ||
280 | } | ||
281 | |||
282 | static const struct file_operations romfs_dir_operations = { | ||
283 | .read = generic_read_dir, | ||
284 | .readdir = romfs_readdir, | ||
285 | }; | ||
286 | |||
287 | static struct inode_operations romfs_dir_inode_operations = { | ||
288 | .lookup = romfs_lookup, | ||
289 | }; | ||
290 | |||
291 | /* | ||
292 | * get a romfs inode based on its position in the image (which doubles as the | ||
293 | * inode number) | ||
294 | */ | ||
295 | static struct inode *romfs_iget(struct super_block *sb, unsigned long pos) | ||
296 | { | ||
297 | struct romfs_inode_info *inode; | ||
298 | struct romfs_inode ri; | ||
299 | struct inode *i; | ||
300 | unsigned long nlen; | ||
301 | unsigned nextfh, ret; | ||
302 | umode_t mode; | ||
303 | |||
304 | /* we might have to traverse a chain of "hard link" file entries to get | ||
305 | * to the actual file */ | ||
306 | for (;;) { | ||
307 | ret = romfs_dev_read(sb, pos, &ri, sizeof(ri)); | ||
308 | if (ret < 0) | ||
309 | goto error; | ||
310 | |||
311 | /* XXX: do romfs_checksum here too (with name) */ | ||
312 | |||
313 | nextfh = be32_to_cpu(ri.next); | ||
314 | if ((nextfh & ROMFH_TYPE) != ROMFH_HRD) | ||
315 | break; | ||
316 | |||
317 | pos = be32_to_cpu(ri.spec) & ROMFH_MASK; | ||
318 | } | ||
319 | |||
320 | /* determine the length of the filename */ | ||
321 | nlen = romfs_dev_strnlen(sb, pos + ROMFH_SIZE, ROMFS_MAXFN); | ||
322 | if (IS_ERR_VALUE(nlen)) | ||
323 | goto eio; | ||
324 | |||
325 | /* get an inode for this image position */ | ||
326 | i = iget_locked(sb, pos); | ||
327 | if (!i) | ||
328 | return ERR_PTR(-ENOMEM); | ||
329 | |||
330 | if (!(i->i_state & I_NEW)) | ||
331 | return i; | ||
332 | |||
333 | /* precalculate the data offset */ | ||
334 | inode = ROMFS_I(i); | ||
335 | inode->i_metasize = (ROMFH_SIZE + nlen + 1 + ROMFH_PAD) & ROMFH_MASK; | ||
336 | inode->i_dataoffset = pos + inode->i_metasize; | ||
337 | |||
338 | i->i_nlink = 1; /* Hard to decide.. */ | ||
339 | i->i_size = be32_to_cpu(ri.size); | ||
340 | i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0; | ||
341 | i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0; | ||
342 | |||
343 | /* set up mode and ops */ | ||
344 | mode = romfs_modemap[nextfh & ROMFH_TYPE]; | ||
345 | |||
346 | switch (nextfh & ROMFH_TYPE) { | ||
347 | case ROMFH_DIR: | ||
348 | i->i_size = ROMFS_I(i)->i_metasize; | ||
349 | i->i_op = &romfs_dir_inode_operations; | ||
350 | i->i_fop = &romfs_dir_operations; | ||
351 | if (nextfh & ROMFH_EXEC) | ||
352 | mode |= S_IXUGO; | ||
353 | break; | ||
354 | case ROMFH_REG: | ||
355 | i->i_fop = &romfs_ro_fops; | ||
356 | i->i_data.a_ops = &romfs_aops; | ||
357 | if (i->i_sb->s_mtd) | ||
358 | i->i_data.backing_dev_info = | ||
359 | i->i_sb->s_mtd->backing_dev_info; | ||
360 | if (nextfh & ROMFH_EXEC) | ||
361 | mode |= S_IXUGO; | ||
362 | break; | ||
363 | case ROMFH_SYM: | ||
364 | i->i_op = &page_symlink_inode_operations; | ||
365 | i->i_data.a_ops = &romfs_aops; | ||
366 | mode |= S_IRWXUGO; | ||
367 | break; | ||
368 | default: | ||
369 | /* depending on MBZ for sock/fifos */ | ||
370 | nextfh = be32_to_cpu(ri.spec); | ||
371 | init_special_inode(i, mode, MKDEV(nextfh >> 16, | ||
372 | nextfh & 0xffff)); | ||
373 | break; | ||
374 | } | ||
375 | |||
376 | i->i_mode = mode; | ||
377 | |||
378 | unlock_new_inode(i); | ||
379 | return i; | ||
380 | |||
381 | eio: | ||
382 | ret = -EIO; | ||
383 | error: | ||
384 | printk(KERN_ERR "ROMFS: read error for inode 0x%lx\n", pos); | ||
385 | return ERR_PTR(ret); | ||
386 | } | ||
387 | |||
388 | /* | ||
389 | * allocate a new inode | ||
390 | */ | ||
391 | static struct inode *romfs_alloc_inode(struct super_block *sb) | ||
392 | { | ||
393 | struct romfs_inode_info *inode; | ||
394 | inode = kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL); | ||
395 | return inode ? &inode->vfs_inode : NULL; | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * return a spent inode to the slab cache | ||
400 | */ | ||
401 | static void romfs_destroy_inode(struct inode *inode) | ||
402 | { | ||
403 | kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * get filesystem statistics | ||
408 | */ | ||
409 | static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
410 | { | ||
411 | struct super_block *sb = dentry->d_sb; | ||
412 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
413 | |||
414 | buf->f_type = ROMFS_MAGIC; | ||
415 | buf->f_namelen = ROMFS_MAXFN; | ||
416 | buf->f_bsize = ROMBSIZE; | ||
417 | buf->f_bfree = buf->f_bavail = buf->f_ffree; | ||
418 | buf->f_blocks = | ||
419 | (romfs_maxsize(dentry->d_sb) + ROMBSIZE - 1) >> ROMBSBITS; | ||
420 | buf->f_fsid.val[0] = (u32)id; | ||
421 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
422 | return 0; | ||
423 | } | ||
424 | |||
425 | /* | ||
426 | * remounting must involve read-only | ||
427 | */ | ||
428 | static int romfs_remount(struct super_block *sb, int *flags, char *data) | ||
429 | { | ||
430 | *flags |= MS_RDONLY; | ||
431 | return 0; | ||
432 | } | ||
433 | |||
434 | static const struct super_operations romfs_super_ops = { | ||
435 | .alloc_inode = romfs_alloc_inode, | ||
436 | .destroy_inode = romfs_destroy_inode, | ||
437 | .statfs = romfs_statfs, | ||
438 | .remount_fs = romfs_remount, | ||
439 | }; | ||
440 | |||
441 | /* | ||
442 | * checksum check on part of a romfs filesystem | ||
443 | */ | ||
444 | static __u32 romfs_checksum(const void *data, int size) | ||
445 | { | ||
446 | const __be32 *ptr = data; | ||
447 | __u32 sum; | ||
448 | |||
449 | sum = 0; | ||
450 | size >>= 2; | ||
451 | while (size > 0) { | ||
452 | sum += be32_to_cpu(*ptr++); | ||
453 | size--; | ||
454 | } | ||
455 | return sum; | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * fill in the superblock | ||
460 | */ | ||
461 | static int romfs_fill_super(struct super_block *sb, void *data, int silent) | ||
462 | { | ||
463 | struct romfs_super_block *rsb; | ||
464 | struct inode *root; | ||
465 | unsigned long pos, img_size; | ||
466 | const char *storage; | ||
467 | size_t len; | ||
468 | int ret; | ||
469 | |||
470 | #ifdef CONFIG_BLOCK | ||
471 | if (!sb->s_mtd) { | ||
472 | sb_set_blocksize(sb, ROMBSIZE); | ||
473 | } else { | ||
474 | sb->s_blocksize = ROMBSIZE; | ||
475 | sb->s_blocksize_bits = blksize_bits(ROMBSIZE); | ||
476 | } | ||
477 | #endif | ||
478 | |||
479 | sb->s_maxbytes = 0xFFFFFFFF; | ||
480 | sb->s_magic = ROMFS_MAGIC; | ||
481 | sb->s_flags |= MS_RDONLY | MS_NOATIME; | ||
482 | sb->s_op = &romfs_super_ops; | ||
483 | |||
484 | /* read the image superblock and check it */ | ||
485 | rsb = kmalloc(512, GFP_KERNEL); | ||
486 | if (!rsb) | ||
487 | return -ENOMEM; | ||
488 | |||
489 | sb->s_fs_info = (void *) 512; | ||
490 | ret = romfs_dev_read(sb, 0, rsb, 512); | ||
491 | if (ret < 0) | ||
492 | goto error_rsb; | ||
493 | |||
494 | img_size = be32_to_cpu(rsb->size); | ||
495 | |||
496 | if (sb->s_mtd && img_size > sb->s_mtd->size) | ||
497 | goto error_rsb_inval; | ||
498 | |||
499 | sb->s_fs_info = (void *) img_size; | ||
500 | |||
501 | if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1 || | ||
502 | img_size < ROMFH_SIZE) { | ||
503 | if (!silent) | ||
504 | printk(KERN_WARNING "VFS:" | ||
505 | " Can't find a romfs filesystem on dev %s.\n", | ||
506 | sb->s_id); | ||
507 | goto error_rsb_inval; | ||
508 | } | ||
509 | |||
510 | if (romfs_checksum(rsb, min_t(size_t, img_size, 512))) { | ||
511 | printk(KERN_ERR "ROMFS: bad initial checksum on dev %s.\n", | ||
512 | sb->s_id); | ||
513 | goto error_rsb_inval; | ||
514 | } | ||
515 | |||
516 | storage = sb->s_mtd ? "MTD" : "the block layer"; | ||
517 | |||
518 | len = strnlen(rsb->name, ROMFS_MAXFN); | ||
519 | if (!silent) | ||
520 | printk(KERN_NOTICE "ROMFS: Mounting image '%*.*s' through %s\n", | ||
521 | (unsigned) len, (unsigned) len, rsb->name, storage); | ||
522 | |||
523 | kfree(rsb); | ||
524 | rsb = NULL; | ||
525 | |||
526 | /* find the root directory */ | ||
527 | pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK; | ||
528 | |||
529 | root = romfs_iget(sb, pos); | ||
530 | if (!root) | ||
531 | goto error; | ||
532 | |||
533 | sb->s_root = d_alloc_root(root); | ||
534 | if (!sb->s_root) | ||
535 | goto error_i; | ||
536 | |||
537 | return 0; | ||
538 | |||
539 | error_i: | ||
540 | iput(root); | ||
541 | error: | ||
542 | return -EINVAL; | ||
543 | error_rsb_inval: | ||
544 | ret = -EINVAL; | ||
545 | error_rsb: | ||
546 | return ret; | ||
547 | } | ||
548 | |||
549 | /* | ||
550 | * get a superblock for mounting | ||
551 | */ | ||
552 | static int romfs_get_sb(struct file_system_type *fs_type, | ||
553 | int flags, const char *dev_name, | ||
554 | void *data, struct vfsmount *mnt) | ||
555 | { | ||
556 | int ret = -EINVAL; | ||
557 | |||
558 | #ifdef CONFIG_ROMFS_ON_MTD | ||
559 | ret = get_sb_mtd(fs_type, flags, dev_name, data, romfs_fill_super, | ||
560 | mnt); | ||
561 | #endif | ||
562 | #ifdef CONFIG_ROMFS_ON_BLOCK | ||
563 | if (ret == -EINVAL) | ||
564 | ret = get_sb_bdev(fs_type, flags, dev_name, data, | ||
565 | romfs_fill_super, mnt); | ||
566 | #endif | ||
567 | return ret; | ||
568 | } | ||
569 | |||
570 | /* | ||
571 | * destroy a romfs superblock in the appropriate manner | ||
572 | */ | ||
573 | static void romfs_kill_sb(struct super_block *sb) | ||
574 | { | ||
575 | #ifdef CONFIG_ROMFS_ON_MTD | ||
576 | if (sb->s_mtd) { | ||
577 | kill_mtd_super(sb); | ||
578 | return; | ||
579 | } | ||
580 | #endif | ||
581 | #ifdef CONFIG_ROMFS_ON_BLOCK | ||
582 | if (sb->s_bdev) { | ||
583 | kill_block_super(sb); | ||
584 | return; | ||
585 | } | ||
586 | #endif | ||
587 | } | ||
588 | |||
589 | static struct file_system_type romfs_fs_type = { | ||
590 | .owner = THIS_MODULE, | ||
591 | .name = "romfs", | ||
592 | .get_sb = romfs_get_sb, | ||
593 | .kill_sb = romfs_kill_sb, | ||
594 | .fs_flags = FS_REQUIRES_DEV, | ||
595 | }; | ||
596 | |||
597 | /* | ||
598 | * inode storage initialiser | ||
599 | */ | ||
600 | static void romfs_i_init_once(void *_inode) | ||
601 | { | ||
602 | struct romfs_inode_info *inode = _inode; | ||
603 | |||
604 | inode_init_once(&inode->vfs_inode); | ||
605 | } | ||
606 | |||
607 | /* | ||
608 | * romfs module initialisation | ||
609 | */ | ||
610 | static int __init init_romfs_fs(void) | ||
611 | { | ||
612 | int ret; | ||
613 | |||
614 | printk(KERN_INFO "ROMFS MTD (C) 2007 Red Hat, Inc.\n"); | ||
615 | |||
616 | romfs_inode_cachep = | ||
617 | kmem_cache_create("romfs_i", | ||
618 | sizeof(struct romfs_inode_info), 0, | ||
619 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
620 | romfs_i_init_once); | ||
621 | |||
622 | if (!romfs_inode_cachep) { | ||
623 | printk(KERN_ERR | ||
624 | "ROMFS error: Failed to initialise inode cache\n"); | ||
625 | return -ENOMEM; | ||
626 | } | ||
627 | ret = register_filesystem(&romfs_fs_type); | ||
628 | if (ret) { | ||
629 | printk(KERN_ERR "ROMFS error: Failed to register filesystem\n"); | ||
630 | goto error_register; | ||
631 | } | ||
632 | return 0; | ||
633 | |||
634 | error_register: | ||
635 | kmem_cache_destroy(romfs_inode_cachep); | ||
636 | return ret; | ||
637 | } | ||
638 | |||
639 | /* | ||
640 | * romfs module removal | ||
641 | */ | ||
642 | static void __exit exit_romfs_fs(void) | ||
643 | { | ||
644 | unregister_filesystem(&romfs_fs_type); | ||
645 | kmem_cache_destroy(romfs_inode_cachep); | ||
646 | } | ||
647 | |||
648 | module_init(init_romfs_fs); | ||
649 | module_exit(exit_romfs_fs); | ||
650 | |||
651 | MODULE_DESCRIPTION("Direct-MTD Capable RomFS"); | ||
652 | MODULE_AUTHOR("Red Hat, Inc."); | ||
653 | MODULE_LICENSE("GPL"); /* Actually dual-licensed, but it doesn't matter for */ | ||
diff --git a/fs/splice.c b/fs/splice.c index dd727d43e5b7..c18aa7e03e2b 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -737,10 +737,19 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
737 | * ->write_end. Most of the time, these expect i_mutex to | 737 | * ->write_end. Most of the time, these expect i_mutex to |
738 | * be held. Since this may result in an ABBA deadlock with | 738 | * be held. Since this may result in an ABBA deadlock with |
739 | * pipe->inode, we have to order lock acquiry here. | 739 | * pipe->inode, we have to order lock acquiry here. |
740 | * | ||
741 | * Outer lock must be inode->i_mutex, as pipe_wait() will | ||
742 | * release and reacquire pipe->inode->i_mutex, AND inode must | ||
743 | * never be a pipe. | ||
740 | */ | 744 | */ |
741 | inode_double_lock(inode, pipe->inode); | 745 | WARN_ON(S_ISFIFO(inode->i_mode)); |
746 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | ||
747 | if (pipe->inode) | ||
748 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | ||
742 | ret = __splice_from_pipe(pipe, &sd, actor); | 749 | ret = __splice_from_pipe(pipe, &sd, actor); |
743 | inode_double_unlock(inode, pipe->inode); | 750 | if (pipe->inode) |
751 | mutex_unlock(&pipe->inode->i_mutex); | ||
752 | mutex_unlock(&inode->i_mutex); | ||
744 | 753 | ||
745 | return ret; | 754 | return ret; |
746 | } | 755 | } |
@@ -831,11 +840,17 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
831 | }; | 840 | }; |
832 | ssize_t ret; | 841 | ssize_t ret; |
833 | 842 | ||
834 | inode_double_lock(inode, pipe->inode); | 843 | WARN_ON(S_ISFIFO(inode->i_mode)); |
844 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | ||
835 | ret = file_remove_suid(out); | 845 | ret = file_remove_suid(out); |
836 | if (likely(!ret)) | 846 | if (likely(!ret)) { |
847 | if (pipe->inode) | ||
848 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | ||
837 | ret = __splice_from_pipe(pipe, &sd, pipe_to_file); | 849 | ret = __splice_from_pipe(pipe, &sd, pipe_to_file); |
838 | inode_double_unlock(inode, pipe->inode); | 850 | if (pipe->inode) |
851 | mutex_unlock(&pipe->inode->i_mutex); | ||
852 | } | ||
853 | mutex_unlock(&inode->i_mutex); | ||
839 | if (ret > 0) { | 854 | if (ret > 0) { |
840 | unsigned long nr_pages; | 855 | unsigned long nr_pages; |
841 | 856 | ||
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index 69e971d5ddc1..2b1b8fe5e037 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/dcache.h> | 40 | #include <linux/dcache.h> |
41 | #include <linux/exportfs.h> | 41 | #include <linux/exportfs.h> |
42 | #include <linux/zlib.h> | 42 | #include <linux/zlib.h> |
43 | #include <linux/slab.h> | ||
43 | 44 | ||
44 | #include "squashfs_fs.h" | 45 | #include "squashfs_fs.h" |
45 | #include "squashfs_fs_sb.h" | 46 | #include "squashfs_fs_sb.h" |
diff --git a/fs/super.c b/fs/super.c index 77cb4ec919b9..786fe7d72790 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -771,6 +771,46 @@ void kill_litter_super(struct super_block *sb) | |||
771 | 771 | ||
772 | EXPORT_SYMBOL(kill_litter_super); | 772 | EXPORT_SYMBOL(kill_litter_super); |
773 | 773 | ||
774 | static int ns_test_super(struct super_block *sb, void *data) | ||
775 | { | ||
776 | return sb->s_fs_info == data; | ||
777 | } | ||
778 | |||
779 | static int ns_set_super(struct super_block *sb, void *data) | ||
780 | { | ||
781 | sb->s_fs_info = data; | ||
782 | return set_anon_super(sb, NULL); | ||
783 | } | ||
784 | |||
785 | int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, | ||
786 | int (*fill_super)(struct super_block *, void *, int), | ||
787 | struct vfsmount *mnt) | ||
788 | { | ||
789 | struct super_block *sb; | ||
790 | |||
791 | sb = sget(fs_type, ns_test_super, ns_set_super, data); | ||
792 | if (IS_ERR(sb)) | ||
793 | return PTR_ERR(sb); | ||
794 | |||
795 | if (!sb->s_root) { | ||
796 | int err; | ||
797 | sb->s_flags = flags; | ||
798 | err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); | ||
799 | if (err) { | ||
800 | up_write(&sb->s_umount); | ||
801 | deactivate_super(sb); | ||
802 | return err; | ||
803 | } | ||
804 | |||
805 | sb->s_flags |= MS_ACTIVE; | ||
806 | } | ||
807 | |||
808 | simple_set_mnt(mnt, sb); | ||
809 | return 0; | ||
810 | } | ||
811 | |||
812 | EXPORT_SYMBOL(get_sb_ns); | ||
813 | |||
774 | #ifdef CONFIG_BLOCK | 814 | #ifdef CONFIG_BLOCK |
775 | static int set_bdev_super(struct super_block *s, void *data) | 815 | static int set_bdev_super(struct super_block *s, void *data) |
776 | { | 816 | { |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index f393620890ee..af1914462f02 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -194,29 +194,26 @@ static int make_free_space(struct ubifs_info *c) | |||
194 | } | 194 | } |
195 | 195 | ||
196 | /** | 196 | /** |
197 | * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. | 197 | * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index. |
198 | * @c: UBIFS file-system description object | 198 | * @c: UBIFS file-system description object |
199 | * | 199 | * |
200 | * This function calculates and returns the number of eraseblocks which should | 200 | * This function calculates and returns the number of LEBs which should be kept |
201 | * be kept for index usage. | 201 | * for index usage. |
202 | */ | 202 | */ |
203 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c) | 203 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c) |
204 | { | 204 | { |
205 | int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; | 205 | int idx_lebs; |
206 | long long idx_size; | 206 | long long idx_size; |
207 | 207 | ||
208 | idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; | 208 | idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; |
209 | |||
210 | /* And make sure we have thrice the index size of space reserved */ | 209 | /* And make sure we have thrice the index size of space reserved */ |
211 | idx_size = idx_size + (idx_size << 1); | 210 | idx_size += idx_size << 1; |
212 | |||
213 | /* | 211 | /* |
214 | * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' | 212 | * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' |
215 | * pair, nor similarly the two variables for the new index size, so we | 213 | * pair, nor similarly the two variables for the new index size, so we |
216 | * have to do this costly 64-bit division on fast-path. | 214 | * have to do this costly 64-bit division on fast-path. |
217 | */ | 215 | */ |
218 | idx_size += eff_leb_size - 1; | 216 | idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size); |
219 | idx_lebs = div_u64(idx_size, eff_leb_size); | ||
220 | /* | 217 | /* |
221 | * The index head is not available for the in-the-gaps method, so add an | 218 | * The index head is not available for the in-the-gaps method, so add an |
222 | * extra LEB to compensate. | 219 | * extra LEB to compensate. |
@@ -310,23 +307,23 @@ static int can_use_rp(struct ubifs_info *c) | |||
310 | * do_budget_space - reserve flash space for index and data growth. | 307 | * do_budget_space - reserve flash space for index and data growth. |
311 | * @c: UBIFS file-system description object | 308 | * @c: UBIFS file-system description object |
312 | * | 309 | * |
313 | * This function makes sure UBIFS has enough free eraseblocks for index growth | 310 | * This function makes sure UBIFS has enough free LEBs for index growth and |
314 | * and data. | 311 | * data. |
315 | * | 312 | * |
316 | * When budgeting index space, UBIFS reserves thrice as many LEBs as the index | 313 | * When budgeting index space, UBIFS reserves thrice as many LEBs as the index |
317 | * would take if it was consolidated and written to the flash. This guarantees | 314 | * would take if it was consolidated and written to the flash. This guarantees |
318 | * that the "in-the-gaps" commit method always succeeds and UBIFS will always | 315 | * that the "in-the-gaps" commit method always succeeds and UBIFS will always |
319 | * be able to commit dirty index. So this function basically adds amount of | 316 | * be able to commit dirty index. So this function basically adds amount of |
320 | * budgeted index space to the size of the current index, multiplies this by 3, | 317 | * budgeted index space to the size of the current index, multiplies this by 3, |
321 | * and makes sure this does not exceed the amount of free eraseblocks. | 318 | * and makes sure this does not exceed the amount of free LEBs. |
322 | * | 319 | * |
323 | * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: | 320 | * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: |
324 | * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might | 321 | * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might |
325 | * be large, because UBIFS does not do any index consolidation as long as | 322 | * be large, because UBIFS does not do any index consolidation as long as |
326 | * there is free space. IOW, the index may take a lot of LEBs, but the LEBs | 323 | * there is free space. IOW, the index may take a lot of LEBs, but the LEBs |
327 | * will contain a lot of dirt. | 324 | * will contain a lot of dirt. |
328 | * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be | 325 | * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, |
329 | * consolidated to take up to @c->min_idx_lebs LEBs. | 326 | * the index may be consolidated to take up to @c->min_idx_lebs LEBs. |
330 | * | 327 | * |
331 | * This function returns zero in case of success, and %-ENOSPC in case of | 328 | * This function returns zero in case of success, and %-ENOSPC in case of |
332 | * failure. | 329 | * failure. |
@@ -695,12 +692,12 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free) | |||
695 | * This function calculates amount of free space to report to user-space. | 692 | * This function calculates amount of free space to report to user-space. |
696 | * | 693 | * |
697 | * Because UBIFS may introduce substantial overhead (the index, node headers, | 694 | * Because UBIFS may introduce substantial overhead (the index, node headers, |
698 | * alignment, wastage at the end of eraseblocks, etc), it cannot report real | 695 | * alignment, wastage at the end of LEBs, etc), it cannot report real amount of |
699 | * amount of free flash space it has (well, because not all dirty space is | 696 | * free flash space it has (well, because not all dirty space is reclaimable, |
700 | * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so, | 697 | * UBIFS does not actually know the real amount). If UBIFS did so, it would |
701 | * it would bread user expectations about what free space is. Users seem to | 698 | * bread user expectations about what free space is. Users seem to accustomed |
702 | * accustomed to assume that if the file-system reports N bytes of free space, | 699 | * to assume that if the file-system reports N bytes of free space, they would |
703 | * they would be able to fit a file of N bytes to the FS. This almost works for | 700 | * be able to fit a file of N bytes to the FS. This almost works for |
704 | * traditional file-systems, because they have way less overhead than UBIFS. | 701 | * traditional file-systems, because they have way less overhead than UBIFS. |
705 | * So, to keep users happy, UBIFS tries to take the overhead into account. | 702 | * So, to keep users happy, UBIFS tries to take the overhead into account. |
706 | */ | 703 | */ |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index e975bd82f38b..ce2cd8343618 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -479,9 +479,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
479 | "bad or corrupted node)"); | 479 | "bad or corrupted node)"); |
480 | else { | 480 | else { |
481 | for (i = 0; i < nlen && dent->name[i]; i++) | 481 | for (i = 0; i < nlen && dent->name[i]; i++) |
482 | printk("%c", dent->name[i]); | 482 | printk(KERN_CONT "%c", dent->name[i]); |
483 | } | 483 | } |
484 | printk("\n"); | 484 | printk(KERN_CONT "\n"); |
485 | 485 | ||
486 | break; | 486 | break; |
487 | } | 487 | } |
@@ -1214,7 +1214,7 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr) | |||
1214 | 1214 | ||
1215 | /* | 1215 | /* |
1216 | * Make sure the last key in our znode is less or | 1216 | * Make sure the last key in our znode is less or |
1217 | * equivalent than the the key in zbranch which goes | 1217 | * equivalent than the key in the zbranch which goes |
1218 | * after our pointing zbranch. | 1218 | * after our pointing zbranch. |
1219 | */ | 1219 | */ |
1220 | cmp = keys_cmp(c, max, | 1220 | cmp = keys_cmp(c, max, |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 0ff89fe71e51..6d34dc7e33e1 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -430,6 +430,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, | |||
430 | struct ubifs_inode *ui = ubifs_inode(inode); | 430 | struct ubifs_inode *ui = ubifs_inode(inode); |
431 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 431 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
432 | int uninitialized_var(err), appending = !!(pos + len > inode->i_size); | 432 | int uninitialized_var(err), appending = !!(pos + len > inode->i_size); |
433 | int skipped_read = 0; | ||
433 | struct page *page; | 434 | struct page *page; |
434 | 435 | ||
435 | ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); | 436 | ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); |
@@ -444,7 +445,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, | |||
444 | 445 | ||
445 | if (!PageUptodate(page)) { | 446 | if (!PageUptodate(page)) { |
446 | /* The page is not loaded from the flash */ | 447 | /* The page is not loaded from the flash */ |
447 | if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) | 448 | if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { |
448 | /* | 449 | /* |
449 | * We change whole page so no need to load it. But we | 450 | * We change whole page so no need to load it. But we |
450 | * have to set the @PG_checked flag to make the further | 451 | * have to set the @PG_checked flag to make the further |
@@ -453,7 +454,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, | |||
453 | * the media. | 454 | * the media. |
454 | */ | 455 | */ |
455 | SetPageChecked(page); | 456 | SetPageChecked(page); |
456 | else { | 457 | skipped_read = 1; |
458 | } else { | ||
457 | err = do_readpage(page); | 459 | err = do_readpage(page); |
458 | if (err) { | 460 | if (err) { |
459 | unlock_page(page); | 461 | unlock_page(page); |
@@ -470,6 +472,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, | |||
470 | if (unlikely(err)) { | 472 | if (unlikely(err)) { |
471 | ubifs_assert(err == -ENOSPC); | 473 | ubifs_assert(err == -ENOSPC); |
472 | /* | 474 | /* |
475 | * If we skipped reading the page because we were going to | ||
476 | * write all of it, then it is not up to date. | ||
477 | */ | ||
478 | if (skipped_read) { | ||
479 | ClearPageChecked(page); | ||
480 | ClearPageUptodate(page); | ||
481 | } | ||
482 | /* | ||
473 | * Budgeting failed which means it would have to force | 483 | * Budgeting failed which means it would have to force |
474 | * write-back but didn't, because we set the @fast flag in the | 484 | * write-back but didn't, because we set the @fast flag in the |
475 | * request. Write-back cannot be done now, while we have the | 485 | * request. Write-back cannot be done now, while we have the |
@@ -949,7 +959,7 @@ static int do_writepage(struct page *page, int len) | |||
949 | * whole index and correct all inode sizes, which is long an unacceptable. | 959 | * whole index and correct all inode sizes, which is long an unacceptable. |
950 | * | 960 | * |
951 | * To prevent situations like this, UBIFS writes pages back only if they are | 961 | * To prevent situations like this, UBIFS writes pages back only if they are |
952 | * within last synchronized inode size, i.e. the the size which has been | 962 | * within the last synchronized inode size, i.e. the size which has been |
953 | * written to the flash media last time. Otherwise, UBIFS forces inode | 963 | * written to the flash media last time. Otherwise, UBIFS forces inode |
954 | * write-back, thus making sure the on-flash inode contains current inode size, | 964 | * write-back, thus making sure the on-flash inode contains current inode size, |
955 | * and then keeps writing pages back. | 965 | * and then keeps writing pages back. |
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 717d79c97c5e..1d54383d1269 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
@@ -478,7 +478,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, | |||
478 | * ubifs_find_free_space - find a data LEB with free space. | 478 | * ubifs_find_free_space - find a data LEB with free space. |
479 | * @c: the UBIFS file-system description object | 479 | * @c: the UBIFS file-system description object |
480 | * @min_space: minimum amount of required free space | 480 | * @min_space: minimum amount of required free space |
481 | * @free: contains amount of free space in the LEB on exit | 481 | * @offs: contains offset of where free space starts on exit |
482 | * @squeeze: whether to try to find space in a non-empty LEB first | 482 | * @squeeze: whether to try to find space in a non-empty LEB first |
483 | * | 483 | * |
484 | * This function looks for an LEB with at least @min_space bytes of free space. | 484 | * This function looks for an LEB with at least @min_space bytes of free space. |
@@ -490,7 +490,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c, | |||
490 | * failed to find a LEB with @min_space bytes of free space and other a negative | 490 | * failed to find a LEB with @min_space bytes of free space and other a negative |
491 | * error codes in case of failure. | 491 | * error codes in case of failure. |
492 | */ | 492 | */ |
493 | int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | 493 | int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, |
494 | int squeeze) | 494 | int squeeze) |
495 | { | 495 | { |
496 | const struct ubifs_lprops *lprops; | 496 | const struct ubifs_lprops *lprops; |
@@ -558,10 +558,10 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | |||
558 | spin_unlock(&c->space_lock); | 558 | spin_unlock(&c->space_lock); |
559 | } | 559 | } |
560 | 560 | ||
561 | *free = lprops->free; | 561 | *offs = c->leb_size - lprops->free; |
562 | ubifs_release_lprops(c); | 562 | ubifs_release_lprops(c); |
563 | 563 | ||
564 | if (*free == c->leb_size) { | 564 | if (*offs == 0) { |
565 | /* | 565 | /* |
566 | * Ensure that empty LEBs have been unmapped. They may not have | 566 | * Ensure that empty LEBs have been unmapped. They may not have |
567 | * been, for example, because of an unclean unmount. Also | 567 | * been, for example, because of an unclean unmount. Also |
@@ -573,8 +573,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | |||
573 | return err; | 573 | return err; |
574 | } | 574 | } |
575 | 575 | ||
576 | dbg_find("found LEB %d, free %d", lnum, *free); | 576 | dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs); |
577 | ubifs_assert(*free >= min_space); | 577 | ubifs_assert(*offs <= c->leb_size - min_space); |
578 | return lnum; | 578 | return lnum; |
579 | 579 | ||
580 | out: | 580 | out: |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index a711d33b3d3e..f0f5f15d384e 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
@@ -47,7 +47,7 @@ | |||
47 | * have to waste large pieces of free space at the end of LEB B, because nodes | 47 | * have to waste large pieces of free space at the end of LEB B, because nodes |
48 | * from LEB A would not fit. And the worst situation is when all nodes are of | 48 | * from LEB A would not fit. And the worst situation is when all nodes are of |
49 | * maximum size. So dark watermark is the amount of free + dirty space in LEB | 49 | * maximum size. So dark watermark is the amount of free + dirty space in LEB |
50 | * which are guaranteed to be reclaimable. If LEB has less space, the GC migh | 50 | * which are guaranteed to be reclaimable. If LEB has less space, the GC might |
51 | * be unable to reclaim it. So, LEBs with free + dirty greater than dark | 51 | * be unable to reclaim it. So, LEBs with free + dirty greater than dark |
52 | * watermark are "good" LEBs from GC's point of few. The other LEBs are not so | 52 | * watermark are "good" LEBs from GC's point of few. The other LEBs are not so |
53 | * good, and GC takes extra care when moving them. | 53 | * good, and GC takes extra care when moving them. |
@@ -57,14 +57,6 @@ | |||
57 | #include "ubifs.h" | 57 | #include "ubifs.h" |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * GC tries to optimize the way it fit nodes to available space, and it sorts | ||
61 | * nodes a little. The below constants are watermarks which define "large", | ||
62 | * "medium", and "small" nodes. | ||
63 | */ | ||
64 | #define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4) | ||
65 | #define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ | ||
66 | |||
67 | /* | ||
68 | * GC may need to move more than one LEB to make progress. The below constants | 60 | * GC may need to move more than one LEB to make progress. The below constants |
69 | * define "soft" and "hard" limits on the number of LEBs the garbage collector | 61 | * define "soft" and "hard" limits on the number of LEBs the garbage collector |
70 | * may move. | 62 | * may move. |
@@ -116,83 +108,222 @@ static int switch_gc_head(struct ubifs_info *c) | |||
116 | } | 108 | } |
117 | 109 | ||
118 | /** | 110 | /** |
119 | * joinup - bring data nodes for an inode together. | 111 | * list_sort - sort a list. |
120 | * @c: UBIFS file-system description object | 112 | * @priv: private data, passed to @cmp |
121 | * @sleb: describes scanned LEB | 113 | * @head: the list to sort |
122 | * @inum: inode number | 114 | * @cmp: the elements comparison function |
123 | * @blk: block number | ||
124 | * @data: list to which to add data nodes | ||
125 | * | 115 | * |
126 | * This function looks at the first few nodes in the scanned LEB @sleb and adds | 116 | * This function has been implemented by Mark J Roberts <mjr@znex.org>. It |
127 | * them to @data if they are data nodes from @inum and have a larger block | 117 | * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted |
128 | * number than @blk. This function returns %0 on success and a negative error | 118 | * in ascending order. |
129 | * code on failure. | 119 | * |
120 | * The comparison function @cmp is supposed to return a negative value if @a is | ||
121 | * than @b, and a positive value if @a is greater than @b. If @a and @b are | ||
122 | * equivalent, then it does not matter what this function returns. | ||
130 | */ | 123 | */ |
131 | static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum, | 124 | static void list_sort(void *priv, struct list_head *head, |
132 | unsigned int blk, struct list_head *data) | 125 | int (*cmp)(void *priv, struct list_head *a, |
126 | struct list_head *b)) | ||
133 | { | 127 | { |
134 | int err, cnt = 6, lnum = sleb->lnum, offs; | 128 | struct list_head *p, *q, *e, *list, *tail, *oldhead; |
135 | struct ubifs_scan_node *snod, *tmp; | 129 | int insize, nmerges, psize, qsize, i; |
136 | union ubifs_key *key; | 130 | |
131 | if (list_empty(head)) | ||
132 | return; | ||
133 | |||
134 | list = head->next; | ||
135 | list_del(head); | ||
136 | insize = 1; | ||
137 | for (;;) { | ||
138 | p = oldhead = list; | ||
139 | list = tail = NULL; | ||
140 | nmerges = 0; | ||
141 | |||
142 | while (p) { | ||
143 | nmerges++; | ||
144 | q = p; | ||
145 | psize = 0; | ||
146 | for (i = 0; i < insize; i++) { | ||
147 | psize++; | ||
148 | q = q->next == oldhead ? NULL : q->next; | ||
149 | if (!q) | ||
150 | break; | ||
151 | } | ||
137 | 152 | ||
138 | list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { | 153 | qsize = insize; |
139 | key = &snod->key; | 154 | while (psize > 0 || (qsize > 0 && q)) { |
140 | if (key_inum(c, key) == inum && | 155 | if (!psize) { |
141 | key_type(c, key) == UBIFS_DATA_KEY && | 156 | e = q; |
142 | key_block(c, key) > blk) { | 157 | q = q->next; |
143 | offs = snod->offs; | 158 | qsize--; |
144 | err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0); | 159 | if (q == oldhead) |
145 | if (err < 0) | 160 | q = NULL; |
146 | return err; | 161 | } else if (!qsize || !q) { |
147 | list_del(&snod->list); | 162 | e = p; |
148 | if (err) { | 163 | p = p->next; |
149 | list_add_tail(&snod->list, data); | 164 | psize--; |
150 | blk = key_block(c, key); | 165 | if (p == oldhead) |
151 | } else | 166 | p = NULL; |
152 | kfree(snod); | 167 | } else if (cmp(priv, p, q) <= 0) { |
153 | cnt = 6; | 168 | e = p; |
154 | } else if (--cnt == 0) | 169 | p = p->next; |
170 | psize--; | ||
171 | if (p == oldhead) | ||
172 | p = NULL; | ||
173 | } else { | ||
174 | e = q; | ||
175 | q = q->next; | ||
176 | qsize--; | ||
177 | if (q == oldhead) | ||
178 | q = NULL; | ||
179 | } | ||
180 | if (tail) | ||
181 | tail->next = e; | ||
182 | else | ||
183 | list = e; | ||
184 | e->prev = tail; | ||
185 | tail = e; | ||
186 | } | ||
187 | p = q; | ||
188 | } | ||
189 | |||
190 | tail->next = list; | ||
191 | list->prev = tail; | ||
192 | |||
193 | if (nmerges <= 1) | ||
155 | break; | 194 | break; |
195 | |||
196 | insize *= 2; | ||
156 | } | 197 | } |
157 | return 0; | 198 | |
199 | head->next = list; | ||
200 | head->prev = list->prev; | ||
201 | list->prev->next = head; | ||
202 | list->prev = head; | ||
158 | } | 203 | } |
159 | 204 | ||
160 | /** | 205 | /** |
161 | * move_nodes - move nodes. | 206 | * data_nodes_cmp - compare 2 data nodes. |
207 | * @priv: UBIFS file-system description object | ||
208 | * @a: first data node | ||
209 | * @a: second data node | ||
210 | * | ||
211 | * This function compares data nodes @a and @b. Returns %1 if @a has greater | ||
212 | * inode or block number, and %-1 otherwise. | ||
213 | */ | ||
214 | int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
215 | { | ||
216 | ino_t inuma, inumb; | ||
217 | struct ubifs_info *c = priv; | ||
218 | struct ubifs_scan_node *sa, *sb; | ||
219 | |||
220 | cond_resched(); | ||
221 | sa = list_entry(a, struct ubifs_scan_node, list); | ||
222 | sb = list_entry(b, struct ubifs_scan_node, list); | ||
223 | ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY); | ||
224 | ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY); | ||
225 | |||
226 | inuma = key_inum(c, &sa->key); | ||
227 | inumb = key_inum(c, &sb->key); | ||
228 | |||
229 | if (inuma == inumb) { | ||
230 | unsigned int blka = key_block(c, &sa->key); | ||
231 | unsigned int blkb = key_block(c, &sb->key); | ||
232 | |||
233 | if (blka <= blkb) | ||
234 | return -1; | ||
235 | } else if (inuma <= inumb) | ||
236 | return -1; | ||
237 | |||
238 | return 1; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * nondata_nodes_cmp - compare 2 non-data nodes. | ||
243 | * @priv: UBIFS file-system description object | ||
244 | * @a: first node | ||
245 | * @a: second node | ||
246 | * | ||
247 | * This function compares nodes @a and @b. It makes sure that inode nodes go | ||
248 | * first and sorted by length in descending order. Directory entry nodes go | ||
249 | * after inode nodes and are sorted in ascending hash valuer order. | ||
250 | */ | ||
251 | int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
252 | { | ||
253 | int typea, typeb; | ||
254 | ino_t inuma, inumb; | ||
255 | struct ubifs_info *c = priv; | ||
256 | struct ubifs_scan_node *sa, *sb; | ||
257 | |||
258 | cond_resched(); | ||
259 | sa = list_entry(a, struct ubifs_scan_node, list); | ||
260 | sb = list_entry(b, struct ubifs_scan_node, list); | ||
261 | typea = key_type(c, &sa->key); | ||
262 | typeb = key_type(c, &sb->key); | ||
263 | ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY); | ||
264 | |||
265 | /* Inodes go before directory entries */ | ||
266 | if (typea == UBIFS_INO_KEY) { | ||
267 | if (typeb == UBIFS_INO_KEY) | ||
268 | return sb->len - sa->len; | ||
269 | return -1; | ||
270 | } | ||
271 | if (typeb == UBIFS_INO_KEY) | ||
272 | return 1; | ||
273 | |||
274 | ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY); | ||
275 | inuma = key_inum(c, &sa->key); | ||
276 | inumb = key_inum(c, &sb->key); | ||
277 | |||
278 | if (inuma == inumb) { | ||
279 | uint32_t hasha = key_hash(c, &sa->key); | ||
280 | uint32_t hashb = key_hash(c, &sb->key); | ||
281 | |||
282 | if (hasha <= hashb) | ||
283 | return -1; | ||
284 | } else if (inuma <= inumb) | ||
285 | return -1; | ||
286 | |||
287 | return 1; | ||
288 | } | ||
289 | |||
290 | /** | ||
291 | * sort_nodes - sort nodes for GC. | ||
162 | * @c: UBIFS file-system description object | 292 | * @c: UBIFS file-system description object |
163 | * @sleb: describes nodes to move | 293 | * @sleb: describes nodes to sort and contains the result on exit |
294 | * @nondata: contains non-data nodes on exit | ||
295 | * @min: minimum node size is returned here | ||
164 | * | 296 | * |
165 | * This function moves valid nodes from data LEB described by @sleb to the GC | 297 | * This function sorts the list of inodes to garbage collect. First of all, it |
166 | * journal head. The obsolete nodes are dropped. | 298 | * kills obsolete nodes and separates data and non-data nodes to the |
299 | * @sleb->nodes and @nondata lists correspondingly. | ||
300 | * | ||
301 | * Data nodes are then sorted in block number order - this is important for | ||
302 | * bulk-read; data nodes with lower inode number go before data nodes with | ||
303 | * higher inode number, and data nodes with lower block number go before data | ||
304 | * nodes with higher block number; | ||
167 | * | 305 | * |
168 | * When moving nodes we have to deal with classical bin-packing problem: the | 306 | * Non-data nodes are sorted as follows. |
169 | * space in the current GC journal head LEB and in @c->gc_lnum are the "bins", | 307 | * o First go inode nodes - they are sorted in descending length order. |
170 | * where the nodes in the @sleb->nodes list are the elements which should be | 308 | * o Then go directory entry nodes - they are sorted in hash order, which |
171 | * fit optimally to the bins. This function uses the "first fit decreasing" | 309 | * should supposedly optimize 'readdir()'. Direntry nodes with lower parent |
172 | * strategy, although it does not really sort the nodes but just split them on | 310 | * inode number go before direntry nodes with higher parent inode number, |
173 | * 3 classes - large, medium, and small, so they are roughly sorted. | 311 | * and direntry nodes with lower name hash values go before direntry nodes |
312 | * with higher name hash values. | ||
174 | * | 313 | * |
175 | * This function returns zero in case of success, %-EAGAIN if commit is | 314 | * This function returns zero in case of success and a negative error code in |
176 | * required, and other negative error codes in case of other failures. | 315 | * case of failure. |
177 | */ | 316 | */ |
178 | static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | 317 | static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb, |
318 | struct list_head *nondata, int *min) | ||
179 | { | 319 | { |
180 | struct ubifs_scan_node *snod, *tmp; | 320 | struct ubifs_scan_node *snod, *tmp; |
181 | struct list_head data, large, medium, small; | ||
182 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | ||
183 | int avail, err, min = INT_MAX; | ||
184 | unsigned int blk = 0; | ||
185 | ino_t inum = 0; | ||
186 | 321 | ||
187 | INIT_LIST_HEAD(&data); | 322 | *min = INT_MAX; |
188 | INIT_LIST_HEAD(&large); | ||
189 | INIT_LIST_HEAD(&medium); | ||
190 | INIT_LIST_HEAD(&small); | ||
191 | 323 | ||
192 | while (!list_empty(&sleb->nodes)) { | 324 | /* Separate data nodes and non-data nodes */ |
193 | struct list_head *lst = sleb->nodes.next; | 325 | list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { |
194 | 326 | int err; | |
195 | snod = list_entry(lst, struct ubifs_scan_node, list); | ||
196 | 327 | ||
197 | ubifs_assert(snod->type != UBIFS_IDX_NODE); | 328 | ubifs_assert(snod->type != UBIFS_IDX_NODE); |
198 | ubifs_assert(snod->type != UBIFS_REF_NODE); | 329 | ubifs_assert(snod->type != UBIFS_REF_NODE); |
@@ -201,53 +332,72 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | |||
201 | err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, | 332 | err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, |
202 | snod->offs, 0); | 333 | snod->offs, 0); |
203 | if (err < 0) | 334 | if (err < 0) |
204 | goto out; | 335 | return err; |
205 | 336 | ||
206 | list_del(lst); | ||
207 | if (!err) { | 337 | if (!err) { |
208 | /* The node is obsolete, remove it from the list */ | 338 | /* The node is obsolete, remove it from the list */ |
339 | list_del(&snod->list); | ||
209 | kfree(snod); | 340 | kfree(snod); |
210 | continue; | 341 | continue; |
211 | } | 342 | } |
212 | 343 | ||
213 | /* | 344 | if (snod->len < *min) |
214 | * Sort the list of nodes so that data nodes go first, large | 345 | *min = snod->len; |
215 | * nodes go second, and small nodes go last. | 346 | |
216 | */ | 347 | if (key_type(c, &snod->key) != UBIFS_DATA_KEY) |
217 | if (key_type(c, &snod->key) == UBIFS_DATA_KEY) { | 348 | list_move_tail(&snod->list, nondata); |
218 | if (inum != key_inum(c, &snod->key)) { | ||
219 | if (inum) { | ||
220 | /* | ||
221 | * Try to move data nodes from the same | ||
222 | * inode together. | ||
223 | */ | ||
224 | err = joinup(c, sleb, inum, blk, &data); | ||
225 | if (err) | ||
226 | goto out; | ||
227 | } | ||
228 | inum = key_inum(c, &snod->key); | ||
229 | blk = key_block(c, &snod->key); | ||
230 | } | ||
231 | list_add_tail(lst, &data); | ||
232 | } else if (snod->len > MEDIUM_NODE_WM) | ||
233 | list_add_tail(lst, &large); | ||
234 | else if (snod->len > SMALL_NODE_WM) | ||
235 | list_add_tail(lst, &medium); | ||
236 | else | ||
237 | list_add_tail(lst, &small); | ||
238 | |||
239 | /* And find the smallest node */ | ||
240 | if (snod->len < min) | ||
241 | min = snod->len; | ||
242 | } | 349 | } |
243 | 350 | ||
244 | /* | 351 | /* Sort data and non-data nodes */ |
245 | * Join the tree lists so that we'd have one roughly sorted list | 352 | list_sort(c, &sleb->nodes, &data_nodes_cmp); |
246 | * ('large' will be the head of the joined list). | 353 | list_sort(c, nondata, &nondata_nodes_cmp); |
247 | */ | 354 | return 0; |
248 | list_splice(&data, &large); | 355 | } |
249 | list_splice(&medium, large.prev); | 356 | |
250 | list_splice(&small, large.prev); | 357 | /** |
358 | * move_node - move a node. | ||
359 | * @c: UBIFS file-system description object | ||
360 | * @sleb: describes the LEB to move nodes from | ||
361 | * @snod: the mode to move | ||
362 | * @wbuf: write-buffer to move node to | ||
363 | * | ||
364 | * This function moves node @snod to @wbuf, changes TNC correspondingly, and | ||
365 | * destroys @snod. Returns zero in case of success and a negative error code in | ||
366 | * case of failure. | ||
367 | */ | ||
368 | static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | ||
369 | struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf) | ||
370 | { | ||
371 | int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used; | ||
372 | |||
373 | cond_resched(); | ||
374 | err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len); | ||
375 | if (err) | ||
376 | return err; | ||
377 | |||
378 | err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, | ||
379 | snod->offs, new_lnum, new_offs, | ||
380 | snod->len); | ||
381 | list_del(&snod->list); | ||
382 | kfree(snod); | ||
383 | return err; | ||
384 | } | ||
385 | |||
386 | /** | ||
387 | * move_nodes - move nodes. | ||
388 | * @c: UBIFS file-system description object | ||
389 | * @sleb: describes the LEB to move nodes from | ||
390 | * | ||
391 | * This function moves valid nodes from data LEB described by @sleb to the GC | ||
392 | * journal head. This function returns zero in case of success, %-EAGAIN if | ||
393 | * commit is required, and other negative error codes in case of other | ||
394 | * failures. | ||
395 | */ | ||
396 | static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | ||
397 | { | ||
398 | int err, min; | ||
399 | LIST_HEAD(nondata); | ||
400 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | ||
251 | 401 | ||
252 | if (wbuf->lnum == -1) { | 402 | if (wbuf->lnum == -1) { |
253 | /* | 403 | /* |
@@ -256,42 +406,59 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | |||
256 | */ | 406 | */ |
257 | err = switch_gc_head(c); | 407 | err = switch_gc_head(c); |
258 | if (err) | 408 | if (err) |
259 | goto out; | 409 | return err; |
260 | } | 410 | } |
261 | 411 | ||
412 | err = sort_nodes(c, sleb, &nondata, &min); | ||
413 | if (err) | ||
414 | goto out; | ||
415 | |||
262 | /* Write nodes to their new location. Use the first-fit strategy */ | 416 | /* Write nodes to their new location. Use the first-fit strategy */ |
263 | while (1) { | 417 | while (1) { |
264 | avail = c->leb_size - wbuf->offs - wbuf->used; | 418 | int avail; |
265 | list_for_each_entry_safe(snod, tmp, &large, list) { | 419 | struct ubifs_scan_node *snod, *tmp; |
266 | int new_lnum, new_offs; | 420 | |
421 | /* Move data nodes */ | ||
422 | list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { | ||
423 | avail = c->leb_size - wbuf->offs - wbuf->used; | ||
424 | if (snod->len > avail) | ||
425 | /* | ||
426 | * Do not skip data nodes in order to optimize | ||
427 | * bulk-read. | ||
428 | */ | ||
429 | break; | ||
430 | |||
431 | err = move_node(c, sleb, snod, wbuf); | ||
432 | if (err) | ||
433 | goto out; | ||
434 | } | ||
267 | 435 | ||
436 | /* Move non-data nodes */ | ||
437 | list_for_each_entry_safe(snod, tmp, &nondata, list) { | ||
438 | avail = c->leb_size - wbuf->offs - wbuf->used; | ||
268 | if (avail < min) | 439 | if (avail < min) |
269 | break; | 440 | break; |
270 | 441 | ||
271 | if (snod->len > avail) | 442 | if (snod->len > avail) { |
272 | /* This node does not fit */ | 443 | /* |
444 | * Keep going only if this is an inode with | ||
445 | * some data. Otherwise stop and switch the GC | ||
446 | * head. IOW, we assume that data-less inode | ||
447 | * nodes and direntry nodes are roughly of the | ||
448 | * same size. | ||
449 | */ | ||
450 | if (key_type(c, &snod->key) == UBIFS_DENT_KEY || | ||
451 | snod->len == UBIFS_INO_NODE_SZ) | ||
452 | break; | ||
273 | continue; | 453 | continue; |
454 | } | ||
274 | 455 | ||
275 | cond_resched(); | 456 | err = move_node(c, sleb, snod, wbuf); |
276 | |||
277 | new_lnum = wbuf->lnum; | ||
278 | new_offs = wbuf->offs + wbuf->used; | ||
279 | err = ubifs_wbuf_write_nolock(wbuf, snod->node, | ||
280 | snod->len); | ||
281 | if (err) | 457 | if (err) |
282 | goto out; | 458 | goto out; |
283 | err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, | ||
284 | snod->offs, new_lnum, new_offs, | ||
285 | snod->len); | ||
286 | if (err) | ||
287 | goto out; | ||
288 | |||
289 | avail = c->leb_size - wbuf->offs - wbuf->used; | ||
290 | list_del(&snod->list); | ||
291 | kfree(snod); | ||
292 | } | 459 | } |
293 | 460 | ||
294 | if (list_empty(&large)) | 461 | if (list_empty(&sleb->nodes) && list_empty(&nondata)) |
295 | break; | 462 | break; |
296 | 463 | ||
297 | /* | 464 | /* |
@@ -306,10 +473,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | |||
306 | return 0; | 473 | return 0; |
307 | 474 | ||
308 | out: | 475 | out: |
309 | list_for_each_entry_safe(snod, tmp, &large, list) { | 476 | list_splice_tail(&nondata, &sleb->nodes); |
310 | list_del(&snod->list); | ||
311 | kfree(snod); | ||
312 | } | ||
313 | return err; | 477 | return err; |
314 | } | 478 | } |
315 | 479 | ||
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index a11ca0958a23..64b5f3a309f5 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c | |||
@@ -114,7 +114,7 @@ static inline void zero_trun_node_unused(struct ubifs_trun_node *trun) | |||
114 | */ | 114 | */ |
115 | static int reserve_space(struct ubifs_info *c, int jhead, int len) | 115 | static int reserve_space(struct ubifs_info *c, int jhead, int len) |
116 | { | 116 | { |
117 | int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze; | 117 | int err = 0, err1, retries = 0, avail, lnum, offs, squeeze; |
118 | struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; | 118 | struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; |
119 | 119 | ||
120 | /* | 120 | /* |
@@ -139,10 +139,9 @@ again: | |||
139 | * Write buffer wasn't seek'ed or there is no enough space - look for an | 139 | * Write buffer wasn't seek'ed or there is no enough space - look for an |
140 | * LEB with some empty space. | 140 | * LEB with some empty space. |
141 | */ | 141 | */ |
142 | lnum = ubifs_find_free_space(c, len, &free, squeeze); | 142 | lnum = ubifs_find_free_space(c, len, &offs, squeeze); |
143 | if (lnum >= 0) { | 143 | if (lnum >= 0) { |
144 | /* Found an LEB, add it to the journal head */ | 144 | /* Found an LEB, add it to the journal head */ |
145 | offs = c->leb_size - free; | ||
146 | err = ubifs_add_bud_to_log(c, jhead, lnum, offs); | 145 | err = ubifs_add_bud_to_log(c, jhead, lnum, offs); |
147 | if (err) | 146 | if (err) |
148 | goto out_return; | 147 | goto out_return; |
@@ -1366,7 +1365,7 @@ out_ro: | |||
1366 | * @host: host inode | 1365 | * @host: host inode |
1367 | * | 1366 | * |
1368 | * This function writes the updated version of an extended attribute inode and | 1367 | * This function writes the updated version of an extended attribute inode and |
1369 | * the host inode tho the journal (to the base head). The host inode is written | 1368 | * the host inode to the journal (to the base head). The host inode is written |
1370 | * after the extended attribute inode in order to guarantee that the extended | 1369 | * after the extended attribute inode in order to guarantee that the extended |
1371 | * attribute will be flushed when the inode is synchronized by 'fsync()' and | 1370 | * attribute will be flushed when the inode is synchronized by 'fsync()' and |
1372 | * consequently, the write-buffer is synchronized. This function returns zero | 1371 | * consequently, the write-buffer is synchronized. This function returns zero |
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index efb3430a2581..5fa27ea031ba 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h | |||
@@ -381,8 +381,8 @@ static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) | |||
381 | * @c: UBIFS file-system description object | 381 | * @c: UBIFS file-system description object |
382 | * @key: the key to get hash from | 382 | * @key: the key to get hash from |
383 | */ | 383 | */ |
384 | static inline int key_hash(const struct ubifs_info *c, | 384 | static inline uint32_t key_hash(const struct ubifs_info *c, |
385 | const union ubifs_key *key) | 385 | const union ubifs_key *key) |
386 | { | 386 | { |
387 | return key->u32[1] & UBIFS_S_KEY_HASH_MASK; | 387 | return key->u32[1] & UBIFS_S_KEY_HASH_MASK; |
388 | } | 388 | } |
@@ -392,7 +392,7 @@ static inline int key_hash(const struct ubifs_info *c, | |||
392 | * @c: UBIFS file-system description object | 392 | * @c: UBIFS file-system description object |
393 | * @k: the key to get hash from | 393 | * @k: the key to get hash from |
394 | */ | 394 | */ |
395 | static inline int key_hash_flash(const struct ubifs_info *c, const void *k) | 395 | static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k) |
396 | { | 396 | { |
397 | const union ubifs_key *key = k; | 397 | const union ubifs_key *key = k; |
398 | 398 | ||
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 3e0aa7367556..56e33772a1ee 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c | |||
@@ -239,7 +239,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) | |||
239 | } | 239 | } |
240 | 240 | ||
241 | /* | 241 | /* |
242 | * Make sure the the amount of space in buds will not exceed | 242 | * Make sure the amount of space in buds will not exceed the |
243 | * 'c->max_bud_bytes' limit, because we want to guarantee mount time | 243 | * 'c->max_bud_bytes' limit, because we want to guarantee mount time |
244 | * limits. | 244 | * limits. |
245 | * | 245 | * |
@@ -367,7 +367,6 @@ static void remove_buds(struct ubifs_info *c) | |||
367 | bud->jhead, c->leb_size - bud->start, | 367 | bud->jhead, c->leb_size - bud->start, |
368 | c->cmt_bud_bytes); | 368 | c->cmt_bud_bytes); |
369 | rb_erase(p1, &c->buds); | 369 | rb_erase(p1, &c->buds); |
370 | list_del(&bud->list); | ||
371 | /* | 370 | /* |
372 | * If the commit does not finish, the recovery will need | 371 | * If the commit does not finish, the recovery will need |
373 | * to replay the journal, in which case the old buds | 372 | * to replay the journal, in which case the old buds |
@@ -375,7 +374,7 @@ static void remove_buds(struct ubifs_info *c) | |||
375 | * commit i.e. do not allow them to be garbage | 374 | * commit i.e. do not allow them to be garbage |
376 | * collected. | 375 | * collected. |
377 | */ | 376 | */ |
378 | list_add(&bud->list, &c->old_buds); | 377 | list_move(&bud->list, &c->old_buds); |
379 | } | 378 | } |
380 | } | 379 | } |
381 | spin_unlock(&c->buds_lock); | 380 | spin_unlock(&c->buds_lock); |
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 3216a1f277f8..8cbfb8248025 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c | |||
@@ -229,7 +229,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
229 | while (offs + len > c->leb_size) { | 229 | while (offs + len > c->leb_size) { |
230 | alen = ALIGN(offs, c->min_io_size); | 230 | alen = ALIGN(offs, c->min_io_size); |
231 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 231 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
232 | dbg_chk_lpt_sz(c, 2, alen - offs); | 232 | dbg_chk_lpt_sz(c, 2, c->leb_size - offs); |
233 | err = alloc_lpt_leb(c, &lnum); | 233 | err = alloc_lpt_leb(c, &lnum); |
234 | if (err) | 234 | if (err) |
235 | goto no_space; | 235 | goto no_space; |
@@ -272,7 +272,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
272 | if (offs + c->lsave_sz > c->leb_size) { | 272 | if (offs + c->lsave_sz > c->leb_size) { |
273 | alen = ALIGN(offs, c->min_io_size); | 273 | alen = ALIGN(offs, c->min_io_size); |
274 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 274 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
275 | dbg_chk_lpt_sz(c, 2, alen - offs); | 275 | dbg_chk_lpt_sz(c, 2, c->leb_size - offs); |
276 | err = alloc_lpt_leb(c, &lnum); | 276 | err = alloc_lpt_leb(c, &lnum); |
277 | if (err) | 277 | if (err) |
278 | goto no_space; | 278 | goto no_space; |
@@ -292,7 +292,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
292 | if (offs + c->ltab_sz > c->leb_size) { | 292 | if (offs + c->ltab_sz > c->leb_size) { |
293 | alen = ALIGN(offs, c->min_io_size); | 293 | alen = ALIGN(offs, c->min_io_size); |
294 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 294 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
295 | dbg_chk_lpt_sz(c, 2, alen - offs); | 295 | dbg_chk_lpt_sz(c, 2, c->leb_size - offs); |
296 | err = alloc_lpt_leb(c, &lnum); | 296 | err = alloc_lpt_leb(c, &lnum); |
297 | if (err) | 297 | if (err) |
298 | goto no_space; | 298 | goto no_space; |
@@ -416,14 +416,12 @@ static int write_cnodes(struct ubifs_info *c) | |||
416 | alen, UBI_SHORTTERM); | 416 | alen, UBI_SHORTTERM); |
417 | if (err) | 417 | if (err) |
418 | return err; | 418 | return err; |
419 | dbg_chk_lpt_sz(c, 4, alen - wlen); | ||
420 | } | 419 | } |
421 | dbg_chk_lpt_sz(c, 2, 0); | 420 | dbg_chk_lpt_sz(c, 2, c->leb_size - offs); |
422 | err = realloc_lpt_leb(c, &lnum); | 421 | err = realloc_lpt_leb(c, &lnum); |
423 | if (err) | 422 | if (err) |
424 | goto no_space; | 423 | goto no_space; |
425 | offs = 0; | 424 | offs = from = 0; |
426 | from = 0; | ||
427 | ubifs_assert(lnum >= c->lpt_first && | 425 | ubifs_assert(lnum >= c->lpt_first && |
428 | lnum <= c->lpt_last); | 426 | lnum <= c->lpt_last); |
429 | err = ubifs_leb_unmap(c, lnum); | 427 | err = ubifs_leb_unmap(c, lnum); |
@@ -477,11 +475,11 @@ static int write_cnodes(struct ubifs_info *c) | |||
477 | UBI_SHORTTERM); | 475 | UBI_SHORTTERM); |
478 | if (err) | 476 | if (err) |
479 | return err; | 477 | return err; |
480 | dbg_chk_lpt_sz(c, 2, alen - wlen); | 478 | dbg_chk_lpt_sz(c, 2, c->leb_size - offs); |
481 | err = realloc_lpt_leb(c, &lnum); | 479 | err = realloc_lpt_leb(c, &lnum); |
482 | if (err) | 480 | if (err) |
483 | goto no_space; | 481 | goto no_space; |
484 | offs = 0; | 482 | offs = from = 0; |
485 | ubifs_assert(lnum >= c->lpt_first && | 483 | ubifs_assert(lnum >= c->lpt_first && |
486 | lnum <= c->lpt_last); | 484 | lnum <= c->lpt_last); |
487 | err = ubifs_leb_unmap(c, lnum); | 485 | err = ubifs_leb_unmap(c, lnum); |
@@ -504,11 +502,11 @@ static int write_cnodes(struct ubifs_info *c) | |||
504 | UBI_SHORTTERM); | 502 | UBI_SHORTTERM); |
505 | if (err) | 503 | if (err) |
506 | return err; | 504 | return err; |
507 | dbg_chk_lpt_sz(c, 2, alen - wlen); | 505 | dbg_chk_lpt_sz(c, 2, c->leb_size - offs); |
508 | err = realloc_lpt_leb(c, &lnum); | 506 | err = realloc_lpt_leb(c, &lnum); |
509 | if (err) | 507 | if (err) |
510 | goto no_space; | 508 | goto no_space; |
511 | offs = 0; | 509 | offs = from = 0; |
512 | ubifs_assert(lnum >= c->lpt_first && | 510 | ubifs_assert(lnum >= c->lpt_first && |
513 | lnum <= c->lpt_last); | 511 | lnum <= c->lpt_last); |
514 | err = ubifs_leb_unmap(c, lnum); | 512 | err = ubifs_leb_unmap(c, lnum); |
@@ -1756,10 +1754,16 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) | |||
1756 | /** | 1754 | /** |
1757 | * dbg_chk_lpt_sz - check LPT does not write more than LPT size. | 1755 | * dbg_chk_lpt_sz - check LPT does not write more than LPT size. |
1758 | * @c: the UBIFS file-system description object | 1756 | * @c: the UBIFS file-system description object |
1759 | * @action: action | 1757 | * @action: what to do |
1760 | * @len: length written | 1758 | * @len: length written |
1761 | * | 1759 | * |
1762 | * This function returns %0 on success and a negative error code on failure. | 1760 | * This function returns %0 on success and a negative error code on failure. |
1761 | * The @action argument may be one of: | ||
1762 | * o %0 - LPT debugging checking starts, initialize debugging variables; | ||
1763 | * o %1 - wrote an LPT node, increase LPT size by @len bytes; | ||
1764 | * o %2 - switched to a different LEB and wasted @len bytes; | ||
1765 | * o %3 - check that we've written the right number of bytes. | ||
1766 | * o %4 - wasted @len bytes; | ||
1763 | */ | 1767 | */ |
1764 | int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) | 1768 | int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) |
1765 | { | 1769 | { |
@@ -1917,12 +1921,12 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) | |||
1917 | lnum, offs); | 1921 | lnum, offs); |
1918 | err = ubifs_unpack_nnode(c, buf, &nnode); | 1922 | err = ubifs_unpack_nnode(c, buf, &nnode); |
1919 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { | 1923 | for (i = 0; i < UBIFS_LPT_FANOUT; i++) { |
1920 | printk("%d:%d", nnode.nbranch[i].lnum, | 1924 | printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, |
1921 | nnode.nbranch[i].offs); | 1925 | nnode.nbranch[i].offs); |
1922 | if (i != UBIFS_LPT_FANOUT - 1) | 1926 | if (i != UBIFS_LPT_FANOUT - 1) |
1923 | printk(", "); | 1927 | printk(KERN_CONT ", "); |
1924 | } | 1928 | } |
1925 | printk("\n"); | 1929 | printk(KERN_CONT "\n"); |
1926 | break; | 1930 | break; |
1927 | } | 1931 | } |
1928 | case UBIFS_LPT_LTAB: | 1932 | case UBIFS_LPT_LTAB: |
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 90acac603e63..10662975d2ef 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c | |||
@@ -425,59 +425,35 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, | |||
425 | * @lnum: LEB number of the LEB from which @buf was read | 425 | * @lnum: LEB number of the LEB from which @buf was read |
426 | * @offs: offset from which @buf was read | 426 | * @offs: offset from which @buf was read |
427 | * | 427 | * |
428 | * This function scans @buf for more nodes and returns %0 is a node is found and | 428 | * This function ensures that the corrupted node at @offs is the last thing |
429 | * %1 if no more nodes are found. | 429 | * written to a LEB. This function returns %1 if more data is not found and |
430 | * %0 if more data is found. | ||
430 | */ | 431 | */ |
431 | static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, | 432 | static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, |
432 | int lnum, int offs) | 433 | int lnum, int offs) |
433 | { | 434 | { |
434 | int skip, next_offs = 0; | 435 | struct ubifs_ch *ch = buf; |
436 | int skip, dlen = le32_to_cpu(ch->len); | ||
435 | 437 | ||
436 | if (len > UBIFS_DATA_NODE_SZ) { | 438 | /* Check for empty space after the corrupt node's common header */ |
437 | struct ubifs_ch *ch = buf; | 439 | skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; |
438 | int dlen = le32_to_cpu(ch->len); | 440 | if (is_empty(buf + skip, len - skip)) |
439 | 441 | return 1; | |
440 | if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ && | 442 | /* |
441 | dlen <= UBIFS_MAX_DATA_NODE_SZ) | 443 | * The area after the common header size is not empty, so the common |
442 | /* The corrupt node looks like a data node */ | 444 | * header must be intact. Check it. |
443 | next_offs = ALIGN(offs + dlen, 8); | 445 | */ |
444 | } | 446 | if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) { |
445 | 447 | dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs); | |
446 | if (c->min_io_size == 1) | 448 | return 0; |
447 | skip = 8; | ||
448 | else | ||
449 | skip = ALIGN(offs + 1, c->min_io_size) - offs; | ||
450 | |||
451 | offs += skip; | ||
452 | buf += skip; | ||
453 | len -= skip; | ||
454 | while (len > 8) { | ||
455 | struct ubifs_ch *ch = buf; | ||
456 | uint32_t magic = le32_to_cpu(ch->magic); | ||
457 | int ret; | ||
458 | |||
459 | if (magic == UBIFS_NODE_MAGIC) { | ||
460 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); | ||
461 | if (ret == SCANNED_A_NODE || ret > 0) { | ||
462 | /* | ||
463 | * There is a small chance this is just data in | ||
464 | * a data node, so check that possibility. e.g. | ||
465 | * this is part of a file that itself contains | ||
466 | * a UBIFS image. | ||
467 | */ | ||
468 | if (next_offs && offs + le32_to_cpu(ch->len) <= | ||
469 | next_offs) | ||
470 | continue; | ||
471 | dbg_rcvry("unexpected node at %d:%d", lnum, | ||
472 | offs); | ||
473 | return 0; | ||
474 | } | ||
475 | } | ||
476 | offs += 8; | ||
477 | buf += 8; | ||
478 | len -= 8; | ||
479 | } | 449 | } |
480 | return 1; | 450 | /* Now we know the corrupt node's length we can skip over it */ |
451 | skip = ALIGN(offs + dlen, c->min_io_size) - offs; | ||
452 | /* After which there should be empty space */ | ||
453 | if (is_empty(buf + skip, len - skip)) | ||
454 | return 1; | ||
455 | dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip); | ||
456 | return 0; | ||
481 | } | 457 | } |
482 | 458 | ||
483 | /** | 459 | /** |
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index ce42a7b0ca5a..11cc80125a49 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c | |||
@@ -143,7 +143,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) | |||
143 | dirty -= c->leb_size - lp->free; | 143 | dirty -= c->leb_size - lp->free; |
144 | /* | 144 | /* |
145 | * If the replay order was perfect the dirty space would now be | 145 | * If the replay order was perfect the dirty space would now be |
146 | * zero. The order is not perfect because the the journal heads | 146 | * zero. The order is not perfect because the journal heads |
147 | * race with each other. This is not a problem but is does mean | 147 | * race with each other. This is not a problem but is does mean |
148 | * that the dirty space may temporarily exceed c->leb_size | 148 | * that the dirty space may temporarily exceed c->leb_size |
149 | * during the replay. | 149 | * during the replay. |
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index e070c643d1bb..57085e43320f 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c | |||
@@ -193,6 +193,7 @@ static int create_default_filesystem(struct ubifs_info *c) | |||
193 | if (tmp64 > DEFAULT_MAX_RP_SIZE) | 193 | if (tmp64 > DEFAULT_MAX_RP_SIZE) |
194 | tmp64 = DEFAULT_MAX_RP_SIZE; | 194 | tmp64 = DEFAULT_MAX_RP_SIZE; |
195 | sup->rp_size = cpu_to_le64(tmp64); | 195 | sup->rp_size = cpu_to_le64(tmp64); |
196 | sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION); | ||
196 | 197 | ||
197 | err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); | 198 | err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); |
198 | kfree(sup); | 199 | kfree(sup); |
@@ -532,17 +533,39 @@ int ubifs_read_superblock(struct ubifs_info *c) | |||
532 | if (IS_ERR(sup)) | 533 | if (IS_ERR(sup)) |
533 | return PTR_ERR(sup); | 534 | return PTR_ERR(sup); |
534 | 535 | ||
536 | c->fmt_version = le32_to_cpu(sup->fmt_version); | ||
537 | c->ro_compat_version = le32_to_cpu(sup->ro_compat_version); | ||
538 | |||
535 | /* | 539 | /* |
536 | * The software supports all previous versions but not future versions, | 540 | * The software supports all previous versions but not future versions, |
537 | * due to the unavailability of time-travelling equipment. | 541 | * due to the unavailability of time-travelling equipment. |
538 | */ | 542 | */ |
539 | c->fmt_version = le32_to_cpu(sup->fmt_version); | ||
540 | if (c->fmt_version > UBIFS_FORMAT_VERSION) { | 543 | if (c->fmt_version > UBIFS_FORMAT_VERSION) { |
541 | ubifs_err("on-flash format version is %d, but software only " | 544 | struct super_block *sb = c->vfs_sb; |
542 | "supports up to version %d", c->fmt_version, | 545 | int mounting_ro = sb->s_flags & MS_RDONLY; |
543 | UBIFS_FORMAT_VERSION); | 546 | |
544 | err = -EINVAL; | 547 | ubifs_assert(!c->ro_media || mounting_ro); |
545 | goto out; | 548 | if (!mounting_ro || |
549 | c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { | ||
550 | ubifs_err("on-flash format version is w%d/r%d, but " | ||
551 | "software only supports up to version " | ||
552 | "w%d/r%d", c->fmt_version, | ||
553 | c->ro_compat_version, UBIFS_FORMAT_VERSION, | ||
554 | UBIFS_RO_COMPAT_VERSION); | ||
555 | if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { | ||
556 | ubifs_msg("only R/O mounting is possible"); | ||
557 | err = -EROFS; | ||
558 | } else | ||
559 | err = -EINVAL; | ||
560 | goto out; | ||
561 | } | ||
562 | |||
563 | /* | ||
564 | * The FS is mounted R/O, and the media format is | ||
565 | * R/O-compatible with the UBIFS implementation, so we can | ||
566 | * mount. | ||
567 | */ | ||
568 | c->rw_incompat = 1; | ||
546 | } | 569 | } |
547 | 570 | ||
548 | if (c->fmt_version < 3) { | 571 | if (c->fmt_version < 3) { |
@@ -623,7 +646,6 @@ int ubifs_read_superblock(struct ubifs_info *c) | |||
623 | c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; | 646 | c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; |
624 | c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; | 647 | c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; |
625 | c->main_first = c->leb_cnt - c->main_lebs; | 648 | c->main_first = c->leb_cnt - c->main_lebs; |
626 | c->report_rp_size = ubifs_reported_space(c, c->rp_size); | ||
627 | 649 | ||
628 | err = validate_sb(c, sup); | 650 | err = validate_sb(c, sup); |
629 | out: | 651 | out: |
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index e7bab52a1410..02feb59cefca 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c | |||
@@ -206,8 +206,7 @@ static int shrink_tnc_trees(int nr, int age, int *contention) | |||
206 | * Move this one to the end of the list to provide some | 206 | * Move this one to the end of the list to provide some |
207 | * fairness. | 207 | * fairness. |
208 | */ | 208 | */ |
209 | list_del(&c->infos_list); | 209 | list_move_tail(&c->infos_list, &ubifs_infos); |
210 | list_add_tail(&c->infos_list, &ubifs_infos); | ||
211 | mutex_unlock(&c->umount_mutex); | 210 | mutex_unlock(&c->umount_mutex); |
212 | if (freed >= nr) | 211 | if (freed >= nr) |
213 | break; | 212 | break; |
@@ -263,8 +262,7 @@ static int kick_a_thread(void) | |||
263 | } | 262 | } |
264 | 263 | ||
265 | if (i == 1) { | 264 | if (i == 1) { |
266 | list_del(&c->infos_list); | 265 | list_move_tail(&c->infos_list, &ubifs_infos); |
267 | list_add_tail(&c->infos_list, &ubifs_infos); | ||
268 | spin_unlock(&ubifs_infos_lock); | 266 | spin_unlock(&ubifs_infos_lock); |
269 | 267 | ||
270 | ubifs_request_bg_commit(c); | 268 | ubifs_request_bg_commit(c); |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index c5c98355459a..faa44f90608a 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -421,8 +421,8 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
421 | seq_printf(s, ",no_chk_data_crc"); | 421 | seq_printf(s, ",no_chk_data_crc"); |
422 | 422 | ||
423 | if (c->mount_opts.override_compr) { | 423 | if (c->mount_opts.override_compr) { |
424 | seq_printf(s, ",compr="); | 424 | seq_printf(s, ",compr=%s", |
425 | seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type)); | 425 | ubifs_compr_name(c->mount_opts.compr_type)); |
426 | } | 426 | } |
427 | 427 | ||
428 | return 0; | 428 | return 0; |
@@ -700,6 +700,8 @@ static int init_constants_sb(struct ubifs_info *c) | |||
700 | if (err) | 700 | if (err) |
701 | return err; | 701 | return err; |
702 | 702 | ||
703 | /* Initialize effective LEB size used in budgeting calculations */ | ||
704 | c->idx_leb_size = c->leb_size - c->max_idx_node_sz; | ||
703 | return 0; | 705 | return 0; |
704 | } | 706 | } |
705 | 707 | ||
@@ -716,6 +718,7 @@ static void init_constants_master(struct ubifs_info *c) | |||
716 | long long tmp64; | 718 | long long tmp64; |
717 | 719 | ||
718 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 720 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
721 | c->report_rp_size = ubifs_reported_space(c, c->rp_size); | ||
719 | 722 | ||
720 | /* | 723 | /* |
721 | * Calculate total amount of FS blocks. This number is not used | 724 | * Calculate total amount of FS blocks. This number is not used |
@@ -1201,7 +1204,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1201 | goto out_cbuf; | 1204 | goto out_cbuf; |
1202 | 1205 | ||
1203 | /* Create background thread */ | 1206 | /* Create background thread */ |
1204 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | 1207 | c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); |
1205 | if (IS_ERR(c->bgt)) { | 1208 | if (IS_ERR(c->bgt)) { |
1206 | err = PTR_ERR(c->bgt); | 1209 | err = PTR_ERR(c->bgt); |
1207 | c->bgt = NULL; | 1210 | c->bgt = NULL; |
@@ -1318,11 +1321,15 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1318 | else { | 1321 | else { |
1319 | c->need_recovery = 0; | 1322 | c->need_recovery = 0; |
1320 | ubifs_msg("recovery completed"); | 1323 | ubifs_msg("recovery completed"); |
1321 | /* GC LEB has to be empty and taken at this point */ | 1324 | /* |
1322 | ubifs_assert(c->lst.taken_empty_lebs == 1); | 1325 | * GC LEB has to be empty and taken at this point. But |
1326 | * the journal head LEBs may also be accounted as | ||
1327 | * "empty taken" if they are empty. | ||
1328 | */ | ||
1329 | ubifs_assert(c->lst.taken_empty_lebs > 0); | ||
1323 | } | 1330 | } |
1324 | } else | 1331 | } else |
1325 | ubifs_assert(c->lst.taken_empty_lebs == 1); | 1332 | ubifs_assert(c->lst.taken_empty_lebs > 0); |
1326 | 1333 | ||
1327 | err = dbg_check_filesystem(c); | 1334 | err = dbg_check_filesystem(c); |
1328 | if (err) | 1335 | if (err) |
@@ -1344,8 +1351,9 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1344 | x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; | 1351 | x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; |
1345 | ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " | 1352 | ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " |
1346 | "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); | 1353 | "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); |
1347 | ubifs_msg("media format: %d (latest is %d)", | 1354 | ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)", |
1348 | c->fmt_version, UBIFS_FORMAT_VERSION); | 1355 | c->fmt_version, c->ro_compat_version, |
1356 | UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); | ||
1349 | ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); | 1357 | ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); |
1350 | ubifs_msg("reserved for root: %llu bytes (%llu KiB)", | 1358 | ubifs_msg("reserved for root: %llu bytes (%llu KiB)", |
1351 | c->report_rp_size, c->report_rp_size >> 10); | 1359 | c->report_rp_size, c->report_rp_size >> 10); |
@@ -1485,6 +1493,15 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
1485 | { | 1493 | { |
1486 | int err, lnum; | 1494 | int err, lnum; |
1487 | 1495 | ||
1496 | if (c->rw_incompat) { | ||
1497 | ubifs_err("the file-system is not R/W-compatible"); | ||
1498 | ubifs_msg("on-flash format version is w%d/r%d, but software " | ||
1499 | "only supports up to version w%d/r%d", c->fmt_version, | ||
1500 | c->ro_compat_version, UBIFS_FORMAT_VERSION, | ||
1501 | UBIFS_RO_COMPAT_VERSION); | ||
1502 | return -EROFS; | ||
1503 | } | ||
1504 | |||
1488 | mutex_lock(&c->umount_mutex); | 1505 | mutex_lock(&c->umount_mutex); |
1489 | dbg_save_space_info(c); | 1506 | dbg_save_space_info(c); |
1490 | c->remounting_rw = 1; | 1507 | c->remounting_rw = 1; |
@@ -1554,7 +1571,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
1554 | ubifs_create_buds_lists(c); | 1571 | ubifs_create_buds_lists(c); |
1555 | 1572 | ||
1556 | /* Create background thread */ | 1573 | /* Create background thread */ |
1557 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | 1574 | c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); |
1558 | if (IS_ERR(c->bgt)) { | 1575 | if (IS_ERR(c->bgt)) { |
1559 | err = PTR_ERR(c->bgt); | 1576 | err = PTR_ERR(c->bgt); |
1560 | c->bgt = NULL; | 1577 | c->bgt = NULL; |
@@ -1775,7 +1792,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) | |||
1775 | c->bu.buf = NULL; | 1792 | c->bu.buf = NULL; |
1776 | } | 1793 | } |
1777 | 1794 | ||
1778 | ubifs_assert(c->lst.taken_empty_lebs == 1); | 1795 | ubifs_assert(c->lst.taken_empty_lebs > 0); |
1779 | return 0; | 1796 | return 0; |
1780 | } | 1797 | } |
1781 | 1798 | ||
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index fa28a84c6a1b..f249f7b0d656 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -1252,7 +1252,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | |||
1252 | * splitting in the middle of the colliding sequence. Also, when | 1252 | * splitting in the middle of the colliding sequence. Also, when |
1253 | * removing the leftmost key, we would have to correct the key of the | 1253 | * removing the leftmost key, we would have to correct the key of the |
1254 | * parent node, which would introduce additional complications. Namely, | 1254 | * parent node, which would introduce additional complications. Namely, |
1255 | * if we changed the the leftmost key of the parent znode, the garbage | 1255 | * if we changed the leftmost key of the parent znode, the garbage |
1256 | * collector would be unable to find it (GC is doing this when GC'ing | 1256 | * collector would be unable to find it (GC is doing this when GC'ing |
1257 | * indexing LEBs). Although we already have an additional RB-tree where | 1257 | * indexing LEBs). Although we already have an additional RB-tree where |
1258 | * we save such changed znodes (see 'ins_clr_old_idx_znode()') until | 1258 | * we save such changed znodes (see 'ins_clr_old_idx_znode()') until |
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index b25fc36cf72f..3eee07e0c495 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
@@ -36,9 +36,31 @@ | |||
36 | /* UBIFS node magic number (must not have the padding byte first or last) */ | 36 | /* UBIFS node magic number (must not have the padding byte first or last) */ |
37 | #define UBIFS_NODE_MAGIC 0x06101831 | 37 | #define UBIFS_NODE_MAGIC 0x06101831 |
38 | 38 | ||
39 | /* UBIFS on-flash format version */ | 39 | /* |
40 | * UBIFS on-flash format version. This version is increased when the on-flash | ||
41 | * format is changing. If this happens, UBIFS is will support older versions as | ||
42 | * well. But older UBIFS code will not support newer formats. Format changes | ||
43 | * will be rare and only when absolutely necessary, e.g. to fix a bug or to add | ||
44 | * a new feature. | ||
45 | * | ||
46 | * UBIFS went into mainline kernel with format version 4. The older formats | ||
47 | * were development formats. | ||
48 | */ | ||
40 | #define UBIFS_FORMAT_VERSION 4 | 49 | #define UBIFS_FORMAT_VERSION 4 |
41 | 50 | ||
51 | /* | ||
52 | * Read-only compatibility version. If the UBIFS format is changed, older UBIFS | ||
53 | * implementations will not be able to mount newer formats in read-write mode. | ||
54 | * However, depending on the change, it may be possible to mount newer formats | ||
55 | * in R/O mode. This is indicated by the R/O compatibility version which is | ||
56 | * stored in the super-block. | ||
57 | * | ||
58 | * This is needed to support boot-loaders which only need R/O mounting. With | ||
59 | * this flag it is possible to do UBIFS format changes without a need to update | ||
60 | * boot-loaders. | ||
61 | */ | ||
62 | #define UBIFS_RO_COMPAT_VERSION 0 | ||
63 | |||
42 | /* Minimum logical eraseblock size in bytes */ | 64 | /* Minimum logical eraseblock size in bytes */ |
43 | #define UBIFS_MIN_LEB_SZ (15*1024) | 65 | #define UBIFS_MIN_LEB_SZ (15*1024) |
44 | 66 | ||
@@ -53,7 +75,7 @@ | |||
53 | 75 | ||
54 | /* | 76 | /* |
55 | * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes | 77 | * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes |
56 | * shorter than uncompressed data length, UBIFS preferes to leave this data | 78 | * shorter than uncompressed data length, UBIFS prefers to leave this data |
57 | * node uncompress, because it'll be read faster. | 79 | * node uncompress, because it'll be read faster. |
58 | */ | 80 | */ |
59 | #define UBIFS_MIN_COMPRESS_DIFF 64 | 81 | #define UBIFS_MIN_COMPRESS_DIFF 64 |
@@ -586,6 +608,7 @@ struct ubifs_pad_node { | |||
586 | * @padding2: reserved for future, zeroes | 608 | * @padding2: reserved for future, zeroes |
587 | * @time_gran: time granularity in nanoseconds | 609 | * @time_gran: time granularity in nanoseconds |
588 | * @uuid: UUID generated when the file system image was created | 610 | * @uuid: UUID generated when the file system image was created |
611 | * @ro_compat_version: UBIFS R/O compatibility version | ||
589 | */ | 612 | */ |
590 | struct ubifs_sb_node { | 613 | struct ubifs_sb_node { |
591 | struct ubifs_ch ch; | 614 | struct ubifs_ch ch; |
@@ -612,7 +635,8 @@ struct ubifs_sb_node { | |||
612 | __le64 rp_size; | 635 | __le64 rp_size; |
613 | __le32 time_gran; | 636 | __le32 time_gran; |
614 | __u8 uuid[16]; | 637 | __u8 uuid[16]; |
615 | __u8 padding2[3972]; | 638 | __le32 ro_compat_version; |
639 | __u8 padding2[3968]; | ||
616 | } __attribute__ ((packed)); | 640 | } __attribute__ ((packed)); |
617 | 641 | ||
618 | /** | 642 | /** |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 039a68bee29a..0a8341e14088 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -934,6 +934,7 @@ struct ubifs_debug_info; | |||
934 | * by @commit_sem | 934 | * by @commit_sem |
935 | * @cnt_lock: protects @highest_inum and @max_sqnum counters | 935 | * @cnt_lock: protects @highest_inum and @max_sqnum counters |
936 | * @fmt_version: UBIFS on-flash format version | 936 | * @fmt_version: UBIFS on-flash format version |
937 | * @ro_compat_version: R/O compatibility version | ||
937 | * @uuid: UUID from super block | 938 | * @uuid: UUID from super block |
938 | * | 939 | * |
939 | * @lhead_lnum: log head logical eraseblock number | 940 | * @lhead_lnum: log head logical eraseblock number |
@@ -966,6 +967,7 @@ struct ubifs_debug_info; | |||
966 | * recovery) | 967 | * recovery) |
967 | * @bulk_read: enable bulk-reads | 968 | * @bulk_read: enable bulk-reads |
968 | * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) | 969 | * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) |
970 | * @rw_incompat: the media is not R/W compatible | ||
969 | * | 971 | * |
970 | * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and | 972 | * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and |
971 | * @calc_idx_sz | 973 | * @calc_idx_sz |
@@ -1015,6 +1017,8 @@ struct ubifs_debug_info; | |||
1015 | * @min_io_shift: number of bits in @min_io_size minus one | 1017 | * @min_io_shift: number of bits in @min_io_size minus one |
1016 | * @leb_size: logical eraseblock size in bytes | 1018 | * @leb_size: logical eraseblock size in bytes |
1017 | * @half_leb_size: half LEB size | 1019 | * @half_leb_size: half LEB size |
1020 | * @idx_leb_size: how many bytes of an LEB are effectively available when it is | ||
1021 | * used to store indexing nodes (@leb_size - @max_idx_node_sz) | ||
1018 | * @leb_cnt: count of logical eraseblocks | 1022 | * @leb_cnt: count of logical eraseblocks |
1019 | * @max_leb_cnt: maximum count of logical eraseblocks | 1023 | * @max_leb_cnt: maximum count of logical eraseblocks |
1020 | * @old_leb_cnt: count of logical eraseblocks before re-size | 1024 | * @old_leb_cnt: count of logical eraseblocks before re-size |
@@ -1132,8 +1136,8 @@ struct ubifs_debug_info; | |||
1132 | * previous commit start | 1136 | * previous commit start |
1133 | * @uncat_list: list of un-categorized LEBs | 1137 | * @uncat_list: list of un-categorized LEBs |
1134 | * @empty_list: list of empty LEBs | 1138 | * @empty_list: list of empty LEBs |
1135 | * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) | 1139 | * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) |
1136 | * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) | 1140 | * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) |
1137 | * @freeable_cnt: number of freeable LEBs in @freeable_list | 1141 | * @freeable_cnt: number of freeable LEBs in @freeable_list |
1138 | * | 1142 | * |
1139 | * @ltab_lnum: LEB number of LPT's own lprops table | 1143 | * @ltab_lnum: LEB number of LPT's own lprops table |
@@ -1177,6 +1181,7 @@ struct ubifs_info { | |||
1177 | unsigned long long cmt_no; | 1181 | unsigned long long cmt_no; |
1178 | spinlock_t cnt_lock; | 1182 | spinlock_t cnt_lock; |
1179 | int fmt_version; | 1183 | int fmt_version; |
1184 | int ro_compat_version; | ||
1180 | unsigned char uuid[16]; | 1185 | unsigned char uuid[16]; |
1181 | 1186 | ||
1182 | int lhead_lnum; | 1187 | int lhead_lnum; |
@@ -1205,6 +1210,7 @@ struct ubifs_info { | |||
1205 | unsigned int no_chk_data_crc:1; | 1210 | unsigned int no_chk_data_crc:1; |
1206 | unsigned int bulk_read:1; | 1211 | unsigned int bulk_read:1; |
1207 | unsigned int default_compr:2; | 1212 | unsigned int default_compr:2; |
1213 | unsigned int rw_incompat:1; | ||
1208 | 1214 | ||
1209 | struct mutex tnc_mutex; | 1215 | struct mutex tnc_mutex; |
1210 | struct ubifs_zbranch zroot; | 1216 | struct ubifs_zbranch zroot; |
@@ -1253,6 +1259,7 @@ struct ubifs_info { | |||
1253 | int min_io_shift; | 1259 | int min_io_shift; |
1254 | int leb_size; | 1260 | int leb_size; |
1255 | int half_leb_size; | 1261 | int half_leb_size; |
1262 | int idx_leb_size; | ||
1256 | int leb_cnt; | 1263 | int leb_cnt; |
1257 | int max_leb_cnt; | 1264 | int max_leb_cnt; |
1258 | int old_leb_cnt; | 1265 | int old_leb_cnt; |
@@ -1500,7 +1507,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free); | |||
1500 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); | 1507 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); |
1501 | 1508 | ||
1502 | /* find.c */ | 1509 | /* find.c */ |
1503 | int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | 1510 | int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, |
1504 | int squeeze); | 1511 | int squeeze); |
1505 | int ubifs_find_free_leb_for_idx(struct ubifs_info *c); | 1512 | int ubifs_find_free_leb_for_idx(struct ubifs_info *c); |
1506 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | 1513 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, |