diff options
Diffstat (limited to 'fs')
59 files changed, 19459 insertions, 30 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 86b203fc3c56..9f7270f36b2a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -175,9 +175,34 @@ source "fs/qnx4/Kconfig" | |||
175 | source "fs/romfs/Kconfig" | 175 | source "fs/romfs/Kconfig" |
176 | source "fs/sysv/Kconfig" | 176 | source "fs/sysv/Kconfig" |
177 | source "fs/ufs/Kconfig" | 177 | source "fs/ufs/Kconfig" |
178 | |||
179 | source "fs/exofs/Kconfig" | 178 | source "fs/exofs/Kconfig" |
180 | 179 | ||
180 | config NILFS2_FS | ||
181 | tristate "NILFS2 file system support (EXPERIMENTAL)" | ||
182 | depends on BLOCK && EXPERIMENTAL | ||
183 | select CRC32 | ||
184 | help | ||
185 | NILFS2 is a log-structured file system (LFS) supporting continuous | ||
186 | snapshotting. In addition to versioning capability of the entire | ||
187 | file system, users can even restore files mistakenly overwritten or | ||
188 | destroyed just a few seconds ago. Since this file system can keep | ||
189 | consistency like conventional LFS, it achieves quick recovery after | ||
190 | system crashes. | ||
191 | |||
192 | NILFS2 creates a number of checkpoints every few seconds or per | ||
193 | synchronous write basis (unless there is no change). Users can | ||
194 | select significant versions among continuously created checkpoints, | ||
195 | and can change them into snapshots which will be preserved for long | ||
196 | periods until they are changed back to checkpoints. Each | ||
197 | snapshot is mountable as a read-only file system concurrently with | ||
198 | its writable mount, and this feature is convenient for online backup. | ||
199 | |||
200 | Some features including atime, extended attributes, and POSIX ACLs, | ||
201 | are not supported yet. | ||
202 | |||
203 | To compile this file system support as a module, choose M here: the | ||
204 | module will be called nilfs2. If unsure, say N. | ||
205 | |||
181 | endif # MISC_FILESYSTEMS | 206 | endif # MISC_FILESYSTEMS |
182 | 207 | ||
183 | menuconfig NETWORK_FILESYSTEMS | 208 | menuconfig NETWORK_FILESYSTEMS |
diff --git a/fs/Makefile b/fs/Makefile index 70b2aed87133..af6d04700d9c 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -114,6 +114,7 @@ obj-$(CONFIG_JFS_FS) += jfs/ | |||
114 | obj-$(CONFIG_XFS_FS) += xfs/ | 114 | obj-$(CONFIG_XFS_FS) += xfs/ |
115 | obj-$(CONFIG_9P_FS) += 9p/ | 115 | obj-$(CONFIG_9P_FS) += 9p/ |
116 | obj-$(CONFIG_AFS_FS) += afs/ | 116 | obj-$(CONFIG_AFS_FS) += afs/ |
117 | obj-$(CONFIG_NILFS2_FS) += nilfs2/ | ||
117 | obj-$(CONFIG_BEFS_FS) += befs/ | 118 | obj-$(CONFIG_BEFS_FS) += befs/ |
118 | obj-$(CONFIG_HOSTFS) += hostfs/ | 119 | obj-$(CONFIG_HOSTFS) += hostfs/ |
119 | obj-$(CONFIG_HPPFS) += hppfs/ | 120 | obj-$(CONFIG_HPPFS) += hppfs/ |
diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c index 49f189423063..7ad36506c256 100644 --- a/fs/afs/netdevices.c +++ b/fs/afs/netdevices.c | |||
@@ -20,8 +20,7 @@ int afs_get_MAC_address(u8 *mac, size_t maclen) | |||
20 | struct net_device *dev; | 20 | struct net_device *dev; |
21 | int ret = -ENODEV; | 21 | int ret = -ENODEV; |
22 | 22 | ||
23 | if (maclen != ETH_ALEN) | 23 | BUG_ON(maclen != ETH_ALEN); |
24 | BUG(); | ||
25 | 24 | ||
26 | rtnl_lock(); | 25 | rtnl_lock(); |
27 | dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER); | 26 | dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER); |
diff --git a/fs/befs/super.c b/fs/befs/super.c index 41f2b4d0093e..ca40f828f64d 100644 --- a/fs/befs/super.c +++ b/fs/befs/super.c | |||
@@ -8,6 +8,7 @@ | |||
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
11 | #include <asm/page.h> /* for PAGE_SIZE */ | ||
11 | 12 | ||
12 | #include "befs.h" | 13 | #include "befs.h" |
13 | #include "super.h" | 14 | #include "super.h" |
diff --git a/fs/buffer.c b/fs/buffer.c index 6e35762b6169..13edf7ad3ff1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1596,6 +1596,16 @@ EXPORT_SYMBOL(unmap_underlying_metadata); | |||
1596 | * locked buffer. This only can happen if someone has written the buffer | 1596 | * locked buffer. This only can happen if someone has written the buffer |
1597 | * directly, with submit_bh(). At the address_space level PageWriteback | 1597 | * directly, with submit_bh(). At the address_space level PageWriteback |
1598 | * prevents this contention from occurring. | 1598 | * prevents this contention from occurring. |
1599 | * | ||
1600 | * If block_write_full_page() is called with wbc->sync_mode == | ||
1601 | * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this | ||
1602 | * causes the writes to be flagged as synchronous writes, but the | ||
1603 | * block device queue will NOT be unplugged, since usually many pages | ||
1604 | * will be pushed to the out before the higher-level caller actually | ||
1605 | * waits for the writes to be completed. The various wait functions, | ||
1606 | * such as wait_on_writeback_range() will ultimately call sync_page() | ||
1607 | * which will ultimately call blk_run_backing_dev(), which will end up | ||
1608 | * unplugging the device queue. | ||
1599 | */ | 1609 | */ |
1600 | static int __block_write_full_page(struct inode *inode, struct page *page, | 1610 | static int __block_write_full_page(struct inode *inode, struct page *page, |
1601 | get_block_t *get_block, struct writeback_control *wbc) | 1611 | get_block_t *get_block, struct writeback_control *wbc) |
@@ -1606,7 +1616,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1606 | struct buffer_head *bh, *head; | 1616 | struct buffer_head *bh, *head; |
1607 | const unsigned blocksize = 1 << inode->i_blkbits; | 1617 | const unsigned blocksize = 1 << inode->i_blkbits; |
1608 | int nr_underway = 0; | 1618 | int nr_underway = 0; |
1609 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | 1619 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? |
1620 | WRITE_SYNC_PLUG : WRITE); | ||
1610 | 1621 | ||
1611 | BUG_ON(!PageLocked(page)); | 1622 | BUG_ON(!PageLocked(page)); |
1612 | 1623 | ||
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig index 8e0cfe44b0fc..fb3c1a21b135 100644 --- a/fs/ext3/Kconfig +++ b/fs/ext3/Kconfig | |||
@@ -28,6 +28,25 @@ config EXT3_FS | |||
28 | To compile this file system support as a module, choose M here: the | 28 | To compile this file system support as a module, choose M here: the |
29 | module will be called ext3. | 29 | module will be called ext3. |
30 | 30 | ||
31 | config EXT3_DEFAULTS_TO_ORDERED | ||
32 | bool "Default to 'data=ordered' in ext3 (legacy option)" | ||
33 | depends on EXT3_FS | ||
34 | help | ||
35 | If a filesystem does not explicitly specify a data ordering | ||
36 | mode, and the journal capability allowed it, ext3 used to | ||
37 | historically default to 'data=ordered'. | ||
38 | |||
39 | That was a rather unfortunate choice, because it leads to all | ||
40 | kinds of latency problems, and the 'data=writeback' mode is more | ||
41 | appropriate these days. | ||
42 | |||
43 | You should probably always answer 'n' here, and if you really | ||
44 | want to use 'data=ordered' mode, set it in the filesystem itself | ||
45 | with 'tune2fs -o journal_data_ordered'. | ||
46 | |||
47 | But if you really want to enable the legacy default, you can do | ||
48 | so by answering 'y' to this question. | ||
49 | |||
31 | config EXT3_FS_XATTR | 50 | config EXT3_FS_XATTR |
32 | bool "Ext3 extended attributes" | 51 | bool "Ext3 extended attributes" |
33 | depends on EXT3_FS | 52 | depends on EXT3_FS |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 466a332e0bd1..fcfa24361856 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1521,12 +1521,16 @@ static int ext3_ordered_writepage(struct page *page, | |||
1521 | if (!page_has_buffers(page)) { | 1521 | if (!page_has_buffers(page)) { |
1522 | create_empty_buffers(page, inode->i_sb->s_blocksize, | 1522 | create_empty_buffers(page, inode->i_sb->s_blocksize, |
1523 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 1523 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
1524 | } else if (!walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { | 1524 | page_bufs = page_buffers(page); |
1525 | /* Provide NULL instead of get_block so that we catch bugs if buffers weren't really mapped */ | 1525 | } else { |
1526 | return block_write_full_page(page, NULL, wbc); | 1526 | page_bufs = page_buffers(page); |
1527 | if (!walk_page_buffers(NULL, page_bufs, 0, PAGE_CACHE_SIZE, | ||
1528 | NULL, buffer_unmapped)) { | ||
1529 | /* Provide NULL get_block() to catch bugs if buffers | ||
1530 | * weren't really mapped */ | ||
1531 | return block_write_full_page(page, NULL, wbc); | ||
1532 | } | ||
1527 | } | 1533 | } |
1528 | page_bufs = page_buffers(page); | ||
1529 | |||
1530 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); | 1534 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); |
1531 | 1535 | ||
1532 | if (IS_ERR(handle)) { | 1536 | if (IS_ERR(handle)) { |
@@ -1581,6 +1585,15 @@ static int ext3_writeback_writepage(struct page *page, | |||
1581 | if (ext3_journal_current_handle()) | 1585 | if (ext3_journal_current_handle()) |
1582 | goto out_fail; | 1586 | goto out_fail; |
1583 | 1587 | ||
1588 | if (page_has_buffers(page)) { | ||
1589 | if (!walk_page_buffers(NULL, page_buffers(page), 0, | ||
1590 | PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { | ||
1591 | /* Provide NULL get_block() to catch bugs if buffers | ||
1592 | * weren't really mapped */ | ||
1593 | return block_write_full_page(page, NULL, wbc); | ||
1594 | } | ||
1595 | } | ||
1596 | |||
1584 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); | 1597 | handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); |
1585 | if (IS_ERR(handle)) { | 1598 | if (IS_ERR(handle)) { |
1586 | ret = PTR_ERR(handle); | 1599 | ret = PTR_ERR(handle); |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 9e5b8e387e1e..599dbfe504c3 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -44,6 +44,12 @@ | |||
44 | #include "acl.h" | 44 | #include "acl.h" |
45 | #include "namei.h" | 45 | #include "namei.h" |
46 | 46 | ||
47 | #ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED | ||
48 | #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA | ||
49 | #else | ||
50 | #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA | ||
51 | #endif | ||
52 | |||
47 | static int ext3_load_journal(struct super_block *, struct ext3_super_block *, | 53 | static int ext3_load_journal(struct super_block *, struct ext3_super_block *, |
48 | unsigned long journal_devnum); | 54 | unsigned long journal_devnum); |
49 | static int ext3_create_journal(struct super_block *, struct ext3_super_block *, | 55 | static int ext3_create_journal(struct super_block *, struct ext3_super_block *, |
@@ -1919,7 +1925,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1919 | cope, else JOURNAL_DATA */ | 1925 | cope, else JOURNAL_DATA */ |
1920 | if (journal_check_available_features | 1926 | if (journal_check_available_features |
1921 | (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) | 1927 | (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) |
1922 | set_opt(sbi->s_mount_opt, ORDERED_DATA); | 1928 | set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE); |
1923 | else | 1929 | else |
1924 | set_opt(sbi->s_mount_opt, JOURNAL_DATA); | 1930 | set_opt(sbi->s_mount_opt, JOURNAL_DATA); |
1925 | break; | 1931 | break; |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 3523b895eb4b..5a97bcfe03e5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -516,8 +516,6 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
516 | goto out_unlock; | 516 | goto out_unlock; |
517 | 517 | ||
518 | ret = nfs_updatepage(filp, page, 0, pagelen); | 518 | ret = nfs_updatepage(filp, page, 0, pagelen); |
519 | if (ret == 0) | ||
520 | ret = pagelen; | ||
521 | out_unlock: | 519 | out_unlock: |
522 | unlock_page(page); | 520 | unlock_page(page); |
523 | if (ret) | 521 | if (ret) |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 82eaadbff408..6717200923fe 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -1228,7 +1228,6 @@ static int nfs_parse_mount_options(char *raw, | |||
1228 | goto out_nomem; | 1228 | goto out_nomem; |
1229 | token = match_token(string, | 1229 | token = match_token(string, |
1230 | nfs_xprt_protocol_tokens, args); | 1230 | nfs_xprt_protocol_tokens, args); |
1231 | kfree(string); | ||
1232 | 1231 | ||
1233 | switch (token) { | 1232 | switch (token) { |
1234 | case Opt_xprt_udp: | 1233 | case Opt_xprt_udp: |
@@ -1258,6 +1257,7 @@ static int nfs_parse_mount_options(char *raw, | |||
1258 | goto out_nomem; | 1257 | goto out_nomem; |
1259 | token = match_token(string, | 1258 | token = match_token(string, |
1260 | nfs_xprt_protocol_tokens, args); | 1259 | nfs_xprt_protocol_tokens, args); |
1260 | kfree(string); | ||
1261 | 1261 | ||
1262 | switch (token) { | 1262 | switch (token) { |
1263 | case Opt_xprt_udp: | 1263 | case Opt_xprt_udp: |
diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile new file mode 100644 index 000000000000..df3e62c1ddc5 --- /dev/null +++ b/fs/nilfs2/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | obj-$(CONFIG_NILFS2_FS) += nilfs2.o | ||
2 | nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \ | ||
3 | btnode.o bmap.o btree.o direct.o dat.o recovery.o \ | ||
4 | the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \ | ||
5 | ifile.o alloc.o gcinode.o ioctl.o gcdat.o | ||
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c new file mode 100644 index 000000000000..d69e6ae59251 --- /dev/null +++ b/fs/nilfs2/alloc.c | |||
@@ -0,0 +1,504 @@ | |||
1 | /* | ||
2 | * alloc.c - NILFS dat/inode allocator | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Original code was written by Koji Sato <koji@osrg.net>. | ||
21 | * Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>, | ||
22 | * Amagai Yoshiji <amagai@osrg.net>. | ||
23 | */ | ||
24 | |||
25 | #include <linux/types.h> | ||
26 | #include <linux/buffer_head.h> | ||
27 | #include <linux/fs.h> | ||
28 | #include <linux/bitops.h> | ||
29 | #include "mdt.h" | ||
30 | #include "alloc.h" | ||
31 | |||
32 | |||
33 | static inline unsigned long | ||
34 | nilfs_palloc_groups_per_desc_block(const struct inode *inode) | ||
35 | { | ||
36 | return (1UL << inode->i_blkbits) / | ||
37 | sizeof(struct nilfs_palloc_group_desc); | ||
38 | } | ||
39 | |||
40 | static inline unsigned long | ||
41 | nilfs_palloc_groups_count(const struct inode *inode) | ||
42 | { | ||
43 | return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */)); | ||
44 | } | ||
45 | |||
46 | int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size) | ||
47 | { | ||
48 | struct nilfs_mdt_info *mi = NILFS_MDT(inode); | ||
49 | |||
50 | mi->mi_bgl = kmalloc(sizeof(*mi->mi_bgl), GFP_NOFS); | ||
51 | if (!mi->mi_bgl) | ||
52 | return -ENOMEM; | ||
53 | |||
54 | bgl_lock_init(mi->mi_bgl); | ||
55 | |||
56 | nilfs_mdt_set_entry_size(inode, entry_size, 0); | ||
57 | |||
58 | mi->mi_blocks_per_group = | ||
59 | DIV_ROUND_UP(nilfs_palloc_entries_per_group(inode), | ||
60 | mi->mi_entries_per_block) + 1; | ||
61 | /* Number of blocks in a group including entry blocks and | ||
62 | a bitmap block */ | ||
63 | mi->mi_blocks_per_desc_block = | ||
64 | nilfs_palloc_groups_per_desc_block(inode) * | ||
65 | mi->mi_blocks_per_group + 1; | ||
66 | /* Number of blocks per descriptor including the | ||
67 | descriptor block */ | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, | ||
72 | unsigned long *offset) | ||
73 | { | ||
74 | __u64 group = nr; | ||
75 | |||
76 | *offset = do_div(group, nilfs_palloc_entries_per_group(inode)); | ||
77 | return group; | ||
78 | } | ||
79 | |||
80 | static unsigned long | ||
81 | nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) | ||
82 | { | ||
83 | unsigned long desc_block = | ||
84 | group / nilfs_palloc_groups_per_desc_block(inode); | ||
85 | return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block; | ||
86 | } | ||
87 | |||
88 | static unsigned long | ||
89 | nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) | ||
90 | { | ||
91 | unsigned long desc_offset = | ||
92 | group % nilfs_palloc_groups_per_desc_block(inode); | ||
93 | return nilfs_palloc_desc_blkoff(inode, group) + 1 + | ||
94 | desc_offset * NILFS_MDT(inode)->mi_blocks_per_group; | ||
95 | } | ||
96 | |||
97 | static unsigned long | ||
98 | nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group, | ||
99 | const struct nilfs_palloc_group_desc *desc) | ||
100 | { | ||
101 | unsigned long nfree; | ||
102 | |||
103 | spin_lock(nilfs_mdt_bgl_lock(inode, group)); | ||
104 | nfree = le32_to_cpu(desc->pg_nfrees); | ||
105 | spin_unlock(nilfs_mdt_bgl_lock(inode, group)); | ||
106 | return nfree; | ||
107 | } | ||
108 | |||
109 | static void | ||
110 | nilfs_palloc_group_desc_add_entries(struct inode *inode, | ||
111 | unsigned long group, | ||
112 | struct nilfs_palloc_group_desc *desc, | ||
113 | u32 n) | ||
114 | { | ||
115 | spin_lock(nilfs_mdt_bgl_lock(inode, group)); | ||
116 | le32_add_cpu(&desc->pg_nfrees, n); | ||
117 | spin_unlock(nilfs_mdt_bgl_lock(inode, group)); | ||
118 | } | ||
119 | |||
120 | static unsigned long | ||
121 | nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) | ||
122 | { | ||
123 | unsigned long group, group_offset; | ||
124 | |||
125 | group = nilfs_palloc_group(inode, nr, &group_offset); | ||
126 | |||
127 | return nilfs_palloc_bitmap_blkoff(inode, group) + 1 + | ||
128 | group_offset / NILFS_MDT(inode)->mi_entries_per_block; | ||
129 | } | ||
130 | |||
131 | static void nilfs_palloc_desc_block_init(struct inode *inode, | ||
132 | struct buffer_head *bh, void *kaddr) | ||
133 | { | ||
134 | struct nilfs_palloc_group_desc *desc = kaddr + bh_offset(bh); | ||
135 | unsigned long n = nilfs_palloc_groups_per_desc_block(inode); | ||
136 | __le32 nfrees; | ||
137 | |||
138 | nfrees = cpu_to_le32(nilfs_palloc_entries_per_group(inode)); | ||
139 | while (n-- > 0) { | ||
140 | desc->pg_nfrees = nfrees; | ||
141 | desc++; | ||
142 | } | ||
143 | } | ||
144 | |||
145 | static int nilfs_palloc_get_desc_block(struct inode *inode, | ||
146 | unsigned long group, | ||
147 | int create, struct buffer_head **bhp) | ||
148 | { | ||
149 | return nilfs_mdt_get_block(inode, | ||
150 | nilfs_palloc_desc_blkoff(inode, group), | ||
151 | create, nilfs_palloc_desc_block_init, bhp); | ||
152 | } | ||
153 | |||
154 | static int nilfs_palloc_get_bitmap_block(struct inode *inode, | ||
155 | unsigned long group, | ||
156 | int create, struct buffer_head **bhp) | ||
157 | { | ||
158 | return nilfs_mdt_get_block(inode, | ||
159 | nilfs_palloc_bitmap_blkoff(inode, group), | ||
160 | create, NULL, bhp); | ||
161 | } | ||
162 | |||
163 | int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, | ||
164 | int create, struct buffer_head **bhp) | ||
165 | { | ||
166 | return nilfs_mdt_get_block(inode, nilfs_palloc_entry_blkoff(inode, nr), | ||
167 | create, NULL, bhp); | ||
168 | } | ||
169 | |||
170 | static struct nilfs_palloc_group_desc * | ||
171 | nilfs_palloc_block_get_group_desc(const struct inode *inode, | ||
172 | unsigned long group, | ||
173 | const struct buffer_head *bh, void *kaddr) | ||
174 | { | ||
175 | return (struct nilfs_palloc_group_desc *)(kaddr + bh_offset(bh)) + | ||
176 | group % nilfs_palloc_groups_per_desc_block(inode); | ||
177 | } | ||
178 | |||
179 | static unsigned char * | ||
180 | nilfs_palloc_block_get_bitmap(const struct inode *inode, | ||
181 | const struct buffer_head *bh, void *kaddr) | ||
182 | { | ||
183 | return (unsigned char *)(kaddr + bh_offset(bh)); | ||
184 | } | ||
185 | |||
186 | void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, | ||
187 | const struct buffer_head *bh, void *kaddr) | ||
188 | { | ||
189 | unsigned long entry_offset, group_offset; | ||
190 | |||
191 | nilfs_palloc_group(inode, nr, &group_offset); | ||
192 | entry_offset = group_offset % NILFS_MDT(inode)->mi_entries_per_block; | ||
193 | |||
194 | return kaddr + bh_offset(bh) + | ||
195 | entry_offset * NILFS_MDT(inode)->mi_entry_size; | ||
196 | } | ||
197 | |||
198 | static int nilfs_palloc_find_available_slot(struct inode *inode, | ||
199 | unsigned long group, | ||
200 | unsigned long target, | ||
201 | unsigned char *bitmap, | ||
202 | int bsize) /* size in bits */ | ||
203 | { | ||
204 | int curr, pos, end, i; | ||
205 | |||
206 | if (target > 0) { | ||
207 | end = (target + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1); | ||
208 | if (end > bsize) | ||
209 | end = bsize; | ||
210 | pos = nilfs_find_next_zero_bit(bitmap, end, target); | ||
211 | if (pos < end && | ||
212 | !nilfs_set_bit_atomic( | ||
213 | nilfs_mdt_bgl_lock(inode, group), pos, bitmap)) | ||
214 | return pos; | ||
215 | } else | ||
216 | end = 0; | ||
217 | |||
218 | for (i = 0, curr = end; | ||
219 | i < bsize; | ||
220 | i += BITS_PER_LONG, curr += BITS_PER_LONG) { | ||
221 | /* wrap around */ | ||
222 | if (curr >= bsize) | ||
223 | curr = 0; | ||
224 | while (*((unsigned long *)bitmap + curr / BITS_PER_LONG) | ||
225 | != ~0UL) { | ||
226 | end = curr + BITS_PER_LONG; | ||
227 | if (end > bsize) | ||
228 | end = bsize; | ||
229 | pos = nilfs_find_next_zero_bit(bitmap, end, curr); | ||
230 | if ((pos < end) && | ||
231 | !nilfs_set_bit_atomic( | ||
232 | nilfs_mdt_bgl_lock(inode, group), pos, | ||
233 | bitmap)) | ||
234 | return pos; | ||
235 | } | ||
236 | } | ||
237 | return -ENOSPC; | ||
238 | } | ||
239 | |||
240 | static unsigned long | ||
241 | nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, | ||
242 | unsigned long curr, unsigned long max) | ||
243 | { | ||
244 | return min_t(unsigned long, | ||
245 | nilfs_palloc_groups_per_desc_block(inode) - | ||
246 | curr % nilfs_palloc_groups_per_desc_block(inode), | ||
247 | max - curr + 1); | ||
248 | } | ||
249 | |||
250 | int nilfs_palloc_prepare_alloc_entry(struct inode *inode, | ||
251 | struct nilfs_palloc_req *req) | ||
252 | { | ||
253 | struct buffer_head *desc_bh, *bitmap_bh; | ||
254 | struct nilfs_palloc_group_desc *desc; | ||
255 | unsigned char *bitmap; | ||
256 | void *desc_kaddr, *bitmap_kaddr; | ||
257 | unsigned long group, maxgroup, ngroups; | ||
258 | unsigned long group_offset, maxgroup_offset; | ||
259 | unsigned long n, entries_per_group, groups_per_desc_block; | ||
260 | unsigned long i, j; | ||
261 | int pos, ret; | ||
262 | |||
263 | ngroups = nilfs_palloc_groups_count(inode); | ||
264 | maxgroup = ngroups - 1; | ||
265 | group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); | ||
266 | entries_per_group = nilfs_palloc_entries_per_group(inode); | ||
267 | groups_per_desc_block = nilfs_palloc_groups_per_desc_block(inode); | ||
268 | |||
269 | for (i = 0; i < ngroups; i += n) { | ||
270 | if (group >= ngroups) { | ||
271 | /* wrap around */ | ||
272 | group = 0; | ||
273 | maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr, | ||
274 | &maxgroup_offset) - 1; | ||
275 | } | ||
276 | ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh); | ||
277 | if (ret < 0) | ||
278 | return ret; | ||
279 | desc_kaddr = kmap(desc_bh->b_page); | ||
280 | desc = nilfs_palloc_block_get_group_desc( | ||
281 | inode, group, desc_bh, desc_kaddr); | ||
282 | n = nilfs_palloc_rest_groups_in_desc_block(inode, group, | ||
283 | maxgroup); | ||
284 | for (j = 0; j < n; j++, desc++, group++) { | ||
285 | if (nilfs_palloc_group_desc_nfrees(inode, group, desc) | ||
286 | > 0) { | ||
287 | ret = nilfs_palloc_get_bitmap_block( | ||
288 | inode, group, 1, &bitmap_bh); | ||
289 | if (ret < 0) | ||
290 | goto out_desc; | ||
291 | bitmap_kaddr = kmap(bitmap_bh->b_page); | ||
292 | bitmap = nilfs_palloc_block_get_bitmap( | ||
293 | inode, bitmap_bh, bitmap_kaddr); | ||
294 | pos = nilfs_palloc_find_available_slot( | ||
295 | inode, group, group_offset, bitmap, | ||
296 | entries_per_group); | ||
297 | if (pos >= 0) { | ||
298 | /* found a free entry */ | ||
299 | nilfs_palloc_group_desc_add_entries( | ||
300 | inode, group, desc, -1); | ||
301 | req->pr_entry_nr = | ||
302 | entries_per_group * group + pos; | ||
303 | kunmap(desc_bh->b_page); | ||
304 | kunmap(bitmap_bh->b_page); | ||
305 | |||
306 | req->pr_desc_bh = desc_bh; | ||
307 | req->pr_bitmap_bh = bitmap_bh; | ||
308 | return 0; | ||
309 | } | ||
310 | kunmap(bitmap_bh->b_page); | ||
311 | brelse(bitmap_bh); | ||
312 | } | ||
313 | |||
314 | group_offset = 0; | ||
315 | } | ||
316 | |||
317 | kunmap(desc_bh->b_page); | ||
318 | brelse(desc_bh); | ||
319 | } | ||
320 | |||
321 | /* no entries left */ | ||
322 | return -ENOSPC; | ||
323 | |||
324 | out_desc: | ||
325 | kunmap(desc_bh->b_page); | ||
326 | brelse(desc_bh); | ||
327 | return ret; | ||
328 | } | ||
329 | |||
330 | void nilfs_palloc_commit_alloc_entry(struct inode *inode, | ||
331 | struct nilfs_palloc_req *req) | ||
332 | { | ||
333 | nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); | ||
334 | nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); | ||
335 | nilfs_mdt_mark_dirty(inode); | ||
336 | |||
337 | brelse(req->pr_bitmap_bh); | ||
338 | brelse(req->pr_desc_bh); | ||
339 | } | ||
340 | |||
341 | void nilfs_palloc_commit_free_entry(struct inode *inode, | ||
342 | struct nilfs_palloc_req *req) | ||
343 | { | ||
344 | struct nilfs_palloc_group_desc *desc; | ||
345 | unsigned long group, group_offset; | ||
346 | unsigned char *bitmap; | ||
347 | void *desc_kaddr, *bitmap_kaddr; | ||
348 | |||
349 | group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); | ||
350 | desc_kaddr = kmap(req->pr_desc_bh->b_page); | ||
351 | desc = nilfs_palloc_block_get_group_desc(inode, group, | ||
352 | req->pr_desc_bh, desc_kaddr); | ||
353 | bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); | ||
354 | bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, | ||
355 | bitmap_kaddr); | ||
356 | |||
357 | if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), | ||
358 | group_offset, bitmap)) | ||
359 | printk(KERN_WARNING "%s: entry number %llu already freed\n", | ||
360 | __func__, (unsigned long long)req->pr_entry_nr); | ||
361 | |||
362 | nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); | ||
363 | |||
364 | kunmap(req->pr_bitmap_bh->b_page); | ||
365 | kunmap(req->pr_desc_bh->b_page); | ||
366 | |||
367 | nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); | ||
368 | nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); | ||
369 | nilfs_mdt_mark_dirty(inode); | ||
370 | |||
371 | brelse(req->pr_bitmap_bh); | ||
372 | brelse(req->pr_desc_bh); | ||
373 | } | ||
374 | |||
375 | void nilfs_palloc_abort_alloc_entry(struct inode *inode, | ||
376 | struct nilfs_palloc_req *req) | ||
377 | { | ||
378 | struct nilfs_palloc_group_desc *desc; | ||
379 | void *desc_kaddr, *bitmap_kaddr; | ||
380 | unsigned char *bitmap; | ||
381 | unsigned long group, group_offset; | ||
382 | |||
383 | group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); | ||
384 | desc_kaddr = kmap(req->pr_desc_bh->b_page); | ||
385 | desc = nilfs_palloc_block_get_group_desc(inode, group, | ||
386 | req->pr_desc_bh, desc_kaddr); | ||
387 | bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); | ||
388 | bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, | ||
389 | bitmap_kaddr); | ||
390 | if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), | ||
391 | group_offset, bitmap)) | ||
392 | printk(KERN_WARNING "%s: entry numer %llu already freed\n", | ||
393 | __func__, (unsigned long long)req->pr_entry_nr); | ||
394 | |||
395 | nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); | ||
396 | |||
397 | kunmap(req->pr_bitmap_bh->b_page); | ||
398 | kunmap(req->pr_desc_bh->b_page); | ||
399 | |||
400 | brelse(req->pr_bitmap_bh); | ||
401 | brelse(req->pr_desc_bh); | ||
402 | |||
403 | req->pr_entry_nr = 0; | ||
404 | req->pr_bitmap_bh = NULL; | ||
405 | req->pr_desc_bh = NULL; | ||
406 | } | ||
407 | |||
408 | int nilfs_palloc_prepare_free_entry(struct inode *inode, | ||
409 | struct nilfs_palloc_req *req) | ||
410 | { | ||
411 | struct buffer_head *desc_bh, *bitmap_bh; | ||
412 | unsigned long group, group_offset; | ||
413 | int ret; | ||
414 | |||
415 | group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); | ||
416 | ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh); | ||
417 | if (ret < 0) | ||
418 | return ret; | ||
419 | ret = nilfs_palloc_get_bitmap_block(inode, group, 1, &bitmap_bh); | ||
420 | if (ret < 0) { | ||
421 | brelse(desc_bh); | ||
422 | return ret; | ||
423 | } | ||
424 | |||
425 | req->pr_desc_bh = desc_bh; | ||
426 | req->pr_bitmap_bh = bitmap_bh; | ||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | void nilfs_palloc_abort_free_entry(struct inode *inode, | ||
431 | struct nilfs_palloc_req *req) | ||
432 | { | ||
433 | brelse(req->pr_bitmap_bh); | ||
434 | brelse(req->pr_desc_bh); | ||
435 | |||
436 | req->pr_entry_nr = 0; | ||
437 | req->pr_bitmap_bh = NULL; | ||
438 | req->pr_desc_bh = NULL; | ||
439 | } | ||
440 | |||
441 | static int | ||
442 | nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr) | ||
443 | { | ||
444 | __u64 first, last; | ||
445 | |||
446 | first = group * nilfs_palloc_entries_per_group(inode); | ||
447 | last = first + nilfs_palloc_entries_per_group(inode) - 1; | ||
448 | return (nr >= first) && (nr <= last); | ||
449 | } | ||
450 | |||
451 | int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) | ||
452 | { | ||
453 | struct buffer_head *desc_bh, *bitmap_bh; | ||
454 | struct nilfs_palloc_group_desc *desc; | ||
455 | unsigned char *bitmap; | ||
456 | void *desc_kaddr, *bitmap_kaddr; | ||
457 | unsigned long group, group_offset; | ||
458 | int i, j, n, ret; | ||
459 | |||
460 | for (i = 0; i < nitems; i += n) { | ||
461 | group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset); | ||
462 | ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh); | ||
463 | if (ret < 0) | ||
464 | return ret; | ||
465 | ret = nilfs_palloc_get_bitmap_block(inode, group, 0, | ||
466 | &bitmap_bh); | ||
467 | if (ret < 0) { | ||
468 | brelse(desc_bh); | ||
469 | return ret; | ||
470 | } | ||
471 | desc_kaddr = kmap(desc_bh->b_page); | ||
472 | desc = nilfs_palloc_block_get_group_desc( | ||
473 | inode, group, desc_bh, desc_kaddr); | ||
474 | bitmap_kaddr = kmap(bitmap_bh->b_page); | ||
475 | bitmap = nilfs_palloc_block_get_bitmap( | ||
476 | inode, bitmap_bh, bitmap_kaddr); | ||
477 | for (j = i, n = 0; | ||
478 | (j < nitems) && nilfs_palloc_group_is_in(inode, group, | ||
479 | entry_nrs[j]); | ||
480 | j++, n++) { | ||
481 | nilfs_palloc_group(inode, entry_nrs[j], &group_offset); | ||
482 | if (!nilfs_clear_bit_atomic( | ||
483 | nilfs_mdt_bgl_lock(inode, group), | ||
484 | group_offset, bitmap)) { | ||
485 | printk(KERN_WARNING | ||
486 | "%s: entry number %llu already freed\n", | ||
487 | __func__, | ||
488 | (unsigned long long)entry_nrs[j]); | ||
489 | } | ||
490 | } | ||
491 | nilfs_palloc_group_desc_add_entries(inode, group, desc, n); | ||
492 | |||
493 | kunmap(bitmap_bh->b_page); | ||
494 | kunmap(desc_bh->b_page); | ||
495 | |||
496 | nilfs_mdt_mark_buffer_dirty(desc_bh); | ||
497 | nilfs_mdt_mark_buffer_dirty(bitmap_bh); | ||
498 | nilfs_mdt_mark_dirty(inode); | ||
499 | |||
500 | brelse(bitmap_bh); | ||
501 | brelse(desc_bh); | ||
502 | } | ||
503 | return 0; | ||
504 | } | ||
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h new file mode 100644 index 000000000000..4ace5475c2c7 --- /dev/null +++ b/fs/nilfs2/alloc.h | |||
@@ -0,0 +1,72 @@ | |||
1 | /* | ||
2 | * alloc.h - persistent object (dat entry/disk inode) allocator/deallocator | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Original code was written by Koji Sato <koji@osrg.net>. | ||
21 | * Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>, | ||
22 | * Amagai Yoshiji <amagai@osrg.net>. | ||
23 | */ | ||
24 | |||
25 | #ifndef _NILFS_ALLOC_H | ||
26 | #define _NILFS_ALLOC_H | ||
27 | |||
28 | #include <linux/types.h> | ||
29 | #include <linux/buffer_head.h> | ||
30 | #include <linux/fs.h> | ||
31 | |||
32 | static inline unsigned long | ||
33 | nilfs_palloc_entries_per_group(const struct inode *inode) | ||
34 | { | ||
35 | return 1UL << (inode->i_blkbits + 3 /* log2(8 = CHAR_BITS) */); | ||
36 | } | ||
37 | |||
38 | int nilfs_palloc_init_blockgroup(struct inode *, unsigned); | ||
39 | int nilfs_palloc_get_entry_block(struct inode *, __u64, int, | ||
40 | struct buffer_head **); | ||
41 | void *nilfs_palloc_block_get_entry(const struct inode *, __u64, | ||
42 | const struct buffer_head *, void *); | ||
43 | |||
44 | /** | ||
45 | * nilfs_palloc_req - persistent alloctor request and reply | ||
46 | * @pr_entry_nr: entry number (vblocknr or inode number) | ||
47 | * @pr_desc_bh: buffer head of the buffer containing block group descriptors | ||
48 | * @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap | ||
49 | * @pr_entry_bh: buffer head of the buffer containing translation entries | ||
50 | */ | ||
51 | struct nilfs_palloc_req { | ||
52 | __u64 pr_entry_nr; | ||
53 | struct buffer_head *pr_desc_bh; | ||
54 | struct buffer_head *pr_bitmap_bh; | ||
55 | struct buffer_head *pr_entry_bh; | ||
56 | }; | ||
57 | |||
58 | int nilfs_palloc_prepare_alloc_entry(struct inode *, | ||
59 | struct nilfs_palloc_req *); | ||
60 | void nilfs_palloc_commit_alloc_entry(struct inode *, | ||
61 | struct nilfs_palloc_req *); | ||
62 | void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *); | ||
63 | void nilfs_palloc_commit_free_entry(struct inode *, struct nilfs_palloc_req *); | ||
64 | int nilfs_palloc_prepare_free_entry(struct inode *, struct nilfs_palloc_req *); | ||
65 | void nilfs_palloc_abort_free_entry(struct inode *, struct nilfs_palloc_req *); | ||
66 | int nilfs_palloc_freev(struct inode *, __u64 *, size_t); | ||
67 | |||
68 | #define nilfs_set_bit_atomic ext2_set_bit_atomic | ||
69 | #define nilfs_clear_bit_atomic ext2_clear_bit_atomic | ||
70 | #define nilfs_find_next_zero_bit ext2_find_next_zero_bit | ||
71 | |||
72 | #endif /* _NILFS_ALLOC_H */ | ||
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c new file mode 100644 index 000000000000..24638e059bf3 --- /dev/null +++ b/fs/nilfs2/bmap.c | |||
@@ -0,0 +1,783 @@ | |||
1 | /* | ||
2 | * bmap.c - NILFS block mapping. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/fs.h> | ||
24 | #include <linux/string.h> | ||
25 | #include <linux/errno.h> | ||
26 | #include "nilfs.h" | ||
27 | #include "bmap.h" | ||
28 | #include "sb.h" | ||
29 | #include "btnode.h" | ||
30 | #include "mdt.h" | ||
31 | #include "dat.h" | ||
32 | #include "alloc.h" | ||
33 | |||
34 | int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, | ||
35 | __u64 *ptrp) | ||
36 | { | ||
37 | __u64 ptr; | ||
38 | int ret; | ||
39 | |||
40 | down_read(&bmap->b_sem); | ||
41 | ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp); | ||
42 | if (ret < 0) | ||
43 | goto out; | ||
44 | if (bmap->b_pops->bpop_translate != NULL) { | ||
45 | ret = bmap->b_pops->bpop_translate(bmap, *ptrp, &ptr); | ||
46 | if (ret < 0) | ||
47 | goto out; | ||
48 | *ptrp = ptr; | ||
49 | } | ||
50 | |||
51 | out: | ||
52 | up_read(&bmap->b_sem); | ||
53 | return ret; | ||
54 | } | ||
55 | |||
56 | |||
57 | /** | ||
58 | * nilfs_bmap_lookup - find a record | ||
59 | * @bmap: bmap | ||
60 | * @key: key | ||
61 | * @recp: pointer to record | ||
62 | * | ||
63 | * Description: nilfs_bmap_lookup() finds a record whose key matches @key in | ||
64 | * @bmap. | ||
65 | * | ||
66 | * Return Value: On success, 0 is returned and the record associated with @key | ||
67 | * is stored in the place pointed by @recp. On error, one of the following | ||
68 | * negative error codes is returned. | ||
69 | * | ||
70 | * %-EIO - I/O error. | ||
71 | * | ||
72 | * %-ENOMEM - Insufficient amount of memory available. | ||
73 | * | ||
74 | * %-ENOENT - A record associated with @key does not exist. | ||
75 | */ | ||
76 | int nilfs_bmap_lookup(struct nilfs_bmap *bmap, | ||
77 | unsigned long key, | ||
78 | unsigned long *recp) | ||
79 | { | ||
80 | __u64 ptr; | ||
81 | int ret; | ||
82 | |||
83 | /* XXX: use macro for level 1 */ | ||
84 | ret = nilfs_bmap_lookup_at_level(bmap, key, 1, &ptr); | ||
85 | if (recp != NULL) | ||
86 | *recp = ptr; | ||
87 | return ret; | ||
88 | } | ||
89 | |||
90 | static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | ||
91 | { | ||
92 | __u64 keys[NILFS_BMAP_SMALL_HIGH + 1]; | ||
93 | __u64 ptrs[NILFS_BMAP_SMALL_HIGH + 1]; | ||
94 | int ret, n; | ||
95 | |||
96 | if (bmap->b_ops->bop_check_insert != NULL) { | ||
97 | ret = bmap->b_ops->bop_check_insert(bmap, key); | ||
98 | if (ret > 0) { | ||
99 | n = bmap->b_ops->bop_gather_data( | ||
100 | bmap, keys, ptrs, NILFS_BMAP_SMALL_HIGH + 1); | ||
101 | if (n < 0) | ||
102 | return n; | ||
103 | ret = nilfs_btree_convert_and_insert( | ||
104 | bmap, key, ptr, keys, ptrs, n, | ||
105 | NILFS_BMAP_LARGE_LOW, NILFS_BMAP_LARGE_HIGH); | ||
106 | if (ret == 0) | ||
107 | bmap->b_u.u_flags |= NILFS_BMAP_LARGE; | ||
108 | |||
109 | return ret; | ||
110 | } else if (ret < 0) | ||
111 | return ret; | ||
112 | } | ||
113 | |||
114 | return bmap->b_ops->bop_insert(bmap, key, ptr); | ||
115 | } | ||
116 | |||
117 | /** | ||
118 | * nilfs_bmap_insert - insert a new key-record pair into a bmap | ||
119 | * @bmap: bmap | ||
120 | * @key: key | ||
121 | * @rec: record | ||
122 | * | ||
123 | * Description: nilfs_bmap_insert() inserts the new key-record pair specified | ||
124 | * by @key and @rec into @bmap. | ||
125 | * | ||
126 | * Return Value: On success, 0 is returned. On error, one of the following | ||
127 | * negative error codes is returned. | ||
128 | * | ||
129 | * %-EIO - I/O error. | ||
130 | * | ||
131 | * %-ENOMEM - Insufficient amount of memory available. | ||
132 | * | ||
133 | * %-EEXIST - A record associated with @key already exist. | ||
134 | */ | ||
135 | int nilfs_bmap_insert(struct nilfs_bmap *bmap, | ||
136 | unsigned long key, | ||
137 | unsigned long rec) | ||
138 | { | ||
139 | int ret; | ||
140 | |||
141 | down_write(&bmap->b_sem); | ||
142 | ret = nilfs_bmap_do_insert(bmap, key, rec); | ||
143 | up_write(&bmap->b_sem); | ||
144 | return ret; | ||
145 | } | ||
146 | |||
147 | static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key) | ||
148 | { | ||
149 | __u64 keys[NILFS_BMAP_LARGE_LOW + 1]; | ||
150 | __u64 ptrs[NILFS_BMAP_LARGE_LOW + 1]; | ||
151 | int ret, n; | ||
152 | |||
153 | if (bmap->b_ops->bop_check_delete != NULL) { | ||
154 | ret = bmap->b_ops->bop_check_delete(bmap, key); | ||
155 | if (ret > 0) { | ||
156 | n = bmap->b_ops->bop_gather_data( | ||
157 | bmap, keys, ptrs, NILFS_BMAP_LARGE_LOW + 1); | ||
158 | if (n < 0) | ||
159 | return n; | ||
160 | ret = nilfs_direct_delete_and_convert( | ||
161 | bmap, key, keys, ptrs, n, | ||
162 | NILFS_BMAP_SMALL_LOW, NILFS_BMAP_SMALL_HIGH); | ||
163 | if (ret == 0) | ||
164 | bmap->b_u.u_flags &= ~NILFS_BMAP_LARGE; | ||
165 | |||
166 | return ret; | ||
167 | } else if (ret < 0) | ||
168 | return ret; | ||
169 | } | ||
170 | |||
171 | return bmap->b_ops->bop_delete(bmap, key); | ||
172 | } | ||
173 | |||
174 | int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key) | ||
175 | { | ||
176 | __u64 lastkey; | ||
177 | int ret; | ||
178 | |||
179 | down_read(&bmap->b_sem); | ||
180 | ret = bmap->b_ops->bop_last_key(bmap, &lastkey); | ||
181 | if (!ret) | ||
182 | *key = lastkey; | ||
183 | up_read(&bmap->b_sem); | ||
184 | return ret; | ||
185 | } | ||
186 | |||
187 | /** | ||
188 | * nilfs_bmap_delete - delete a key-record pair from a bmap | ||
189 | * @bmap: bmap | ||
190 | * @key: key | ||
191 | * | ||
192 | * Description: nilfs_bmap_delete() deletes the key-record pair specified by | ||
193 | * @key from @bmap. | ||
194 | * | ||
195 | * Return Value: On success, 0 is returned. On error, one of the following | ||
196 | * negative error codes is returned. | ||
197 | * | ||
198 | * %-EIO - I/O error. | ||
199 | * | ||
200 | * %-ENOMEM - Insufficient amount of memory available. | ||
201 | * | ||
202 | * %-ENOENT - A record associated with @key does not exist. | ||
203 | */ | ||
204 | int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key) | ||
205 | { | ||
206 | int ret; | ||
207 | |||
208 | down_write(&bmap->b_sem); | ||
209 | ret = nilfs_bmap_do_delete(bmap, key); | ||
210 | up_write(&bmap->b_sem); | ||
211 | return ret; | ||
212 | } | ||
213 | |||
214 | static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key) | ||
215 | { | ||
216 | __u64 lastkey; | ||
217 | int ret; | ||
218 | |||
219 | ret = bmap->b_ops->bop_last_key(bmap, &lastkey); | ||
220 | if (ret < 0) { | ||
221 | if (ret == -ENOENT) | ||
222 | ret = 0; | ||
223 | return ret; | ||
224 | } | ||
225 | |||
226 | while (key <= lastkey) { | ||
227 | ret = nilfs_bmap_do_delete(bmap, lastkey); | ||
228 | if (ret < 0) | ||
229 | return ret; | ||
230 | ret = bmap->b_ops->bop_last_key(bmap, &lastkey); | ||
231 | if (ret < 0) { | ||
232 | if (ret == -ENOENT) | ||
233 | ret = 0; | ||
234 | return ret; | ||
235 | } | ||
236 | } | ||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | /** | ||
241 | * nilfs_bmap_truncate - truncate a bmap to a specified key | ||
242 | * @bmap: bmap | ||
243 | * @key: key | ||
244 | * | ||
245 | * Description: nilfs_bmap_truncate() removes key-record pairs whose keys are | ||
246 | * greater than or equal to @key from @bmap. | ||
247 | * | ||
248 | * Return Value: On success, 0 is returned. On error, one of the following | ||
249 | * negative error codes is returned. | ||
250 | * | ||
251 | * %-EIO - I/O error. | ||
252 | * | ||
253 | * %-ENOMEM - Insufficient amount of memory available. | ||
254 | */ | ||
255 | int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key) | ||
256 | { | ||
257 | int ret; | ||
258 | |||
259 | down_write(&bmap->b_sem); | ||
260 | ret = nilfs_bmap_do_truncate(bmap, key); | ||
261 | up_write(&bmap->b_sem); | ||
262 | return ret; | ||
263 | } | ||
264 | |||
265 | /** | ||
266 | * nilfs_bmap_clear - free resources a bmap holds | ||
267 | * @bmap: bmap | ||
268 | * | ||
269 | * Description: nilfs_bmap_clear() frees resources associated with @bmap. | ||
270 | */ | ||
271 | void nilfs_bmap_clear(struct nilfs_bmap *bmap) | ||
272 | { | ||
273 | down_write(&bmap->b_sem); | ||
274 | if (bmap->b_ops->bop_clear != NULL) | ||
275 | bmap->b_ops->bop_clear(bmap); | ||
276 | up_write(&bmap->b_sem); | ||
277 | } | ||
278 | |||
279 | /** | ||
280 | * nilfs_bmap_propagate - propagate dirty state | ||
281 | * @bmap: bmap | ||
282 | * @bh: buffer head | ||
283 | * | ||
284 | * Description: nilfs_bmap_propagate() marks the buffers that directly or | ||
285 | * indirectly refer to the block specified by @bh dirty. | ||
286 | * | ||
287 | * Return Value: On success, 0 is returned. On error, one of the following | ||
288 | * negative error codes is returned. | ||
289 | * | ||
290 | * %-EIO - I/O error. | ||
291 | * | ||
292 | * %-ENOMEM - Insufficient amount of memory available. | ||
293 | */ | ||
294 | int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) | ||
295 | { | ||
296 | int ret; | ||
297 | |||
298 | down_write(&bmap->b_sem); | ||
299 | ret = bmap->b_ops->bop_propagate(bmap, bh); | ||
300 | up_write(&bmap->b_sem); | ||
301 | return ret; | ||
302 | } | ||
303 | |||
304 | /** | ||
305 | * nilfs_bmap_lookup_dirty_buffers - | ||
306 | * @bmap: bmap | ||
307 | * @listp: pointer to buffer head list | ||
308 | */ | ||
309 | void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap, | ||
310 | struct list_head *listp) | ||
311 | { | ||
312 | if (bmap->b_ops->bop_lookup_dirty_buffers != NULL) | ||
313 | bmap->b_ops->bop_lookup_dirty_buffers(bmap, listp); | ||
314 | } | ||
315 | |||
316 | /** | ||
317 | * nilfs_bmap_assign - assign a new block number to a block | ||
318 | * @bmap: bmap | ||
319 | * @bhp: pointer to buffer head | ||
320 | * @blocknr: block number | ||
321 | * @binfo: block information | ||
322 | * | ||
323 | * Description: nilfs_bmap_assign() assigns the block number @blocknr to the | ||
324 | * buffer specified by @bh. | ||
325 | * | ||
326 | * Return Value: On success, 0 is returned and the buffer head of a newly | ||
327 | * create buffer and the block information associated with the buffer are | ||
328 | * stored in the place pointed by @bh and @binfo, respectively. On error, one | ||
329 | * of the following negative error codes is returned. | ||
330 | * | ||
331 | * %-EIO - I/O error. | ||
332 | * | ||
333 | * %-ENOMEM - Insufficient amount of memory available. | ||
334 | */ | ||
335 | int nilfs_bmap_assign(struct nilfs_bmap *bmap, | ||
336 | struct buffer_head **bh, | ||
337 | unsigned long blocknr, | ||
338 | union nilfs_binfo *binfo) | ||
339 | { | ||
340 | int ret; | ||
341 | |||
342 | down_write(&bmap->b_sem); | ||
343 | ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo); | ||
344 | up_write(&bmap->b_sem); | ||
345 | return ret; | ||
346 | } | ||
347 | |||
348 | /** | ||
349 | * nilfs_bmap_mark - mark block dirty | ||
350 | * @bmap: bmap | ||
351 | * @key: key | ||
352 | * @level: level | ||
353 | * | ||
354 | * Description: nilfs_bmap_mark() marks the block specified by @key and @level | ||
355 | * as dirty. | ||
356 | * | ||
357 | * Return Value: On success, 0 is returned. On error, one of the following | ||
358 | * negative error codes is returned. | ||
359 | * | ||
360 | * %-EIO - I/O error. | ||
361 | * | ||
362 | * %-ENOMEM - Insufficient amount of memory available. | ||
363 | */ | ||
364 | int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level) | ||
365 | { | ||
366 | int ret; | ||
367 | |||
368 | if (bmap->b_ops->bop_mark == NULL) | ||
369 | return 0; | ||
370 | |||
371 | down_write(&bmap->b_sem); | ||
372 | ret = bmap->b_ops->bop_mark(bmap, key, level); | ||
373 | up_write(&bmap->b_sem); | ||
374 | return ret; | ||
375 | } | ||
376 | |||
377 | /** | ||
378 | * nilfs_bmap_test_and_clear_dirty - test and clear a bmap dirty state | ||
379 | * @bmap: bmap | ||
380 | * | ||
381 | * Description: nilfs_test_and_clear() is the atomic operation to test and | ||
382 | * clear the dirty state of @bmap. | ||
383 | * | ||
384 | * Return Value: 1 is returned if @bmap is dirty, or 0 if clear. | ||
385 | */ | ||
386 | int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) | ||
387 | { | ||
388 | int ret; | ||
389 | |||
390 | down_write(&bmap->b_sem); | ||
391 | ret = nilfs_bmap_dirty(bmap); | ||
392 | nilfs_bmap_clear_dirty(bmap); | ||
393 | up_write(&bmap->b_sem); | ||
394 | return ret; | ||
395 | } | ||
396 | |||
397 | |||
398 | /* | ||
399 | * Internal use only | ||
400 | */ | ||
401 | |||
402 | void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n) | ||
403 | { | ||
404 | inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); | ||
405 | if (NILFS_MDT(bmap->b_inode)) | ||
406 | nilfs_mdt_mark_dirty(bmap->b_inode); | ||
407 | else | ||
408 | mark_inode_dirty(bmap->b_inode); | ||
409 | } | ||
410 | |||
411 | void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n) | ||
412 | { | ||
413 | inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); | ||
414 | if (NILFS_MDT(bmap->b_inode)) | ||
415 | nilfs_mdt_mark_dirty(bmap->b_inode); | ||
416 | else | ||
417 | mark_inode_dirty(bmap->b_inode); | ||
418 | } | ||
419 | |||
420 | int nilfs_bmap_get_block(const struct nilfs_bmap *bmap, __u64 ptr, | ||
421 | struct buffer_head **bhp) | ||
422 | { | ||
423 | return nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache, | ||
424 | ptr, 0, bhp, 0); | ||
425 | } | ||
426 | |||
427 | void nilfs_bmap_put_block(const struct nilfs_bmap *bmap, | ||
428 | struct buffer_head *bh) | ||
429 | { | ||
430 | brelse(bh); | ||
431 | } | ||
432 | |||
433 | int nilfs_bmap_get_new_block(const struct nilfs_bmap *bmap, __u64 ptr, | ||
434 | struct buffer_head **bhp) | ||
435 | { | ||
436 | int ret; | ||
437 | |||
438 | ret = nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache, | ||
439 | ptr, 0, bhp, 1); | ||
440 | if (ret < 0) | ||
441 | return ret; | ||
442 | set_buffer_nilfs_volatile(*bhp); | ||
443 | return 0; | ||
444 | } | ||
445 | |||
446 | void nilfs_bmap_delete_block(const struct nilfs_bmap *bmap, | ||
447 | struct buffer_head *bh) | ||
448 | { | ||
449 | nilfs_btnode_delete(bh); | ||
450 | } | ||
451 | |||
452 | __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, | ||
453 | const struct buffer_head *bh) | ||
454 | { | ||
455 | struct buffer_head *pbh; | ||
456 | __u64 key; | ||
457 | |||
458 | key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT - | ||
459 | bmap->b_inode->i_blkbits); | ||
460 | for (pbh = page_buffers(bh->b_page); pbh != bh; | ||
461 | pbh = pbh->b_this_page, key++); | ||
462 | |||
463 | return key; | ||
464 | } | ||
465 | |||
466 | __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) | ||
467 | { | ||
468 | __s64 diff; | ||
469 | |||
470 | diff = key - bmap->b_last_allocated_key; | ||
471 | if ((nilfs_bmap_keydiff_abs(diff) < NILFS_INODE_BMAP_SIZE) && | ||
472 | (bmap->b_last_allocated_ptr != NILFS_BMAP_INVALID_PTR) && | ||
473 | (bmap->b_last_allocated_ptr + diff > 0)) | ||
474 | return bmap->b_last_allocated_ptr + diff; | ||
475 | else | ||
476 | return NILFS_BMAP_INVALID_PTR; | ||
477 | } | ||
478 | |||
479 | static struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) | ||
480 | { | ||
481 | return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); | ||
482 | } | ||
483 | |||
484 | #define NILFS_BMAP_GROUP_DIV 8 | ||
485 | __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap) | ||
486 | { | ||
487 | struct inode *dat = nilfs_bmap_get_dat(bmap); | ||
488 | unsigned long entries_per_group = nilfs_palloc_entries_per_group(dat); | ||
489 | unsigned long group = bmap->b_inode->i_ino / entries_per_group; | ||
490 | |||
491 | return group * entries_per_group + | ||
492 | (bmap->b_inode->i_ino % NILFS_BMAP_GROUP_DIV) * | ||
493 | (entries_per_group / NILFS_BMAP_GROUP_DIV); | ||
494 | } | ||
495 | |||
496 | static int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap, | ||
497 | union nilfs_bmap_ptr_req *req) | ||
498 | { | ||
499 | return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
500 | } | ||
501 | |||
502 | static void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap, | ||
503 | union nilfs_bmap_ptr_req *req) | ||
504 | { | ||
505 | nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
506 | } | ||
507 | |||
508 | static void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap, | ||
509 | union nilfs_bmap_ptr_req *req) | ||
510 | { | ||
511 | nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
512 | } | ||
513 | |||
514 | static int nilfs_bmap_prepare_start_v(struct nilfs_bmap *bmap, | ||
515 | union nilfs_bmap_ptr_req *req) | ||
516 | { | ||
517 | return nilfs_dat_prepare_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
518 | } | ||
519 | |||
520 | static void nilfs_bmap_commit_start_v(struct nilfs_bmap *bmap, | ||
521 | union nilfs_bmap_ptr_req *req, | ||
522 | sector_t blocknr) | ||
523 | { | ||
524 | nilfs_dat_commit_start(nilfs_bmap_get_dat(bmap), &req->bpr_req, | ||
525 | blocknr); | ||
526 | } | ||
527 | |||
528 | static void nilfs_bmap_abort_start_v(struct nilfs_bmap *bmap, | ||
529 | union nilfs_bmap_ptr_req *req) | ||
530 | { | ||
531 | nilfs_dat_abort_start(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
532 | } | ||
533 | |||
534 | static int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap, | ||
535 | union nilfs_bmap_ptr_req *req) | ||
536 | { | ||
537 | return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
538 | } | ||
539 | |||
540 | static void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap, | ||
541 | union nilfs_bmap_ptr_req *req) | ||
542 | { | ||
543 | nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 0); | ||
544 | } | ||
545 | |||
546 | static void nilfs_bmap_commit_end_vmdt(struct nilfs_bmap *bmap, | ||
547 | union nilfs_bmap_ptr_req *req) | ||
548 | { | ||
549 | nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 1); | ||
550 | } | ||
551 | |||
552 | static void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap, | ||
553 | union nilfs_bmap_ptr_req *req) | ||
554 | { | ||
555 | nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req); | ||
556 | } | ||
557 | |||
558 | int nilfs_bmap_move_v(const struct nilfs_bmap *bmap, __u64 vblocknr, | ||
559 | sector_t blocknr) | ||
560 | { | ||
561 | return nilfs_dat_move(nilfs_bmap_get_dat(bmap), vblocknr, blocknr); | ||
562 | } | ||
563 | |||
564 | int nilfs_bmap_mark_dirty(const struct nilfs_bmap *bmap, __u64 vblocknr) | ||
565 | { | ||
566 | return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr); | ||
567 | } | ||
568 | |||
569 | int nilfs_bmap_prepare_update(struct nilfs_bmap *bmap, | ||
570 | union nilfs_bmap_ptr_req *oldreq, | ||
571 | union nilfs_bmap_ptr_req *newreq) | ||
572 | { | ||
573 | int ret; | ||
574 | |||
575 | ret = bmap->b_pops->bpop_prepare_end_ptr(bmap, oldreq); | ||
576 | if (ret < 0) | ||
577 | return ret; | ||
578 | ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, newreq); | ||
579 | if (ret < 0) | ||
580 | bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq); | ||
581 | |||
582 | return ret; | ||
583 | } | ||
584 | |||
585 | void nilfs_bmap_commit_update(struct nilfs_bmap *bmap, | ||
586 | union nilfs_bmap_ptr_req *oldreq, | ||
587 | union nilfs_bmap_ptr_req *newreq) | ||
588 | { | ||
589 | bmap->b_pops->bpop_commit_end_ptr(bmap, oldreq); | ||
590 | bmap->b_pops->bpop_commit_alloc_ptr(bmap, newreq); | ||
591 | } | ||
592 | |||
593 | void nilfs_bmap_abort_update(struct nilfs_bmap *bmap, | ||
594 | union nilfs_bmap_ptr_req *oldreq, | ||
595 | union nilfs_bmap_ptr_req *newreq) | ||
596 | { | ||
597 | bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq); | ||
598 | bmap->b_pops->bpop_abort_alloc_ptr(bmap, newreq); | ||
599 | } | ||
600 | |||
601 | static int nilfs_bmap_translate_v(const struct nilfs_bmap *bmap, __u64 ptr, | ||
602 | __u64 *ptrp) | ||
603 | { | ||
604 | sector_t blocknr; | ||
605 | int ret; | ||
606 | |||
607 | ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), ptr, &blocknr); | ||
608 | if (ret < 0) | ||
609 | return ret; | ||
610 | if (ptrp != NULL) | ||
611 | *ptrp = blocknr; | ||
612 | return 0; | ||
613 | } | ||
614 | |||
615 | static int nilfs_bmap_prepare_alloc_p(struct nilfs_bmap *bmap, | ||
616 | union nilfs_bmap_ptr_req *req) | ||
617 | { | ||
618 | /* ignore target ptr */ | ||
619 | req->bpr_ptr = bmap->b_last_allocated_ptr++; | ||
620 | return 0; | ||
621 | } | ||
622 | |||
623 | static void nilfs_bmap_commit_alloc_p(struct nilfs_bmap *bmap, | ||
624 | union nilfs_bmap_ptr_req *req) | ||
625 | { | ||
626 | /* do nothing */ | ||
627 | } | ||
628 | |||
629 | static void nilfs_bmap_abort_alloc_p(struct nilfs_bmap *bmap, | ||
630 | union nilfs_bmap_ptr_req *req) | ||
631 | { | ||
632 | bmap->b_last_allocated_ptr--; | ||
633 | } | ||
634 | |||
635 | static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_v = { | ||
636 | .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v, | ||
637 | .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v, | ||
638 | .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v, | ||
639 | .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v, | ||
640 | .bpop_commit_start_ptr = nilfs_bmap_commit_start_v, | ||
641 | .bpop_abort_start_ptr = nilfs_bmap_abort_start_v, | ||
642 | .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v, | ||
643 | .bpop_commit_end_ptr = nilfs_bmap_commit_end_v, | ||
644 | .bpop_abort_end_ptr = nilfs_bmap_abort_end_v, | ||
645 | |||
646 | .bpop_translate = nilfs_bmap_translate_v, | ||
647 | }; | ||
648 | |||
649 | static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_vmdt = { | ||
650 | .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v, | ||
651 | .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v, | ||
652 | .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v, | ||
653 | .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v, | ||
654 | .bpop_commit_start_ptr = nilfs_bmap_commit_start_v, | ||
655 | .bpop_abort_start_ptr = nilfs_bmap_abort_start_v, | ||
656 | .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v, | ||
657 | .bpop_commit_end_ptr = nilfs_bmap_commit_end_vmdt, | ||
658 | .bpop_abort_end_ptr = nilfs_bmap_abort_end_v, | ||
659 | |||
660 | .bpop_translate = nilfs_bmap_translate_v, | ||
661 | }; | ||
662 | |||
663 | static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_p = { | ||
664 | .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_p, | ||
665 | .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_p, | ||
666 | .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_p, | ||
667 | .bpop_prepare_start_ptr = NULL, | ||
668 | .bpop_commit_start_ptr = NULL, | ||
669 | .bpop_abort_start_ptr = NULL, | ||
670 | .bpop_prepare_end_ptr = NULL, | ||
671 | .bpop_commit_end_ptr = NULL, | ||
672 | .bpop_abort_end_ptr = NULL, | ||
673 | |||
674 | .bpop_translate = NULL, | ||
675 | }; | ||
676 | |||
677 | static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = { | ||
678 | .bpop_prepare_alloc_ptr = NULL, | ||
679 | .bpop_commit_alloc_ptr = NULL, | ||
680 | .bpop_abort_alloc_ptr = NULL, | ||
681 | .bpop_prepare_start_ptr = NULL, | ||
682 | .bpop_commit_start_ptr = NULL, | ||
683 | .bpop_abort_start_ptr = NULL, | ||
684 | .bpop_prepare_end_ptr = NULL, | ||
685 | .bpop_commit_end_ptr = NULL, | ||
686 | .bpop_abort_end_ptr = NULL, | ||
687 | |||
688 | .bpop_translate = NULL, | ||
689 | }; | ||
690 | |||
691 | /** | ||
692 | * nilfs_bmap_read - read a bmap from an inode | ||
693 | * @bmap: bmap | ||
694 | * @raw_inode: on-disk inode | ||
695 | * | ||
696 | * Description: nilfs_bmap_read() initializes the bmap @bmap. | ||
697 | * | ||
698 | * Return Value: On success, 0 is returned. On error, the following negative | ||
699 | * error code is returned. | ||
700 | * | ||
701 | * %-ENOMEM - Insufficient amount of memory available. | ||
702 | */ | ||
703 | int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) | ||
704 | { | ||
705 | if (raw_inode == NULL) | ||
706 | memset(bmap->b_u.u_data, 0, NILFS_BMAP_SIZE); | ||
707 | else | ||
708 | memcpy(bmap->b_u.u_data, raw_inode->i_bmap, NILFS_BMAP_SIZE); | ||
709 | |||
710 | init_rwsem(&bmap->b_sem); | ||
711 | bmap->b_state = 0; | ||
712 | bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; | ||
713 | switch (bmap->b_inode->i_ino) { | ||
714 | case NILFS_DAT_INO: | ||
715 | bmap->b_pops = &nilfs_bmap_ptr_ops_p; | ||
716 | bmap->b_last_allocated_key = 0; /* XXX: use macro */ | ||
717 | bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; | ||
718 | break; | ||
719 | case NILFS_CPFILE_INO: | ||
720 | case NILFS_SUFILE_INO: | ||
721 | bmap->b_pops = &nilfs_bmap_ptr_ops_vmdt; | ||
722 | bmap->b_last_allocated_key = 0; /* XXX: use macro */ | ||
723 | bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; | ||
724 | break; | ||
725 | default: | ||
726 | bmap->b_pops = &nilfs_bmap_ptr_ops_v; | ||
727 | bmap->b_last_allocated_key = 0; /* XXX: use macro */ | ||
728 | bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; | ||
729 | break; | ||
730 | } | ||
731 | |||
732 | return (bmap->b_u.u_flags & NILFS_BMAP_LARGE) ? | ||
733 | nilfs_btree_init(bmap, | ||
734 | NILFS_BMAP_LARGE_LOW, | ||
735 | NILFS_BMAP_LARGE_HIGH) : | ||
736 | nilfs_direct_init(bmap, | ||
737 | NILFS_BMAP_SMALL_LOW, | ||
738 | NILFS_BMAP_SMALL_HIGH); | ||
739 | } | ||
740 | |||
741 | /** | ||
742 | * nilfs_bmap_write - write back a bmap to an inode | ||
743 | * @bmap: bmap | ||
744 | * @raw_inode: on-disk inode | ||
745 | * | ||
746 | * Description: nilfs_bmap_write() stores @bmap in @raw_inode. | ||
747 | */ | ||
748 | void nilfs_bmap_write(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) | ||
749 | { | ||
750 | down_write(&bmap->b_sem); | ||
751 | memcpy(raw_inode->i_bmap, bmap->b_u.u_data, | ||
752 | NILFS_INODE_BMAP_SIZE * sizeof(__le64)); | ||
753 | if (bmap->b_inode->i_ino == NILFS_DAT_INO) | ||
754 | bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; | ||
755 | |||
756 | up_write(&bmap->b_sem); | ||
757 | } | ||
758 | |||
759 | void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) | ||
760 | { | ||
761 | memset(&bmap->b_u, 0, NILFS_BMAP_SIZE); | ||
762 | init_rwsem(&bmap->b_sem); | ||
763 | bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; | ||
764 | bmap->b_pops = &nilfs_bmap_ptr_ops_gc; | ||
765 | bmap->b_last_allocated_key = 0; | ||
766 | bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; | ||
767 | bmap->b_state = 0; | ||
768 | nilfs_btree_init_gc(bmap); | ||
769 | } | ||
770 | |||
771 | void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) | ||
772 | { | ||
773 | memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union)); | ||
774 | init_rwsem(&gcbmap->b_sem); | ||
775 | gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; | ||
776 | } | ||
777 | |||
778 | void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) | ||
779 | { | ||
780 | memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union)); | ||
781 | init_rwsem(&bmap->b_sem); | ||
782 | bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; | ||
783 | } | ||
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h new file mode 100644 index 000000000000..4f2708abb1ba --- /dev/null +++ b/fs/nilfs2/bmap.h | |||
@@ -0,0 +1,244 @@ | |||
1 | /* | ||
2 | * bmap.h - NILFS block mapping. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_BMAP_H | ||
24 | #define _NILFS_BMAP_H | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/fs.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/nilfs2_fs.h> | ||
30 | #include "alloc.h" | ||
31 | |||
32 | #define NILFS_BMAP_INVALID_PTR 0 | ||
33 | |||
34 | #define nilfs_bmap_dkey_to_key(dkey) le64_to_cpu(dkey) | ||
35 | #define nilfs_bmap_key_to_dkey(key) cpu_to_le64(key) | ||
36 | #define nilfs_bmap_dptr_to_ptr(dptr) le64_to_cpu(dptr) | ||
37 | #define nilfs_bmap_ptr_to_dptr(ptr) cpu_to_le64(ptr) | ||
38 | |||
39 | #define nilfs_bmap_keydiff_abs(diff) ((diff) < 0 ? -(diff) : (diff)) | ||
40 | |||
41 | |||
42 | struct nilfs_bmap; | ||
43 | |||
44 | /** | ||
45 | * union nilfs_bmap_ptr_req - request for bmap ptr | ||
46 | * @bpr_ptr: bmap pointer | ||
47 | * @bpr_req: request for persistent allocator | ||
48 | */ | ||
49 | union nilfs_bmap_ptr_req { | ||
50 | __u64 bpr_ptr; | ||
51 | struct nilfs_palloc_req bpr_req; | ||
52 | }; | ||
53 | |||
54 | /** | ||
55 | * struct nilfs_bmap_stats - bmap statistics | ||
56 | * @bs_nblocks: number of blocks created or deleted | ||
57 | */ | ||
58 | struct nilfs_bmap_stats { | ||
59 | unsigned int bs_nblocks; | ||
60 | }; | ||
61 | |||
62 | /** | ||
63 | * struct nilfs_bmap_operations - bmap operation table | ||
64 | */ | ||
65 | struct nilfs_bmap_operations { | ||
66 | int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); | ||
67 | int (*bop_insert)(struct nilfs_bmap *, __u64, __u64); | ||
68 | int (*bop_delete)(struct nilfs_bmap *, __u64); | ||
69 | void (*bop_clear)(struct nilfs_bmap *); | ||
70 | |||
71 | int (*bop_propagate)(const struct nilfs_bmap *, struct buffer_head *); | ||
72 | void (*bop_lookup_dirty_buffers)(struct nilfs_bmap *, | ||
73 | struct list_head *); | ||
74 | |||
75 | int (*bop_assign)(struct nilfs_bmap *, | ||
76 | struct buffer_head **, | ||
77 | sector_t, | ||
78 | union nilfs_binfo *); | ||
79 | int (*bop_mark)(struct nilfs_bmap *, __u64, int); | ||
80 | |||
81 | /* The following functions are internal use only. */ | ||
82 | int (*bop_last_key)(const struct nilfs_bmap *, __u64 *); | ||
83 | int (*bop_check_insert)(const struct nilfs_bmap *, __u64); | ||
84 | int (*bop_check_delete)(struct nilfs_bmap *, __u64); | ||
85 | int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int); | ||
86 | }; | ||
87 | |||
88 | |||
89 | /** | ||
90 | * struct nilfs_bmap_ptr_operations - bmap ptr operation table | ||
91 | */ | ||
92 | struct nilfs_bmap_ptr_operations { | ||
93 | int (*bpop_prepare_alloc_ptr)(struct nilfs_bmap *, | ||
94 | union nilfs_bmap_ptr_req *); | ||
95 | void (*bpop_commit_alloc_ptr)(struct nilfs_bmap *, | ||
96 | union nilfs_bmap_ptr_req *); | ||
97 | void (*bpop_abort_alloc_ptr)(struct nilfs_bmap *, | ||
98 | union nilfs_bmap_ptr_req *); | ||
99 | int (*bpop_prepare_start_ptr)(struct nilfs_bmap *, | ||
100 | union nilfs_bmap_ptr_req *); | ||
101 | void (*bpop_commit_start_ptr)(struct nilfs_bmap *, | ||
102 | union nilfs_bmap_ptr_req *, | ||
103 | sector_t); | ||
104 | void (*bpop_abort_start_ptr)(struct nilfs_bmap *, | ||
105 | union nilfs_bmap_ptr_req *); | ||
106 | int (*bpop_prepare_end_ptr)(struct nilfs_bmap *, | ||
107 | union nilfs_bmap_ptr_req *); | ||
108 | void (*bpop_commit_end_ptr)(struct nilfs_bmap *, | ||
109 | union nilfs_bmap_ptr_req *); | ||
110 | void (*bpop_abort_end_ptr)(struct nilfs_bmap *, | ||
111 | union nilfs_bmap_ptr_req *); | ||
112 | |||
113 | int (*bpop_translate)(const struct nilfs_bmap *, __u64, __u64 *); | ||
114 | }; | ||
115 | |||
116 | |||
117 | #define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) | ||
118 | #define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) | ||
119 | #define NILFS_BMAP_NEW_PTR_INIT \ | ||
120 | (1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1)) | ||
121 | |||
122 | static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) | ||
123 | { | ||
124 | return !!(ptr & NILFS_BMAP_NEW_PTR_INIT); | ||
125 | } | ||
126 | |||
127 | |||
128 | /** | ||
129 | * struct nilfs_bmap - bmap structure | ||
130 | * @b_u: raw data | ||
131 | * @b_sem: semaphore | ||
132 | * @b_inode: owner of bmap | ||
133 | * @b_ops: bmap operation table | ||
134 | * @b_pops: bmap ptr operation table | ||
135 | * @b_low: low watermark of conversion | ||
136 | * @b_high: high watermark of conversion | ||
137 | * @b_last_allocated_key: last allocated key for data block | ||
138 | * @b_last_allocated_ptr: last allocated ptr for data block | ||
139 | * @b_state: state | ||
140 | */ | ||
141 | struct nilfs_bmap { | ||
142 | union { | ||
143 | __u8 u_flags; | ||
144 | __le64 u_data[NILFS_BMAP_SIZE / sizeof(__le64)]; | ||
145 | } b_u; | ||
146 | struct rw_semaphore b_sem; | ||
147 | struct inode *b_inode; | ||
148 | const struct nilfs_bmap_operations *b_ops; | ||
149 | const struct nilfs_bmap_ptr_operations *b_pops; | ||
150 | __u64 b_low; | ||
151 | __u64 b_high; | ||
152 | __u64 b_last_allocated_key; | ||
153 | __u64 b_last_allocated_ptr; | ||
154 | int b_state; | ||
155 | }; | ||
156 | |||
157 | /* state */ | ||
158 | #define NILFS_BMAP_DIRTY 0x00000001 | ||
159 | |||
160 | |||
161 | int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); | ||
162 | int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); | ||
163 | void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); | ||
164 | int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *); | ||
165 | int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); | ||
166 | int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); | ||
167 | int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *); | ||
168 | int nilfs_bmap_truncate(struct nilfs_bmap *, unsigned long); | ||
169 | void nilfs_bmap_clear(struct nilfs_bmap *); | ||
170 | int nilfs_bmap_propagate(struct nilfs_bmap *, struct buffer_head *); | ||
171 | void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *, struct list_head *); | ||
172 | int nilfs_bmap_assign(struct nilfs_bmap *, struct buffer_head **, | ||
173 | unsigned long, union nilfs_binfo *); | ||
174 | int nilfs_bmap_lookup_at_level(struct nilfs_bmap *, __u64, int, __u64 *); | ||
175 | int nilfs_bmap_mark(struct nilfs_bmap *, __u64, int); | ||
176 | |||
177 | void nilfs_bmap_init_gc(struct nilfs_bmap *); | ||
178 | void nilfs_bmap_init_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); | ||
179 | void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); | ||
180 | |||
181 | |||
182 | /* | ||
183 | * Internal use only | ||
184 | */ | ||
185 | |||
186 | int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t); | ||
187 | int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64); | ||
188 | |||
189 | |||
190 | __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, | ||
191 | const struct buffer_head *); | ||
192 | |||
193 | __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); | ||
194 | __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); | ||
195 | |||
196 | int nilfs_bmap_prepare_update(struct nilfs_bmap *, | ||
197 | union nilfs_bmap_ptr_req *, | ||
198 | union nilfs_bmap_ptr_req *); | ||
199 | void nilfs_bmap_commit_update(struct nilfs_bmap *, | ||
200 | union nilfs_bmap_ptr_req *, | ||
201 | union nilfs_bmap_ptr_req *); | ||
202 | void nilfs_bmap_abort_update(struct nilfs_bmap *, | ||
203 | union nilfs_bmap_ptr_req *, | ||
204 | union nilfs_bmap_ptr_req *); | ||
205 | |||
206 | void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int); | ||
207 | void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int); | ||
208 | |||
209 | |||
210 | int nilfs_bmap_get_block(const struct nilfs_bmap *, __u64, | ||
211 | struct buffer_head **); | ||
212 | void nilfs_bmap_put_block(const struct nilfs_bmap *, struct buffer_head *); | ||
213 | int nilfs_bmap_get_new_block(const struct nilfs_bmap *, __u64, | ||
214 | struct buffer_head **); | ||
215 | void nilfs_bmap_delete_block(const struct nilfs_bmap *, struct buffer_head *); | ||
216 | |||
217 | |||
218 | /* Assume that bmap semaphore is locked. */ | ||
219 | static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap) | ||
220 | { | ||
221 | return !!(bmap->b_state & NILFS_BMAP_DIRTY); | ||
222 | } | ||
223 | |||
224 | /* Assume that bmap semaphore is locked. */ | ||
225 | static inline void nilfs_bmap_set_dirty(struct nilfs_bmap *bmap) | ||
226 | { | ||
227 | bmap->b_state |= NILFS_BMAP_DIRTY; | ||
228 | } | ||
229 | |||
230 | /* Assume that bmap semaphore is locked. */ | ||
231 | static inline void nilfs_bmap_clear_dirty(struct nilfs_bmap *bmap) | ||
232 | { | ||
233 | bmap->b_state &= ~NILFS_BMAP_DIRTY; | ||
234 | } | ||
235 | |||
236 | |||
237 | #define NILFS_BMAP_LARGE 0x1 | ||
238 | |||
239 | #define NILFS_BMAP_SMALL_LOW NILFS_DIRECT_KEY_MIN | ||
240 | #define NILFS_BMAP_SMALL_HIGH NILFS_DIRECT_KEY_MAX | ||
241 | #define NILFS_BMAP_LARGE_LOW NILFS_BTREE_ROOT_NCHILDREN_MAX | ||
242 | #define NILFS_BMAP_LARGE_HIGH NILFS_BTREE_KEY_MAX | ||
243 | |||
244 | #endif /* _NILFS_BMAP_H */ | ||
diff --git a/fs/nilfs2/bmap_union.h b/fs/nilfs2/bmap_union.h new file mode 100644 index 000000000000..d41509bff47b --- /dev/null +++ b/fs/nilfs2/bmap_union.h | |||
@@ -0,0 +1,42 @@ | |||
1 | /* | ||
2 | * bmap_union.h - NILFS block mapping. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_BMAP_UNION_H | ||
24 | #define _NILFS_BMAP_UNION_H | ||
25 | |||
26 | #include "bmap.h" | ||
27 | #include "direct.h" | ||
28 | #include "btree.h" | ||
29 | |||
30 | /** | ||
31 | * nilfs_bmap_union - | ||
32 | * @bi_bmap: bmap structure | ||
33 | * @bi_btree: direct map structure | ||
34 | * @bi_direct: B-tree structure | ||
35 | */ | ||
36 | union nilfs_bmap_union { | ||
37 | struct nilfs_bmap bi_bmap; | ||
38 | struct nilfs_direct bi_direct; | ||
39 | struct nilfs_btree bi_btree; | ||
40 | }; | ||
41 | |||
42 | #endif /* _NILFS_BMAP_UNION_H */ | ||
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c new file mode 100644 index 000000000000..4cc07b2c30e0 --- /dev/null +++ b/fs/nilfs2/btnode.c | |||
@@ -0,0 +1,316 @@ | |||
1 | /* | ||
2 | * btnode.c - NILFS B-tree node cache | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * This file was originally written by Seiji Kihara <kihara@osrg.net> | ||
21 | * and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for | ||
22 | * stabilization and simplification. | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/backing-dev.h> | ||
30 | #include "nilfs.h" | ||
31 | #include "mdt.h" | ||
32 | #include "dat.h" | ||
33 | #include "page.h" | ||
34 | #include "btnode.h" | ||
35 | |||
36 | |||
37 | void nilfs_btnode_cache_init_once(struct address_space *btnc) | ||
38 | { | ||
39 | INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC); | ||
40 | spin_lock_init(&btnc->tree_lock); | ||
41 | INIT_LIST_HEAD(&btnc->private_list); | ||
42 | spin_lock_init(&btnc->private_lock); | ||
43 | |||
44 | spin_lock_init(&btnc->i_mmap_lock); | ||
45 | INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap); | ||
46 | INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); | ||
47 | } | ||
48 | |||
49 | static struct address_space_operations def_btnode_aops; | ||
50 | |||
51 | void nilfs_btnode_cache_init(struct address_space *btnc) | ||
52 | { | ||
53 | btnc->host = NULL; /* can safely set to host inode ? */ | ||
54 | btnc->flags = 0; | ||
55 | mapping_set_gfp_mask(btnc, GFP_NOFS); | ||
56 | btnc->assoc_mapping = NULL; | ||
57 | btnc->backing_dev_info = &default_backing_dev_info; | ||
58 | btnc->a_ops = &def_btnode_aops; | ||
59 | } | ||
60 | |||
61 | void nilfs_btnode_cache_clear(struct address_space *btnc) | ||
62 | { | ||
63 | invalidate_mapping_pages(btnc, 0, -1); | ||
64 | truncate_inode_pages(btnc, 0); | ||
65 | } | ||
66 | |||
67 | int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, | ||
68 | sector_t pblocknr, struct buffer_head **pbh, | ||
69 | int newblk) | ||
70 | { | ||
71 | struct buffer_head *bh; | ||
72 | struct inode *inode = NILFS_BTNC_I(btnc); | ||
73 | int err; | ||
74 | |||
75 | bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); | ||
76 | if (unlikely(!bh)) | ||
77 | return -ENOMEM; | ||
78 | |||
79 | err = -EEXIST; /* internal code */ | ||
80 | if (newblk) { | ||
81 | if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || | ||
82 | buffer_dirty(bh))) { | ||
83 | brelse(bh); | ||
84 | BUG(); | ||
85 | } | ||
86 | bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; | ||
87 | bh->b_blocknr = blocknr; | ||
88 | set_buffer_mapped(bh); | ||
89 | set_buffer_uptodate(bh); | ||
90 | goto found; | ||
91 | } | ||
92 | |||
93 | if (buffer_uptodate(bh) || buffer_dirty(bh)) | ||
94 | goto found; | ||
95 | |||
96 | if (pblocknr == 0) { | ||
97 | pblocknr = blocknr; | ||
98 | if (inode->i_ino != NILFS_DAT_INO) { | ||
99 | struct inode *dat = | ||
100 | nilfs_dat_inode(NILFS_I_NILFS(inode)); | ||
101 | |||
102 | /* blocknr is a virtual block number */ | ||
103 | err = nilfs_dat_translate(dat, blocknr, &pblocknr); | ||
104 | if (unlikely(err)) { | ||
105 | brelse(bh); | ||
106 | goto out_locked; | ||
107 | } | ||
108 | } | ||
109 | } | ||
110 | lock_buffer(bh); | ||
111 | if (buffer_uptodate(bh)) { | ||
112 | unlock_buffer(bh); | ||
113 | err = -EEXIST; /* internal code */ | ||
114 | goto found; | ||
115 | } | ||
116 | set_buffer_mapped(bh); | ||
117 | bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; | ||
118 | bh->b_blocknr = pblocknr; /* set block address for read */ | ||
119 | bh->b_end_io = end_buffer_read_sync; | ||
120 | get_bh(bh); | ||
121 | submit_bh(READ, bh); | ||
122 | bh->b_blocknr = blocknr; /* set back to the given block address */ | ||
123 | err = 0; | ||
124 | found: | ||
125 | *pbh = bh; | ||
126 | |||
127 | out_locked: | ||
128 | unlock_page(bh->b_page); | ||
129 | page_cache_release(bh->b_page); | ||
130 | return err; | ||
131 | } | ||
132 | |||
133 | int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr, | ||
134 | sector_t pblocknr, struct buffer_head **pbh, int newblk) | ||
135 | { | ||
136 | struct buffer_head *bh; | ||
137 | int err; | ||
138 | |||
139 | err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk); | ||
140 | if (err == -EEXIST) /* internal code (cache hit) */ | ||
141 | return 0; | ||
142 | if (unlikely(err)) | ||
143 | return err; | ||
144 | |||
145 | bh = *pbh; | ||
146 | wait_on_buffer(bh); | ||
147 | if (!buffer_uptodate(bh)) { | ||
148 | brelse(bh); | ||
149 | return -EIO; | ||
150 | } | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | /** | ||
155 | * nilfs_btnode_delete - delete B-tree node buffer | ||
156 | * @bh: buffer to be deleted | ||
157 | * | ||
158 | * nilfs_btnode_delete() invalidates the specified buffer and delete the page | ||
159 | * including the buffer if the page gets unbusy. | ||
160 | */ | ||
161 | void nilfs_btnode_delete(struct buffer_head *bh) | ||
162 | { | ||
163 | struct address_space *mapping; | ||
164 | struct page *page = bh->b_page; | ||
165 | pgoff_t index = page_index(page); | ||
166 | int still_dirty; | ||
167 | |||
168 | page_cache_get(page); | ||
169 | lock_page(page); | ||
170 | wait_on_page_writeback(page); | ||
171 | |||
172 | nilfs_forget_buffer(bh); | ||
173 | still_dirty = PageDirty(page); | ||
174 | mapping = page->mapping; | ||
175 | unlock_page(page); | ||
176 | page_cache_release(page); | ||
177 | |||
178 | if (!still_dirty && mapping) | ||
179 | invalidate_inode_pages2_range(mapping, index, index); | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * nilfs_btnode_prepare_change_key | ||
184 | * prepare to move contents of the block for old key to one of new key. | ||
185 | * the old buffer will not be removed, but might be reused for new buffer. | ||
186 | * it might return -ENOMEM because of memory allocation errors, | ||
187 | * and might return -EIO because of disk read errors. | ||
188 | */ | ||
189 | int nilfs_btnode_prepare_change_key(struct address_space *btnc, | ||
190 | struct nilfs_btnode_chkey_ctxt *ctxt) | ||
191 | { | ||
192 | struct buffer_head *obh, *nbh; | ||
193 | struct inode *inode = NILFS_BTNC_I(btnc); | ||
194 | __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; | ||
195 | int err; | ||
196 | |||
197 | if (oldkey == newkey) | ||
198 | return 0; | ||
199 | |||
200 | obh = ctxt->bh; | ||
201 | ctxt->newbh = NULL; | ||
202 | |||
203 | if (inode->i_blkbits == PAGE_CACHE_SHIFT) { | ||
204 | lock_page(obh->b_page); | ||
205 | /* | ||
206 | * We cannot call radix_tree_preload for the kernels older | ||
207 | * than 2.6.23, because it is not exported for modules. | ||
208 | */ | ||
209 | err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
210 | if (err) | ||
211 | goto failed_unlock; | ||
212 | /* BUG_ON(oldkey != obh->b_page->index); */ | ||
213 | if (unlikely(oldkey != obh->b_page->index)) | ||
214 | NILFS_PAGE_BUG(obh->b_page, | ||
215 | "invalid oldkey %lld (newkey=%lld)", | ||
216 | (unsigned long long)oldkey, | ||
217 | (unsigned long long)newkey); | ||
218 | |||
219 | retry: | ||
220 | spin_lock_irq(&btnc->tree_lock); | ||
221 | err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); | ||
222 | spin_unlock_irq(&btnc->tree_lock); | ||
223 | /* | ||
224 | * Note: page->index will not change to newkey until | ||
225 | * nilfs_btnode_commit_change_key() will be called. | ||
226 | * To protect the page in intermediate state, the page lock | ||
227 | * is held. | ||
228 | */ | ||
229 | radix_tree_preload_end(); | ||
230 | if (!err) | ||
231 | return 0; | ||
232 | else if (err != -EEXIST) | ||
233 | goto failed_unlock; | ||
234 | |||
235 | err = invalidate_inode_pages2_range(btnc, newkey, newkey); | ||
236 | if (!err) | ||
237 | goto retry; | ||
238 | /* fallback to copy mode */ | ||
239 | unlock_page(obh->b_page); | ||
240 | } | ||
241 | |||
242 | err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1); | ||
243 | if (likely(!err)) { | ||
244 | BUG_ON(nbh == obh); | ||
245 | ctxt->newbh = nbh; | ||
246 | } | ||
247 | return err; | ||
248 | |||
249 | failed_unlock: | ||
250 | unlock_page(obh->b_page); | ||
251 | return err; | ||
252 | } | ||
253 | |||
254 | /** | ||
255 | * nilfs_btnode_commit_change_key | ||
256 | * commit the change_key operation prepared by prepare_change_key(). | ||
257 | */ | ||
258 | void nilfs_btnode_commit_change_key(struct address_space *btnc, | ||
259 | struct nilfs_btnode_chkey_ctxt *ctxt) | ||
260 | { | ||
261 | struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh; | ||
262 | __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; | ||
263 | struct page *opage; | ||
264 | |||
265 | if (oldkey == newkey) | ||
266 | return; | ||
267 | |||
268 | if (nbh == NULL) { /* blocksize == pagesize */ | ||
269 | opage = obh->b_page; | ||
270 | if (unlikely(oldkey != opage->index)) | ||
271 | NILFS_PAGE_BUG(opage, | ||
272 | "invalid oldkey %lld (newkey=%lld)", | ||
273 | (unsigned long long)oldkey, | ||
274 | (unsigned long long)newkey); | ||
275 | if (!test_set_buffer_dirty(obh) && TestSetPageDirty(opage)) | ||
276 | BUG(); | ||
277 | |||
278 | spin_lock_irq(&btnc->tree_lock); | ||
279 | radix_tree_delete(&btnc->page_tree, oldkey); | ||
280 | radix_tree_tag_set(&btnc->page_tree, newkey, | ||
281 | PAGECACHE_TAG_DIRTY); | ||
282 | spin_unlock_irq(&btnc->tree_lock); | ||
283 | |||
284 | opage->index = obh->b_blocknr = newkey; | ||
285 | unlock_page(opage); | ||
286 | } else { | ||
287 | nilfs_copy_buffer(nbh, obh); | ||
288 | nilfs_btnode_mark_dirty(nbh); | ||
289 | |||
290 | nbh->b_blocknr = newkey; | ||
291 | ctxt->bh = nbh; | ||
292 | nilfs_btnode_delete(obh); /* will decrement bh->b_count */ | ||
293 | } | ||
294 | } | ||
295 | |||
296 | /** | ||
297 | * nilfs_btnode_abort_change_key | ||
298 | * abort the change_key operation prepared by prepare_change_key(). | ||
299 | */ | ||
300 | void nilfs_btnode_abort_change_key(struct address_space *btnc, | ||
301 | struct nilfs_btnode_chkey_ctxt *ctxt) | ||
302 | { | ||
303 | struct buffer_head *nbh = ctxt->newbh; | ||
304 | __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; | ||
305 | |||
306 | if (oldkey == newkey) | ||
307 | return; | ||
308 | |||
309 | if (nbh == NULL) { /* blocksize == pagesize */ | ||
310 | spin_lock_irq(&btnc->tree_lock); | ||
311 | radix_tree_delete(&btnc->page_tree, newkey); | ||
312 | spin_unlock_irq(&btnc->tree_lock); | ||
313 | unlock_page(ctxt->bh->b_page); | ||
314 | } else | ||
315 | brelse(nbh); | ||
316 | } | ||
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h new file mode 100644 index 000000000000..35faa86444a7 --- /dev/null +++ b/fs/nilfs2/btnode.h | |||
@@ -0,0 +1,58 @@ | |||
1 | /* | ||
2 | * btnode.h - NILFS B-tree node cache | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Seiji Kihara <kihara@osrg.net> | ||
21 | * Revised by Ryusuke Konishi <ryusuke@osrg.net> | ||
22 | */ | ||
23 | |||
24 | #ifndef _NILFS_BTNODE_H | ||
25 | #define _NILFS_BTNODE_H | ||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/fs.h> | ||
30 | #include <linux/backing-dev.h> | ||
31 | |||
32 | |||
33 | struct nilfs_btnode_chkey_ctxt { | ||
34 | __u64 oldkey; | ||
35 | __u64 newkey; | ||
36 | struct buffer_head *bh; | ||
37 | struct buffer_head *newbh; | ||
38 | }; | ||
39 | |||
40 | void nilfs_btnode_cache_init_once(struct address_space *); | ||
41 | void nilfs_btnode_cache_init(struct address_space *); | ||
42 | void nilfs_btnode_cache_clear(struct address_space *); | ||
43 | int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, | ||
44 | struct buffer_head **, int); | ||
45 | int nilfs_btnode_get(struct address_space *, __u64, sector_t, | ||
46 | struct buffer_head **, int); | ||
47 | void nilfs_btnode_delete(struct buffer_head *); | ||
48 | int nilfs_btnode_prepare_change_key(struct address_space *, | ||
49 | struct nilfs_btnode_chkey_ctxt *); | ||
50 | void nilfs_btnode_commit_change_key(struct address_space *, | ||
51 | struct nilfs_btnode_chkey_ctxt *); | ||
52 | void nilfs_btnode_abort_change_key(struct address_space *, | ||
53 | struct nilfs_btnode_chkey_ctxt *); | ||
54 | |||
55 | #define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh) | ||
56 | |||
57 | |||
58 | #endif /* _NILFS_BTNODE_H */ | ||
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c new file mode 100644 index 000000000000..6b37a2767293 --- /dev/null +++ b/fs/nilfs2/btree.c | |||
@@ -0,0 +1,2269 @@ | |||
1 | /* | ||
2 | * btree.c - NILFS B-tree. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/slab.h> | ||
24 | #include <linux/string.h> | ||
25 | #include <linux/errno.h> | ||
26 | #include <linux/pagevec.h> | ||
27 | #include "nilfs.h" | ||
28 | #include "page.h" | ||
29 | #include "btnode.h" | ||
30 | #include "btree.h" | ||
31 | #include "alloc.h" | ||
32 | |||
33 | /** | ||
34 | * struct nilfs_btree_path - A path on which B-tree operations are executed | ||
35 | * @bp_bh: buffer head of node block | ||
36 | * @bp_sib_bh: buffer head of sibling node block | ||
37 | * @bp_index: index of child node | ||
38 | * @bp_oldreq: ptr end request for old ptr | ||
39 | * @bp_newreq: ptr alloc request for new ptr | ||
40 | * @bp_op: rebalance operation | ||
41 | */ | ||
42 | struct nilfs_btree_path { | ||
43 | struct buffer_head *bp_bh; | ||
44 | struct buffer_head *bp_sib_bh; | ||
45 | int bp_index; | ||
46 | union nilfs_bmap_ptr_req bp_oldreq; | ||
47 | union nilfs_bmap_ptr_req bp_newreq; | ||
48 | struct nilfs_btnode_chkey_ctxt bp_ctxt; | ||
49 | void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *, | ||
50 | int, __u64 *, __u64 *); | ||
51 | }; | ||
52 | |||
53 | /* | ||
54 | * B-tree path operations | ||
55 | */ | ||
56 | |||
57 | static struct kmem_cache *nilfs_btree_path_cache; | ||
58 | |||
59 | int __init nilfs_btree_path_cache_init(void) | ||
60 | { | ||
61 | nilfs_btree_path_cache = | ||
62 | kmem_cache_create("nilfs2_btree_path_cache", | ||
63 | sizeof(struct nilfs_btree_path) * | ||
64 | NILFS_BTREE_LEVEL_MAX, 0, 0, NULL); | ||
65 | return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM; | ||
66 | } | ||
67 | |||
68 | void nilfs_btree_path_cache_destroy(void) | ||
69 | { | ||
70 | kmem_cache_destroy(nilfs_btree_path_cache); | ||
71 | } | ||
72 | |||
73 | static inline struct nilfs_btree_path * | ||
74 | nilfs_btree_alloc_path(const struct nilfs_btree *btree) | ||
75 | { | ||
76 | return (struct nilfs_btree_path *) | ||
77 | kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); | ||
78 | } | ||
79 | |||
80 | static inline void nilfs_btree_free_path(const struct nilfs_btree *btree, | ||
81 | struct nilfs_btree_path *path) | ||
82 | { | ||
83 | kmem_cache_free(nilfs_btree_path_cache, path); | ||
84 | } | ||
85 | |||
86 | static void nilfs_btree_init_path(const struct nilfs_btree *btree, | ||
87 | struct nilfs_btree_path *path) | ||
88 | { | ||
89 | int level; | ||
90 | |||
91 | for (level = NILFS_BTREE_LEVEL_DATA; | ||
92 | level < NILFS_BTREE_LEVEL_MAX; | ||
93 | level++) { | ||
94 | path[level].bp_bh = NULL; | ||
95 | path[level].bp_sib_bh = NULL; | ||
96 | path[level].bp_index = 0; | ||
97 | path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; | ||
98 | path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; | ||
99 | path[level].bp_op = NULL; | ||
100 | } | ||
101 | } | ||
102 | |||
103 | static void nilfs_btree_clear_path(const struct nilfs_btree *btree, | ||
104 | struct nilfs_btree_path *path) | ||
105 | { | ||
106 | int level; | ||
107 | |||
108 | for (level = NILFS_BTREE_LEVEL_DATA; | ||
109 | level < NILFS_BTREE_LEVEL_MAX; | ||
110 | level++) { | ||
111 | if (path[level].bp_bh != NULL) { | ||
112 | nilfs_bmap_put_block(&btree->bt_bmap, | ||
113 | path[level].bp_bh); | ||
114 | path[level].bp_bh = NULL; | ||
115 | } | ||
116 | /* sib_bh is released or deleted by prepare or commit | ||
117 | * operations. */ | ||
118 | path[level].bp_sib_bh = NULL; | ||
119 | path[level].bp_index = 0; | ||
120 | path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; | ||
121 | path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; | ||
122 | path[level].bp_op = NULL; | ||
123 | } | ||
124 | } | ||
125 | |||
126 | |||
127 | /* | ||
128 | * B-tree node operations | ||
129 | */ | ||
130 | |||
131 | static inline int | ||
132 | nilfs_btree_node_get_flags(const struct nilfs_btree *btree, | ||
133 | const struct nilfs_btree_node *node) | ||
134 | { | ||
135 | return node->bn_flags; | ||
136 | } | ||
137 | |||
138 | static inline void | ||
139 | nilfs_btree_node_set_flags(struct nilfs_btree *btree, | ||
140 | struct nilfs_btree_node *node, | ||
141 | int flags) | ||
142 | { | ||
143 | node->bn_flags = flags; | ||
144 | } | ||
145 | |||
146 | static inline int nilfs_btree_node_root(const struct nilfs_btree *btree, | ||
147 | const struct nilfs_btree_node *node) | ||
148 | { | ||
149 | return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT; | ||
150 | } | ||
151 | |||
152 | static inline int | ||
153 | nilfs_btree_node_get_level(const struct nilfs_btree *btree, | ||
154 | const struct nilfs_btree_node *node) | ||
155 | { | ||
156 | return node->bn_level; | ||
157 | } | ||
158 | |||
159 | static inline void | ||
160 | nilfs_btree_node_set_level(struct nilfs_btree *btree, | ||
161 | struct nilfs_btree_node *node, | ||
162 | int level) | ||
163 | { | ||
164 | node->bn_level = level; | ||
165 | } | ||
166 | |||
167 | static inline int | ||
168 | nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree, | ||
169 | const struct nilfs_btree_node *node) | ||
170 | { | ||
171 | return le16_to_cpu(node->bn_nchildren); | ||
172 | } | ||
173 | |||
174 | static inline void | ||
175 | nilfs_btree_node_set_nchildren(struct nilfs_btree *btree, | ||
176 | struct nilfs_btree_node *node, | ||
177 | int nchildren) | ||
178 | { | ||
179 | node->bn_nchildren = cpu_to_le16(nchildren); | ||
180 | } | ||
181 | |||
182 | static inline int | ||
183 | nilfs_btree_node_size(const struct nilfs_btree *btree) | ||
184 | { | ||
185 | return 1 << btree->bt_bmap.b_inode->i_blkbits; | ||
186 | } | ||
187 | |||
188 | static inline int | ||
189 | nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree, | ||
190 | const struct nilfs_btree_node *node) | ||
191 | { | ||
192 | return nilfs_btree_node_root(btree, node) ? | ||
193 | NILFS_BTREE_ROOT_NCHILDREN_MIN : | ||
194 | NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); | ||
195 | } | ||
196 | |||
197 | static inline int | ||
198 | nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree, | ||
199 | const struct nilfs_btree_node *node) | ||
200 | { | ||
201 | return nilfs_btree_node_root(btree, node) ? | ||
202 | NILFS_BTREE_ROOT_NCHILDREN_MAX : | ||
203 | NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree)); | ||
204 | } | ||
205 | |||
206 | static inline __le64 * | ||
207 | nilfs_btree_node_dkeys(const struct nilfs_btree *btree, | ||
208 | const struct nilfs_btree_node *node) | ||
209 | { | ||
210 | return (__le64 *)((char *)(node + 1) + | ||
211 | (nilfs_btree_node_root(btree, node) ? | ||
212 | 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); | ||
213 | } | ||
214 | |||
215 | static inline __le64 * | ||
216 | nilfs_btree_node_dptrs(const struct nilfs_btree *btree, | ||
217 | const struct nilfs_btree_node *node) | ||
218 | { | ||
219 | return (__le64 *)(nilfs_btree_node_dkeys(btree, node) + | ||
220 | nilfs_btree_node_nchildren_max(btree, node)); | ||
221 | } | ||
222 | |||
223 | static inline __u64 | ||
224 | nilfs_btree_node_get_key(const struct nilfs_btree *btree, | ||
225 | const struct nilfs_btree_node *node, int index) | ||
226 | { | ||
227 | return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) + | ||
228 | index)); | ||
229 | } | ||
230 | |||
231 | static inline void | ||
232 | nilfs_btree_node_set_key(struct nilfs_btree *btree, | ||
233 | struct nilfs_btree_node *node, int index, __u64 key) | ||
234 | { | ||
235 | *(nilfs_btree_node_dkeys(btree, node) + index) = | ||
236 | nilfs_bmap_key_to_dkey(key); | ||
237 | } | ||
238 | |||
239 | static inline __u64 | ||
240 | nilfs_btree_node_get_ptr(const struct nilfs_btree *btree, | ||
241 | const struct nilfs_btree_node *node, | ||
242 | int index) | ||
243 | { | ||
244 | return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) + | ||
245 | index)); | ||
246 | } | ||
247 | |||
248 | static inline void | ||
249 | nilfs_btree_node_set_ptr(struct nilfs_btree *btree, | ||
250 | struct nilfs_btree_node *node, | ||
251 | int index, | ||
252 | __u64 ptr) | ||
253 | { | ||
254 | *(nilfs_btree_node_dptrs(btree, node) + index) = | ||
255 | nilfs_bmap_ptr_to_dptr(ptr); | ||
256 | } | ||
257 | |||
258 | static void nilfs_btree_node_init(struct nilfs_btree *btree, | ||
259 | struct nilfs_btree_node *node, | ||
260 | int flags, int level, int nchildren, | ||
261 | const __u64 *keys, const __u64 *ptrs) | ||
262 | { | ||
263 | __le64 *dkeys; | ||
264 | __le64 *dptrs; | ||
265 | int i; | ||
266 | |||
267 | nilfs_btree_node_set_flags(btree, node, flags); | ||
268 | nilfs_btree_node_set_level(btree, node, level); | ||
269 | nilfs_btree_node_set_nchildren(btree, node, nchildren); | ||
270 | |||
271 | dkeys = nilfs_btree_node_dkeys(btree, node); | ||
272 | dptrs = nilfs_btree_node_dptrs(btree, node); | ||
273 | for (i = 0; i < nchildren; i++) { | ||
274 | dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]); | ||
275 | dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]); | ||
276 | } | ||
277 | } | ||
278 | |||
279 | /* Assume the buffer heads corresponding to left and right are locked. */ | ||
280 | static void nilfs_btree_node_move_left(struct nilfs_btree *btree, | ||
281 | struct nilfs_btree_node *left, | ||
282 | struct nilfs_btree_node *right, | ||
283 | int n) | ||
284 | { | ||
285 | __le64 *ldkeys, *rdkeys; | ||
286 | __le64 *ldptrs, *rdptrs; | ||
287 | int lnchildren, rnchildren; | ||
288 | |||
289 | ldkeys = nilfs_btree_node_dkeys(btree, left); | ||
290 | ldptrs = nilfs_btree_node_dptrs(btree, left); | ||
291 | lnchildren = nilfs_btree_node_get_nchildren(btree, left); | ||
292 | |||
293 | rdkeys = nilfs_btree_node_dkeys(btree, right); | ||
294 | rdptrs = nilfs_btree_node_dptrs(btree, right); | ||
295 | rnchildren = nilfs_btree_node_get_nchildren(btree, right); | ||
296 | |||
297 | memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); | ||
298 | memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); | ||
299 | memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); | ||
300 | memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); | ||
301 | |||
302 | lnchildren += n; | ||
303 | rnchildren -= n; | ||
304 | nilfs_btree_node_set_nchildren(btree, left, lnchildren); | ||
305 | nilfs_btree_node_set_nchildren(btree, right, rnchildren); | ||
306 | } | ||
307 | |||
308 | /* Assume that the buffer heads corresponding to left and right are locked. */ | ||
309 | static void nilfs_btree_node_move_right(struct nilfs_btree *btree, | ||
310 | struct nilfs_btree_node *left, | ||
311 | struct nilfs_btree_node *right, | ||
312 | int n) | ||
313 | { | ||
314 | __le64 *ldkeys, *rdkeys; | ||
315 | __le64 *ldptrs, *rdptrs; | ||
316 | int lnchildren, rnchildren; | ||
317 | |||
318 | ldkeys = nilfs_btree_node_dkeys(btree, left); | ||
319 | ldptrs = nilfs_btree_node_dptrs(btree, left); | ||
320 | lnchildren = nilfs_btree_node_get_nchildren(btree, left); | ||
321 | |||
322 | rdkeys = nilfs_btree_node_dkeys(btree, right); | ||
323 | rdptrs = nilfs_btree_node_dptrs(btree, right); | ||
324 | rnchildren = nilfs_btree_node_get_nchildren(btree, right); | ||
325 | |||
326 | memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); | ||
327 | memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); | ||
328 | memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); | ||
329 | memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); | ||
330 | |||
331 | lnchildren -= n; | ||
332 | rnchildren += n; | ||
333 | nilfs_btree_node_set_nchildren(btree, left, lnchildren); | ||
334 | nilfs_btree_node_set_nchildren(btree, right, rnchildren); | ||
335 | } | ||
336 | |||
337 | /* Assume that the buffer head corresponding to node is locked. */ | ||
338 | static void nilfs_btree_node_insert(struct nilfs_btree *btree, | ||
339 | struct nilfs_btree_node *node, | ||
340 | __u64 key, __u64 ptr, int index) | ||
341 | { | ||
342 | __le64 *dkeys; | ||
343 | __le64 *dptrs; | ||
344 | int nchildren; | ||
345 | |||
346 | dkeys = nilfs_btree_node_dkeys(btree, node); | ||
347 | dptrs = nilfs_btree_node_dptrs(btree, node); | ||
348 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
349 | if (index < nchildren) { | ||
350 | memmove(dkeys + index + 1, dkeys + index, | ||
351 | (nchildren - index) * sizeof(*dkeys)); | ||
352 | memmove(dptrs + index + 1, dptrs + index, | ||
353 | (nchildren - index) * sizeof(*dptrs)); | ||
354 | } | ||
355 | dkeys[index] = nilfs_bmap_key_to_dkey(key); | ||
356 | dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr); | ||
357 | nchildren++; | ||
358 | nilfs_btree_node_set_nchildren(btree, node, nchildren); | ||
359 | } | ||
360 | |||
361 | /* Assume that the buffer head corresponding to node is locked. */ | ||
362 | static void nilfs_btree_node_delete(struct nilfs_btree *btree, | ||
363 | struct nilfs_btree_node *node, | ||
364 | __u64 *keyp, __u64 *ptrp, int index) | ||
365 | { | ||
366 | __u64 key; | ||
367 | __u64 ptr; | ||
368 | __le64 *dkeys; | ||
369 | __le64 *dptrs; | ||
370 | int nchildren; | ||
371 | |||
372 | dkeys = nilfs_btree_node_dkeys(btree, node); | ||
373 | dptrs = nilfs_btree_node_dptrs(btree, node); | ||
374 | key = nilfs_bmap_dkey_to_key(dkeys[index]); | ||
375 | ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]); | ||
376 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
377 | if (keyp != NULL) | ||
378 | *keyp = key; | ||
379 | if (ptrp != NULL) | ||
380 | *ptrp = ptr; | ||
381 | |||
382 | if (index < nchildren - 1) { | ||
383 | memmove(dkeys + index, dkeys + index + 1, | ||
384 | (nchildren - index - 1) * sizeof(*dkeys)); | ||
385 | memmove(dptrs + index, dptrs + index + 1, | ||
386 | (nchildren - index - 1) * sizeof(*dptrs)); | ||
387 | } | ||
388 | nchildren--; | ||
389 | nilfs_btree_node_set_nchildren(btree, node, nchildren); | ||
390 | } | ||
391 | |||
392 | static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, | ||
393 | const struct nilfs_btree_node *node, | ||
394 | __u64 key, int *indexp) | ||
395 | { | ||
396 | __u64 nkey; | ||
397 | int index, low, high, s; | ||
398 | |||
399 | /* binary search */ | ||
400 | low = 0; | ||
401 | high = nilfs_btree_node_get_nchildren(btree, node) - 1; | ||
402 | index = 0; | ||
403 | s = 0; | ||
404 | while (low <= high) { | ||
405 | index = (low + high) / 2; | ||
406 | nkey = nilfs_btree_node_get_key(btree, node, index); | ||
407 | if (nkey == key) { | ||
408 | s = 0; | ||
409 | goto out; | ||
410 | } else if (nkey < key) { | ||
411 | low = index + 1; | ||
412 | s = -1; | ||
413 | } else { | ||
414 | high = index - 1; | ||
415 | s = 1; | ||
416 | } | ||
417 | } | ||
418 | |||
419 | /* adjust index */ | ||
420 | if (nilfs_btree_node_get_level(btree, node) > | ||
421 | NILFS_BTREE_LEVEL_NODE_MIN) { | ||
422 | if ((s > 0) && (index > 0)) | ||
423 | index--; | ||
424 | } else if (s < 0) | ||
425 | index++; | ||
426 | |||
427 | out: | ||
428 | *indexp = index; | ||
429 | |||
430 | return s == 0; | ||
431 | } | ||
432 | |||
433 | static inline struct nilfs_btree_node * | ||
434 | nilfs_btree_get_root(const struct nilfs_btree *btree) | ||
435 | { | ||
436 | return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data; | ||
437 | } | ||
438 | |||
439 | static inline struct nilfs_btree_node * | ||
440 | nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree, | ||
441 | const struct nilfs_btree_path *path, | ||
442 | int level) | ||
443 | { | ||
444 | return (struct nilfs_btree_node *)path[level].bp_bh->b_data; | ||
445 | } | ||
446 | |||
447 | static inline struct nilfs_btree_node * | ||
448 | nilfs_btree_get_sib_node(const struct nilfs_btree *btree, | ||
449 | const struct nilfs_btree_path *path, | ||
450 | int level) | ||
451 | { | ||
452 | return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; | ||
453 | } | ||
454 | |||
455 | static inline int nilfs_btree_height(const struct nilfs_btree *btree) | ||
456 | { | ||
457 | return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree)) | ||
458 | + 1; | ||
459 | } | ||
460 | |||
461 | static inline struct nilfs_btree_node * | ||
462 | nilfs_btree_get_node(const struct nilfs_btree *btree, | ||
463 | const struct nilfs_btree_path *path, | ||
464 | int level) | ||
465 | { | ||
466 | return (level == nilfs_btree_height(btree) - 1) ? | ||
467 | nilfs_btree_get_root(btree) : | ||
468 | nilfs_btree_get_nonroot_node(btree, path, level); | ||
469 | } | ||
470 | |||
471 | static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, | ||
472 | struct nilfs_btree_path *path, | ||
473 | __u64 key, __u64 *ptrp, int minlevel) | ||
474 | { | ||
475 | struct nilfs_btree_node *node; | ||
476 | __u64 ptr; | ||
477 | int level, index, found, ret; | ||
478 | |||
479 | node = nilfs_btree_get_root(btree); | ||
480 | level = nilfs_btree_node_get_level(btree, node); | ||
481 | if ((level < minlevel) || | ||
482 | (nilfs_btree_node_get_nchildren(btree, node) <= 0)) | ||
483 | return -ENOENT; | ||
484 | |||
485 | found = nilfs_btree_node_lookup(btree, node, key, &index); | ||
486 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | ||
487 | path[level].bp_bh = NULL; | ||
488 | path[level].bp_index = index; | ||
489 | |||
490 | for (level--; level >= minlevel; level--) { | ||
491 | ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, | ||
492 | &path[level].bp_bh); | ||
493 | if (ret < 0) | ||
494 | return ret; | ||
495 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
496 | BUG_ON(level != nilfs_btree_node_get_level(btree, node)); | ||
497 | if (!found) | ||
498 | found = nilfs_btree_node_lookup(btree, node, key, | ||
499 | &index); | ||
500 | else | ||
501 | index = 0; | ||
502 | if (index < nilfs_btree_node_nchildren_max(btree, node)) | ||
503 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | ||
504 | else { | ||
505 | WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); | ||
506 | /* insert */ | ||
507 | ptr = NILFS_BMAP_INVALID_PTR; | ||
508 | } | ||
509 | path[level].bp_index = index; | ||
510 | } | ||
511 | if (!found) | ||
512 | return -ENOENT; | ||
513 | |||
514 | if (ptrp != NULL) | ||
515 | *ptrp = ptr; | ||
516 | |||
517 | return 0; | ||
518 | } | ||
519 | |||
520 | static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, | ||
521 | struct nilfs_btree_path *path, | ||
522 | __u64 *keyp, __u64 *ptrp) | ||
523 | { | ||
524 | struct nilfs_btree_node *node; | ||
525 | __u64 ptr; | ||
526 | int index, level, ret; | ||
527 | |||
528 | node = nilfs_btree_get_root(btree); | ||
529 | index = nilfs_btree_node_get_nchildren(btree, node) - 1; | ||
530 | if (index < 0) | ||
531 | return -ENOENT; | ||
532 | level = nilfs_btree_node_get_level(btree, node); | ||
533 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | ||
534 | path[level].bp_bh = NULL; | ||
535 | path[level].bp_index = index; | ||
536 | |||
537 | for (level--; level > 0; level--) { | ||
538 | ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, | ||
539 | &path[level].bp_bh); | ||
540 | if (ret < 0) | ||
541 | return ret; | ||
542 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
543 | BUG_ON(level != nilfs_btree_node_get_level(btree, node)); | ||
544 | index = nilfs_btree_node_get_nchildren(btree, node) - 1; | ||
545 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | ||
546 | path[level].bp_index = index; | ||
547 | } | ||
548 | |||
549 | if (keyp != NULL) | ||
550 | *keyp = nilfs_btree_node_get_key(btree, node, index); | ||
551 | if (ptrp != NULL) | ||
552 | *ptrp = ptr; | ||
553 | |||
554 | return 0; | ||
555 | } | ||
556 | |||
557 | static int nilfs_btree_lookup(const struct nilfs_bmap *bmap, | ||
558 | __u64 key, int level, __u64 *ptrp) | ||
559 | { | ||
560 | struct nilfs_btree *btree; | ||
561 | struct nilfs_btree_path *path; | ||
562 | __u64 ptr; | ||
563 | int ret; | ||
564 | |||
565 | btree = (struct nilfs_btree *)bmap; | ||
566 | path = nilfs_btree_alloc_path(btree); | ||
567 | if (path == NULL) | ||
568 | return -ENOMEM; | ||
569 | nilfs_btree_init_path(btree, path); | ||
570 | |||
571 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); | ||
572 | |||
573 | if (ptrp != NULL) | ||
574 | *ptrp = ptr; | ||
575 | |||
576 | nilfs_btree_clear_path(btree, path); | ||
577 | nilfs_btree_free_path(btree, path); | ||
578 | |||
579 | return ret; | ||
580 | } | ||
581 | |||
582 | static void nilfs_btree_promote_key(struct nilfs_btree *btree, | ||
583 | struct nilfs_btree_path *path, | ||
584 | int level, __u64 key) | ||
585 | { | ||
586 | if (level < nilfs_btree_height(btree) - 1) { | ||
587 | do { | ||
588 | lock_buffer(path[level].bp_bh); | ||
589 | nilfs_btree_node_set_key( | ||
590 | btree, | ||
591 | nilfs_btree_get_nonroot_node( | ||
592 | btree, path, level), | ||
593 | path[level].bp_index, key); | ||
594 | if (!buffer_dirty(path[level].bp_bh)) | ||
595 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
596 | unlock_buffer(path[level].bp_bh); | ||
597 | } while ((path[level].bp_index == 0) && | ||
598 | (++level < nilfs_btree_height(btree) - 1)); | ||
599 | } | ||
600 | |||
601 | /* root */ | ||
602 | if (level == nilfs_btree_height(btree) - 1) { | ||
603 | nilfs_btree_node_set_key(btree, | ||
604 | nilfs_btree_get_root(btree), | ||
605 | path[level].bp_index, key); | ||
606 | } | ||
607 | } | ||
608 | |||
609 | static void nilfs_btree_do_insert(struct nilfs_btree *btree, | ||
610 | struct nilfs_btree_path *path, | ||
611 | int level, __u64 *keyp, __u64 *ptrp) | ||
612 | { | ||
613 | struct nilfs_btree_node *node; | ||
614 | |||
615 | if (level < nilfs_btree_height(btree) - 1) { | ||
616 | lock_buffer(path[level].bp_bh); | ||
617 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
618 | nilfs_btree_node_insert(btree, node, *keyp, *ptrp, | ||
619 | path[level].bp_index); | ||
620 | if (!buffer_dirty(path[level].bp_bh)) | ||
621 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
622 | unlock_buffer(path[level].bp_bh); | ||
623 | |||
624 | if (path[level].bp_index == 0) | ||
625 | nilfs_btree_promote_key(btree, path, level + 1, | ||
626 | nilfs_btree_node_get_key( | ||
627 | btree, node, 0)); | ||
628 | } else { | ||
629 | node = nilfs_btree_get_root(btree); | ||
630 | nilfs_btree_node_insert(btree, node, *keyp, *ptrp, | ||
631 | path[level].bp_index); | ||
632 | } | ||
633 | } | ||
634 | |||
635 | static void nilfs_btree_carry_left(struct nilfs_btree *btree, | ||
636 | struct nilfs_btree_path *path, | ||
637 | int level, __u64 *keyp, __u64 *ptrp) | ||
638 | { | ||
639 | struct nilfs_btree_node *node, *left; | ||
640 | int nchildren, lnchildren, n, move; | ||
641 | |||
642 | lock_buffer(path[level].bp_bh); | ||
643 | lock_buffer(path[level].bp_sib_bh); | ||
644 | |||
645 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
646 | left = nilfs_btree_get_sib_node(btree, path, level); | ||
647 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
648 | lnchildren = nilfs_btree_node_get_nchildren(btree, left); | ||
649 | move = 0; | ||
650 | |||
651 | n = (nchildren + lnchildren + 1) / 2 - lnchildren; | ||
652 | if (n > path[level].bp_index) { | ||
653 | /* move insert point */ | ||
654 | n--; | ||
655 | move = 1; | ||
656 | } | ||
657 | |||
658 | nilfs_btree_node_move_left(btree, left, node, n); | ||
659 | |||
660 | if (!buffer_dirty(path[level].bp_bh)) | ||
661 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
662 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
663 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
664 | |||
665 | unlock_buffer(path[level].bp_bh); | ||
666 | unlock_buffer(path[level].bp_sib_bh); | ||
667 | |||
668 | nilfs_btree_promote_key(btree, path, level + 1, | ||
669 | nilfs_btree_node_get_key(btree, node, 0)); | ||
670 | |||
671 | if (move) { | ||
672 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); | ||
673 | path[level].bp_bh = path[level].bp_sib_bh; | ||
674 | path[level].bp_sib_bh = NULL; | ||
675 | path[level].bp_index += lnchildren; | ||
676 | path[level + 1].bp_index--; | ||
677 | } else { | ||
678 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
679 | path[level].bp_sib_bh = NULL; | ||
680 | path[level].bp_index -= n; | ||
681 | } | ||
682 | |||
683 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); | ||
684 | } | ||
685 | |||
686 | static void nilfs_btree_carry_right(struct nilfs_btree *btree, | ||
687 | struct nilfs_btree_path *path, | ||
688 | int level, __u64 *keyp, __u64 *ptrp) | ||
689 | { | ||
690 | struct nilfs_btree_node *node, *right; | ||
691 | int nchildren, rnchildren, n, move; | ||
692 | |||
693 | lock_buffer(path[level].bp_bh); | ||
694 | lock_buffer(path[level].bp_sib_bh); | ||
695 | |||
696 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
697 | right = nilfs_btree_get_sib_node(btree, path, level); | ||
698 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
699 | rnchildren = nilfs_btree_node_get_nchildren(btree, right); | ||
700 | move = 0; | ||
701 | |||
702 | n = (nchildren + rnchildren + 1) / 2 - rnchildren; | ||
703 | if (n > nchildren - path[level].bp_index) { | ||
704 | /* move insert point */ | ||
705 | n--; | ||
706 | move = 1; | ||
707 | } | ||
708 | |||
709 | nilfs_btree_node_move_right(btree, node, right, n); | ||
710 | |||
711 | if (!buffer_dirty(path[level].bp_bh)) | ||
712 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
713 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
714 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
715 | |||
716 | unlock_buffer(path[level].bp_bh); | ||
717 | unlock_buffer(path[level].bp_sib_bh); | ||
718 | |||
719 | path[level + 1].bp_index++; | ||
720 | nilfs_btree_promote_key(btree, path, level + 1, | ||
721 | nilfs_btree_node_get_key(btree, right, 0)); | ||
722 | path[level + 1].bp_index--; | ||
723 | |||
724 | if (move) { | ||
725 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); | ||
726 | path[level].bp_bh = path[level].bp_sib_bh; | ||
727 | path[level].bp_sib_bh = NULL; | ||
728 | path[level].bp_index -= | ||
729 | nilfs_btree_node_get_nchildren(btree, node); | ||
730 | path[level + 1].bp_index++; | ||
731 | } else { | ||
732 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
733 | path[level].bp_sib_bh = NULL; | ||
734 | } | ||
735 | |||
736 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); | ||
737 | } | ||
738 | |||
739 | static void nilfs_btree_split(struct nilfs_btree *btree, | ||
740 | struct nilfs_btree_path *path, | ||
741 | int level, __u64 *keyp, __u64 *ptrp) | ||
742 | { | ||
743 | struct nilfs_btree_node *node, *right; | ||
744 | __u64 newkey; | ||
745 | __u64 newptr; | ||
746 | int nchildren, n, move; | ||
747 | |||
748 | lock_buffer(path[level].bp_bh); | ||
749 | lock_buffer(path[level].bp_sib_bh); | ||
750 | |||
751 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
752 | right = nilfs_btree_get_sib_node(btree, path, level); | ||
753 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
754 | move = 0; | ||
755 | |||
756 | n = (nchildren + 1) / 2; | ||
757 | if (n > nchildren - path[level].bp_index) { | ||
758 | n--; | ||
759 | move = 1; | ||
760 | } | ||
761 | |||
762 | nilfs_btree_node_move_right(btree, node, right, n); | ||
763 | |||
764 | if (!buffer_dirty(path[level].bp_bh)) | ||
765 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
766 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
767 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
768 | |||
769 | unlock_buffer(path[level].bp_bh); | ||
770 | unlock_buffer(path[level].bp_sib_bh); | ||
771 | |||
772 | newkey = nilfs_btree_node_get_key(btree, right, 0); | ||
773 | newptr = path[level].bp_newreq.bpr_ptr; | ||
774 | |||
775 | if (move) { | ||
776 | path[level].bp_index -= | ||
777 | nilfs_btree_node_get_nchildren(btree, node); | ||
778 | nilfs_btree_node_insert(btree, right, *keyp, *ptrp, | ||
779 | path[level].bp_index); | ||
780 | |||
781 | *keyp = nilfs_btree_node_get_key(btree, right, 0); | ||
782 | *ptrp = path[level].bp_newreq.bpr_ptr; | ||
783 | |||
784 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh); | ||
785 | path[level].bp_bh = path[level].bp_sib_bh; | ||
786 | path[level].bp_sib_bh = NULL; | ||
787 | } else { | ||
788 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); | ||
789 | |||
790 | *keyp = nilfs_btree_node_get_key(btree, right, 0); | ||
791 | *ptrp = path[level].bp_newreq.bpr_ptr; | ||
792 | |||
793 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
794 | path[level].bp_sib_bh = NULL; | ||
795 | } | ||
796 | |||
797 | path[level + 1].bp_index++; | ||
798 | } | ||
799 | |||
800 | static void nilfs_btree_grow(struct nilfs_btree *btree, | ||
801 | struct nilfs_btree_path *path, | ||
802 | int level, __u64 *keyp, __u64 *ptrp) | ||
803 | { | ||
804 | struct nilfs_btree_node *root, *child; | ||
805 | int n; | ||
806 | |||
807 | lock_buffer(path[level].bp_sib_bh); | ||
808 | |||
809 | root = nilfs_btree_get_root(btree); | ||
810 | child = nilfs_btree_get_sib_node(btree, path, level); | ||
811 | |||
812 | n = nilfs_btree_node_get_nchildren(btree, root); | ||
813 | |||
814 | nilfs_btree_node_move_right(btree, root, child, n); | ||
815 | nilfs_btree_node_set_level(btree, root, level + 1); | ||
816 | |||
817 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
818 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
819 | |||
820 | unlock_buffer(path[level].bp_sib_bh); | ||
821 | |||
822 | path[level].bp_bh = path[level].bp_sib_bh; | ||
823 | path[level].bp_sib_bh = NULL; | ||
824 | |||
825 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); | ||
826 | |||
827 | *keyp = nilfs_btree_node_get_key(btree, child, 0); | ||
828 | *ptrp = path[level].bp_newreq.bpr_ptr; | ||
829 | } | ||
830 | |||
831 | static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree, | ||
832 | const struct nilfs_btree_path *path) | ||
833 | { | ||
834 | struct nilfs_btree_node *node; | ||
835 | int level; | ||
836 | |||
837 | if (path == NULL) | ||
838 | return NILFS_BMAP_INVALID_PTR; | ||
839 | |||
840 | /* left sibling */ | ||
841 | level = NILFS_BTREE_LEVEL_NODE_MIN; | ||
842 | if (path[level].bp_index > 0) { | ||
843 | node = nilfs_btree_get_node(btree, path, level); | ||
844 | return nilfs_btree_node_get_ptr(btree, node, | ||
845 | path[level].bp_index - 1); | ||
846 | } | ||
847 | |||
848 | /* parent */ | ||
849 | level = NILFS_BTREE_LEVEL_NODE_MIN + 1; | ||
850 | if (level <= nilfs_btree_height(btree) - 1) { | ||
851 | node = nilfs_btree_get_node(btree, path, level); | ||
852 | return nilfs_btree_node_get_ptr(btree, node, | ||
853 | path[level].bp_index); | ||
854 | } | ||
855 | |||
856 | return NILFS_BMAP_INVALID_PTR; | ||
857 | } | ||
858 | |||
859 | static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree, | ||
860 | const struct nilfs_btree_path *path, | ||
861 | __u64 key) | ||
862 | { | ||
863 | __u64 ptr; | ||
864 | |||
865 | ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key); | ||
866 | if (ptr != NILFS_BMAP_INVALID_PTR) | ||
867 | /* sequential access */ | ||
868 | return ptr; | ||
869 | else { | ||
870 | ptr = nilfs_btree_find_near(btree, path); | ||
871 | if (ptr != NILFS_BMAP_INVALID_PTR) | ||
872 | /* near */ | ||
873 | return ptr; | ||
874 | } | ||
875 | /* block group */ | ||
876 | return nilfs_bmap_find_target_in_group(&btree->bt_bmap); | ||
877 | } | ||
878 | |||
879 | static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key, | ||
880 | __u64 ptr) | ||
881 | { | ||
882 | btree->bt_bmap.b_last_allocated_key = key; | ||
883 | btree->bt_bmap.b_last_allocated_ptr = ptr; | ||
884 | } | ||
885 | |||
886 | static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | ||
887 | struct nilfs_btree_path *path, | ||
888 | int *levelp, __u64 key, __u64 ptr, | ||
889 | struct nilfs_bmap_stats *stats) | ||
890 | { | ||
891 | struct buffer_head *bh; | ||
892 | struct nilfs_btree_node *node, *parent, *sib; | ||
893 | __u64 sibptr; | ||
894 | int pindex, level, ret; | ||
895 | |||
896 | stats->bs_nblocks = 0; | ||
897 | level = NILFS_BTREE_LEVEL_DATA; | ||
898 | |||
899 | /* allocate a new ptr for data block */ | ||
900 | if (btree->bt_ops->btop_find_target != NULL) | ||
901 | path[level].bp_newreq.bpr_ptr = | ||
902 | btree->bt_ops->btop_find_target(btree, path, key); | ||
903 | |||
904 | ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( | ||
905 | &btree->bt_bmap, &path[level].bp_newreq); | ||
906 | if (ret < 0) | ||
907 | goto err_out_data; | ||
908 | |||
909 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; | ||
910 | level < nilfs_btree_height(btree) - 1; | ||
911 | level++) { | ||
912 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
913 | if (nilfs_btree_node_get_nchildren(btree, node) < | ||
914 | nilfs_btree_node_nchildren_max(btree, node)) { | ||
915 | path[level].bp_op = nilfs_btree_do_insert; | ||
916 | stats->bs_nblocks++; | ||
917 | goto out; | ||
918 | } | ||
919 | |||
920 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
921 | pindex = path[level + 1].bp_index; | ||
922 | |||
923 | /* left sibling */ | ||
924 | if (pindex > 0) { | ||
925 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | ||
926 | pindex - 1); | ||
927 | ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, | ||
928 | &bh); | ||
929 | if (ret < 0) | ||
930 | goto err_out_child_node; | ||
931 | sib = (struct nilfs_btree_node *)bh->b_data; | ||
932 | if (nilfs_btree_node_get_nchildren(btree, sib) < | ||
933 | nilfs_btree_node_nchildren_max(btree, sib)) { | ||
934 | path[level].bp_sib_bh = bh; | ||
935 | path[level].bp_op = nilfs_btree_carry_left; | ||
936 | stats->bs_nblocks++; | ||
937 | goto out; | ||
938 | } else | ||
939 | nilfs_bmap_put_block(&btree->bt_bmap, bh); | ||
940 | } | ||
941 | |||
942 | /* right sibling */ | ||
943 | if (pindex < | ||
944 | nilfs_btree_node_get_nchildren(btree, parent) - 1) { | ||
945 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | ||
946 | pindex + 1); | ||
947 | ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, | ||
948 | &bh); | ||
949 | if (ret < 0) | ||
950 | goto err_out_child_node; | ||
951 | sib = (struct nilfs_btree_node *)bh->b_data; | ||
952 | if (nilfs_btree_node_get_nchildren(btree, sib) < | ||
953 | nilfs_btree_node_nchildren_max(btree, sib)) { | ||
954 | path[level].bp_sib_bh = bh; | ||
955 | path[level].bp_op = nilfs_btree_carry_right; | ||
956 | stats->bs_nblocks++; | ||
957 | goto out; | ||
958 | } else | ||
959 | nilfs_bmap_put_block(&btree->bt_bmap, bh); | ||
960 | } | ||
961 | |||
962 | /* split */ | ||
963 | path[level].bp_newreq.bpr_ptr = | ||
964 | path[level - 1].bp_newreq.bpr_ptr + 1; | ||
965 | ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( | ||
966 | &btree->bt_bmap, &path[level].bp_newreq); | ||
967 | if (ret < 0) | ||
968 | goto err_out_child_node; | ||
969 | ret = nilfs_bmap_get_new_block(&btree->bt_bmap, | ||
970 | path[level].bp_newreq.bpr_ptr, | ||
971 | &bh); | ||
972 | if (ret < 0) | ||
973 | goto err_out_curr_node; | ||
974 | |||
975 | stats->bs_nblocks++; | ||
976 | |||
977 | lock_buffer(bh); | ||
978 | nilfs_btree_node_init(btree, | ||
979 | (struct nilfs_btree_node *)bh->b_data, | ||
980 | 0, level, 0, NULL, NULL); | ||
981 | unlock_buffer(bh); | ||
982 | path[level].bp_sib_bh = bh; | ||
983 | path[level].bp_op = nilfs_btree_split; | ||
984 | } | ||
985 | |||
986 | /* root */ | ||
987 | node = nilfs_btree_get_root(btree); | ||
988 | if (nilfs_btree_node_get_nchildren(btree, node) < | ||
989 | nilfs_btree_node_nchildren_max(btree, node)) { | ||
990 | path[level].bp_op = nilfs_btree_do_insert; | ||
991 | stats->bs_nblocks++; | ||
992 | goto out; | ||
993 | } | ||
994 | |||
995 | /* grow */ | ||
996 | path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; | ||
997 | ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr( | ||
998 | &btree->bt_bmap, &path[level].bp_newreq); | ||
999 | if (ret < 0) | ||
1000 | goto err_out_child_node; | ||
1001 | ret = nilfs_bmap_get_new_block(&btree->bt_bmap, | ||
1002 | path[level].bp_newreq.bpr_ptr, &bh); | ||
1003 | if (ret < 0) | ||
1004 | goto err_out_curr_node; | ||
1005 | |||
1006 | lock_buffer(bh); | ||
1007 | nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data, | ||
1008 | 0, level, 0, NULL, NULL); | ||
1009 | unlock_buffer(bh); | ||
1010 | path[level].bp_sib_bh = bh; | ||
1011 | path[level].bp_op = nilfs_btree_grow; | ||
1012 | |||
1013 | level++; | ||
1014 | path[level].bp_op = nilfs_btree_do_insert; | ||
1015 | |||
1016 | /* a newly-created node block and a data block are added */ | ||
1017 | stats->bs_nblocks += 2; | ||
1018 | |||
1019 | /* success */ | ||
1020 | out: | ||
1021 | *levelp = level; | ||
1022 | return ret; | ||
1023 | |||
1024 | /* error */ | ||
1025 | err_out_curr_node: | ||
1026 | btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, | ||
1027 | &path[level].bp_newreq); | ||
1028 | err_out_child_node: | ||
1029 | for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { | ||
1030 | nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
1031 | btree->bt_bmap.b_pops->bpop_abort_alloc_ptr( | ||
1032 | &btree->bt_bmap, &path[level].bp_newreq); | ||
1033 | |||
1034 | } | ||
1035 | |||
1036 | btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap, | ||
1037 | &path[level].bp_newreq); | ||
1038 | err_out_data: | ||
1039 | *levelp = level; | ||
1040 | stats->bs_nblocks = 0; | ||
1041 | return ret; | ||
1042 | } | ||
1043 | |||
1044 | static void nilfs_btree_commit_insert(struct nilfs_btree *btree, | ||
1045 | struct nilfs_btree_path *path, | ||
1046 | int maxlevel, __u64 key, __u64 ptr) | ||
1047 | { | ||
1048 | int level; | ||
1049 | |||
1050 | set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); | ||
1051 | ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; | ||
1052 | if (btree->bt_ops->btop_set_target != NULL) | ||
1053 | btree->bt_ops->btop_set_target(btree, key, ptr); | ||
1054 | |||
1055 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { | ||
1056 | if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) { | ||
1057 | btree->bt_bmap.b_pops->bpop_commit_alloc_ptr( | ||
1058 | &btree->bt_bmap, &path[level - 1].bp_newreq); | ||
1059 | } | ||
1060 | path[level].bp_op(btree, path, level, &key, &ptr); | ||
1061 | } | ||
1062 | |||
1063 | if (!nilfs_bmap_dirty(&btree->bt_bmap)) | ||
1064 | nilfs_bmap_set_dirty(&btree->bt_bmap); | ||
1065 | } | ||
1066 | |||
1067 | static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | ||
1068 | { | ||
1069 | struct nilfs_btree *btree; | ||
1070 | struct nilfs_btree_path *path; | ||
1071 | struct nilfs_bmap_stats stats; | ||
1072 | int level, ret; | ||
1073 | |||
1074 | btree = (struct nilfs_btree *)bmap; | ||
1075 | path = nilfs_btree_alloc_path(btree); | ||
1076 | if (path == NULL) | ||
1077 | return -ENOMEM; | ||
1078 | nilfs_btree_init_path(btree, path); | ||
1079 | |||
1080 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, | ||
1081 | NILFS_BTREE_LEVEL_NODE_MIN); | ||
1082 | if (ret != -ENOENT) { | ||
1083 | if (ret == 0) | ||
1084 | ret = -EEXIST; | ||
1085 | goto out; | ||
1086 | } | ||
1087 | |||
1088 | ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); | ||
1089 | if (ret < 0) | ||
1090 | goto out; | ||
1091 | nilfs_btree_commit_insert(btree, path, level, key, ptr); | ||
1092 | nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); | ||
1093 | |||
1094 | out: | ||
1095 | nilfs_btree_clear_path(btree, path); | ||
1096 | nilfs_btree_free_path(btree, path); | ||
1097 | return ret; | ||
1098 | } | ||
1099 | |||
1100 | static void nilfs_btree_do_delete(struct nilfs_btree *btree, | ||
1101 | struct nilfs_btree_path *path, | ||
1102 | int level, __u64 *keyp, __u64 *ptrp) | ||
1103 | { | ||
1104 | struct nilfs_btree_node *node; | ||
1105 | |||
1106 | if (level < nilfs_btree_height(btree) - 1) { | ||
1107 | lock_buffer(path[level].bp_bh); | ||
1108 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1109 | nilfs_btree_node_delete(btree, node, keyp, ptrp, | ||
1110 | path[level].bp_index); | ||
1111 | if (!buffer_dirty(path[level].bp_bh)) | ||
1112 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
1113 | unlock_buffer(path[level].bp_bh); | ||
1114 | if (path[level].bp_index == 0) | ||
1115 | nilfs_btree_promote_key(btree, path, level + 1, | ||
1116 | nilfs_btree_node_get_key(btree, node, 0)); | ||
1117 | } else { | ||
1118 | node = nilfs_btree_get_root(btree); | ||
1119 | nilfs_btree_node_delete(btree, node, keyp, ptrp, | ||
1120 | path[level].bp_index); | ||
1121 | } | ||
1122 | } | ||
1123 | |||
1124 | static void nilfs_btree_borrow_left(struct nilfs_btree *btree, | ||
1125 | struct nilfs_btree_path *path, | ||
1126 | int level, __u64 *keyp, __u64 *ptrp) | ||
1127 | { | ||
1128 | struct nilfs_btree_node *node, *left; | ||
1129 | int nchildren, lnchildren, n; | ||
1130 | |||
1131 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | ||
1132 | |||
1133 | lock_buffer(path[level].bp_bh); | ||
1134 | lock_buffer(path[level].bp_sib_bh); | ||
1135 | |||
1136 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1137 | left = nilfs_btree_get_sib_node(btree, path, level); | ||
1138 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
1139 | lnchildren = nilfs_btree_node_get_nchildren(btree, left); | ||
1140 | |||
1141 | n = (nchildren + lnchildren) / 2 - nchildren; | ||
1142 | |||
1143 | nilfs_btree_node_move_right(btree, left, node, n); | ||
1144 | |||
1145 | if (!buffer_dirty(path[level].bp_bh)) | ||
1146 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
1147 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
1148 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
1149 | |||
1150 | unlock_buffer(path[level].bp_bh); | ||
1151 | unlock_buffer(path[level].bp_sib_bh); | ||
1152 | |||
1153 | nilfs_btree_promote_key(btree, path, level + 1, | ||
1154 | nilfs_btree_node_get_key(btree, node, 0)); | ||
1155 | |||
1156 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
1157 | path[level].bp_sib_bh = NULL; | ||
1158 | path[level].bp_index += n; | ||
1159 | } | ||
1160 | |||
1161 | static void nilfs_btree_borrow_right(struct nilfs_btree *btree, | ||
1162 | struct nilfs_btree_path *path, | ||
1163 | int level, __u64 *keyp, __u64 *ptrp) | ||
1164 | { | ||
1165 | struct nilfs_btree_node *node, *right; | ||
1166 | int nchildren, rnchildren, n; | ||
1167 | |||
1168 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | ||
1169 | |||
1170 | lock_buffer(path[level].bp_bh); | ||
1171 | lock_buffer(path[level].bp_sib_bh); | ||
1172 | |||
1173 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1174 | right = nilfs_btree_get_sib_node(btree, path, level); | ||
1175 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
1176 | rnchildren = nilfs_btree_node_get_nchildren(btree, right); | ||
1177 | |||
1178 | n = (nchildren + rnchildren) / 2 - nchildren; | ||
1179 | |||
1180 | nilfs_btree_node_move_left(btree, node, right, n); | ||
1181 | |||
1182 | if (!buffer_dirty(path[level].bp_bh)) | ||
1183 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
1184 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
1185 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
1186 | |||
1187 | unlock_buffer(path[level].bp_bh); | ||
1188 | unlock_buffer(path[level].bp_sib_bh); | ||
1189 | |||
1190 | path[level + 1].bp_index++; | ||
1191 | nilfs_btree_promote_key(btree, path, level + 1, | ||
1192 | nilfs_btree_node_get_key(btree, right, 0)); | ||
1193 | path[level + 1].bp_index--; | ||
1194 | |||
1195 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
1196 | path[level].bp_sib_bh = NULL; | ||
1197 | } | ||
1198 | |||
1199 | static void nilfs_btree_concat_left(struct nilfs_btree *btree, | ||
1200 | struct nilfs_btree_path *path, | ||
1201 | int level, __u64 *keyp, __u64 *ptrp) | ||
1202 | { | ||
1203 | struct nilfs_btree_node *node, *left; | ||
1204 | int n; | ||
1205 | |||
1206 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | ||
1207 | |||
1208 | lock_buffer(path[level].bp_bh); | ||
1209 | lock_buffer(path[level].bp_sib_bh); | ||
1210 | |||
1211 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1212 | left = nilfs_btree_get_sib_node(btree, path, level); | ||
1213 | |||
1214 | n = nilfs_btree_node_get_nchildren(btree, node); | ||
1215 | |||
1216 | nilfs_btree_node_move_left(btree, left, node, n); | ||
1217 | |||
1218 | if (!buffer_dirty(path[level].bp_sib_bh)) | ||
1219 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | ||
1220 | |||
1221 | unlock_buffer(path[level].bp_bh); | ||
1222 | unlock_buffer(path[level].bp_sib_bh); | ||
1223 | |||
1224 | nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); | ||
1225 | path[level].bp_bh = path[level].bp_sib_bh; | ||
1226 | path[level].bp_sib_bh = NULL; | ||
1227 | path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left); | ||
1228 | } | ||
1229 | |||
1230 | static void nilfs_btree_concat_right(struct nilfs_btree *btree, | ||
1231 | struct nilfs_btree_path *path, | ||
1232 | int level, __u64 *keyp, __u64 *ptrp) | ||
1233 | { | ||
1234 | struct nilfs_btree_node *node, *right; | ||
1235 | int n; | ||
1236 | |||
1237 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | ||
1238 | |||
1239 | lock_buffer(path[level].bp_bh); | ||
1240 | lock_buffer(path[level].bp_sib_bh); | ||
1241 | |||
1242 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1243 | right = nilfs_btree_get_sib_node(btree, path, level); | ||
1244 | |||
1245 | n = nilfs_btree_node_get_nchildren(btree, right); | ||
1246 | |||
1247 | nilfs_btree_node_move_left(btree, node, right, n); | ||
1248 | |||
1249 | if (!buffer_dirty(path[level].bp_bh)) | ||
1250 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
1251 | |||
1252 | unlock_buffer(path[level].bp_bh); | ||
1253 | unlock_buffer(path[level].bp_sib_bh); | ||
1254 | |||
1255 | nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
1256 | path[level].bp_sib_bh = NULL; | ||
1257 | path[level + 1].bp_index++; | ||
1258 | } | ||
1259 | |||
1260 | static void nilfs_btree_shrink(struct nilfs_btree *btree, | ||
1261 | struct nilfs_btree_path *path, | ||
1262 | int level, __u64 *keyp, __u64 *ptrp) | ||
1263 | { | ||
1264 | struct nilfs_btree_node *root, *child; | ||
1265 | int n; | ||
1266 | |||
1267 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | ||
1268 | |||
1269 | lock_buffer(path[level].bp_bh); | ||
1270 | root = nilfs_btree_get_root(btree); | ||
1271 | child = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1272 | |||
1273 | nilfs_btree_node_delete(btree, root, NULL, NULL, 0); | ||
1274 | nilfs_btree_node_set_level(btree, root, level); | ||
1275 | n = nilfs_btree_node_get_nchildren(btree, child); | ||
1276 | nilfs_btree_node_move_left(btree, root, child, n); | ||
1277 | unlock_buffer(path[level].bp_bh); | ||
1278 | |||
1279 | nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh); | ||
1280 | path[level].bp_bh = NULL; | ||
1281 | } | ||
1282 | |||
1283 | |||
1284 | static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | ||
1285 | struct nilfs_btree_path *path, | ||
1286 | int *levelp, | ||
1287 | struct nilfs_bmap_stats *stats) | ||
1288 | { | ||
1289 | struct buffer_head *bh; | ||
1290 | struct nilfs_btree_node *node, *parent, *sib; | ||
1291 | __u64 sibptr; | ||
1292 | int pindex, level, ret; | ||
1293 | |||
1294 | ret = 0; | ||
1295 | stats->bs_nblocks = 0; | ||
1296 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; | ||
1297 | level < nilfs_btree_height(btree) - 1; | ||
1298 | level++) { | ||
1299 | node = nilfs_btree_get_nonroot_node(btree, path, level); | ||
1300 | path[level].bp_oldreq.bpr_ptr = | ||
1301 | nilfs_btree_node_get_ptr(btree, node, | ||
1302 | path[level].bp_index); | ||
1303 | if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { | ||
1304 | ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( | ||
1305 | &btree->bt_bmap, &path[level].bp_oldreq); | ||
1306 | if (ret < 0) | ||
1307 | goto err_out_child_node; | ||
1308 | } | ||
1309 | |||
1310 | if (nilfs_btree_node_get_nchildren(btree, node) > | ||
1311 | nilfs_btree_node_nchildren_min(btree, node)) { | ||
1312 | path[level].bp_op = nilfs_btree_do_delete; | ||
1313 | stats->bs_nblocks++; | ||
1314 | goto out; | ||
1315 | } | ||
1316 | |||
1317 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
1318 | pindex = path[level + 1].bp_index; | ||
1319 | |||
1320 | if (pindex > 0) { | ||
1321 | /* left sibling */ | ||
1322 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | ||
1323 | pindex - 1); | ||
1324 | ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, | ||
1325 | &bh); | ||
1326 | if (ret < 0) | ||
1327 | goto err_out_curr_node; | ||
1328 | sib = (struct nilfs_btree_node *)bh->b_data; | ||
1329 | if (nilfs_btree_node_get_nchildren(btree, sib) > | ||
1330 | nilfs_btree_node_nchildren_min(btree, sib)) { | ||
1331 | path[level].bp_sib_bh = bh; | ||
1332 | path[level].bp_op = nilfs_btree_borrow_left; | ||
1333 | stats->bs_nblocks++; | ||
1334 | goto out; | ||
1335 | } else { | ||
1336 | path[level].bp_sib_bh = bh; | ||
1337 | path[level].bp_op = nilfs_btree_concat_left; | ||
1338 | stats->bs_nblocks++; | ||
1339 | /* continue; */ | ||
1340 | } | ||
1341 | } else if (pindex < | ||
1342 | nilfs_btree_node_get_nchildren(btree, parent) - 1) { | ||
1343 | /* right sibling */ | ||
1344 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | ||
1345 | pindex + 1); | ||
1346 | ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr, | ||
1347 | &bh); | ||
1348 | if (ret < 0) | ||
1349 | goto err_out_curr_node; | ||
1350 | sib = (struct nilfs_btree_node *)bh->b_data; | ||
1351 | if (nilfs_btree_node_get_nchildren(btree, sib) > | ||
1352 | nilfs_btree_node_nchildren_min(btree, sib)) { | ||
1353 | path[level].bp_sib_bh = bh; | ||
1354 | path[level].bp_op = nilfs_btree_borrow_right; | ||
1355 | stats->bs_nblocks++; | ||
1356 | goto out; | ||
1357 | } else { | ||
1358 | path[level].bp_sib_bh = bh; | ||
1359 | path[level].bp_op = nilfs_btree_concat_right; | ||
1360 | stats->bs_nblocks++; | ||
1361 | /* continue; */ | ||
1362 | } | ||
1363 | } else { | ||
1364 | /* no siblings */ | ||
1365 | /* the only child of the root node */ | ||
1366 | WARN_ON(level != nilfs_btree_height(btree) - 2); | ||
1367 | if (nilfs_btree_node_get_nchildren(btree, node) - 1 <= | ||
1368 | NILFS_BTREE_ROOT_NCHILDREN_MAX) { | ||
1369 | path[level].bp_op = nilfs_btree_shrink; | ||
1370 | stats->bs_nblocks += 2; | ||
1371 | } else { | ||
1372 | path[level].bp_op = nilfs_btree_do_delete; | ||
1373 | stats->bs_nblocks++; | ||
1374 | } | ||
1375 | |||
1376 | goto out; | ||
1377 | |||
1378 | } | ||
1379 | } | ||
1380 | |||
1381 | node = nilfs_btree_get_root(btree); | ||
1382 | path[level].bp_oldreq.bpr_ptr = | ||
1383 | nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); | ||
1384 | if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) { | ||
1385 | ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr( | ||
1386 | &btree->bt_bmap, &path[level].bp_oldreq); | ||
1387 | if (ret < 0) | ||
1388 | goto err_out_child_node; | ||
1389 | } | ||
1390 | /* child of the root node is deleted */ | ||
1391 | path[level].bp_op = nilfs_btree_do_delete; | ||
1392 | stats->bs_nblocks++; | ||
1393 | |||
1394 | /* success */ | ||
1395 | out: | ||
1396 | *levelp = level; | ||
1397 | return ret; | ||
1398 | |||
1399 | /* error */ | ||
1400 | err_out_curr_node: | ||
1401 | if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) | ||
1402 | btree->bt_bmap.b_pops->bpop_abort_end_ptr( | ||
1403 | &btree->bt_bmap, &path[level].bp_oldreq); | ||
1404 | err_out_child_node: | ||
1405 | for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { | ||
1406 | nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh); | ||
1407 | if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL) | ||
1408 | btree->bt_bmap.b_pops->bpop_abort_end_ptr( | ||
1409 | &btree->bt_bmap, &path[level].bp_oldreq); | ||
1410 | } | ||
1411 | *levelp = level; | ||
1412 | stats->bs_nblocks = 0; | ||
1413 | return ret; | ||
1414 | } | ||
1415 | |||
1416 | static void nilfs_btree_commit_delete(struct nilfs_btree *btree, | ||
1417 | struct nilfs_btree_path *path, | ||
1418 | int maxlevel) | ||
1419 | { | ||
1420 | int level; | ||
1421 | |||
1422 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { | ||
1423 | if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL) | ||
1424 | btree->bt_bmap.b_pops->bpop_commit_end_ptr( | ||
1425 | &btree->bt_bmap, &path[level].bp_oldreq); | ||
1426 | path[level].bp_op(btree, path, level, NULL, NULL); | ||
1427 | } | ||
1428 | |||
1429 | if (!nilfs_bmap_dirty(&btree->bt_bmap)) | ||
1430 | nilfs_bmap_set_dirty(&btree->bt_bmap); | ||
1431 | } | ||
1432 | |||
1433 | static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key) | ||
1434 | |||
1435 | { | ||
1436 | struct nilfs_btree *btree; | ||
1437 | struct nilfs_btree_path *path; | ||
1438 | struct nilfs_bmap_stats stats; | ||
1439 | int level, ret; | ||
1440 | |||
1441 | btree = (struct nilfs_btree *)bmap; | ||
1442 | path = nilfs_btree_alloc_path(btree); | ||
1443 | if (path == NULL) | ||
1444 | return -ENOMEM; | ||
1445 | nilfs_btree_init_path(btree, path); | ||
1446 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, | ||
1447 | NILFS_BTREE_LEVEL_NODE_MIN); | ||
1448 | if (ret < 0) | ||
1449 | goto out; | ||
1450 | |||
1451 | ret = nilfs_btree_prepare_delete(btree, path, &level, &stats); | ||
1452 | if (ret < 0) | ||
1453 | goto out; | ||
1454 | nilfs_btree_commit_delete(btree, path, level); | ||
1455 | nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); | ||
1456 | |||
1457 | out: | ||
1458 | nilfs_btree_clear_path(btree, path); | ||
1459 | nilfs_btree_free_path(btree, path); | ||
1460 | return ret; | ||
1461 | } | ||
1462 | |||
1463 | static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) | ||
1464 | { | ||
1465 | struct nilfs_btree *btree; | ||
1466 | struct nilfs_btree_path *path; | ||
1467 | int ret; | ||
1468 | |||
1469 | btree = (struct nilfs_btree *)bmap; | ||
1470 | path = nilfs_btree_alloc_path(btree); | ||
1471 | if (path == NULL) | ||
1472 | return -ENOMEM; | ||
1473 | nilfs_btree_init_path(btree, path); | ||
1474 | |||
1475 | ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); | ||
1476 | |||
1477 | nilfs_btree_clear_path(btree, path); | ||
1478 | nilfs_btree_free_path(btree, path); | ||
1479 | |||
1480 | return ret; | ||
1481 | } | ||
1482 | |||
1483 | static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) | ||
1484 | { | ||
1485 | struct buffer_head *bh; | ||
1486 | struct nilfs_btree *btree; | ||
1487 | struct nilfs_btree_node *root, *node; | ||
1488 | __u64 maxkey, nextmaxkey; | ||
1489 | __u64 ptr; | ||
1490 | int nchildren, ret; | ||
1491 | |||
1492 | btree = (struct nilfs_btree *)bmap; | ||
1493 | root = nilfs_btree_get_root(btree); | ||
1494 | switch (nilfs_btree_height(btree)) { | ||
1495 | case 2: | ||
1496 | bh = NULL; | ||
1497 | node = root; | ||
1498 | break; | ||
1499 | case 3: | ||
1500 | nchildren = nilfs_btree_node_get_nchildren(btree, root); | ||
1501 | if (nchildren > 1) | ||
1502 | return 0; | ||
1503 | ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); | ||
1504 | ret = nilfs_bmap_get_block(bmap, ptr, &bh); | ||
1505 | if (ret < 0) | ||
1506 | return ret; | ||
1507 | node = (struct nilfs_btree_node *)bh->b_data; | ||
1508 | break; | ||
1509 | default: | ||
1510 | return 0; | ||
1511 | } | ||
1512 | |||
1513 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
1514 | maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1); | ||
1515 | nextmaxkey = (nchildren > 1) ? | ||
1516 | nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0; | ||
1517 | if (bh != NULL) | ||
1518 | nilfs_bmap_put_block(bmap, bh); | ||
1519 | |||
1520 | return (maxkey == key) && (nextmaxkey < bmap->b_low); | ||
1521 | } | ||
1522 | |||
1523 | static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, | ||
1524 | __u64 *keys, __u64 *ptrs, int nitems) | ||
1525 | { | ||
1526 | struct buffer_head *bh; | ||
1527 | struct nilfs_btree *btree; | ||
1528 | struct nilfs_btree_node *node, *root; | ||
1529 | __le64 *dkeys; | ||
1530 | __le64 *dptrs; | ||
1531 | __u64 ptr; | ||
1532 | int nchildren, i, ret; | ||
1533 | |||
1534 | btree = (struct nilfs_btree *)bmap; | ||
1535 | root = nilfs_btree_get_root(btree); | ||
1536 | switch (nilfs_btree_height(btree)) { | ||
1537 | case 2: | ||
1538 | bh = NULL; | ||
1539 | node = root; | ||
1540 | break; | ||
1541 | case 3: | ||
1542 | nchildren = nilfs_btree_node_get_nchildren(btree, root); | ||
1543 | WARN_ON(nchildren > 1); | ||
1544 | ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); | ||
1545 | ret = nilfs_bmap_get_block(bmap, ptr, &bh); | ||
1546 | if (ret < 0) | ||
1547 | return ret; | ||
1548 | node = (struct nilfs_btree_node *)bh->b_data; | ||
1549 | break; | ||
1550 | default: | ||
1551 | node = NULL; | ||
1552 | return -EINVAL; | ||
1553 | } | ||
1554 | |||
1555 | nchildren = nilfs_btree_node_get_nchildren(btree, node); | ||
1556 | if (nchildren < nitems) | ||
1557 | nitems = nchildren; | ||
1558 | dkeys = nilfs_btree_node_dkeys(btree, node); | ||
1559 | dptrs = nilfs_btree_node_dptrs(btree, node); | ||
1560 | for (i = 0; i < nitems; i++) { | ||
1561 | keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]); | ||
1562 | ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]); | ||
1563 | } | ||
1564 | |||
1565 | if (bh != NULL) | ||
1566 | nilfs_bmap_put_block(bmap, bh); | ||
1567 | |||
1568 | return nitems; | ||
1569 | } | ||
1570 | |||
1571 | static int | ||
1572 | nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, | ||
1573 | union nilfs_bmap_ptr_req *dreq, | ||
1574 | union nilfs_bmap_ptr_req *nreq, | ||
1575 | struct buffer_head **bhp, | ||
1576 | struct nilfs_bmap_stats *stats) | ||
1577 | { | ||
1578 | struct buffer_head *bh; | ||
1579 | struct nilfs_btree *btree; | ||
1580 | int ret; | ||
1581 | |||
1582 | btree = (struct nilfs_btree *)bmap; | ||
1583 | stats->bs_nblocks = 0; | ||
1584 | |||
1585 | /* for data */ | ||
1586 | /* cannot find near ptr */ | ||
1587 | if (btree->bt_ops->btop_find_target != NULL) | ||
1588 | dreq->bpr_ptr | ||
1589 | = btree->bt_ops->btop_find_target(btree, NULL, key); | ||
1590 | ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq); | ||
1591 | if (ret < 0) | ||
1592 | return ret; | ||
1593 | |||
1594 | *bhp = NULL; | ||
1595 | stats->bs_nblocks++; | ||
1596 | if (nreq != NULL) { | ||
1597 | nreq->bpr_ptr = dreq->bpr_ptr + 1; | ||
1598 | ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq); | ||
1599 | if (ret < 0) | ||
1600 | goto err_out_dreq; | ||
1601 | |||
1602 | ret = nilfs_bmap_get_new_block(bmap, nreq->bpr_ptr, &bh); | ||
1603 | if (ret < 0) | ||
1604 | goto err_out_nreq; | ||
1605 | |||
1606 | *bhp = bh; | ||
1607 | stats->bs_nblocks++; | ||
1608 | } | ||
1609 | |||
1610 | /* success */ | ||
1611 | return 0; | ||
1612 | |||
1613 | /* error */ | ||
1614 | err_out_nreq: | ||
1615 | bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq); | ||
1616 | err_out_dreq: | ||
1617 | bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq); | ||
1618 | stats->bs_nblocks = 0; | ||
1619 | return ret; | ||
1620 | |||
1621 | } | ||
1622 | |||
1623 | static void | ||
1624 | nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, | ||
1625 | __u64 key, __u64 ptr, | ||
1626 | const __u64 *keys, const __u64 *ptrs, | ||
1627 | int n, __u64 low, __u64 high, | ||
1628 | union nilfs_bmap_ptr_req *dreq, | ||
1629 | union nilfs_bmap_ptr_req *nreq, | ||
1630 | struct buffer_head *bh) | ||
1631 | { | ||
1632 | struct nilfs_btree *btree; | ||
1633 | struct nilfs_btree_node *node; | ||
1634 | __u64 tmpptr; | ||
1635 | |||
1636 | /* free resources */ | ||
1637 | if (bmap->b_ops->bop_clear != NULL) | ||
1638 | bmap->b_ops->bop_clear(bmap); | ||
1639 | |||
1640 | /* ptr must be a pointer to a buffer head. */ | ||
1641 | set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); | ||
1642 | |||
1643 | /* convert and insert */ | ||
1644 | btree = (struct nilfs_btree *)bmap; | ||
1645 | nilfs_btree_init(bmap, low, high); | ||
1646 | if (nreq != NULL) { | ||
1647 | if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) { | ||
1648 | bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq); | ||
1649 | bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq); | ||
1650 | } | ||
1651 | |||
1652 | /* create child node at level 1 */ | ||
1653 | lock_buffer(bh); | ||
1654 | node = (struct nilfs_btree_node *)bh->b_data; | ||
1655 | nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs); | ||
1656 | nilfs_btree_node_insert(btree, node, | ||
1657 | key, dreq->bpr_ptr, n); | ||
1658 | if (!buffer_dirty(bh)) | ||
1659 | nilfs_btnode_mark_dirty(bh); | ||
1660 | if (!nilfs_bmap_dirty(bmap)) | ||
1661 | nilfs_bmap_set_dirty(bmap); | ||
1662 | |||
1663 | unlock_buffer(bh); | ||
1664 | nilfs_bmap_put_block(bmap, bh); | ||
1665 | |||
1666 | /* create root node at level 2 */ | ||
1667 | node = nilfs_btree_get_root(btree); | ||
1668 | tmpptr = nreq->bpr_ptr; | ||
1669 | nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, | ||
1670 | 2, 1, &keys[0], &tmpptr); | ||
1671 | } else { | ||
1672 | if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) | ||
1673 | bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq); | ||
1674 | |||
1675 | /* create root node at level 1 */ | ||
1676 | node = nilfs_btree_get_root(btree); | ||
1677 | nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, | ||
1678 | 1, n, keys, ptrs); | ||
1679 | nilfs_btree_node_insert(btree, node, | ||
1680 | key, dreq->bpr_ptr, n); | ||
1681 | if (!nilfs_bmap_dirty(bmap)) | ||
1682 | nilfs_bmap_set_dirty(bmap); | ||
1683 | } | ||
1684 | |||
1685 | if (btree->bt_ops->btop_set_target != NULL) | ||
1686 | btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr); | ||
1687 | } | ||
1688 | |||
1689 | /** | ||
1690 | * nilfs_btree_convert_and_insert - | ||
1691 | * @bmap: | ||
1692 | * @key: | ||
1693 | * @ptr: | ||
1694 | * @keys: | ||
1695 | * @ptrs: | ||
1696 | * @n: | ||
1697 | * @low: | ||
1698 | * @high: | ||
1699 | */ | ||
1700 | int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, | ||
1701 | __u64 key, __u64 ptr, | ||
1702 | const __u64 *keys, const __u64 *ptrs, | ||
1703 | int n, __u64 low, __u64 high) | ||
1704 | { | ||
1705 | struct buffer_head *bh; | ||
1706 | union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; | ||
1707 | struct nilfs_bmap_stats stats; | ||
1708 | int ret; | ||
1709 | |||
1710 | if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { | ||
1711 | di = &dreq; | ||
1712 | ni = NULL; | ||
1713 | } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( | ||
1714 | 1 << bmap->b_inode->i_blkbits)) { | ||
1715 | di = &dreq; | ||
1716 | ni = &nreq; | ||
1717 | } else { | ||
1718 | di = NULL; | ||
1719 | ni = NULL; | ||
1720 | BUG(); | ||
1721 | } | ||
1722 | |||
1723 | ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh, | ||
1724 | &stats); | ||
1725 | if (ret < 0) | ||
1726 | return ret; | ||
1727 | nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n, | ||
1728 | low, high, di, ni, bh); | ||
1729 | nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); | ||
1730 | return 0; | ||
1731 | } | ||
1732 | |||
1733 | static int nilfs_btree_propagate_p(struct nilfs_btree *btree, | ||
1734 | struct nilfs_btree_path *path, | ||
1735 | int level, | ||
1736 | struct buffer_head *bh) | ||
1737 | { | ||
1738 | while ((++level < nilfs_btree_height(btree) - 1) && | ||
1739 | !buffer_dirty(path[level].bp_bh)) | ||
1740 | nilfs_btnode_mark_dirty(path[level].bp_bh); | ||
1741 | |||
1742 | return 0; | ||
1743 | } | ||
1744 | |||
1745 | static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, | ||
1746 | struct nilfs_btree_path *path, | ||
1747 | int level) | ||
1748 | { | ||
1749 | struct nilfs_btree_node *parent; | ||
1750 | int ret; | ||
1751 | |||
1752 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
1753 | path[level].bp_oldreq.bpr_ptr = | ||
1754 | nilfs_btree_node_get_ptr(btree, parent, | ||
1755 | path[level + 1].bp_index); | ||
1756 | path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; | ||
1757 | ret = nilfs_bmap_prepare_update(&btree->bt_bmap, | ||
1758 | &path[level].bp_oldreq, | ||
1759 | &path[level].bp_newreq); | ||
1760 | if (ret < 0) | ||
1761 | return ret; | ||
1762 | |||
1763 | if (buffer_nilfs_node(path[level].bp_bh)) { | ||
1764 | path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; | ||
1765 | path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; | ||
1766 | path[level].bp_ctxt.bh = path[level].bp_bh; | ||
1767 | ret = nilfs_btnode_prepare_change_key( | ||
1768 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | ||
1769 | &path[level].bp_ctxt); | ||
1770 | if (ret < 0) { | ||
1771 | nilfs_bmap_abort_update(&btree->bt_bmap, | ||
1772 | &path[level].bp_oldreq, | ||
1773 | &path[level].bp_newreq); | ||
1774 | return ret; | ||
1775 | } | ||
1776 | } | ||
1777 | |||
1778 | return 0; | ||
1779 | } | ||
1780 | |||
1781 | static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, | ||
1782 | struct nilfs_btree_path *path, | ||
1783 | int level) | ||
1784 | { | ||
1785 | struct nilfs_btree_node *parent; | ||
1786 | |||
1787 | nilfs_bmap_commit_update(&btree->bt_bmap, | ||
1788 | &path[level].bp_oldreq, | ||
1789 | &path[level].bp_newreq); | ||
1790 | |||
1791 | if (buffer_nilfs_node(path[level].bp_bh)) { | ||
1792 | nilfs_btnode_commit_change_key( | ||
1793 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | ||
1794 | &path[level].bp_ctxt); | ||
1795 | path[level].bp_bh = path[level].bp_ctxt.bh; | ||
1796 | } | ||
1797 | set_buffer_nilfs_volatile(path[level].bp_bh); | ||
1798 | |||
1799 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
1800 | nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index, | ||
1801 | path[level].bp_newreq.bpr_ptr); | ||
1802 | } | ||
1803 | |||
1804 | static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, | ||
1805 | struct nilfs_btree_path *path, | ||
1806 | int level) | ||
1807 | { | ||
1808 | nilfs_bmap_abort_update(&btree->bt_bmap, | ||
1809 | &path[level].bp_oldreq, | ||
1810 | &path[level].bp_newreq); | ||
1811 | if (buffer_nilfs_node(path[level].bp_bh)) | ||
1812 | nilfs_btnode_abort_change_key( | ||
1813 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | ||
1814 | &path[level].bp_ctxt); | ||
1815 | } | ||
1816 | |||
1817 | static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, | ||
1818 | struct nilfs_btree_path *path, | ||
1819 | int minlevel, | ||
1820 | int *maxlevelp) | ||
1821 | { | ||
1822 | int level, ret; | ||
1823 | |||
1824 | level = minlevel; | ||
1825 | if (!buffer_nilfs_volatile(path[level].bp_bh)) { | ||
1826 | ret = nilfs_btree_prepare_update_v(btree, path, level); | ||
1827 | if (ret < 0) | ||
1828 | return ret; | ||
1829 | } | ||
1830 | while ((++level < nilfs_btree_height(btree) - 1) && | ||
1831 | !buffer_dirty(path[level].bp_bh)) { | ||
1832 | |||
1833 | WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); | ||
1834 | ret = nilfs_btree_prepare_update_v(btree, path, level); | ||
1835 | if (ret < 0) | ||
1836 | goto out; | ||
1837 | } | ||
1838 | |||
1839 | /* success */ | ||
1840 | *maxlevelp = level - 1; | ||
1841 | return 0; | ||
1842 | |||
1843 | /* error */ | ||
1844 | out: | ||
1845 | while (--level > minlevel) | ||
1846 | nilfs_btree_abort_update_v(btree, path, level); | ||
1847 | if (!buffer_nilfs_volatile(path[level].bp_bh)) | ||
1848 | nilfs_btree_abort_update_v(btree, path, level); | ||
1849 | return ret; | ||
1850 | } | ||
1851 | |||
1852 | static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree, | ||
1853 | struct nilfs_btree_path *path, | ||
1854 | int minlevel, | ||
1855 | int maxlevel, | ||
1856 | struct buffer_head *bh) | ||
1857 | { | ||
1858 | int level; | ||
1859 | |||
1860 | if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) | ||
1861 | nilfs_btree_commit_update_v(btree, path, minlevel); | ||
1862 | |||
1863 | for (level = minlevel + 1; level <= maxlevel; level++) | ||
1864 | nilfs_btree_commit_update_v(btree, path, level); | ||
1865 | } | ||
1866 | |||
1867 | static int nilfs_btree_propagate_v(struct nilfs_btree *btree, | ||
1868 | struct nilfs_btree_path *path, | ||
1869 | int level, | ||
1870 | struct buffer_head *bh) | ||
1871 | { | ||
1872 | int maxlevel, ret; | ||
1873 | struct nilfs_btree_node *parent; | ||
1874 | __u64 ptr; | ||
1875 | |||
1876 | get_bh(bh); | ||
1877 | path[level].bp_bh = bh; | ||
1878 | ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel); | ||
1879 | if (ret < 0) | ||
1880 | goto out; | ||
1881 | |||
1882 | if (buffer_nilfs_volatile(path[level].bp_bh)) { | ||
1883 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
1884 | ptr = nilfs_btree_node_get_ptr(btree, parent, | ||
1885 | path[level + 1].bp_index); | ||
1886 | ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr); | ||
1887 | if (ret < 0) | ||
1888 | goto out; | ||
1889 | } | ||
1890 | |||
1891 | nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh); | ||
1892 | |||
1893 | out: | ||
1894 | brelse(path[level].bp_bh); | ||
1895 | path[level].bp_bh = NULL; | ||
1896 | return ret; | ||
1897 | } | ||
1898 | |||
1899 | static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, | ||
1900 | struct buffer_head *bh) | ||
1901 | { | ||
1902 | struct nilfs_btree *btree; | ||
1903 | struct nilfs_btree_path *path; | ||
1904 | struct nilfs_btree_node *node; | ||
1905 | __u64 key; | ||
1906 | int level, ret; | ||
1907 | |||
1908 | WARN_ON(!buffer_dirty(bh)); | ||
1909 | |||
1910 | btree = (struct nilfs_btree *)bmap; | ||
1911 | path = nilfs_btree_alloc_path(btree); | ||
1912 | if (path == NULL) | ||
1913 | return -ENOMEM; | ||
1914 | nilfs_btree_init_path(btree, path); | ||
1915 | |||
1916 | if (buffer_nilfs_node(bh)) { | ||
1917 | node = (struct nilfs_btree_node *)bh->b_data; | ||
1918 | key = nilfs_btree_node_get_key(btree, node, 0); | ||
1919 | level = nilfs_btree_node_get_level(btree, node); | ||
1920 | } else { | ||
1921 | key = nilfs_bmap_data_get_key(bmap, bh); | ||
1922 | level = NILFS_BTREE_LEVEL_DATA; | ||
1923 | } | ||
1924 | |||
1925 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); | ||
1926 | if (ret < 0) { | ||
1927 | if (unlikely(ret == -ENOENT)) | ||
1928 | printk(KERN_CRIT "%s: key = %llu, level == %d\n", | ||
1929 | __func__, (unsigned long long)key, level); | ||
1930 | goto out; | ||
1931 | } | ||
1932 | |||
1933 | ret = btree->bt_ops->btop_propagate(btree, path, level, bh); | ||
1934 | |||
1935 | out: | ||
1936 | nilfs_btree_clear_path(btree, path); | ||
1937 | nilfs_btree_free_path(btree, path); | ||
1938 | |||
1939 | return ret; | ||
1940 | } | ||
1941 | |||
1942 | static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap, | ||
1943 | struct buffer_head *bh) | ||
1944 | { | ||
1945 | return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr); | ||
1946 | } | ||
1947 | |||
1948 | static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, | ||
1949 | struct list_head *lists, | ||
1950 | struct buffer_head *bh) | ||
1951 | { | ||
1952 | struct list_head *head; | ||
1953 | struct buffer_head *cbh; | ||
1954 | struct nilfs_btree_node *node, *cnode; | ||
1955 | __u64 key, ckey; | ||
1956 | int level; | ||
1957 | |||
1958 | get_bh(bh); | ||
1959 | node = (struct nilfs_btree_node *)bh->b_data; | ||
1960 | key = nilfs_btree_node_get_key(btree, node, 0); | ||
1961 | level = nilfs_btree_node_get_level(btree, node); | ||
1962 | list_for_each(head, &lists[level]) { | ||
1963 | cbh = list_entry(head, struct buffer_head, b_assoc_buffers); | ||
1964 | cnode = (struct nilfs_btree_node *)cbh->b_data; | ||
1965 | ckey = nilfs_btree_node_get_key(btree, cnode, 0); | ||
1966 | if (key < ckey) | ||
1967 | break; | ||
1968 | } | ||
1969 | list_add_tail(&bh->b_assoc_buffers, head); | ||
1970 | } | ||
1971 | |||
1972 | static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap, | ||
1973 | struct list_head *listp) | ||
1974 | { | ||
1975 | struct nilfs_btree *btree = (struct nilfs_btree *)bmap; | ||
1976 | struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache; | ||
1977 | struct list_head lists[NILFS_BTREE_LEVEL_MAX]; | ||
1978 | struct pagevec pvec; | ||
1979 | struct buffer_head *bh, *head; | ||
1980 | pgoff_t index = 0; | ||
1981 | int level, i; | ||
1982 | |||
1983 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; | ||
1984 | level < NILFS_BTREE_LEVEL_MAX; | ||
1985 | level++) | ||
1986 | INIT_LIST_HEAD(&lists[level]); | ||
1987 | |||
1988 | pagevec_init(&pvec, 0); | ||
1989 | |||
1990 | while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY, | ||
1991 | PAGEVEC_SIZE)) { | ||
1992 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
1993 | bh = head = page_buffers(pvec.pages[i]); | ||
1994 | do { | ||
1995 | if (buffer_dirty(bh)) | ||
1996 | nilfs_btree_add_dirty_buffer(btree, | ||
1997 | lists, bh); | ||
1998 | } while ((bh = bh->b_this_page) != head); | ||
1999 | } | ||
2000 | pagevec_release(&pvec); | ||
2001 | cond_resched(); | ||
2002 | } | ||
2003 | |||
2004 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; | ||
2005 | level < NILFS_BTREE_LEVEL_MAX; | ||
2006 | level++) | ||
2007 | list_splice(&lists[level], listp->prev); | ||
2008 | } | ||
2009 | |||
2010 | static int nilfs_btree_assign_p(struct nilfs_btree *btree, | ||
2011 | struct nilfs_btree_path *path, | ||
2012 | int level, | ||
2013 | struct buffer_head **bh, | ||
2014 | sector_t blocknr, | ||
2015 | union nilfs_binfo *binfo) | ||
2016 | { | ||
2017 | struct nilfs_btree_node *parent; | ||
2018 | __u64 key; | ||
2019 | __u64 ptr; | ||
2020 | int ret; | ||
2021 | |||
2022 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
2023 | ptr = nilfs_btree_node_get_ptr(btree, parent, | ||
2024 | path[level + 1].bp_index); | ||
2025 | if (buffer_nilfs_node(*bh)) { | ||
2026 | path[level].bp_ctxt.oldkey = ptr; | ||
2027 | path[level].bp_ctxt.newkey = blocknr; | ||
2028 | path[level].bp_ctxt.bh = *bh; | ||
2029 | ret = nilfs_btnode_prepare_change_key( | ||
2030 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | ||
2031 | &path[level].bp_ctxt); | ||
2032 | if (ret < 0) | ||
2033 | return ret; | ||
2034 | nilfs_btnode_commit_change_key( | ||
2035 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | ||
2036 | &path[level].bp_ctxt); | ||
2037 | *bh = path[level].bp_ctxt.bh; | ||
2038 | } | ||
2039 | |||
2040 | nilfs_btree_node_set_ptr(btree, parent, | ||
2041 | path[level + 1].bp_index, blocknr); | ||
2042 | |||
2043 | key = nilfs_btree_node_get_key(btree, parent, | ||
2044 | path[level + 1].bp_index); | ||
2045 | /* on-disk format */ | ||
2046 | binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); | ||
2047 | binfo->bi_dat.bi_level = level; | ||
2048 | |||
2049 | return 0; | ||
2050 | } | ||
2051 | |||
2052 | static int nilfs_btree_assign_v(struct nilfs_btree *btree, | ||
2053 | struct nilfs_btree_path *path, | ||
2054 | int level, | ||
2055 | struct buffer_head **bh, | ||
2056 | sector_t blocknr, | ||
2057 | union nilfs_binfo *binfo) | ||
2058 | { | ||
2059 | struct nilfs_btree_node *parent; | ||
2060 | __u64 key; | ||
2061 | __u64 ptr; | ||
2062 | union nilfs_bmap_ptr_req req; | ||
2063 | int ret; | ||
2064 | |||
2065 | parent = nilfs_btree_get_node(btree, path, level + 1); | ||
2066 | ptr = nilfs_btree_node_get_ptr(btree, parent, | ||
2067 | path[level + 1].bp_index); | ||
2068 | req.bpr_ptr = ptr; | ||
2069 | ret = btree->bt_bmap.b_pops->bpop_prepare_start_ptr(&btree->bt_bmap, | ||
2070 | &req); | ||
2071 | if (ret < 0) | ||
2072 | return ret; | ||
2073 | btree->bt_bmap.b_pops->bpop_commit_start_ptr(&btree->bt_bmap, | ||
2074 | &req, blocknr); | ||
2075 | |||
2076 | key = nilfs_btree_node_get_key(btree, parent, | ||
2077 | path[level + 1].bp_index); | ||
2078 | /* on-disk format */ | ||
2079 | binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); | ||
2080 | binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); | ||
2081 | |||
2082 | return 0; | ||
2083 | } | ||
2084 | |||
2085 | static int nilfs_btree_assign(struct nilfs_bmap *bmap, | ||
2086 | struct buffer_head **bh, | ||
2087 | sector_t blocknr, | ||
2088 | union nilfs_binfo *binfo) | ||
2089 | { | ||
2090 | struct nilfs_btree *btree; | ||
2091 | struct nilfs_btree_path *path; | ||
2092 | struct nilfs_btree_node *node; | ||
2093 | __u64 key; | ||
2094 | int level, ret; | ||
2095 | |||
2096 | btree = (struct nilfs_btree *)bmap; | ||
2097 | path = nilfs_btree_alloc_path(btree); | ||
2098 | if (path == NULL) | ||
2099 | return -ENOMEM; | ||
2100 | nilfs_btree_init_path(btree, path); | ||
2101 | |||
2102 | if (buffer_nilfs_node(*bh)) { | ||
2103 | node = (struct nilfs_btree_node *)(*bh)->b_data; | ||
2104 | key = nilfs_btree_node_get_key(btree, node, 0); | ||
2105 | level = nilfs_btree_node_get_level(btree, node); | ||
2106 | } else { | ||
2107 | key = nilfs_bmap_data_get_key(bmap, *bh); | ||
2108 | level = NILFS_BTREE_LEVEL_DATA; | ||
2109 | } | ||
2110 | |||
2111 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); | ||
2112 | if (ret < 0) { | ||
2113 | WARN_ON(ret == -ENOENT); | ||
2114 | goto out; | ||
2115 | } | ||
2116 | |||
2117 | ret = btree->bt_ops->btop_assign(btree, path, level, bh, | ||
2118 | blocknr, binfo); | ||
2119 | |||
2120 | out: | ||
2121 | nilfs_btree_clear_path(btree, path); | ||
2122 | nilfs_btree_free_path(btree, path); | ||
2123 | |||
2124 | return ret; | ||
2125 | } | ||
2126 | |||
2127 | static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap, | ||
2128 | struct buffer_head **bh, | ||
2129 | sector_t blocknr, | ||
2130 | union nilfs_binfo *binfo) | ||
2131 | { | ||
2132 | struct nilfs_btree *btree; | ||
2133 | struct nilfs_btree_node *node; | ||
2134 | __u64 key; | ||
2135 | int ret; | ||
2136 | |||
2137 | btree = (struct nilfs_btree *)bmap; | ||
2138 | ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr); | ||
2139 | if (ret < 0) | ||
2140 | return ret; | ||
2141 | |||
2142 | if (buffer_nilfs_node(*bh)) { | ||
2143 | node = (struct nilfs_btree_node *)(*bh)->b_data; | ||
2144 | key = nilfs_btree_node_get_key(btree, node, 0); | ||
2145 | } else | ||
2146 | key = nilfs_bmap_data_get_key(bmap, *bh); | ||
2147 | |||
2148 | /* on-disk format */ | ||
2149 | binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); | ||
2150 | binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); | ||
2151 | |||
2152 | return 0; | ||
2153 | } | ||
2154 | |||
2155 | static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) | ||
2156 | { | ||
2157 | struct buffer_head *bh; | ||
2158 | struct nilfs_btree *btree; | ||
2159 | struct nilfs_btree_path *path; | ||
2160 | __u64 ptr; | ||
2161 | int ret; | ||
2162 | |||
2163 | btree = (struct nilfs_btree *)bmap; | ||
2164 | path = nilfs_btree_alloc_path(btree); | ||
2165 | if (path == NULL) | ||
2166 | return -ENOMEM; | ||
2167 | nilfs_btree_init_path(btree, path); | ||
2168 | |||
2169 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); | ||
2170 | if (ret < 0) { | ||
2171 | WARN_ON(ret == -ENOENT); | ||
2172 | goto out; | ||
2173 | } | ||
2174 | ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh); | ||
2175 | if (ret < 0) { | ||
2176 | WARN_ON(ret == -ENOENT); | ||
2177 | goto out; | ||
2178 | } | ||
2179 | |||
2180 | if (!buffer_dirty(bh)) | ||
2181 | nilfs_btnode_mark_dirty(bh); | ||
2182 | nilfs_bmap_put_block(&btree->bt_bmap, bh); | ||
2183 | if (!nilfs_bmap_dirty(&btree->bt_bmap)) | ||
2184 | nilfs_bmap_set_dirty(&btree->bt_bmap); | ||
2185 | |||
2186 | out: | ||
2187 | nilfs_btree_clear_path(btree, path); | ||
2188 | nilfs_btree_free_path(btree, path); | ||
2189 | return ret; | ||
2190 | } | ||
2191 | |||
2192 | static const struct nilfs_bmap_operations nilfs_btree_ops = { | ||
2193 | .bop_lookup = nilfs_btree_lookup, | ||
2194 | .bop_insert = nilfs_btree_insert, | ||
2195 | .bop_delete = nilfs_btree_delete, | ||
2196 | .bop_clear = NULL, | ||
2197 | |||
2198 | .bop_propagate = nilfs_btree_propagate, | ||
2199 | |||
2200 | .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, | ||
2201 | |||
2202 | .bop_assign = nilfs_btree_assign, | ||
2203 | .bop_mark = nilfs_btree_mark, | ||
2204 | |||
2205 | .bop_last_key = nilfs_btree_last_key, | ||
2206 | .bop_check_insert = NULL, | ||
2207 | .bop_check_delete = nilfs_btree_check_delete, | ||
2208 | .bop_gather_data = nilfs_btree_gather_data, | ||
2209 | }; | ||
2210 | |||
2211 | static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { | ||
2212 | .bop_lookup = NULL, | ||
2213 | .bop_insert = NULL, | ||
2214 | .bop_delete = NULL, | ||
2215 | .bop_clear = NULL, | ||
2216 | |||
2217 | .bop_propagate = nilfs_btree_propagate_gc, | ||
2218 | |||
2219 | .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, | ||
2220 | |||
2221 | .bop_assign = nilfs_btree_assign_gc, | ||
2222 | .bop_mark = NULL, | ||
2223 | |||
2224 | .bop_last_key = NULL, | ||
2225 | .bop_check_insert = NULL, | ||
2226 | .bop_check_delete = NULL, | ||
2227 | .bop_gather_data = NULL, | ||
2228 | }; | ||
2229 | |||
2230 | static const struct nilfs_btree_operations nilfs_btree_ops_v = { | ||
2231 | .btop_find_target = nilfs_btree_find_target_v, | ||
2232 | .btop_set_target = nilfs_btree_set_target_v, | ||
2233 | .btop_propagate = nilfs_btree_propagate_v, | ||
2234 | .btop_assign = nilfs_btree_assign_v, | ||
2235 | }; | ||
2236 | |||
2237 | static const struct nilfs_btree_operations nilfs_btree_ops_p = { | ||
2238 | .btop_find_target = NULL, | ||
2239 | .btop_set_target = NULL, | ||
2240 | .btop_propagate = nilfs_btree_propagate_p, | ||
2241 | .btop_assign = nilfs_btree_assign_p, | ||
2242 | }; | ||
2243 | |||
2244 | int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high) | ||
2245 | { | ||
2246 | struct nilfs_btree *btree; | ||
2247 | |||
2248 | btree = (struct nilfs_btree *)bmap; | ||
2249 | bmap->b_ops = &nilfs_btree_ops; | ||
2250 | bmap->b_low = low; | ||
2251 | bmap->b_high = high; | ||
2252 | switch (bmap->b_inode->i_ino) { | ||
2253 | case NILFS_DAT_INO: | ||
2254 | btree->bt_ops = &nilfs_btree_ops_p; | ||
2255 | break; | ||
2256 | default: | ||
2257 | btree->bt_ops = &nilfs_btree_ops_v; | ||
2258 | break; | ||
2259 | } | ||
2260 | |||
2261 | return 0; | ||
2262 | } | ||
2263 | |||
2264 | void nilfs_btree_init_gc(struct nilfs_bmap *bmap) | ||
2265 | { | ||
2266 | bmap->b_low = NILFS_BMAP_LARGE_LOW; | ||
2267 | bmap->b_high = NILFS_BMAP_LARGE_HIGH; | ||
2268 | bmap->b_ops = &nilfs_btree_ops_gc; | ||
2269 | } | ||
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h new file mode 100644 index 000000000000..4766deb52fb1 --- /dev/null +++ b/fs/nilfs2/btree.h | |||
@@ -0,0 +1,117 @@ | |||
1 | /* | ||
2 | * btree.h - NILFS B-tree. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_BTREE_H | ||
24 | #define _NILFS_BTREE_H | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/list.h> | ||
29 | #include <linux/nilfs2_fs.h> | ||
30 | #include "btnode.h" | ||
31 | #include "bmap.h" | ||
32 | |||
33 | struct nilfs_btree; | ||
34 | struct nilfs_btree_path; | ||
35 | |||
36 | /** | ||
37 | * struct nilfs_btree_operations - B-tree operation table | ||
38 | */ | ||
39 | struct nilfs_btree_operations { | ||
40 | __u64 (*btop_find_target)(const struct nilfs_btree *, | ||
41 | const struct nilfs_btree_path *, __u64); | ||
42 | void (*btop_set_target)(struct nilfs_btree *, __u64, __u64); | ||
43 | |||
44 | struct the_nilfs *(*btop_get_nilfs)(struct nilfs_btree *); | ||
45 | |||
46 | int (*btop_propagate)(struct nilfs_btree *, | ||
47 | struct nilfs_btree_path *, | ||
48 | int, | ||
49 | struct buffer_head *); | ||
50 | int (*btop_assign)(struct nilfs_btree *, | ||
51 | struct nilfs_btree_path *, | ||
52 | int, | ||
53 | struct buffer_head **, | ||
54 | sector_t, | ||
55 | union nilfs_binfo *); | ||
56 | }; | ||
57 | |||
58 | /** | ||
59 | * struct nilfs_btree_node - B-tree node | ||
60 | * @bn_flags: flags | ||
61 | * @bn_level: level | ||
62 | * @bn_nchildren: number of children | ||
63 | * @bn_pad: padding | ||
64 | */ | ||
65 | struct nilfs_btree_node { | ||
66 | __u8 bn_flags; | ||
67 | __u8 bn_level; | ||
68 | __le16 bn_nchildren; | ||
69 | __le32 bn_pad; | ||
70 | }; | ||
71 | |||
72 | /* flags */ | ||
73 | #define NILFS_BTREE_NODE_ROOT 0x01 | ||
74 | |||
75 | /* level */ | ||
76 | #define NILFS_BTREE_LEVEL_DATA 0 | ||
77 | #define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) | ||
78 | #define NILFS_BTREE_LEVEL_MAX 14 | ||
79 | |||
80 | /** | ||
81 | * struct nilfs_btree - B-tree structure | ||
82 | * @bt_bmap: bmap base structure | ||
83 | * @bt_ops: B-tree operation table | ||
84 | */ | ||
85 | struct nilfs_btree { | ||
86 | struct nilfs_bmap bt_bmap; | ||
87 | |||
88 | /* B-tree-specific members */ | ||
89 | const struct nilfs_btree_operations *bt_ops; | ||
90 | }; | ||
91 | |||
92 | |||
93 | #define NILFS_BTREE_ROOT_SIZE NILFS_BMAP_SIZE | ||
94 | #define NILFS_BTREE_ROOT_NCHILDREN_MAX \ | ||
95 | ((NILFS_BTREE_ROOT_SIZE - sizeof(struct nilfs_btree_node)) / \ | ||
96 | (sizeof(__le64 /* dkey */) + sizeof(__le64 /* dptr */))) | ||
97 | #define NILFS_BTREE_ROOT_NCHILDREN_MIN 0 | ||
98 | #define NILFS_BTREE_NODE_EXTRA_PAD_SIZE (sizeof(__le64)) | ||
99 | #define NILFS_BTREE_NODE_NCHILDREN_MAX(nodesize) \ | ||
100 | (((nodesize) - sizeof(struct nilfs_btree_node) - \ | ||
101 | NILFS_BTREE_NODE_EXTRA_PAD_SIZE) / \ | ||
102 | (sizeof(__le64 /* dkey */) + sizeof(__le64 /* dptr */))) | ||
103 | #define NILFS_BTREE_NODE_NCHILDREN_MIN(nodesize) \ | ||
104 | ((NILFS_BTREE_NODE_NCHILDREN_MAX(nodesize) - 1) / 2 + 1) | ||
105 | #define NILFS_BTREE_KEY_MIN ((__u64)0) | ||
106 | #define NILFS_BTREE_KEY_MAX (~(__u64)0) | ||
107 | |||
108 | |||
109 | int nilfs_btree_path_cache_init(void); | ||
110 | void nilfs_btree_path_cache_destroy(void); | ||
111 | int nilfs_btree_init(struct nilfs_bmap *, __u64, __u64); | ||
112 | int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64, | ||
113 | const __u64 *, const __u64 *, | ||
114 | int, __u64, __u64); | ||
115 | void nilfs_btree_init_gc(struct nilfs_bmap *); | ||
116 | |||
117 | #endif /* _NILFS_BTREE_H */ | ||
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c new file mode 100644 index 000000000000..e90b60dfced9 --- /dev/null +++ b/fs/nilfs2/cpfile.c | |||
@@ -0,0 +1,925 @@ | |||
1 | /* | ||
2 | * cpfile.c - NILFS checkpoint file. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/fs.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/buffer_head.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/nilfs2_fs.h> | ||
29 | #include "mdt.h" | ||
30 | #include "cpfile.h" | ||
31 | |||
32 | |||
33 | static inline unsigned long | ||
34 | nilfs_cpfile_checkpoints_per_block(const struct inode *cpfile) | ||
35 | { | ||
36 | return NILFS_MDT(cpfile)->mi_entries_per_block; | ||
37 | } | ||
38 | |||
39 | /* block number from the beginning of the file */ | ||
40 | static unsigned long | ||
41 | nilfs_cpfile_get_blkoff(const struct inode *cpfile, __u64 cno) | ||
42 | { | ||
43 | __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; | ||
44 | do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); | ||
45 | return (unsigned long)tcno; | ||
46 | } | ||
47 | |||
48 | /* offset in block */ | ||
49 | static unsigned long | ||
50 | nilfs_cpfile_get_offset(const struct inode *cpfile, __u64 cno) | ||
51 | { | ||
52 | __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; | ||
53 | return do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); | ||
54 | } | ||
55 | |||
56 | static unsigned long | ||
57 | nilfs_cpfile_checkpoints_in_block(const struct inode *cpfile, | ||
58 | __u64 curr, | ||
59 | __u64 max) | ||
60 | { | ||
61 | return min_t(__u64, | ||
62 | nilfs_cpfile_checkpoints_per_block(cpfile) - | ||
63 | nilfs_cpfile_get_offset(cpfile, curr), | ||
64 | max - curr); | ||
65 | } | ||
66 | |||
67 | static inline int nilfs_cpfile_is_in_first(const struct inode *cpfile, | ||
68 | __u64 cno) | ||
69 | { | ||
70 | return nilfs_cpfile_get_blkoff(cpfile, cno) == 0; | ||
71 | } | ||
72 | |||
73 | static unsigned int | ||
74 | nilfs_cpfile_block_add_valid_checkpoints(const struct inode *cpfile, | ||
75 | struct buffer_head *bh, | ||
76 | void *kaddr, | ||
77 | unsigned int n) | ||
78 | { | ||
79 | struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); | ||
80 | unsigned int count; | ||
81 | |||
82 | count = le32_to_cpu(cp->cp_checkpoints_count) + n; | ||
83 | cp->cp_checkpoints_count = cpu_to_le32(count); | ||
84 | return count; | ||
85 | } | ||
86 | |||
87 | static unsigned int | ||
88 | nilfs_cpfile_block_sub_valid_checkpoints(const struct inode *cpfile, | ||
89 | struct buffer_head *bh, | ||
90 | void *kaddr, | ||
91 | unsigned int n) | ||
92 | { | ||
93 | struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); | ||
94 | unsigned int count; | ||
95 | |||
96 | WARN_ON(le32_to_cpu(cp->cp_checkpoints_count) < n); | ||
97 | count = le32_to_cpu(cp->cp_checkpoints_count) - n; | ||
98 | cp->cp_checkpoints_count = cpu_to_le32(count); | ||
99 | return count; | ||
100 | } | ||
101 | |||
102 | static inline struct nilfs_cpfile_header * | ||
103 | nilfs_cpfile_block_get_header(const struct inode *cpfile, | ||
104 | struct buffer_head *bh, | ||
105 | void *kaddr) | ||
106 | { | ||
107 | return kaddr + bh_offset(bh); | ||
108 | } | ||
109 | |||
110 | static struct nilfs_checkpoint * | ||
111 | nilfs_cpfile_block_get_checkpoint(const struct inode *cpfile, __u64 cno, | ||
112 | struct buffer_head *bh, | ||
113 | void *kaddr) | ||
114 | { | ||
115 | return kaddr + bh_offset(bh) + nilfs_cpfile_get_offset(cpfile, cno) * | ||
116 | NILFS_MDT(cpfile)->mi_entry_size; | ||
117 | } | ||
118 | |||
119 | static void nilfs_cpfile_block_init(struct inode *cpfile, | ||
120 | struct buffer_head *bh, | ||
121 | void *kaddr) | ||
122 | { | ||
123 | struct nilfs_checkpoint *cp = kaddr + bh_offset(bh); | ||
124 | size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; | ||
125 | int n = nilfs_cpfile_checkpoints_per_block(cpfile); | ||
126 | |||
127 | while (n-- > 0) { | ||
128 | nilfs_checkpoint_set_invalid(cp); | ||
129 | cp = (void *)cp + cpsz; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | static inline int nilfs_cpfile_get_header_block(struct inode *cpfile, | ||
134 | struct buffer_head **bhp) | ||
135 | { | ||
136 | return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); | ||
137 | } | ||
138 | |||
139 | static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile, | ||
140 | __u64 cno, | ||
141 | int create, | ||
142 | struct buffer_head **bhp) | ||
143 | { | ||
144 | return nilfs_mdt_get_block(cpfile, | ||
145 | nilfs_cpfile_get_blkoff(cpfile, cno), | ||
146 | create, nilfs_cpfile_block_init, bhp); | ||
147 | } | ||
148 | |||
149 | static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile, | ||
150 | __u64 cno) | ||
151 | { | ||
152 | return nilfs_mdt_delete_block(cpfile, | ||
153 | nilfs_cpfile_get_blkoff(cpfile, cno)); | ||
154 | } | ||
155 | |||
156 | /** | ||
157 | * nilfs_cpfile_get_checkpoint - get a checkpoint | ||
158 | * @cpfile: inode of checkpoint file | ||
159 | * @cno: checkpoint number | ||
160 | * @create: create flag | ||
161 | * @cpp: pointer to a checkpoint | ||
162 | * @bhp: pointer to a buffer head | ||
163 | * | ||
164 | * Description: nilfs_cpfile_get_checkpoint() acquires the checkpoint | ||
165 | * specified by @cno. A new checkpoint will be created if @cno is the current | ||
166 | * checkpoint number and @create is nonzero. | ||
167 | * | ||
168 | * Return Value: On success, 0 is returned, and the checkpoint and the | ||
169 | * buffer head of the buffer on which the checkpoint is located are stored in | ||
170 | * the place pointed by @cpp and @bhp, respectively. On error, one of the | ||
171 | * following negative error codes is returned. | ||
172 | * | ||
173 | * %-EIO - I/O error. | ||
174 | * | ||
175 | * %-ENOMEM - Insufficient amount of memory available. | ||
176 | * | ||
177 | * %-ENOENT - No such checkpoint. | ||
178 | * | ||
179 | * %-EINVAL - invalid checkpoint. | ||
180 | */ | ||
181 | int nilfs_cpfile_get_checkpoint(struct inode *cpfile, | ||
182 | __u64 cno, | ||
183 | int create, | ||
184 | struct nilfs_checkpoint **cpp, | ||
185 | struct buffer_head **bhp) | ||
186 | { | ||
187 | struct buffer_head *header_bh, *cp_bh; | ||
188 | struct nilfs_cpfile_header *header; | ||
189 | struct nilfs_checkpoint *cp; | ||
190 | void *kaddr; | ||
191 | int ret; | ||
192 | |||
193 | if (unlikely(cno < 1 || cno > nilfs_mdt_cno(cpfile) || | ||
194 | (cno < nilfs_mdt_cno(cpfile) && create))) | ||
195 | return -EINVAL; | ||
196 | |||
197 | down_write(&NILFS_MDT(cpfile)->mi_sem); | ||
198 | |||
199 | ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); | ||
200 | if (ret < 0) | ||
201 | goto out_sem; | ||
202 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, create, &cp_bh); | ||
203 | if (ret < 0) | ||
204 | goto out_header; | ||
205 | kaddr = kmap(cp_bh->b_page); | ||
206 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); | ||
207 | if (nilfs_checkpoint_invalid(cp)) { | ||
208 | if (!create) { | ||
209 | kunmap(cp_bh->b_page); | ||
210 | brelse(cp_bh); | ||
211 | ret = -ENOENT; | ||
212 | goto out_header; | ||
213 | } | ||
214 | /* a newly-created checkpoint */ | ||
215 | nilfs_checkpoint_clear_invalid(cp); | ||
216 | if (!nilfs_cpfile_is_in_first(cpfile, cno)) | ||
217 | nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, | ||
218 | kaddr, 1); | ||
219 | nilfs_mdt_mark_buffer_dirty(cp_bh); | ||
220 | |||
221 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
222 | header = nilfs_cpfile_block_get_header(cpfile, header_bh, | ||
223 | kaddr); | ||
224 | le64_add_cpu(&header->ch_ncheckpoints, 1); | ||
225 | kunmap_atomic(kaddr, KM_USER0); | ||
226 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
227 | nilfs_mdt_mark_dirty(cpfile); | ||
228 | } | ||
229 | |||
230 | if (cpp != NULL) | ||
231 | *cpp = cp; | ||
232 | *bhp = cp_bh; | ||
233 | |||
234 | out_header: | ||
235 | brelse(header_bh); | ||
236 | |||
237 | out_sem: | ||
238 | up_write(&NILFS_MDT(cpfile)->mi_sem); | ||
239 | return ret; | ||
240 | } | ||
241 | |||
242 | /** | ||
243 | * nilfs_cpfile_put_checkpoint - put a checkpoint | ||
244 | * @cpfile: inode of checkpoint file | ||
245 | * @cno: checkpoint number | ||
246 | * @bh: buffer head | ||
247 | * | ||
248 | * Description: nilfs_cpfile_put_checkpoint() releases the checkpoint | ||
249 | * specified by @cno. @bh must be the buffer head which has been returned by | ||
250 | * a previous call to nilfs_cpfile_get_checkpoint() with @cno. | ||
251 | */ | ||
252 | void nilfs_cpfile_put_checkpoint(struct inode *cpfile, __u64 cno, | ||
253 | struct buffer_head *bh) | ||
254 | { | ||
255 | kunmap(bh->b_page); | ||
256 | brelse(bh); | ||
257 | } | ||
258 | |||
259 | /** | ||
260 | * nilfs_cpfile_delete_checkpoints - delete checkpoints | ||
261 | * @cpfile: inode of checkpoint file | ||
262 | * @start: start checkpoint number | ||
263 | * @end: end checkpoint numer | ||
264 | * | ||
265 | * Description: nilfs_cpfile_delete_checkpoints() deletes the checkpoints in | ||
266 | * the period from @start to @end, excluding @end itself. The checkpoints | ||
267 | * which have been already deleted are ignored. | ||
268 | * | ||
269 | * Return Value: On success, 0 is returned. On error, one of the following | ||
270 | * negative error codes is returned. | ||
271 | * | ||
272 | * %-EIO - I/O error. | ||
273 | * | ||
274 | * %-ENOMEM - Insufficient amount of memory available. | ||
275 | * | ||
276 | * %-EINVAL - invalid checkpoints. | ||
277 | */ | ||
278 | int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, | ||
279 | __u64 start, | ||
280 | __u64 end) | ||
281 | { | ||
282 | struct buffer_head *header_bh, *cp_bh; | ||
283 | struct nilfs_cpfile_header *header; | ||
284 | struct nilfs_checkpoint *cp; | ||
285 | size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; | ||
286 | __u64 cno; | ||
287 | void *kaddr; | ||
288 | unsigned long tnicps; | ||
289 | int ret, ncps, nicps, count, i; | ||
290 | |||
291 | if (unlikely(start == 0 || start > end)) { | ||
292 | printk(KERN_ERR "%s: invalid range of checkpoint numbers: " | ||
293 | "[%llu, %llu)\n", __func__, | ||
294 | (unsigned long long)start, (unsigned long long)end); | ||
295 | return -EINVAL; | ||
296 | } | ||
297 | |||
298 | /* cannot delete the latest checkpoint */ | ||
299 | if (start == nilfs_mdt_cno(cpfile) - 1) | ||
300 | return -EPERM; | ||
301 | |||
302 | down_write(&NILFS_MDT(cpfile)->mi_sem); | ||
303 | |||
304 | ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); | ||
305 | if (ret < 0) | ||
306 | goto out_sem; | ||
307 | tnicps = 0; | ||
308 | |||
309 | for (cno = start; cno < end; cno += ncps) { | ||
310 | ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, end); | ||
311 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); | ||
312 | if (ret < 0) { | ||
313 | if (ret != -ENOENT) | ||
314 | goto out_sem; | ||
315 | /* skip hole */ | ||
316 | ret = 0; | ||
317 | continue; | ||
318 | } | ||
319 | |||
320 | kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); | ||
321 | cp = nilfs_cpfile_block_get_checkpoint( | ||
322 | cpfile, cno, cp_bh, kaddr); | ||
323 | nicps = 0; | ||
324 | for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) { | ||
325 | WARN_ON(nilfs_checkpoint_snapshot(cp)); | ||
326 | if (!nilfs_checkpoint_invalid(cp)) { | ||
327 | nilfs_checkpoint_set_invalid(cp); | ||
328 | nicps++; | ||
329 | } | ||
330 | } | ||
331 | if (nicps > 0) { | ||
332 | tnicps += nicps; | ||
333 | nilfs_mdt_mark_buffer_dirty(cp_bh); | ||
334 | nilfs_mdt_mark_dirty(cpfile); | ||
335 | if (!nilfs_cpfile_is_in_first(cpfile, cno) && | ||
336 | (count = nilfs_cpfile_block_sub_valid_checkpoints( | ||
337 | cpfile, cp_bh, kaddr, nicps)) == 0) { | ||
338 | /* make hole */ | ||
339 | kunmap_atomic(kaddr, KM_USER0); | ||
340 | brelse(cp_bh); | ||
341 | ret = nilfs_cpfile_delete_checkpoint_block( | ||
342 | cpfile, cno); | ||
343 | if (ret == 0) | ||
344 | continue; | ||
345 | printk(KERN_ERR "%s: cannot delete block\n", | ||
346 | __func__); | ||
347 | goto out_sem; | ||
348 | } | ||
349 | } | ||
350 | |||
351 | kunmap_atomic(kaddr, KM_USER0); | ||
352 | brelse(cp_bh); | ||
353 | } | ||
354 | |||
355 | if (tnicps > 0) { | ||
356 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
357 | header = nilfs_cpfile_block_get_header(cpfile, header_bh, | ||
358 | kaddr); | ||
359 | le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); | ||
360 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
361 | nilfs_mdt_mark_dirty(cpfile); | ||
362 | kunmap_atomic(kaddr, KM_USER0); | ||
363 | } | ||
364 | brelse(header_bh); | ||
365 | |||
366 | out_sem: | ||
367 | up_write(&NILFS_MDT(cpfile)->mi_sem); | ||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | static void nilfs_cpfile_checkpoint_to_cpinfo(struct inode *cpfile, | ||
372 | struct nilfs_checkpoint *cp, | ||
373 | struct nilfs_cpinfo *ci) | ||
374 | { | ||
375 | ci->ci_flags = le32_to_cpu(cp->cp_flags); | ||
376 | ci->ci_cno = le64_to_cpu(cp->cp_cno); | ||
377 | ci->ci_create = le64_to_cpu(cp->cp_create); | ||
378 | ci->ci_nblk_inc = le64_to_cpu(cp->cp_nblk_inc); | ||
379 | ci->ci_inodes_count = le64_to_cpu(cp->cp_inodes_count); | ||
380 | ci->ci_blocks_count = le64_to_cpu(cp->cp_blocks_count); | ||
381 | ci->ci_next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); | ||
382 | } | ||
383 | |||
384 | static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, | ||
385 | struct nilfs_cpinfo *ci, size_t nci) | ||
386 | { | ||
387 | struct nilfs_checkpoint *cp; | ||
388 | struct buffer_head *bh; | ||
389 | size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size; | ||
390 | __u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop; | ||
391 | void *kaddr; | ||
392 | int n, ret; | ||
393 | int ncps, i; | ||
394 | |||
395 | if (cno == 0) | ||
396 | return -ENOENT; /* checkpoint number 0 is invalid */ | ||
397 | down_read(&NILFS_MDT(cpfile)->mi_sem); | ||
398 | |||
399 | for (n = 0; cno < cur_cno && n < nci; cno += ncps) { | ||
400 | ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno); | ||
401 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); | ||
402 | if (ret < 0) { | ||
403 | if (ret != -ENOENT) | ||
404 | goto out; | ||
405 | continue; /* skip hole */ | ||
406 | } | ||
407 | |||
408 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
409 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); | ||
410 | for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { | ||
411 | if (!nilfs_checkpoint_invalid(cp)) | ||
412 | nilfs_cpfile_checkpoint_to_cpinfo( | ||
413 | cpfile, cp, &ci[n++]); | ||
414 | } | ||
415 | kunmap_atomic(kaddr, KM_USER0); | ||
416 | brelse(bh); | ||
417 | } | ||
418 | |||
419 | ret = n; | ||
420 | if (n > 0) | ||
421 | *cnop = ci[n - 1].ci_cno + 1; | ||
422 | |||
423 | out: | ||
424 | up_read(&NILFS_MDT(cpfile)->mi_sem); | ||
425 | return ret; | ||
426 | } | ||
427 | |||
428 | static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, | ||
429 | struct nilfs_cpinfo *ci, size_t nci) | ||
430 | { | ||
431 | struct buffer_head *bh; | ||
432 | struct nilfs_cpfile_header *header; | ||
433 | struct nilfs_checkpoint *cp; | ||
434 | __u64 curr = *cnop, next; | ||
435 | unsigned long curr_blkoff, next_blkoff; | ||
436 | void *kaddr; | ||
437 | int n = 0, ret; | ||
438 | |||
439 | down_read(&NILFS_MDT(cpfile)->mi_sem); | ||
440 | |||
441 | if (curr == 0) { | ||
442 | ret = nilfs_cpfile_get_header_block(cpfile, &bh); | ||
443 | if (ret < 0) | ||
444 | goto out; | ||
445 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
446 | header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); | ||
447 | curr = le64_to_cpu(header->ch_snapshot_list.ssl_next); | ||
448 | kunmap_atomic(kaddr, KM_USER0); | ||
449 | brelse(bh); | ||
450 | if (curr == 0) { | ||
451 | ret = 0; | ||
452 | goto out; | ||
453 | } | ||
454 | } else if (unlikely(curr == ~(__u64)0)) { | ||
455 | ret = 0; | ||
456 | goto out; | ||
457 | } | ||
458 | |||
459 | curr_blkoff = nilfs_cpfile_get_blkoff(cpfile, curr); | ||
460 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &bh); | ||
461 | if (unlikely(ret < 0)) { | ||
462 | if (ret == -ENOENT) | ||
463 | ret = 0; /* No snapshots (started from a hole block) */ | ||
464 | goto out; | ||
465 | } | ||
466 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
467 | while (n < nci) { | ||
468 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr); | ||
469 | curr = ~(__u64)0; /* Terminator */ | ||
470 | if (unlikely(nilfs_checkpoint_invalid(cp) || | ||
471 | !nilfs_checkpoint_snapshot(cp))) | ||
472 | break; | ||
473 | nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, &ci[n++]); | ||
474 | next = le64_to_cpu(cp->cp_snapshot_list.ssl_next); | ||
475 | if (next == 0) | ||
476 | break; /* reach end of the snapshot list */ | ||
477 | |||
478 | next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); | ||
479 | if (curr_blkoff != next_blkoff) { | ||
480 | kunmap_atomic(kaddr, KM_USER0); | ||
481 | brelse(bh); | ||
482 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, | ||
483 | 0, &bh); | ||
484 | if (unlikely(ret < 0)) { | ||
485 | WARN_ON(ret == -ENOENT); | ||
486 | goto out; | ||
487 | } | ||
488 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
489 | } | ||
490 | curr = next; | ||
491 | curr_blkoff = next_blkoff; | ||
492 | } | ||
493 | kunmap_atomic(kaddr, KM_USER0); | ||
494 | brelse(bh); | ||
495 | *cnop = curr; | ||
496 | ret = n; | ||
497 | |||
498 | out: | ||
499 | up_read(&NILFS_MDT(cpfile)->mi_sem); | ||
500 | return ret; | ||
501 | } | ||
502 | |||
503 | /** | ||
504 | * nilfs_cpfile_get_cpinfo - | ||
505 | * @cpfile: | ||
506 | * @cno: | ||
507 | * @ci: | ||
508 | * @nci: | ||
509 | */ | ||
510 | |||
511 | ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, | ||
512 | struct nilfs_cpinfo *ci, size_t nci) | ||
513 | { | ||
514 | switch (mode) { | ||
515 | case NILFS_CHECKPOINT: | ||
516 | return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, ci, nci); | ||
517 | case NILFS_SNAPSHOT: | ||
518 | return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, ci, nci); | ||
519 | default: | ||
520 | return -EINVAL; | ||
521 | } | ||
522 | } | ||
523 | |||
524 | /** | ||
525 | * nilfs_cpfile_delete_checkpoint - | ||
526 | * @cpfile: | ||
527 | * @cno: | ||
528 | */ | ||
529 | int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) | ||
530 | { | ||
531 | struct nilfs_cpinfo ci; | ||
532 | __u64 tcno = cno; | ||
533 | ssize_t nci; | ||
534 | int ret; | ||
535 | |||
536 | nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, 1); | ||
537 | if (nci < 0) | ||
538 | return nci; | ||
539 | else if (nci == 0 || ci.ci_cno != cno) | ||
540 | return -ENOENT; | ||
541 | |||
542 | /* cannot delete the latest checkpoint nor snapshots */ | ||
543 | ret = nilfs_cpinfo_snapshot(&ci); | ||
544 | if (ret < 0) | ||
545 | return ret; | ||
546 | else if (ret > 0 || cno == nilfs_mdt_cno(cpfile) - 1) | ||
547 | return -EPERM; | ||
548 | |||
549 | return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1); | ||
550 | } | ||
551 | |||
552 | static struct nilfs_snapshot_list * | ||
553 | nilfs_cpfile_block_get_snapshot_list(const struct inode *cpfile, | ||
554 | __u64 cno, | ||
555 | struct buffer_head *bh, | ||
556 | void *kaddr) | ||
557 | { | ||
558 | struct nilfs_cpfile_header *header; | ||
559 | struct nilfs_checkpoint *cp; | ||
560 | struct nilfs_snapshot_list *list; | ||
561 | |||
562 | if (cno != 0) { | ||
563 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); | ||
564 | list = &cp->cp_snapshot_list; | ||
565 | } else { | ||
566 | header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); | ||
567 | list = &header->ch_snapshot_list; | ||
568 | } | ||
569 | return list; | ||
570 | } | ||
571 | |||
572 | static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) | ||
573 | { | ||
574 | struct buffer_head *header_bh, *curr_bh, *prev_bh, *cp_bh; | ||
575 | struct nilfs_cpfile_header *header; | ||
576 | struct nilfs_checkpoint *cp; | ||
577 | struct nilfs_snapshot_list *list; | ||
578 | __u64 curr, prev; | ||
579 | unsigned long curr_blkoff, prev_blkoff; | ||
580 | void *kaddr; | ||
581 | int ret; | ||
582 | |||
583 | if (cno == 0) | ||
584 | return -ENOENT; /* checkpoint number 0 is invalid */ | ||
585 | down_write(&NILFS_MDT(cpfile)->mi_sem); | ||
586 | |||
587 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); | ||
588 | if (ret < 0) | ||
589 | goto out_sem; | ||
590 | kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); | ||
591 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); | ||
592 | if (nilfs_checkpoint_invalid(cp)) { | ||
593 | ret = -ENOENT; | ||
594 | kunmap_atomic(kaddr, KM_USER0); | ||
595 | goto out_cp; | ||
596 | } | ||
597 | if (nilfs_checkpoint_snapshot(cp)) { | ||
598 | ret = 0; | ||
599 | kunmap_atomic(kaddr, KM_USER0); | ||
600 | goto out_cp; | ||
601 | } | ||
602 | kunmap_atomic(kaddr, KM_USER0); | ||
603 | |||
604 | ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); | ||
605 | if (ret < 0) | ||
606 | goto out_cp; | ||
607 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
608 | header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); | ||
609 | list = &header->ch_snapshot_list; | ||
610 | curr_bh = header_bh; | ||
611 | get_bh(curr_bh); | ||
612 | curr = 0; | ||
613 | curr_blkoff = 0; | ||
614 | prev = le64_to_cpu(list->ssl_prev); | ||
615 | while (prev > cno) { | ||
616 | prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev); | ||
617 | curr = prev; | ||
618 | if (curr_blkoff != prev_blkoff) { | ||
619 | kunmap_atomic(kaddr, KM_USER0); | ||
620 | brelse(curr_bh); | ||
621 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, | ||
622 | 0, &curr_bh); | ||
623 | if (ret < 0) | ||
624 | goto out_header; | ||
625 | kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); | ||
626 | } | ||
627 | curr_blkoff = prev_blkoff; | ||
628 | cp = nilfs_cpfile_block_get_checkpoint( | ||
629 | cpfile, curr, curr_bh, kaddr); | ||
630 | list = &cp->cp_snapshot_list; | ||
631 | prev = le64_to_cpu(list->ssl_prev); | ||
632 | } | ||
633 | kunmap_atomic(kaddr, KM_USER0); | ||
634 | |||
635 | if (prev != 0) { | ||
636 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, | ||
637 | &prev_bh); | ||
638 | if (ret < 0) | ||
639 | goto out_curr; | ||
640 | } else { | ||
641 | prev_bh = header_bh; | ||
642 | get_bh(prev_bh); | ||
643 | } | ||
644 | |||
645 | kaddr = kmap_atomic(curr_bh->b_page, KM_USER0); | ||
646 | list = nilfs_cpfile_block_get_snapshot_list( | ||
647 | cpfile, curr, curr_bh, kaddr); | ||
648 | list->ssl_prev = cpu_to_le64(cno); | ||
649 | kunmap_atomic(kaddr, KM_USER0); | ||
650 | |||
651 | kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); | ||
652 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); | ||
653 | cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr); | ||
654 | cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev); | ||
655 | nilfs_checkpoint_set_snapshot(cp); | ||
656 | kunmap_atomic(kaddr, KM_USER0); | ||
657 | |||
658 | kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); | ||
659 | list = nilfs_cpfile_block_get_snapshot_list( | ||
660 | cpfile, prev, prev_bh, kaddr); | ||
661 | list->ssl_next = cpu_to_le64(cno); | ||
662 | kunmap_atomic(kaddr, KM_USER0); | ||
663 | |||
664 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
665 | header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); | ||
666 | le64_add_cpu(&header->ch_nsnapshots, 1); | ||
667 | kunmap_atomic(kaddr, KM_USER0); | ||
668 | |||
669 | nilfs_mdt_mark_buffer_dirty(prev_bh); | ||
670 | nilfs_mdt_mark_buffer_dirty(curr_bh); | ||
671 | nilfs_mdt_mark_buffer_dirty(cp_bh); | ||
672 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
673 | nilfs_mdt_mark_dirty(cpfile); | ||
674 | |||
675 | brelse(prev_bh); | ||
676 | |||
677 | out_curr: | ||
678 | brelse(curr_bh); | ||
679 | |||
680 | out_header: | ||
681 | brelse(header_bh); | ||
682 | |||
683 | out_cp: | ||
684 | brelse(cp_bh); | ||
685 | |||
686 | out_sem: | ||
687 | up_write(&NILFS_MDT(cpfile)->mi_sem); | ||
688 | return ret; | ||
689 | } | ||
690 | |||
691 | static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) | ||
692 | { | ||
693 | struct buffer_head *header_bh, *next_bh, *prev_bh, *cp_bh; | ||
694 | struct nilfs_cpfile_header *header; | ||
695 | struct nilfs_checkpoint *cp; | ||
696 | struct nilfs_snapshot_list *list; | ||
697 | __u64 next, prev; | ||
698 | void *kaddr; | ||
699 | int ret; | ||
700 | |||
701 | if (cno == 0) | ||
702 | return -ENOENT; /* checkpoint number 0 is invalid */ | ||
703 | down_write(&NILFS_MDT(cpfile)->mi_sem); | ||
704 | |||
705 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); | ||
706 | if (ret < 0) | ||
707 | goto out_sem; | ||
708 | kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); | ||
709 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); | ||
710 | if (nilfs_checkpoint_invalid(cp)) { | ||
711 | ret = -ENOENT; | ||
712 | kunmap_atomic(kaddr, KM_USER0); | ||
713 | goto out_cp; | ||
714 | } | ||
715 | if (!nilfs_checkpoint_snapshot(cp)) { | ||
716 | ret = 0; | ||
717 | kunmap_atomic(kaddr, KM_USER0); | ||
718 | goto out_cp; | ||
719 | } | ||
720 | |||
721 | list = &cp->cp_snapshot_list; | ||
722 | next = le64_to_cpu(list->ssl_next); | ||
723 | prev = le64_to_cpu(list->ssl_prev); | ||
724 | kunmap_atomic(kaddr, KM_USER0); | ||
725 | |||
726 | ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); | ||
727 | if (ret < 0) | ||
728 | goto out_cp; | ||
729 | if (next != 0) { | ||
730 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, | ||
731 | &next_bh); | ||
732 | if (ret < 0) | ||
733 | goto out_header; | ||
734 | } else { | ||
735 | next_bh = header_bh; | ||
736 | get_bh(next_bh); | ||
737 | } | ||
738 | if (prev != 0) { | ||
739 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, | ||
740 | &prev_bh); | ||
741 | if (ret < 0) | ||
742 | goto out_next; | ||
743 | } else { | ||
744 | prev_bh = header_bh; | ||
745 | get_bh(prev_bh); | ||
746 | } | ||
747 | |||
748 | kaddr = kmap_atomic(next_bh->b_page, KM_USER0); | ||
749 | list = nilfs_cpfile_block_get_snapshot_list( | ||
750 | cpfile, next, next_bh, kaddr); | ||
751 | list->ssl_prev = cpu_to_le64(prev); | ||
752 | kunmap_atomic(kaddr, KM_USER0); | ||
753 | |||
754 | kaddr = kmap_atomic(prev_bh->b_page, KM_USER0); | ||
755 | list = nilfs_cpfile_block_get_snapshot_list( | ||
756 | cpfile, prev, prev_bh, kaddr); | ||
757 | list->ssl_next = cpu_to_le64(next); | ||
758 | kunmap_atomic(kaddr, KM_USER0); | ||
759 | |||
760 | kaddr = kmap_atomic(cp_bh->b_page, KM_USER0); | ||
761 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); | ||
762 | cp->cp_snapshot_list.ssl_next = cpu_to_le64(0); | ||
763 | cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0); | ||
764 | nilfs_checkpoint_clear_snapshot(cp); | ||
765 | kunmap_atomic(kaddr, KM_USER0); | ||
766 | |||
767 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
768 | header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); | ||
769 | le64_add_cpu(&header->ch_nsnapshots, -1); | ||
770 | kunmap_atomic(kaddr, KM_USER0); | ||
771 | |||
772 | nilfs_mdt_mark_buffer_dirty(next_bh); | ||
773 | nilfs_mdt_mark_buffer_dirty(prev_bh); | ||
774 | nilfs_mdt_mark_buffer_dirty(cp_bh); | ||
775 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
776 | nilfs_mdt_mark_dirty(cpfile); | ||
777 | |||
778 | brelse(prev_bh); | ||
779 | |||
780 | out_next: | ||
781 | brelse(next_bh); | ||
782 | |||
783 | out_header: | ||
784 | brelse(header_bh); | ||
785 | |||
786 | out_cp: | ||
787 | brelse(cp_bh); | ||
788 | |||
789 | out_sem: | ||
790 | up_write(&NILFS_MDT(cpfile)->mi_sem); | ||
791 | return ret; | ||
792 | } | ||
793 | |||
794 | /** | ||
795 | * nilfs_cpfile_is_snapshot - | ||
796 | * @cpfile: inode of checkpoint file | ||
797 | * @cno: checkpoint number | ||
798 | * | ||
799 | * Description: | ||
800 | * | ||
801 | * Return Value: On success, 1 is returned if the checkpoint specified by | ||
802 | * @cno is a snapshot, or 0 if not. On error, one of the following negative | ||
803 | * error codes is returned. | ||
804 | * | ||
805 | * %-EIO - I/O error. | ||
806 | * | ||
807 | * %-ENOMEM - Insufficient amount of memory available. | ||
808 | * | ||
809 | * %-ENOENT - No such checkpoint. | ||
810 | */ | ||
811 | int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) | ||
812 | { | ||
813 | struct buffer_head *bh; | ||
814 | struct nilfs_checkpoint *cp; | ||
815 | void *kaddr; | ||
816 | int ret; | ||
817 | |||
818 | if (cno == 0) | ||
819 | return -ENOENT; /* checkpoint number 0 is invalid */ | ||
820 | down_read(&NILFS_MDT(cpfile)->mi_sem); | ||
821 | |||
822 | ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); | ||
823 | if (ret < 0) | ||
824 | goto out; | ||
825 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
826 | cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); | ||
827 | ret = nilfs_checkpoint_snapshot(cp); | ||
828 | kunmap_atomic(kaddr, KM_USER0); | ||
829 | brelse(bh); | ||
830 | |||
831 | out: | ||
832 | up_read(&NILFS_MDT(cpfile)->mi_sem); | ||
833 | return ret; | ||
834 | } | ||
835 | |||
836 | /** | ||
837 | * nilfs_cpfile_change_cpmode - change checkpoint mode | ||
838 | * @cpfile: inode of checkpoint file | ||
839 | * @cno: checkpoint number | ||
840 | * @status: mode of checkpoint | ||
841 | * | ||
842 | * Description: nilfs_change_cpmode() changes the mode of the checkpoint | ||
843 | * specified by @cno. The mode @mode is NILFS_CHECKPOINT or NILFS_SNAPSHOT. | ||
844 | * | ||
845 | * Return Value: On success, 0 is returned. On error, one of the following | ||
846 | * negative error codes is returned. | ||
847 | * | ||
848 | * %-EIO - I/O error. | ||
849 | * | ||
850 | * %-ENOMEM - Insufficient amount of memory available. | ||
851 | * | ||
852 | * %-ENOENT - No such checkpoint. | ||
853 | */ | ||
854 | int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) | ||
855 | { | ||
856 | struct the_nilfs *nilfs; | ||
857 | int ret; | ||
858 | |||
859 | nilfs = NILFS_MDT(cpfile)->mi_nilfs; | ||
860 | |||
861 | switch (mode) { | ||
862 | case NILFS_CHECKPOINT: | ||
863 | /* | ||
864 | * Check for protecting existing snapshot mounts: | ||
865 | * bd_mount_sem is used to make this operation atomic and | ||
866 | * exclusive with a new mount job. Though it doesn't cover | ||
867 | * umount, it's enough for the purpose. | ||
868 | */ | ||
869 | down(&nilfs->ns_bdev->bd_mount_sem); | ||
870 | if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) { | ||
871 | /* Current implementation does not have to protect | ||
872 | plain read-only mounts since they are exclusive | ||
873 | with a read/write mount and are protected from the | ||
874 | cleaner. */ | ||
875 | ret = -EBUSY; | ||
876 | } else | ||
877 | ret = nilfs_cpfile_clear_snapshot(cpfile, cno); | ||
878 | up(&nilfs->ns_bdev->bd_mount_sem); | ||
879 | return ret; | ||
880 | case NILFS_SNAPSHOT: | ||
881 | return nilfs_cpfile_set_snapshot(cpfile, cno); | ||
882 | default: | ||
883 | return -EINVAL; | ||
884 | } | ||
885 | } | ||
886 | |||
887 | /** | ||
888 | * nilfs_cpfile_get_stat - get checkpoint statistics | ||
889 | * @cpfile: inode of checkpoint file | ||
890 | * @stat: pointer to a structure of checkpoint statistics | ||
891 | * | ||
892 | * Description: nilfs_cpfile_get_stat() returns information about checkpoints. | ||
893 | * | ||
894 | * Return Value: On success, 0 is returned, and checkpoints information is | ||
895 | * stored in the place pointed by @stat. On error, one of the following | ||
896 | * negative error codes is returned. | ||
897 | * | ||
898 | * %-EIO - I/O error. | ||
899 | * | ||
900 | * %-ENOMEM - Insufficient amount of memory available. | ||
901 | */ | ||
902 | int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) | ||
903 | { | ||
904 | struct buffer_head *bh; | ||
905 | struct nilfs_cpfile_header *header; | ||
906 | void *kaddr; | ||
907 | int ret; | ||
908 | |||
909 | down_read(&NILFS_MDT(cpfile)->mi_sem); | ||
910 | |||
911 | ret = nilfs_cpfile_get_header_block(cpfile, &bh); | ||
912 | if (ret < 0) | ||
913 | goto out_sem; | ||
914 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
915 | header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); | ||
916 | cpstat->cs_cno = nilfs_mdt_cno(cpfile); | ||
917 | cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints); | ||
918 | cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots); | ||
919 | kunmap_atomic(kaddr, KM_USER0); | ||
920 | brelse(bh); | ||
921 | |||
922 | out_sem: | ||
923 | up_read(&NILFS_MDT(cpfile)->mi_sem); | ||
924 | return ret; | ||
925 | } | ||
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h new file mode 100644 index 000000000000..1a8a1008c342 --- /dev/null +++ b/fs/nilfs2/cpfile.h | |||
@@ -0,0 +1,45 @@ | |||
1 | /* | ||
2 | * cpfile.h - NILFS checkpoint file. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_CPFILE_H | ||
24 | #define _NILFS_CPFILE_H | ||
25 | |||
26 | #include <linux/fs.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/nilfs2_fs.h> | ||
29 | |||
30 | #define NILFS_CPFILE_GFP NILFS_MDT_GFP | ||
31 | |||
32 | |||
33 | int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int, | ||
34 | struct nilfs_checkpoint **, | ||
35 | struct buffer_head **); | ||
36 | void nilfs_cpfile_put_checkpoint(struct inode *, __u64, struct buffer_head *); | ||
37 | int nilfs_cpfile_delete_checkpoints(struct inode *, __u64, __u64); | ||
38 | int nilfs_cpfile_delete_checkpoint(struct inode *, __u64); | ||
39 | int nilfs_cpfile_change_cpmode(struct inode *, __u64, int); | ||
40 | int nilfs_cpfile_is_snapshot(struct inode *, __u64); | ||
41 | int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); | ||
42 | ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, | ||
43 | struct nilfs_cpinfo *, size_t); | ||
44 | |||
45 | #endif /* _NILFS_CPFILE_H */ | ||
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c new file mode 100644 index 000000000000..bb8a5818e7f1 --- /dev/null +++ b/fs/nilfs2/dat.c | |||
@@ -0,0 +1,430 @@ | |||
1 | /* | ||
2 | * dat.c - NILFS disk address translation. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/types.h> | ||
24 | #include <linux/buffer_head.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/errno.h> | ||
27 | #include "nilfs.h" | ||
28 | #include "mdt.h" | ||
29 | #include "alloc.h" | ||
30 | #include "dat.h" | ||
31 | |||
32 | |||
33 | #define NILFS_CNO_MIN ((__u64)1) | ||
34 | #define NILFS_CNO_MAX (~(__u64)0) | ||
35 | |||
36 | static int nilfs_dat_prepare_entry(struct inode *dat, | ||
37 | struct nilfs_palloc_req *req, int create) | ||
38 | { | ||
39 | return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, | ||
40 | create, &req->pr_entry_bh); | ||
41 | } | ||
42 | |||
43 | static void nilfs_dat_commit_entry(struct inode *dat, | ||
44 | struct nilfs_palloc_req *req) | ||
45 | { | ||
46 | nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh); | ||
47 | nilfs_mdt_mark_dirty(dat); | ||
48 | brelse(req->pr_entry_bh); | ||
49 | } | ||
50 | |||
51 | static void nilfs_dat_abort_entry(struct inode *dat, | ||
52 | struct nilfs_palloc_req *req) | ||
53 | { | ||
54 | brelse(req->pr_entry_bh); | ||
55 | } | ||
56 | |||
57 | int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req) | ||
58 | { | ||
59 | int ret; | ||
60 | |||
61 | ret = nilfs_palloc_prepare_alloc_entry(dat, req); | ||
62 | if (ret < 0) | ||
63 | return ret; | ||
64 | |||
65 | ret = nilfs_dat_prepare_entry(dat, req, 1); | ||
66 | if (ret < 0) | ||
67 | nilfs_palloc_abort_alloc_entry(dat, req); | ||
68 | |||
69 | return ret; | ||
70 | } | ||
71 | |||
72 | void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) | ||
73 | { | ||
74 | struct nilfs_dat_entry *entry; | ||
75 | void *kaddr; | ||
76 | |||
77 | kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); | ||
78 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, | ||
79 | req->pr_entry_bh, kaddr); | ||
80 | entry->de_start = cpu_to_le64(NILFS_CNO_MIN); | ||
81 | entry->de_end = cpu_to_le64(NILFS_CNO_MAX); | ||
82 | entry->de_blocknr = cpu_to_le64(0); | ||
83 | kunmap_atomic(kaddr, KM_USER0); | ||
84 | |||
85 | nilfs_palloc_commit_alloc_entry(dat, req); | ||
86 | nilfs_dat_commit_entry(dat, req); | ||
87 | } | ||
88 | |||
89 | void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) | ||
90 | { | ||
91 | nilfs_dat_abort_entry(dat, req); | ||
92 | nilfs_palloc_abort_alloc_entry(dat, req); | ||
93 | } | ||
94 | |||
95 | int nilfs_dat_prepare_free(struct inode *dat, struct nilfs_palloc_req *req) | ||
96 | { | ||
97 | int ret; | ||
98 | |||
99 | ret = nilfs_palloc_prepare_free_entry(dat, req); | ||
100 | if (ret < 0) | ||
101 | return ret; | ||
102 | ret = nilfs_dat_prepare_entry(dat, req, 0); | ||
103 | if (ret < 0) { | ||
104 | nilfs_palloc_abort_free_entry(dat, req); | ||
105 | return ret; | ||
106 | } | ||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) | ||
111 | { | ||
112 | struct nilfs_dat_entry *entry; | ||
113 | void *kaddr; | ||
114 | |||
115 | kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); | ||
116 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, | ||
117 | req->pr_entry_bh, kaddr); | ||
118 | entry->de_start = cpu_to_le64(NILFS_CNO_MIN); | ||
119 | entry->de_end = cpu_to_le64(NILFS_CNO_MIN); | ||
120 | entry->de_blocknr = cpu_to_le64(0); | ||
121 | kunmap_atomic(kaddr, KM_USER0); | ||
122 | |||
123 | nilfs_dat_commit_entry(dat, req); | ||
124 | nilfs_palloc_commit_free_entry(dat, req); | ||
125 | } | ||
126 | |||
127 | void nilfs_dat_abort_free(struct inode *dat, struct nilfs_palloc_req *req) | ||
128 | { | ||
129 | nilfs_dat_abort_entry(dat, req); | ||
130 | nilfs_palloc_abort_free_entry(dat, req); | ||
131 | } | ||
132 | |||
133 | int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) | ||
134 | { | ||
135 | int ret; | ||
136 | |||
137 | ret = nilfs_dat_prepare_entry(dat, req, 0); | ||
138 | WARN_ON(ret == -ENOENT); | ||
139 | return ret; | ||
140 | } | ||
141 | |||
142 | void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, | ||
143 | sector_t blocknr) | ||
144 | { | ||
145 | struct nilfs_dat_entry *entry; | ||
146 | void *kaddr; | ||
147 | |||
148 | kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); | ||
149 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, | ||
150 | req->pr_entry_bh, kaddr); | ||
151 | entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); | ||
152 | if (entry->de_blocknr != cpu_to_le64(0) || | ||
153 | entry->de_end != cpu_to_le64(NILFS_CNO_MAX)) { | ||
154 | printk(KERN_CRIT | ||
155 | "%s: vbn = %llu, start = %llu, end = %llu, pbn = %llu\n", | ||
156 | __func__, (unsigned long long)req->pr_entry_nr, | ||
157 | (unsigned long long)le64_to_cpu(entry->de_start), | ||
158 | (unsigned long long)le64_to_cpu(entry->de_end), | ||
159 | (unsigned long long)le64_to_cpu(entry->de_blocknr)); | ||
160 | } | ||
161 | entry->de_blocknr = cpu_to_le64(blocknr); | ||
162 | kunmap_atomic(kaddr, KM_USER0); | ||
163 | |||
164 | nilfs_dat_commit_entry(dat, req); | ||
165 | } | ||
166 | |||
167 | void nilfs_dat_abort_start(struct inode *dat, struct nilfs_palloc_req *req) | ||
168 | { | ||
169 | nilfs_dat_abort_entry(dat, req); | ||
170 | } | ||
171 | |||
172 | int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) | ||
173 | { | ||
174 | struct nilfs_dat_entry *entry; | ||
175 | __u64 start; | ||
176 | sector_t blocknr; | ||
177 | void *kaddr; | ||
178 | int ret; | ||
179 | |||
180 | ret = nilfs_dat_prepare_entry(dat, req, 0); | ||
181 | if (ret < 0) { | ||
182 | WARN_ON(ret == -ENOENT); | ||
183 | return ret; | ||
184 | } | ||
185 | |||
186 | kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); | ||
187 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, | ||
188 | req->pr_entry_bh, kaddr); | ||
189 | start = le64_to_cpu(entry->de_start); | ||
190 | blocknr = le64_to_cpu(entry->de_blocknr); | ||
191 | kunmap_atomic(kaddr, KM_USER0); | ||
192 | |||
193 | if (blocknr == 0) { | ||
194 | ret = nilfs_palloc_prepare_free_entry(dat, req); | ||
195 | if (ret < 0) { | ||
196 | nilfs_dat_abort_entry(dat, req); | ||
197 | return ret; | ||
198 | } | ||
199 | } | ||
200 | |||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, | ||
205 | int dead) | ||
206 | { | ||
207 | struct nilfs_dat_entry *entry; | ||
208 | __u64 start, end; | ||
209 | sector_t blocknr; | ||
210 | void *kaddr; | ||
211 | |||
212 | kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); | ||
213 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, | ||
214 | req->pr_entry_bh, kaddr); | ||
215 | end = start = le64_to_cpu(entry->de_start); | ||
216 | if (!dead) { | ||
217 | end = nilfs_mdt_cno(dat); | ||
218 | WARN_ON(start > end); | ||
219 | } | ||
220 | entry->de_end = cpu_to_le64(end); | ||
221 | blocknr = le64_to_cpu(entry->de_blocknr); | ||
222 | kunmap_atomic(kaddr, KM_USER0); | ||
223 | |||
224 | if (blocknr == 0) | ||
225 | nilfs_dat_commit_free(dat, req); | ||
226 | else | ||
227 | nilfs_dat_commit_entry(dat, req); | ||
228 | } | ||
229 | |||
230 | void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) | ||
231 | { | ||
232 | struct nilfs_dat_entry *entry; | ||
233 | __u64 start; | ||
234 | sector_t blocknr; | ||
235 | void *kaddr; | ||
236 | |||
237 | kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0); | ||
238 | entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, | ||
239 | req->pr_entry_bh, kaddr); | ||
240 | start = le64_to_cpu(entry->de_start); | ||
241 | blocknr = le64_to_cpu(entry->de_blocknr); | ||
242 | kunmap_atomic(kaddr, KM_USER0); | ||
243 | |||
244 | if (start == nilfs_mdt_cno(dat) && blocknr == 0) | ||
245 | nilfs_palloc_abort_free_entry(dat, req); | ||
246 | nilfs_dat_abort_entry(dat, req); | ||
247 | } | ||
248 | |||
249 | /** | ||
250 | * nilfs_dat_mark_dirty - | ||
251 | * @dat: DAT file inode | ||
252 | * @vblocknr: virtual block number | ||
253 | * | ||
254 | * Description: | ||
255 | * | ||
256 | * Return Value: On success, 0 is returned. On error, one of the following | ||
257 | * negative error codes is returned. | ||
258 | * | ||
259 | * %-EIO - I/O error. | ||
260 | * | ||
261 | * %-ENOMEM - Insufficient amount of memory available. | ||
262 | */ | ||
263 | int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) | ||
264 | { | ||
265 | struct nilfs_palloc_req req; | ||
266 | int ret; | ||
267 | |||
268 | req.pr_entry_nr = vblocknr; | ||
269 | ret = nilfs_dat_prepare_entry(dat, &req, 0); | ||
270 | if (ret == 0) | ||
271 | nilfs_dat_commit_entry(dat, &req); | ||
272 | return ret; | ||
273 | } | ||
274 | |||
275 | /** | ||
276 | * nilfs_dat_freev - free virtual block numbers | ||
277 | * @dat: DAT file inode | ||
278 | * @vblocknrs: array of virtual block numbers | ||
279 | * @nitems: number of virtual block numbers | ||
280 | * | ||
281 | * Description: nilfs_dat_freev() frees the virtual block numbers specified by | ||
282 | * @vblocknrs and @nitems. | ||
283 | * | ||
284 | * Return Value: On success, 0 is returned. On error, one of the following | ||
285 | * nagative error codes is returned. | ||
286 | * | ||
287 | * %-EIO - I/O error. | ||
288 | * | ||
289 | * %-ENOMEM - Insufficient amount of memory available. | ||
290 | * | ||
291 | * %-ENOENT - The virtual block number have not been allocated. | ||
292 | */ | ||
293 | int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems) | ||
294 | { | ||
295 | return nilfs_palloc_freev(dat, vblocknrs, nitems); | ||
296 | } | ||
297 | |||
298 | /** | ||
299 | * nilfs_dat_move - change a block number | ||
300 | * @dat: DAT file inode | ||
301 | * @vblocknr: virtual block number | ||
302 | * @blocknr: block number | ||
303 | * | ||
304 | * Description: nilfs_dat_move() changes the block number associated with | ||
305 | * @vblocknr to @blocknr. | ||
306 | * | ||
307 | * Return Value: On success, 0 is returned. On error, one of the following | ||
308 | * negative error codes is returned. | ||
309 | * | ||
310 | * %-EIO - I/O error. | ||
311 | * | ||
312 | * %-ENOMEM - Insufficient amount of memory available. | ||
313 | */ | ||
314 | int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) | ||
315 | { | ||
316 | struct buffer_head *entry_bh; | ||
317 | struct nilfs_dat_entry *entry; | ||
318 | void *kaddr; | ||
319 | int ret; | ||
320 | |||
321 | ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); | ||
322 | if (ret < 0) | ||
323 | return ret; | ||
324 | kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); | ||
325 | entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); | ||
326 | if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { | ||
327 | printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__, | ||
328 | (unsigned long long)vblocknr, | ||
329 | (unsigned long long)le64_to_cpu(entry->de_start), | ||
330 | (unsigned long long)le64_to_cpu(entry->de_end)); | ||
331 | kunmap_atomic(kaddr, KM_USER0); | ||
332 | brelse(entry_bh); | ||
333 | return -EINVAL; | ||
334 | } | ||
335 | WARN_ON(blocknr == 0); | ||
336 | entry->de_blocknr = cpu_to_le64(blocknr); | ||
337 | kunmap_atomic(kaddr, KM_USER0); | ||
338 | |||
339 | nilfs_mdt_mark_buffer_dirty(entry_bh); | ||
340 | nilfs_mdt_mark_dirty(dat); | ||
341 | |||
342 | brelse(entry_bh); | ||
343 | |||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | /** | ||
348 | * nilfs_dat_translate - translate a virtual block number to a block number | ||
349 | * @dat: DAT file inode | ||
350 | * @vblocknr: virtual block number | ||
351 | * @blocknrp: pointer to a block number | ||
352 | * | ||
353 | * Description: nilfs_dat_translate() maps the virtual block number @vblocknr | ||
354 | * to the corresponding block number. | ||
355 | * | ||
356 | * Return Value: On success, 0 is returned and the block number associated | ||
357 | * with @vblocknr is stored in the place pointed by @blocknrp. On error, one | ||
358 | * of the following negative error codes is returned. | ||
359 | * | ||
360 | * %-EIO - I/O error. | ||
361 | * | ||
362 | * %-ENOMEM - Insufficient amount of memory available. | ||
363 | * | ||
364 | * %-ENOENT - A block number associated with @vblocknr does not exist. | ||
365 | */ | ||
366 | int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) | ||
367 | { | ||
368 | struct buffer_head *entry_bh; | ||
369 | struct nilfs_dat_entry *entry; | ||
370 | sector_t blocknr; | ||
371 | void *kaddr; | ||
372 | int ret; | ||
373 | |||
374 | ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); | ||
375 | if (ret < 0) | ||
376 | return ret; | ||
377 | |||
378 | kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); | ||
379 | entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); | ||
380 | blocknr = le64_to_cpu(entry->de_blocknr); | ||
381 | if (blocknr == 0) { | ||
382 | ret = -ENOENT; | ||
383 | goto out; | ||
384 | } | ||
385 | if (blocknrp != NULL) | ||
386 | *blocknrp = blocknr; | ||
387 | |||
388 | out: | ||
389 | kunmap_atomic(kaddr, KM_USER0); | ||
390 | brelse(entry_bh); | ||
391 | return ret; | ||
392 | } | ||
393 | |||
394 | ssize_t nilfs_dat_get_vinfo(struct inode *dat, struct nilfs_vinfo *vinfo, | ||
395 | size_t nvi) | ||
396 | { | ||
397 | struct buffer_head *entry_bh; | ||
398 | struct nilfs_dat_entry *entry; | ||
399 | __u64 first, last; | ||
400 | void *kaddr; | ||
401 | unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block; | ||
402 | int i, j, n, ret; | ||
403 | |||
404 | for (i = 0; i < nvi; i += n) { | ||
405 | ret = nilfs_palloc_get_entry_block(dat, vinfo[i].vi_vblocknr, | ||
406 | 0, &entry_bh); | ||
407 | if (ret < 0) | ||
408 | return ret; | ||
409 | kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); | ||
410 | /* last virtual block number in this block */ | ||
411 | first = vinfo[i].vi_vblocknr; | ||
412 | do_div(first, entries_per_block); | ||
413 | first *= entries_per_block; | ||
414 | last = first + entries_per_block - 1; | ||
415 | for (j = i, n = 0; | ||
416 | j < nvi && vinfo[j].vi_vblocknr >= first && | ||
417 | vinfo[j].vi_vblocknr <= last; | ||
418 | j++, n++) { | ||
419 | entry = nilfs_palloc_block_get_entry( | ||
420 | dat, vinfo[j].vi_vblocknr, entry_bh, kaddr); | ||
421 | vinfo[j].vi_start = le64_to_cpu(entry->de_start); | ||
422 | vinfo[j].vi_end = le64_to_cpu(entry->de_end); | ||
423 | vinfo[j].vi_blocknr = le64_to_cpu(entry->de_blocknr); | ||
424 | } | ||
425 | kunmap_atomic(kaddr, KM_USER0); | ||
426 | brelse(entry_bh); | ||
427 | } | ||
428 | |||
429 | return nvi; | ||
430 | } | ||
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h new file mode 100644 index 000000000000..d9560654a4b7 --- /dev/null +++ b/fs/nilfs2/dat.h | |||
@@ -0,0 +1,52 @@ | |||
1 | /* | ||
2 | * dat.h - NILFS disk address translation. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_DAT_H | ||
24 | #define _NILFS_DAT_H | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/fs.h> | ||
29 | |||
30 | #define NILFS_DAT_GFP NILFS_MDT_GFP | ||
31 | |||
32 | struct nilfs_palloc_req; | ||
33 | |||
34 | int nilfs_dat_translate(struct inode *, __u64, sector_t *); | ||
35 | |||
36 | int nilfs_dat_prepare_alloc(struct inode *, struct nilfs_palloc_req *); | ||
37 | void nilfs_dat_commit_alloc(struct inode *, struct nilfs_palloc_req *); | ||
38 | void nilfs_dat_abort_alloc(struct inode *, struct nilfs_palloc_req *); | ||
39 | int nilfs_dat_prepare_start(struct inode *, struct nilfs_palloc_req *); | ||
40 | void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *, | ||
41 | sector_t); | ||
42 | void nilfs_dat_abort_start(struct inode *, struct nilfs_palloc_req *); | ||
43 | int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *); | ||
44 | void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int); | ||
45 | void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *); | ||
46 | |||
47 | int nilfs_dat_mark_dirty(struct inode *, __u64); | ||
48 | int nilfs_dat_freev(struct inode *, __u64 *, size_t); | ||
49 | int nilfs_dat_move(struct inode *, __u64, sector_t); | ||
50 | ssize_t nilfs_dat_get_vinfo(struct inode *, struct nilfs_vinfo *, size_t); | ||
51 | |||
52 | #endif /* _NILFS_DAT_H */ | ||
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c new file mode 100644 index 000000000000..54100acc1102 --- /dev/null +++ b/fs/nilfs2/dir.c | |||
@@ -0,0 +1,711 @@ | |||
1 | /* | ||
2 | * dir.c - NILFS directory entry operations | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Modified for NILFS by Amagai Yoshiji <amagai@osrg.net> | ||
21 | */ | ||
22 | /* | ||
23 | * linux/fs/ext2/dir.c | ||
24 | * | ||
25 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
26 | * Remy Card (card@masi.ibp.fr) | ||
27 | * Laboratoire MASI - Institut Blaise Pascal | ||
28 | * Universite Pierre et Marie Curie (Paris VI) | ||
29 | * | ||
30 | * from | ||
31 | * | ||
32 | * linux/fs/minix/dir.c | ||
33 | * | ||
34 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
35 | * | ||
36 | * ext2 directory handling functions | ||
37 | * | ||
38 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
39 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
40 | * | ||
41 | * All code that works with directory layout had been switched to pagecache | ||
42 | * and moved here. AV | ||
43 | */ | ||
44 | |||
45 | #include <linux/pagemap.h> | ||
46 | #include <linux/smp_lock.h> | ||
47 | #include "nilfs.h" | ||
48 | #include "page.h" | ||
49 | |||
50 | /* | ||
51 | * nilfs uses block-sized chunks. Arguably, sector-sized ones would be | ||
52 | * more robust, but we have what we have | ||
53 | */ | ||
54 | static inline unsigned nilfs_chunk_size(struct inode *inode) | ||
55 | { | ||
56 | return inode->i_sb->s_blocksize; | ||
57 | } | ||
58 | |||
59 | static inline void nilfs_put_page(struct page *page) | ||
60 | { | ||
61 | kunmap(page); | ||
62 | page_cache_release(page); | ||
63 | } | ||
64 | |||
65 | static inline unsigned long dir_pages(struct inode *inode) | ||
66 | { | ||
67 | return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * Return the offset into page `page_nr' of the last valid | ||
72 | * byte in that page, plus one. | ||
73 | */ | ||
74 | static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr) | ||
75 | { | ||
76 | unsigned last_byte = inode->i_size; | ||
77 | |||
78 | last_byte -= page_nr << PAGE_CACHE_SHIFT; | ||
79 | if (last_byte > PAGE_CACHE_SIZE) | ||
80 | last_byte = PAGE_CACHE_SIZE; | ||
81 | return last_byte; | ||
82 | } | ||
83 | |||
84 | static int nilfs_prepare_chunk_uninterruptible(struct page *page, | ||
85 | struct address_space *mapping, | ||
86 | unsigned from, unsigned to) | ||
87 | { | ||
88 | loff_t pos = page_offset(page) + from; | ||
89 | return block_write_begin(NULL, mapping, pos, to - from, | ||
90 | AOP_FLAG_UNINTERRUPTIBLE, &page, | ||
91 | NULL, nilfs_get_block); | ||
92 | } | ||
93 | |||
94 | static int nilfs_prepare_chunk(struct page *page, | ||
95 | struct address_space *mapping, | ||
96 | unsigned from, unsigned to) | ||
97 | { | ||
98 | loff_t pos = page_offset(page) + from; | ||
99 | return block_write_begin(NULL, mapping, pos, to - from, 0, &page, | ||
100 | NULL, nilfs_get_block); | ||
101 | } | ||
102 | |||
103 | static int nilfs_commit_chunk(struct page *page, | ||
104 | struct address_space *mapping, | ||
105 | unsigned from, unsigned to) | ||
106 | { | ||
107 | struct inode *dir = mapping->host; | ||
108 | struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb); | ||
109 | loff_t pos = page_offset(page) + from; | ||
110 | unsigned len = to - from; | ||
111 | unsigned nr_dirty, copied; | ||
112 | int err; | ||
113 | |||
114 | nr_dirty = nilfs_page_count_clean_buffers(page, from, to); | ||
115 | copied = block_write_end(NULL, mapping, pos, len, len, page, NULL); | ||
116 | if (pos + copied > dir->i_size) { | ||
117 | i_size_write(dir, pos + copied); | ||
118 | mark_inode_dirty(dir); | ||
119 | } | ||
120 | if (IS_DIRSYNC(dir)) | ||
121 | nilfs_set_transaction_flag(NILFS_TI_SYNC); | ||
122 | err = nilfs_set_file_dirty(sbi, dir, nr_dirty); | ||
123 | unlock_page(page); | ||
124 | return err; | ||
125 | } | ||
126 | |||
127 | static void nilfs_check_page(struct page *page) | ||
128 | { | ||
129 | struct inode *dir = page->mapping->host; | ||
130 | struct super_block *sb = dir->i_sb; | ||
131 | unsigned chunk_size = nilfs_chunk_size(dir); | ||
132 | char *kaddr = page_address(page); | ||
133 | unsigned offs, rec_len; | ||
134 | unsigned limit = PAGE_CACHE_SIZE; | ||
135 | struct nilfs_dir_entry *p; | ||
136 | char *error; | ||
137 | |||
138 | if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { | ||
139 | limit = dir->i_size & ~PAGE_CACHE_MASK; | ||
140 | if (limit & (chunk_size - 1)) | ||
141 | goto Ebadsize; | ||
142 | if (!limit) | ||
143 | goto out; | ||
144 | } | ||
145 | for (offs = 0; offs <= limit - NILFS_DIR_REC_LEN(1); offs += rec_len) { | ||
146 | p = (struct nilfs_dir_entry *)(kaddr + offs); | ||
147 | rec_len = le16_to_cpu(p->rec_len); | ||
148 | |||
149 | if (rec_len < NILFS_DIR_REC_LEN(1)) | ||
150 | goto Eshort; | ||
151 | if (rec_len & 3) | ||
152 | goto Ealign; | ||
153 | if (rec_len < NILFS_DIR_REC_LEN(p->name_len)) | ||
154 | goto Enamelen; | ||
155 | if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) | ||
156 | goto Espan; | ||
157 | } | ||
158 | if (offs != limit) | ||
159 | goto Eend; | ||
160 | out: | ||
161 | SetPageChecked(page); | ||
162 | return; | ||
163 | |||
164 | /* Too bad, we had an error */ | ||
165 | |||
166 | Ebadsize: | ||
167 | nilfs_error(sb, "nilfs_check_page", | ||
168 | "size of directory #%lu is not a multiple of chunk size", | ||
169 | dir->i_ino | ||
170 | ); | ||
171 | goto fail; | ||
172 | Eshort: | ||
173 | error = "rec_len is smaller than minimal"; | ||
174 | goto bad_entry; | ||
175 | Ealign: | ||
176 | error = "unaligned directory entry"; | ||
177 | goto bad_entry; | ||
178 | Enamelen: | ||
179 | error = "rec_len is too small for name_len"; | ||
180 | goto bad_entry; | ||
181 | Espan: | ||
182 | error = "directory entry across blocks"; | ||
183 | bad_entry: | ||
184 | nilfs_error(sb, "nilfs_check_page", "bad entry in directory #%lu: %s - " | ||
185 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", | ||
186 | dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, | ||
187 | (unsigned long) le64_to_cpu(p->inode), | ||
188 | rec_len, p->name_len); | ||
189 | goto fail; | ||
190 | Eend: | ||
191 | p = (struct nilfs_dir_entry *)(kaddr + offs); | ||
192 | nilfs_error(sb, "nilfs_check_page", | ||
193 | "entry in directory #%lu spans the page boundary" | ||
194 | "offset=%lu, inode=%lu", | ||
195 | dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, | ||
196 | (unsigned long) le64_to_cpu(p->inode)); | ||
197 | fail: | ||
198 | SetPageChecked(page); | ||
199 | SetPageError(page); | ||
200 | } | ||
201 | |||
202 | static struct page *nilfs_get_page(struct inode *dir, unsigned long n) | ||
203 | { | ||
204 | struct address_space *mapping = dir->i_mapping; | ||
205 | struct page *page = read_cache_page(mapping, n, | ||
206 | (filler_t *)mapping->a_ops->readpage, NULL); | ||
207 | if (!IS_ERR(page)) { | ||
208 | wait_on_page_locked(page); | ||
209 | kmap(page); | ||
210 | if (!PageUptodate(page)) | ||
211 | goto fail; | ||
212 | if (!PageChecked(page)) | ||
213 | nilfs_check_page(page); | ||
214 | if (PageError(page)) | ||
215 | goto fail; | ||
216 | } | ||
217 | return page; | ||
218 | |||
219 | fail: | ||
220 | nilfs_put_page(page); | ||
221 | return ERR_PTR(-EIO); | ||
222 | } | ||
223 | |||
224 | /* | ||
225 | * NOTE! unlike strncmp, nilfs_match returns 1 for success, 0 for failure. | ||
226 | * | ||
227 | * len <= NILFS_NAME_LEN and de != NULL are guaranteed by caller. | ||
228 | */ | ||
229 | static int | ||
230 | nilfs_match(int len, const char * const name, struct nilfs_dir_entry *de) | ||
231 | { | ||
232 | if (len != de->name_len) | ||
233 | return 0; | ||
234 | if (!de->inode) | ||
235 | return 0; | ||
236 | return !memcmp(name, de->name, len); | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * p is at least 6 bytes before the end of page | ||
241 | */ | ||
242 | static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) | ||
243 | { | ||
244 | return (struct nilfs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len)); | ||
245 | } | ||
246 | |||
247 | static unsigned char | ||
248 | nilfs_filetype_table[NILFS_FT_MAX] = { | ||
249 | [NILFS_FT_UNKNOWN] = DT_UNKNOWN, | ||
250 | [NILFS_FT_REG_FILE] = DT_REG, | ||
251 | [NILFS_FT_DIR] = DT_DIR, | ||
252 | [NILFS_FT_CHRDEV] = DT_CHR, | ||
253 | [NILFS_FT_BLKDEV] = DT_BLK, | ||
254 | [NILFS_FT_FIFO] = DT_FIFO, | ||
255 | [NILFS_FT_SOCK] = DT_SOCK, | ||
256 | [NILFS_FT_SYMLINK] = DT_LNK, | ||
257 | }; | ||
258 | |||
259 | #define S_SHIFT 12 | ||
260 | static unsigned char | ||
261 | nilfs_type_by_mode[S_IFMT >> S_SHIFT] = { | ||
262 | [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, | ||
263 | [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, | ||
264 | [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, | ||
265 | [S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV, | ||
266 | [S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO, | ||
267 | [S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK, | ||
268 | [S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK, | ||
269 | }; | ||
270 | |||
271 | static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) | ||
272 | { | ||
273 | mode_t mode = inode->i_mode; | ||
274 | |||
275 | de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; | ||
276 | } | ||
277 | |||
278 | static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
279 | { | ||
280 | loff_t pos = filp->f_pos; | ||
281 | struct inode *inode = filp->f_dentry->d_inode; | ||
282 | struct super_block *sb = inode->i_sb; | ||
283 | unsigned int offset = pos & ~PAGE_CACHE_MASK; | ||
284 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | ||
285 | unsigned long npages = dir_pages(inode); | ||
286 | /* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */ | ||
287 | unsigned char *types = NULL; | ||
288 | int ret; | ||
289 | |||
290 | if (pos > inode->i_size - NILFS_DIR_REC_LEN(1)) | ||
291 | goto success; | ||
292 | |||
293 | types = nilfs_filetype_table; | ||
294 | |||
295 | for ( ; n < npages; n++, offset = 0) { | ||
296 | char *kaddr, *limit; | ||
297 | struct nilfs_dir_entry *de; | ||
298 | struct page *page = nilfs_get_page(inode, n); | ||
299 | |||
300 | if (IS_ERR(page)) { | ||
301 | nilfs_error(sb, __func__, "bad page in #%lu", | ||
302 | inode->i_ino); | ||
303 | filp->f_pos += PAGE_CACHE_SIZE - offset; | ||
304 | ret = -EIO; | ||
305 | goto done; | ||
306 | } | ||
307 | kaddr = page_address(page); | ||
308 | de = (struct nilfs_dir_entry *)(kaddr + offset); | ||
309 | limit = kaddr + nilfs_last_byte(inode, n) - | ||
310 | NILFS_DIR_REC_LEN(1); | ||
311 | for ( ; (char *)de <= limit; de = nilfs_next_entry(de)) { | ||
312 | if (de->rec_len == 0) { | ||
313 | nilfs_error(sb, __func__, | ||
314 | "zero-length directory entry"); | ||
315 | ret = -EIO; | ||
316 | nilfs_put_page(page); | ||
317 | goto done; | ||
318 | } | ||
319 | if (de->inode) { | ||
320 | int over; | ||
321 | unsigned char d_type = DT_UNKNOWN; | ||
322 | |||
323 | if (types && de->file_type < NILFS_FT_MAX) | ||
324 | d_type = types[de->file_type]; | ||
325 | |||
326 | offset = (char *)de - kaddr; | ||
327 | over = filldir(dirent, de->name, de->name_len, | ||
328 | (n<<PAGE_CACHE_SHIFT) | offset, | ||
329 | le64_to_cpu(de->inode), d_type); | ||
330 | if (over) { | ||
331 | nilfs_put_page(page); | ||
332 | goto success; | ||
333 | } | ||
334 | } | ||
335 | filp->f_pos += le16_to_cpu(de->rec_len); | ||
336 | } | ||
337 | nilfs_put_page(page); | ||
338 | } | ||
339 | |||
340 | success: | ||
341 | ret = 0; | ||
342 | done: | ||
343 | return ret; | ||
344 | } | ||
345 | |||
346 | /* | ||
347 | * nilfs_find_entry() | ||
348 | * | ||
349 | * finds an entry in the specified directory with the wanted name. It | ||
350 | * returns the page in which the entry was found, and the entry itself | ||
351 | * (as a parameter - res_dir). Page is returned mapped and unlocked. | ||
352 | * Entry is guaranteed to be valid. | ||
353 | */ | ||
354 | struct nilfs_dir_entry * | ||
355 | nilfs_find_entry(struct inode *dir, struct dentry *dentry, | ||
356 | struct page **res_page) | ||
357 | { | ||
358 | const char *name = dentry->d_name.name; | ||
359 | int namelen = dentry->d_name.len; | ||
360 | unsigned reclen = NILFS_DIR_REC_LEN(namelen); | ||
361 | unsigned long start, n; | ||
362 | unsigned long npages = dir_pages(dir); | ||
363 | struct page *page = NULL; | ||
364 | struct nilfs_inode_info *ei = NILFS_I(dir); | ||
365 | struct nilfs_dir_entry *de; | ||
366 | |||
367 | if (npages == 0) | ||
368 | goto out; | ||
369 | |||
370 | /* OFFSET_CACHE */ | ||
371 | *res_page = NULL; | ||
372 | |||
373 | start = ei->i_dir_start_lookup; | ||
374 | if (start >= npages) | ||
375 | start = 0; | ||
376 | n = start; | ||
377 | do { | ||
378 | char *kaddr; | ||
379 | page = nilfs_get_page(dir, n); | ||
380 | if (!IS_ERR(page)) { | ||
381 | kaddr = page_address(page); | ||
382 | de = (struct nilfs_dir_entry *)kaddr; | ||
383 | kaddr += nilfs_last_byte(dir, n) - reclen; | ||
384 | while ((char *) de <= kaddr) { | ||
385 | if (de->rec_len == 0) { | ||
386 | nilfs_error(dir->i_sb, __func__, | ||
387 | "zero-length directory entry"); | ||
388 | nilfs_put_page(page); | ||
389 | goto out; | ||
390 | } | ||
391 | if (nilfs_match(namelen, name, de)) | ||
392 | goto found; | ||
393 | de = nilfs_next_entry(de); | ||
394 | } | ||
395 | nilfs_put_page(page); | ||
396 | } | ||
397 | if (++n >= npages) | ||
398 | n = 0; | ||
399 | /* next page is past the blocks we've got */ | ||
400 | if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) { | ||
401 | nilfs_error(dir->i_sb, __func__, | ||
402 | "dir %lu size %lld exceeds block cout %llu", | ||
403 | dir->i_ino, dir->i_size, | ||
404 | (unsigned long long)dir->i_blocks); | ||
405 | goto out; | ||
406 | } | ||
407 | } while (n != start); | ||
408 | out: | ||
409 | return NULL; | ||
410 | |||
411 | found: | ||
412 | *res_page = page; | ||
413 | ei->i_dir_start_lookup = n; | ||
414 | return de; | ||
415 | } | ||
416 | |||
417 | struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p) | ||
418 | { | ||
419 | struct page *page = nilfs_get_page(dir, 0); | ||
420 | struct nilfs_dir_entry *de = NULL; | ||
421 | |||
422 | if (!IS_ERR(page)) { | ||
423 | de = nilfs_next_entry( | ||
424 | (struct nilfs_dir_entry *)page_address(page)); | ||
425 | *p = page; | ||
426 | } | ||
427 | return de; | ||
428 | } | ||
429 | |||
430 | ino_t nilfs_inode_by_name(struct inode *dir, struct dentry *dentry) | ||
431 | { | ||
432 | ino_t res = 0; | ||
433 | struct nilfs_dir_entry *de; | ||
434 | struct page *page; | ||
435 | |||
436 | de = nilfs_find_entry(dir, dentry, &page); | ||
437 | if (de) { | ||
438 | res = le64_to_cpu(de->inode); | ||
439 | kunmap(page); | ||
440 | page_cache_release(page); | ||
441 | } | ||
442 | return res; | ||
443 | } | ||
444 | |||
445 | /* Releases the page */ | ||
446 | void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, | ||
447 | struct page *page, struct inode *inode) | ||
448 | { | ||
449 | unsigned from = (char *) de - (char *) page_address(page); | ||
450 | unsigned to = from + le16_to_cpu(de->rec_len); | ||
451 | struct address_space *mapping = page->mapping; | ||
452 | int err; | ||
453 | |||
454 | lock_page(page); | ||
455 | err = nilfs_prepare_chunk_uninterruptible(page, mapping, from, to); | ||
456 | BUG_ON(err); | ||
457 | de->inode = cpu_to_le64(inode->i_ino); | ||
458 | nilfs_set_de_type(de, inode); | ||
459 | err = nilfs_commit_chunk(page, mapping, from, to); | ||
460 | nilfs_put_page(page); | ||
461 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | ||
462 | /* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ | ||
463 | mark_inode_dirty(dir); | ||
464 | } | ||
465 | |||
466 | /* | ||
467 | * Parent is locked. | ||
468 | */ | ||
469 | int nilfs_add_link(struct dentry *dentry, struct inode *inode) | ||
470 | { | ||
471 | struct inode *dir = dentry->d_parent->d_inode; | ||
472 | const char *name = dentry->d_name.name; | ||
473 | int namelen = dentry->d_name.len; | ||
474 | unsigned chunk_size = nilfs_chunk_size(dir); | ||
475 | unsigned reclen = NILFS_DIR_REC_LEN(namelen); | ||
476 | unsigned short rec_len, name_len; | ||
477 | struct page *page = NULL; | ||
478 | struct nilfs_dir_entry *de; | ||
479 | unsigned long npages = dir_pages(dir); | ||
480 | unsigned long n; | ||
481 | char *kaddr; | ||
482 | unsigned from, to; | ||
483 | int err; | ||
484 | |||
485 | /* | ||
486 | * We take care of directory expansion in the same loop. | ||
487 | * This code plays outside i_size, so it locks the page | ||
488 | * to protect that region. | ||
489 | */ | ||
490 | for (n = 0; n <= npages; n++) { | ||
491 | char *dir_end; | ||
492 | |||
493 | page = nilfs_get_page(dir, n); | ||
494 | err = PTR_ERR(page); | ||
495 | if (IS_ERR(page)) | ||
496 | goto out; | ||
497 | lock_page(page); | ||
498 | kaddr = page_address(page); | ||
499 | dir_end = kaddr + nilfs_last_byte(dir, n); | ||
500 | de = (struct nilfs_dir_entry *)kaddr; | ||
501 | kaddr += PAGE_CACHE_SIZE - reclen; | ||
502 | while ((char *)de <= kaddr) { | ||
503 | if ((char *)de == dir_end) { | ||
504 | /* We hit i_size */ | ||
505 | name_len = 0; | ||
506 | rec_len = chunk_size; | ||
507 | de->rec_len = cpu_to_le16(chunk_size); | ||
508 | de->inode = 0; | ||
509 | goto got_it; | ||
510 | } | ||
511 | if (de->rec_len == 0) { | ||
512 | nilfs_error(dir->i_sb, __func__, | ||
513 | "zero-length directory entry"); | ||
514 | err = -EIO; | ||
515 | goto out_unlock; | ||
516 | } | ||
517 | err = -EEXIST; | ||
518 | if (nilfs_match(namelen, name, de)) | ||
519 | goto out_unlock; | ||
520 | name_len = NILFS_DIR_REC_LEN(de->name_len); | ||
521 | rec_len = le16_to_cpu(de->rec_len); | ||
522 | if (!de->inode && rec_len >= reclen) | ||
523 | goto got_it; | ||
524 | if (rec_len >= name_len + reclen) | ||
525 | goto got_it; | ||
526 | de = (struct nilfs_dir_entry *)((char *)de + rec_len); | ||
527 | } | ||
528 | unlock_page(page); | ||
529 | nilfs_put_page(page); | ||
530 | } | ||
531 | BUG(); | ||
532 | return -EINVAL; | ||
533 | |||
534 | got_it: | ||
535 | from = (char *)de - (char *)page_address(page); | ||
536 | to = from + rec_len; | ||
537 | err = nilfs_prepare_chunk(page, page->mapping, from, to); | ||
538 | if (err) | ||
539 | goto out_unlock; | ||
540 | if (de->inode) { | ||
541 | struct nilfs_dir_entry *de1; | ||
542 | |||
543 | de1 = (struct nilfs_dir_entry *)((char *)de + name_len); | ||
544 | de1->rec_len = cpu_to_le16(rec_len - name_len); | ||
545 | de->rec_len = cpu_to_le16(name_len); | ||
546 | de = de1; | ||
547 | } | ||
548 | de->name_len = namelen; | ||
549 | memcpy(de->name, name, namelen); | ||
550 | de->inode = cpu_to_le64(inode->i_ino); | ||
551 | nilfs_set_de_type(de, inode); | ||
552 | err = nilfs_commit_chunk(page, page->mapping, from, to); | ||
553 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | ||
554 | /* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ | ||
555 | mark_inode_dirty(dir); | ||
556 | /* OFFSET_CACHE */ | ||
557 | out_put: | ||
558 | nilfs_put_page(page); | ||
559 | out: | ||
560 | return err; | ||
561 | out_unlock: | ||
562 | unlock_page(page); | ||
563 | goto out_put; | ||
564 | } | ||
565 | |||
566 | /* | ||
567 | * nilfs_delete_entry deletes a directory entry by merging it with the | ||
568 | * previous entry. Page is up-to-date. Releases the page. | ||
569 | */ | ||
570 | int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) | ||
571 | { | ||
572 | struct address_space *mapping = page->mapping; | ||
573 | struct inode *inode = mapping->host; | ||
574 | char *kaddr = page_address(page); | ||
575 | unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1); | ||
576 | unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len); | ||
577 | struct nilfs_dir_entry *pde = NULL; | ||
578 | struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from); | ||
579 | int err; | ||
580 | |||
581 | while ((char *)de < (char *)dir) { | ||
582 | if (de->rec_len == 0) { | ||
583 | nilfs_error(inode->i_sb, __func__, | ||
584 | "zero-length directory entry"); | ||
585 | err = -EIO; | ||
586 | goto out; | ||
587 | } | ||
588 | pde = de; | ||
589 | de = nilfs_next_entry(de); | ||
590 | } | ||
591 | if (pde) | ||
592 | from = (char *)pde - (char *)page_address(page); | ||
593 | lock_page(page); | ||
594 | err = nilfs_prepare_chunk(page, mapping, from, to); | ||
595 | BUG_ON(err); | ||
596 | if (pde) | ||
597 | pde->rec_len = cpu_to_le16(to - from); | ||
598 | dir->inode = 0; | ||
599 | err = nilfs_commit_chunk(page, mapping, from, to); | ||
600 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | ||
601 | /* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */ | ||
602 | mark_inode_dirty(inode); | ||
603 | out: | ||
604 | nilfs_put_page(page); | ||
605 | return err; | ||
606 | } | ||
607 | |||
608 | /* | ||
609 | * Set the first fragment of directory. | ||
610 | */ | ||
611 | int nilfs_make_empty(struct inode *inode, struct inode *parent) | ||
612 | { | ||
613 | struct address_space *mapping = inode->i_mapping; | ||
614 | struct page *page = grab_cache_page(mapping, 0); | ||
615 | unsigned chunk_size = nilfs_chunk_size(inode); | ||
616 | struct nilfs_dir_entry *de; | ||
617 | int err; | ||
618 | void *kaddr; | ||
619 | |||
620 | if (!page) | ||
621 | return -ENOMEM; | ||
622 | |||
623 | err = nilfs_prepare_chunk(page, mapping, 0, chunk_size); | ||
624 | if (unlikely(err)) { | ||
625 | unlock_page(page); | ||
626 | goto fail; | ||
627 | } | ||
628 | kaddr = kmap_atomic(page, KM_USER0); | ||
629 | memset(kaddr, 0, chunk_size); | ||
630 | de = (struct nilfs_dir_entry *)kaddr; | ||
631 | de->name_len = 1; | ||
632 | de->rec_len = cpu_to_le16(NILFS_DIR_REC_LEN(1)); | ||
633 | memcpy(de->name, ".\0\0", 4); | ||
634 | de->inode = cpu_to_le64(inode->i_ino); | ||
635 | nilfs_set_de_type(de, inode); | ||
636 | |||
637 | de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); | ||
638 | de->name_len = 2; | ||
639 | de->rec_len = cpu_to_le16(chunk_size - NILFS_DIR_REC_LEN(1)); | ||
640 | de->inode = cpu_to_le64(parent->i_ino); | ||
641 | memcpy(de->name, "..\0", 4); | ||
642 | nilfs_set_de_type(de, inode); | ||
643 | kunmap_atomic(kaddr, KM_USER0); | ||
644 | err = nilfs_commit_chunk(page, mapping, 0, chunk_size); | ||
645 | fail: | ||
646 | page_cache_release(page); | ||
647 | return err; | ||
648 | } | ||
649 | |||
650 | /* | ||
651 | * routine to check that the specified directory is empty (for rmdir) | ||
652 | */ | ||
653 | int nilfs_empty_dir(struct inode *inode) | ||
654 | { | ||
655 | struct page *page = NULL; | ||
656 | unsigned long i, npages = dir_pages(inode); | ||
657 | |||
658 | for (i = 0; i < npages; i++) { | ||
659 | char *kaddr; | ||
660 | struct nilfs_dir_entry *de; | ||
661 | |||
662 | page = nilfs_get_page(inode, i); | ||
663 | if (IS_ERR(page)) | ||
664 | continue; | ||
665 | |||
666 | kaddr = page_address(page); | ||
667 | de = (struct nilfs_dir_entry *)kaddr; | ||
668 | kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1); | ||
669 | |||
670 | while ((char *)de <= kaddr) { | ||
671 | if (de->rec_len == 0) { | ||
672 | nilfs_error(inode->i_sb, __func__, | ||
673 | "zero-length directory entry " | ||
674 | "(kaddr=%p, de=%p)\n", kaddr, de); | ||
675 | goto not_empty; | ||
676 | } | ||
677 | if (de->inode != 0) { | ||
678 | /* check for . and .. */ | ||
679 | if (de->name[0] != '.') | ||
680 | goto not_empty; | ||
681 | if (de->name_len > 2) | ||
682 | goto not_empty; | ||
683 | if (de->name_len < 2) { | ||
684 | if (de->inode != | ||
685 | cpu_to_le64(inode->i_ino)) | ||
686 | goto not_empty; | ||
687 | } else if (de->name[1] != '.') | ||
688 | goto not_empty; | ||
689 | } | ||
690 | de = nilfs_next_entry(de); | ||
691 | } | ||
692 | nilfs_put_page(page); | ||
693 | } | ||
694 | return 1; | ||
695 | |||
696 | not_empty: | ||
697 | nilfs_put_page(page); | ||
698 | return 0; | ||
699 | } | ||
700 | |||
701 | struct file_operations nilfs_dir_operations = { | ||
702 | .llseek = generic_file_llseek, | ||
703 | .read = generic_read_dir, | ||
704 | .readdir = nilfs_readdir, | ||
705 | .unlocked_ioctl = nilfs_ioctl, | ||
706 | #ifdef CONFIG_COMPAT | ||
707 | .compat_ioctl = nilfs_ioctl, | ||
708 | #endif /* CONFIG_COMPAT */ | ||
709 | .fsync = nilfs_sync_file, | ||
710 | |||
711 | }; | ||
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c new file mode 100644 index 000000000000..c6379e482781 --- /dev/null +++ b/fs/nilfs2/direct.c | |||
@@ -0,0 +1,436 @@ | |||
1 | /* | ||
2 | * direct.c - NILFS direct block pointer. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/errno.h> | ||
24 | #include "nilfs.h" | ||
25 | #include "page.h" | ||
26 | #include "direct.h" | ||
27 | #include "alloc.h" | ||
28 | |||
29 | static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct) | ||
30 | { | ||
31 | return (__le64 *) | ||
32 | ((struct nilfs_direct_node *)direct->d_bmap.b_u.u_data + 1); | ||
33 | } | ||
34 | |||
35 | static inline __u64 | ||
36 | nilfs_direct_get_ptr(const struct nilfs_direct *direct, __u64 key) | ||
37 | { | ||
38 | return nilfs_bmap_dptr_to_ptr(*(nilfs_direct_dptrs(direct) + key)); | ||
39 | } | ||
40 | |||
41 | static inline void nilfs_direct_set_ptr(struct nilfs_direct *direct, | ||
42 | __u64 key, __u64 ptr) | ||
43 | { | ||
44 | *(nilfs_direct_dptrs(direct) + key) = nilfs_bmap_ptr_to_dptr(ptr); | ||
45 | } | ||
46 | |||
47 | static int nilfs_direct_lookup(const struct nilfs_bmap *bmap, | ||
48 | __u64 key, int level, __u64 *ptrp) | ||
49 | { | ||
50 | struct nilfs_direct *direct; | ||
51 | __u64 ptr; | ||
52 | |||
53 | direct = (struct nilfs_direct *)bmap; | ||
54 | if ((key > NILFS_DIRECT_KEY_MAX) || | ||
55 | (level != 1) || /* XXX: use macro for level 1 */ | ||
56 | ((ptr = nilfs_direct_get_ptr(direct, key)) == | ||
57 | NILFS_BMAP_INVALID_PTR)) | ||
58 | return -ENOENT; | ||
59 | |||
60 | if (ptrp != NULL) | ||
61 | *ptrp = ptr; | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | static __u64 | ||
66 | nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key) | ||
67 | { | ||
68 | __u64 ptr; | ||
69 | |||
70 | ptr = nilfs_bmap_find_target_seq(&direct->d_bmap, key); | ||
71 | if (ptr != NILFS_BMAP_INVALID_PTR) | ||
72 | /* sequential access */ | ||
73 | return ptr; | ||
74 | else | ||
75 | /* block group */ | ||
76 | return nilfs_bmap_find_target_in_group(&direct->d_bmap); | ||
77 | } | ||
78 | |||
79 | static void nilfs_direct_set_target_v(struct nilfs_direct *direct, | ||
80 | __u64 key, __u64 ptr) | ||
81 | { | ||
82 | direct->d_bmap.b_last_allocated_key = key; | ||
83 | direct->d_bmap.b_last_allocated_ptr = ptr; | ||
84 | } | ||
85 | |||
86 | static int nilfs_direct_prepare_insert(struct nilfs_direct *direct, | ||
87 | __u64 key, | ||
88 | union nilfs_bmap_ptr_req *req, | ||
89 | struct nilfs_bmap_stats *stats) | ||
90 | { | ||
91 | int ret; | ||
92 | |||
93 | if (direct->d_ops->dop_find_target != NULL) | ||
94 | req->bpr_ptr = direct->d_ops->dop_find_target(direct, key); | ||
95 | ret = direct->d_bmap.b_pops->bpop_prepare_alloc_ptr(&direct->d_bmap, | ||
96 | req); | ||
97 | if (ret < 0) | ||
98 | return ret; | ||
99 | |||
100 | stats->bs_nblocks = 1; | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static void nilfs_direct_commit_insert(struct nilfs_direct *direct, | ||
105 | union nilfs_bmap_ptr_req *req, | ||
106 | __u64 key, __u64 ptr) | ||
107 | { | ||
108 | struct buffer_head *bh; | ||
109 | |||
110 | /* ptr must be a pointer to a buffer head. */ | ||
111 | bh = (struct buffer_head *)((unsigned long)ptr); | ||
112 | set_buffer_nilfs_volatile(bh); | ||
113 | |||
114 | if (direct->d_bmap.b_pops->bpop_commit_alloc_ptr != NULL) | ||
115 | direct->d_bmap.b_pops->bpop_commit_alloc_ptr( | ||
116 | &direct->d_bmap, req); | ||
117 | nilfs_direct_set_ptr(direct, key, req->bpr_ptr); | ||
118 | |||
119 | if (!nilfs_bmap_dirty(&direct->d_bmap)) | ||
120 | nilfs_bmap_set_dirty(&direct->d_bmap); | ||
121 | |||
122 | if (direct->d_ops->dop_set_target != NULL) | ||
123 | direct->d_ops->dop_set_target(direct, key, req->bpr_ptr); | ||
124 | } | ||
125 | |||
126 | static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | ||
127 | { | ||
128 | struct nilfs_direct *direct; | ||
129 | union nilfs_bmap_ptr_req req; | ||
130 | struct nilfs_bmap_stats stats; | ||
131 | int ret; | ||
132 | |||
133 | direct = (struct nilfs_direct *)bmap; | ||
134 | if (key > NILFS_DIRECT_KEY_MAX) | ||
135 | return -ENOENT; | ||
136 | if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR) | ||
137 | return -EEXIST; | ||
138 | |||
139 | ret = nilfs_direct_prepare_insert(direct, key, &req, &stats); | ||
140 | if (ret < 0) | ||
141 | return ret; | ||
142 | nilfs_direct_commit_insert(direct, &req, key, ptr); | ||
143 | nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); | ||
144 | |||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | static int nilfs_direct_prepare_delete(struct nilfs_direct *direct, | ||
149 | union nilfs_bmap_ptr_req *req, | ||
150 | __u64 key, | ||
151 | struct nilfs_bmap_stats *stats) | ||
152 | { | ||
153 | int ret; | ||
154 | |||
155 | if (direct->d_bmap.b_pops->bpop_prepare_end_ptr != NULL) { | ||
156 | req->bpr_ptr = nilfs_direct_get_ptr(direct, key); | ||
157 | ret = direct->d_bmap.b_pops->bpop_prepare_end_ptr( | ||
158 | &direct->d_bmap, req); | ||
159 | if (ret < 0) | ||
160 | return ret; | ||
161 | } | ||
162 | |||
163 | stats->bs_nblocks = 1; | ||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | static void nilfs_direct_commit_delete(struct nilfs_direct *direct, | ||
168 | union nilfs_bmap_ptr_req *req, | ||
169 | __u64 key) | ||
170 | { | ||
171 | if (direct->d_bmap.b_pops->bpop_commit_end_ptr != NULL) | ||
172 | direct->d_bmap.b_pops->bpop_commit_end_ptr( | ||
173 | &direct->d_bmap, req); | ||
174 | nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR); | ||
175 | } | ||
176 | |||
177 | static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) | ||
178 | { | ||
179 | struct nilfs_direct *direct; | ||
180 | union nilfs_bmap_ptr_req req; | ||
181 | struct nilfs_bmap_stats stats; | ||
182 | int ret; | ||
183 | |||
184 | direct = (struct nilfs_direct *)bmap; | ||
185 | if ((key > NILFS_DIRECT_KEY_MAX) || | ||
186 | nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR) | ||
187 | return -ENOENT; | ||
188 | |||
189 | ret = nilfs_direct_prepare_delete(direct, &req, key, &stats); | ||
190 | if (ret < 0) | ||
191 | return ret; | ||
192 | nilfs_direct_commit_delete(direct, &req, key); | ||
193 | nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); | ||
194 | |||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) | ||
199 | { | ||
200 | struct nilfs_direct *direct; | ||
201 | __u64 key, lastkey; | ||
202 | |||
203 | direct = (struct nilfs_direct *)bmap; | ||
204 | lastkey = NILFS_DIRECT_KEY_MAX + 1; | ||
205 | for (key = NILFS_DIRECT_KEY_MIN; key <= NILFS_DIRECT_KEY_MAX; key++) | ||
206 | if (nilfs_direct_get_ptr(direct, key) != | ||
207 | NILFS_BMAP_INVALID_PTR) | ||
208 | lastkey = key; | ||
209 | |||
210 | if (lastkey == NILFS_DIRECT_KEY_MAX + 1) | ||
211 | return -ENOENT; | ||
212 | |||
213 | *keyp = lastkey; | ||
214 | |||
215 | return 0; | ||
216 | } | ||
217 | |||
218 | static int nilfs_direct_check_insert(const struct nilfs_bmap *bmap, __u64 key) | ||
219 | { | ||
220 | return key > NILFS_DIRECT_KEY_MAX; | ||
221 | } | ||
222 | |||
223 | static int nilfs_direct_gather_data(struct nilfs_bmap *bmap, | ||
224 | __u64 *keys, __u64 *ptrs, int nitems) | ||
225 | { | ||
226 | struct nilfs_direct *direct; | ||
227 | __u64 key; | ||
228 | __u64 ptr; | ||
229 | int n; | ||
230 | |||
231 | direct = (struct nilfs_direct *)bmap; | ||
232 | if (nitems > NILFS_DIRECT_NBLOCKS) | ||
233 | nitems = NILFS_DIRECT_NBLOCKS; | ||
234 | n = 0; | ||
235 | for (key = 0; key < nitems; key++) { | ||
236 | ptr = nilfs_direct_get_ptr(direct, key); | ||
237 | if (ptr != NILFS_BMAP_INVALID_PTR) { | ||
238 | keys[n] = key; | ||
239 | ptrs[n] = ptr; | ||
240 | n++; | ||
241 | } | ||
242 | } | ||
243 | return n; | ||
244 | } | ||
245 | |||
246 | int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, | ||
247 | __u64 key, __u64 *keys, __u64 *ptrs, | ||
248 | int n, __u64 low, __u64 high) | ||
249 | { | ||
250 | struct nilfs_direct *direct; | ||
251 | __le64 *dptrs; | ||
252 | int ret, i, j; | ||
253 | |||
254 | /* no need to allocate any resource for conversion */ | ||
255 | |||
256 | /* delete */ | ||
257 | ret = bmap->b_ops->bop_delete(bmap, key); | ||
258 | if (ret < 0) | ||
259 | return ret; | ||
260 | |||
261 | /* free resources */ | ||
262 | if (bmap->b_ops->bop_clear != NULL) | ||
263 | bmap->b_ops->bop_clear(bmap); | ||
264 | |||
265 | /* convert */ | ||
266 | direct = (struct nilfs_direct *)bmap; | ||
267 | dptrs = nilfs_direct_dptrs(direct); | ||
268 | for (i = 0, j = 0; i < NILFS_DIRECT_NBLOCKS; i++) { | ||
269 | if ((j < n) && (i == keys[j])) { | ||
270 | dptrs[i] = (i != key) ? | ||
271 | nilfs_bmap_ptr_to_dptr(ptrs[j]) : | ||
272 | NILFS_BMAP_INVALID_PTR; | ||
273 | j++; | ||
274 | } else | ||
275 | dptrs[i] = NILFS_BMAP_INVALID_PTR; | ||
276 | } | ||
277 | |||
278 | nilfs_direct_init(bmap, low, high); | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | static int nilfs_direct_propagate_v(struct nilfs_direct *direct, | ||
284 | struct buffer_head *bh) | ||
285 | { | ||
286 | union nilfs_bmap_ptr_req oldreq, newreq; | ||
287 | __u64 key; | ||
288 | __u64 ptr; | ||
289 | int ret; | ||
290 | |||
291 | key = nilfs_bmap_data_get_key(&direct->d_bmap, bh); | ||
292 | ptr = nilfs_direct_get_ptr(direct, key); | ||
293 | if (!buffer_nilfs_volatile(bh)) { | ||
294 | oldreq.bpr_ptr = ptr; | ||
295 | newreq.bpr_ptr = ptr; | ||
296 | ret = nilfs_bmap_prepare_update(&direct->d_bmap, &oldreq, | ||
297 | &newreq); | ||
298 | if (ret < 0) | ||
299 | return ret; | ||
300 | nilfs_bmap_commit_update(&direct->d_bmap, &oldreq, &newreq); | ||
301 | set_buffer_nilfs_volatile(bh); | ||
302 | nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr); | ||
303 | } else | ||
304 | ret = nilfs_bmap_mark_dirty(&direct->d_bmap, ptr); | ||
305 | |||
306 | return ret; | ||
307 | } | ||
308 | |||
309 | static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, | ||
310 | struct buffer_head *bh) | ||
311 | { | ||
312 | struct nilfs_direct *direct; | ||
313 | |||
314 | direct = (struct nilfs_direct *)bmap; | ||
315 | return (direct->d_ops->dop_propagate != NULL) ? | ||
316 | direct->d_ops->dop_propagate(direct, bh) : | ||
317 | 0; | ||
318 | } | ||
319 | |||
320 | static int nilfs_direct_assign_v(struct nilfs_direct *direct, | ||
321 | __u64 key, __u64 ptr, | ||
322 | struct buffer_head **bh, | ||
323 | sector_t blocknr, | ||
324 | union nilfs_binfo *binfo) | ||
325 | { | ||
326 | union nilfs_bmap_ptr_req req; | ||
327 | int ret; | ||
328 | |||
329 | req.bpr_ptr = ptr; | ||
330 | ret = direct->d_bmap.b_pops->bpop_prepare_start_ptr( | ||
331 | &direct->d_bmap, &req); | ||
332 | if (ret < 0) | ||
333 | return ret; | ||
334 | direct->d_bmap.b_pops->bpop_commit_start_ptr(&direct->d_bmap, | ||
335 | &req, blocknr); | ||
336 | |||
337 | binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); | ||
338 | binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); | ||
339 | |||
340 | return 0; | ||
341 | } | ||
342 | |||
343 | static int nilfs_direct_assign_p(struct nilfs_direct *direct, | ||
344 | __u64 key, __u64 ptr, | ||
345 | struct buffer_head **bh, | ||
346 | sector_t blocknr, | ||
347 | union nilfs_binfo *binfo) | ||
348 | { | ||
349 | nilfs_direct_set_ptr(direct, key, blocknr); | ||
350 | |||
351 | binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); | ||
352 | binfo->bi_dat.bi_level = 0; | ||
353 | |||
354 | return 0; | ||
355 | } | ||
356 | |||
357 | static int nilfs_direct_assign(struct nilfs_bmap *bmap, | ||
358 | struct buffer_head **bh, | ||
359 | sector_t blocknr, | ||
360 | union nilfs_binfo *binfo) | ||
361 | { | ||
362 | struct nilfs_direct *direct; | ||
363 | __u64 key; | ||
364 | __u64 ptr; | ||
365 | |||
366 | direct = (struct nilfs_direct *)bmap; | ||
367 | key = nilfs_bmap_data_get_key(bmap, *bh); | ||
368 | if (unlikely(key > NILFS_DIRECT_KEY_MAX)) { | ||
369 | printk(KERN_CRIT "%s: invalid key: %llu\n", __func__, | ||
370 | (unsigned long long)key); | ||
371 | return -EINVAL; | ||
372 | } | ||
373 | ptr = nilfs_direct_get_ptr(direct, key); | ||
374 | if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) { | ||
375 | printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__, | ||
376 | (unsigned long long)ptr); | ||
377 | return -EINVAL; | ||
378 | } | ||
379 | |||
380 | return direct->d_ops->dop_assign(direct, key, ptr, bh, | ||
381 | blocknr, binfo); | ||
382 | } | ||
383 | |||
384 | static const struct nilfs_bmap_operations nilfs_direct_ops = { | ||
385 | .bop_lookup = nilfs_direct_lookup, | ||
386 | .bop_insert = nilfs_direct_insert, | ||
387 | .bop_delete = nilfs_direct_delete, | ||
388 | .bop_clear = NULL, | ||
389 | |||
390 | .bop_propagate = nilfs_direct_propagate, | ||
391 | |||
392 | .bop_lookup_dirty_buffers = NULL, | ||
393 | |||
394 | .bop_assign = nilfs_direct_assign, | ||
395 | .bop_mark = NULL, | ||
396 | |||
397 | .bop_last_key = nilfs_direct_last_key, | ||
398 | .bop_check_insert = nilfs_direct_check_insert, | ||
399 | .bop_check_delete = NULL, | ||
400 | .bop_gather_data = nilfs_direct_gather_data, | ||
401 | }; | ||
402 | |||
403 | |||
404 | static const struct nilfs_direct_operations nilfs_direct_ops_v = { | ||
405 | .dop_find_target = nilfs_direct_find_target_v, | ||
406 | .dop_set_target = nilfs_direct_set_target_v, | ||
407 | .dop_propagate = nilfs_direct_propagate_v, | ||
408 | .dop_assign = nilfs_direct_assign_v, | ||
409 | }; | ||
410 | |||
411 | static const struct nilfs_direct_operations nilfs_direct_ops_p = { | ||
412 | .dop_find_target = NULL, | ||
413 | .dop_set_target = NULL, | ||
414 | .dop_propagate = NULL, | ||
415 | .dop_assign = nilfs_direct_assign_p, | ||
416 | }; | ||
417 | |||
418 | int nilfs_direct_init(struct nilfs_bmap *bmap, __u64 low, __u64 high) | ||
419 | { | ||
420 | struct nilfs_direct *direct; | ||
421 | |||
422 | direct = (struct nilfs_direct *)bmap; | ||
423 | bmap->b_ops = &nilfs_direct_ops; | ||
424 | bmap->b_low = low; | ||
425 | bmap->b_high = high; | ||
426 | switch (bmap->b_inode->i_ino) { | ||
427 | case NILFS_DAT_INO: | ||
428 | direct->d_ops = &nilfs_direct_ops_p; | ||
429 | break; | ||
430 | default: | ||
431 | direct->d_ops = &nilfs_direct_ops_v; | ||
432 | break; | ||
433 | } | ||
434 | |||
435 | return 0; | ||
436 | } | ||
diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h new file mode 100644 index 000000000000..45d2c5cda812 --- /dev/null +++ b/fs/nilfs2/direct.h | |||
@@ -0,0 +1,78 @@ | |||
1 | /* | ||
2 | * direct.h - NILFS direct block pointer. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_DIRECT_H | ||
24 | #define _NILFS_DIRECT_H | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include "bmap.h" | ||
29 | |||
30 | |||
31 | struct nilfs_direct; | ||
32 | |||
33 | /** | ||
34 | * struct nilfs_direct_operations - direct mapping operation table | ||
35 | */ | ||
36 | struct nilfs_direct_operations { | ||
37 | __u64 (*dop_find_target)(const struct nilfs_direct *, __u64); | ||
38 | void (*dop_set_target)(struct nilfs_direct *, __u64, __u64); | ||
39 | int (*dop_propagate)(struct nilfs_direct *, struct buffer_head *); | ||
40 | int (*dop_assign)(struct nilfs_direct *, __u64, __u64, | ||
41 | struct buffer_head **, sector_t, | ||
42 | union nilfs_binfo *); | ||
43 | }; | ||
44 | |||
45 | /** | ||
46 | * struct nilfs_direct_node - direct node | ||
47 | * @dn_flags: flags | ||
48 | * @dn_pad: padding | ||
49 | */ | ||
50 | struct nilfs_direct_node { | ||
51 | __u8 dn_flags; | ||
52 | __u8 pad[7]; | ||
53 | }; | ||
54 | |||
55 | /** | ||
56 | * struct nilfs_direct - direct mapping | ||
57 | * @d_bmap: bmap structure | ||
58 | * @d_ops: direct mapping operation table | ||
59 | */ | ||
60 | struct nilfs_direct { | ||
61 | struct nilfs_bmap d_bmap; | ||
62 | |||
63 | /* direct-mapping-specific members */ | ||
64 | const struct nilfs_direct_operations *d_ops; | ||
65 | }; | ||
66 | |||
67 | |||
68 | #define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1) | ||
69 | #define NILFS_DIRECT_KEY_MIN 0 | ||
70 | #define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1) | ||
71 | |||
72 | |||
73 | int nilfs_direct_init(struct nilfs_bmap *, __u64, __u64); | ||
74 | int nilfs_direct_delete_and_convert(struct nilfs_bmap *, __u64, __u64 *, | ||
75 | __u64 *, int, __u64, __u64); | ||
76 | |||
77 | |||
78 | #endif /* _NILFS_DIRECT_H */ | ||
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c new file mode 100644 index 000000000000..6bd84a0d8238 --- /dev/null +++ b/fs/nilfs2/file.c | |||
@@ -0,0 +1,160 @@ | |||
1 | /* | ||
2 | * file.c - NILFS regular file handling primitives including fsync(). | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Amagai Yoshiji <amagai@osrg.net>, | ||
21 | * Ryusuke Konishi <ryusuke@osrg.net> | ||
22 | */ | ||
23 | |||
24 | #include <linux/fs.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/writeback.h> | ||
27 | #include "nilfs.h" | ||
28 | #include "segment.h" | ||
29 | |||
30 | int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | ||
31 | { | ||
32 | /* | ||
33 | * Called from fsync() system call | ||
34 | * This is the only entry point that can catch write and synch | ||
35 | * timing for both data blocks and intermediate blocks. | ||
36 | * | ||
37 | * This function should be implemented when the writeback function | ||
38 | * will be implemented. | ||
39 | */ | ||
40 | struct inode *inode = dentry->d_inode; | ||
41 | int err; | ||
42 | |||
43 | if (!nilfs_inode_dirty(inode)) | ||
44 | return 0; | ||
45 | |||
46 | if (datasync) | ||
47 | err = nilfs_construct_dsync_segment(inode->i_sb, inode, 0, | ||
48 | LLONG_MAX); | ||
49 | else | ||
50 | err = nilfs_construct_segment(inode->i_sb); | ||
51 | |||
52 | return err; | ||
53 | } | ||
54 | |||
55 | static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
56 | { | ||
57 | struct page *page = vmf->page; | ||
58 | struct inode *inode = vma->vm_file->f_dentry->d_inode; | ||
59 | struct nilfs_transaction_info ti; | ||
60 | int ret; | ||
61 | |||
62 | if (unlikely(nilfs_near_disk_full(NILFS_SB(inode->i_sb)->s_nilfs))) | ||
63 | return VM_FAULT_SIGBUS; /* -ENOSPC */ | ||
64 | |||
65 | lock_page(page); | ||
66 | if (page->mapping != inode->i_mapping || | ||
67 | page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { | ||
68 | unlock_page(page); | ||
69 | return VM_FAULT_NOPAGE; /* make the VM retry the fault */ | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * check to see if the page is mapped already (no holes) | ||
74 | */ | ||
75 | if (PageMappedToDisk(page)) { | ||
76 | unlock_page(page); | ||
77 | goto mapped; | ||
78 | } | ||
79 | if (page_has_buffers(page)) { | ||
80 | struct buffer_head *bh, *head; | ||
81 | int fully_mapped = 1; | ||
82 | |||
83 | bh = head = page_buffers(page); | ||
84 | do { | ||
85 | if (!buffer_mapped(bh)) { | ||
86 | fully_mapped = 0; | ||
87 | break; | ||
88 | } | ||
89 | } while (bh = bh->b_this_page, bh != head); | ||
90 | |||
91 | if (fully_mapped) { | ||
92 | SetPageMappedToDisk(page); | ||
93 | unlock_page(page); | ||
94 | goto mapped; | ||
95 | } | ||
96 | } | ||
97 | unlock_page(page); | ||
98 | |||
99 | /* | ||
100 | * fill hole blocks | ||
101 | */ | ||
102 | ret = nilfs_transaction_begin(inode->i_sb, &ti, 1); | ||
103 | /* never returns -ENOMEM, but may return -ENOSPC */ | ||
104 | if (unlikely(ret)) | ||
105 | return VM_FAULT_SIGBUS; | ||
106 | |||
107 | ret = block_page_mkwrite(vma, vmf, nilfs_get_block); | ||
108 | if (unlikely(ret)) { | ||
109 | nilfs_transaction_abort(inode->i_sb); | ||
110 | return ret; | ||
111 | } | ||
112 | nilfs_transaction_commit(inode->i_sb); | ||
113 | |||
114 | mapped: | ||
115 | SetPageChecked(page); | ||
116 | wait_on_page_writeback(page); | ||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | struct vm_operations_struct nilfs_file_vm_ops = { | ||
121 | .fault = filemap_fault, | ||
122 | .page_mkwrite = nilfs_page_mkwrite, | ||
123 | }; | ||
124 | |||
125 | static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) | ||
126 | { | ||
127 | file_accessed(file); | ||
128 | vma->vm_ops = &nilfs_file_vm_ops; | ||
129 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * We have mostly NULL's here: the current defaults are ok for | ||
135 | * the nilfs filesystem. | ||
136 | */ | ||
137 | struct file_operations nilfs_file_operations = { | ||
138 | .llseek = generic_file_llseek, | ||
139 | .read = do_sync_read, | ||
140 | .write = do_sync_write, | ||
141 | .aio_read = generic_file_aio_read, | ||
142 | .aio_write = generic_file_aio_write, | ||
143 | .unlocked_ioctl = nilfs_ioctl, | ||
144 | #ifdef CONFIG_COMPAT | ||
145 | .compat_ioctl = nilfs_ioctl, | ||
146 | #endif /* CONFIG_COMPAT */ | ||
147 | .mmap = nilfs_file_mmap, | ||
148 | .open = generic_file_open, | ||
149 | /* .release = nilfs_release_file, */ | ||
150 | .fsync = nilfs_sync_file, | ||
151 | .splice_read = generic_file_splice_read, | ||
152 | }; | ||
153 | |||
154 | struct inode_operations nilfs_file_inode_operations = { | ||
155 | .truncate = nilfs_truncate, | ||
156 | .setattr = nilfs_setattr, | ||
157 | .permission = nilfs_permission, | ||
158 | }; | ||
159 | |||
160 | /* end of file */ | ||
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c new file mode 100644 index 000000000000..93383c5cee90 --- /dev/null +++ b/fs/nilfs2/gcdat.c | |||
@@ -0,0 +1,84 @@ | |||
1 | /* | ||
2 | * gcdat.c - NILFS shadow DAT inode for GC | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>, | ||
21 | * and Ryusuke Konishi <ryusuke@osrg.net>. | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/buffer_head.h> | ||
26 | #include "nilfs.h" | ||
27 | #include "page.h" | ||
28 | #include "mdt.h" | ||
29 | |||
30 | int nilfs_init_gcdat_inode(struct the_nilfs *nilfs) | ||
31 | { | ||
32 | struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; | ||
33 | struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); | ||
34 | int err; | ||
35 | |||
36 | gcdat->i_state = 0; | ||
37 | gcdat->i_blocks = dat->i_blocks; | ||
38 | gii->i_flags = dii->i_flags; | ||
39 | gii->i_state = dii->i_state | (1 << NILFS_I_GCDAT); | ||
40 | gii->i_cno = 0; | ||
41 | nilfs_bmap_init_gcdat(gii->i_bmap, dii->i_bmap); | ||
42 | err = nilfs_copy_dirty_pages(gcdat->i_mapping, dat->i_mapping); | ||
43 | if (unlikely(err)) | ||
44 | return err; | ||
45 | |||
46 | return nilfs_copy_dirty_pages(&gii->i_btnode_cache, | ||
47 | &dii->i_btnode_cache); | ||
48 | } | ||
49 | |||
50 | void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs) | ||
51 | { | ||
52 | struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; | ||
53 | struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); | ||
54 | struct address_space *mapping = dat->i_mapping; | ||
55 | struct address_space *gmapping = gcdat->i_mapping; | ||
56 | |||
57 | down_write(&NILFS_MDT(dat)->mi_sem); | ||
58 | dat->i_blocks = gcdat->i_blocks; | ||
59 | dii->i_flags = gii->i_flags; | ||
60 | dii->i_state = gii->i_state & ~(1 << NILFS_I_GCDAT); | ||
61 | |||
62 | nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap); | ||
63 | |||
64 | nilfs_clear_dirty_pages(mapping); | ||
65 | nilfs_copy_back_pages(mapping, gmapping); | ||
66 | /* note: mdt dirty flags should be cleared by segctor. */ | ||
67 | |||
68 | nilfs_clear_dirty_pages(&dii->i_btnode_cache); | ||
69 | nilfs_copy_back_pages(&dii->i_btnode_cache, &gii->i_btnode_cache); | ||
70 | |||
71 | up_write(&NILFS_MDT(dat)->mi_sem); | ||
72 | } | ||
73 | |||
74 | void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs) | ||
75 | { | ||
76 | struct inode *gcdat = nilfs->ns_gc_dat; | ||
77 | struct nilfs_inode_info *gii = NILFS_I(gcdat); | ||
78 | |||
79 | gcdat->i_state = I_CLEAR; | ||
80 | gii->i_flags = 0; | ||
81 | |||
82 | truncate_inode_pages(gcdat->i_mapping, 0); | ||
83 | truncate_inode_pages(&gii->i_btnode_cache, 0); | ||
84 | } | ||
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c new file mode 100644 index 000000000000..19d2102b6a69 --- /dev/null +++ b/fs/nilfs2/gcinode.c | |||
@@ -0,0 +1,288 @@ | |||
1 | /* | ||
2 | * gcinode.c - dummy inodes to buffer blocks for garbage collection | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>, | ||
21 | * and Ryusuke Konishi <ryusuke@osrg.net>. | ||
22 | * Revised by Ryusuke Konishi <ryusuke@osrg.net>. | ||
23 | * | ||
24 | */ | ||
25 | /* | ||
26 | * This file adds the cache of on-disk blocks to be moved in garbage | ||
27 | * collection. The disk blocks are held with dummy inodes (called | ||
28 | * gcinodes), and this file provides lookup function of the dummy | ||
29 | * inodes and their buffer read function. | ||
30 | * | ||
31 | * Since NILFS2 keeps up multiple checkpoints/snapshots accross GC, it | ||
32 | * has to treat blocks that belong to a same file but have different | ||
33 | * checkpoint numbers. To avoid interference among generations, dummy | ||
34 | * inodes are managed separatly from actual inodes, and their lookup | ||
35 | * function (nilfs_gc_iget) is designed to be specified with a | ||
36 | * checkpoint number argument as well as an inode number. | ||
37 | * | ||
38 | * Buffers and pages held by the dummy inodes will be released each | ||
39 | * time after they are copied to a new log. Dirty blocks made on the | ||
40 | * current generation and the blocks to be moved by GC never overlap | ||
41 | * because the dirty blocks make a new generation; they rather must be | ||
42 | * written individually. | ||
43 | */ | ||
44 | |||
45 | #include <linux/buffer_head.h> | ||
46 | #include <linux/mpage.h> | ||
47 | #include <linux/hash.h> | ||
48 | #include <linux/swap.h> | ||
49 | #include "nilfs.h" | ||
50 | #include "page.h" | ||
51 | #include "mdt.h" | ||
52 | #include "dat.h" | ||
53 | #include "ifile.h" | ||
54 | |||
55 | static struct address_space_operations def_gcinode_aops = {}; | ||
56 | /* XXX need def_gcinode_iops/fops? */ | ||
57 | |||
58 | /* | ||
59 | * nilfs_gccache_submit_read_data() - add data buffer and submit read request | ||
60 | * @inode - gc inode | ||
61 | * @blkoff - dummy offset treated as the key for the page cache | ||
62 | * @pbn - physical block number of the block | ||
63 | * @vbn - virtual block number of the block, 0 for non-virtual block | ||
64 | * @out_bh - indirect pointer to a buffer_head struct to receive the results | ||
65 | * | ||
66 | * Description: nilfs_gccache_submit_read_data() registers the data buffer | ||
67 | * specified by @pbn to the GC pagecache with the key @blkoff. | ||
68 | * This function sets @vbn (@pbn if @vbn is zero) in b_blocknr of the buffer. | ||
69 | * | ||
70 | * Return Value: On success, 0 is returned. On Error, one of the following | ||
71 | * negative error code is returned. | ||
72 | * | ||
73 | * %-EIO - I/O error. | ||
74 | * | ||
75 | * %-ENOMEM - Insufficient amount of memory available. | ||
76 | * | ||
77 | * %-ENOENT - The block specified with @pbn does not exist. | ||
78 | */ | ||
79 | int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, | ||
80 | sector_t pbn, __u64 vbn, | ||
81 | struct buffer_head **out_bh) | ||
82 | { | ||
83 | struct buffer_head *bh; | ||
84 | int err; | ||
85 | |||
86 | bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); | ||
87 | if (unlikely(!bh)) | ||
88 | return -ENOMEM; | ||
89 | |||
90 | if (buffer_uptodate(bh)) | ||
91 | goto out; | ||
92 | |||
93 | if (pbn == 0) { | ||
94 | struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat; | ||
95 | /* use original dat, not gc dat. */ | ||
96 | err = nilfs_dat_translate(dat_inode, vbn, &pbn); | ||
97 | if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ | ||
98 | brelse(bh); | ||
99 | goto failed; | ||
100 | } | ||
101 | } | ||
102 | |||
103 | lock_buffer(bh); | ||
104 | if (buffer_uptodate(bh)) { | ||
105 | unlock_buffer(bh); | ||
106 | goto out; | ||
107 | } | ||
108 | |||
109 | if (!buffer_mapped(bh)) { | ||
110 | bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; | ||
111 | set_buffer_mapped(bh); | ||
112 | } | ||
113 | bh->b_blocknr = pbn; | ||
114 | bh->b_end_io = end_buffer_read_sync; | ||
115 | get_bh(bh); | ||
116 | submit_bh(READ, bh); | ||
117 | if (vbn) | ||
118 | bh->b_blocknr = vbn; | ||
119 | out: | ||
120 | err = 0; | ||
121 | *out_bh = bh; | ||
122 | |||
123 | failed: | ||
124 | unlock_page(bh->b_page); | ||
125 | page_cache_release(bh->b_page); | ||
126 | return err; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * nilfs_gccache_submit_read_node() - add node buffer and submit read request | ||
131 | * @inode - gc inode | ||
132 | * @pbn - physical block number for the block | ||
133 | * @vbn - virtual block number for the block | ||
134 | * @out_bh - indirect pointer to a buffer_head struct to receive the results | ||
135 | * | ||
136 | * Description: nilfs_gccache_submit_read_node() registers the node buffer | ||
137 | * specified by @vbn to the GC pagecache. @pbn can be supplied by the | ||
138 | * caller to avoid translation of the disk block address. | ||
139 | * | ||
140 | * Return Value: On success, 0 is returned. On Error, one of the following | ||
141 | * negative error code is returned. | ||
142 | * | ||
143 | * %-EIO - I/O error. | ||
144 | * | ||
145 | * %-ENOMEM - Insufficient amount of memory available. | ||
146 | */ | ||
147 | int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, | ||
148 | __u64 vbn, struct buffer_head **out_bh) | ||
149 | { | ||
150 | int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, | ||
151 | vbn ? : pbn, pbn, out_bh, 0); | ||
152 | if (ret == -EEXIST) /* internal code (cache hit) */ | ||
153 | ret = 0; | ||
154 | return ret; | ||
155 | } | ||
156 | |||
157 | int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) | ||
158 | { | ||
159 | wait_on_buffer(bh); | ||
160 | if (!buffer_uptodate(bh)) | ||
161 | return -EIO; | ||
162 | if (buffer_dirty(bh)) | ||
163 | return -EEXIST; | ||
164 | |||
165 | if (buffer_nilfs_node(bh)) | ||
166 | nilfs_btnode_mark_dirty(bh); | ||
167 | else | ||
168 | nilfs_mdt_mark_buffer_dirty(bh); | ||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * nilfs_init_gccache() - allocate and initialize gc_inode hash table | ||
174 | * @nilfs - the_nilfs | ||
175 | * | ||
176 | * Return Value: On success, 0. | ||
177 | * On error, a negative error code is returned. | ||
178 | */ | ||
179 | int nilfs_init_gccache(struct the_nilfs *nilfs) | ||
180 | { | ||
181 | int loop; | ||
182 | |||
183 | BUG_ON(nilfs->ns_gc_inodes_h); | ||
184 | |||
185 | INIT_LIST_HEAD(&nilfs->ns_gc_inodes); | ||
186 | |||
187 | nilfs->ns_gc_inodes_h = | ||
188 | kmalloc(sizeof(struct hlist_head) * NILFS_GCINODE_HASH_SIZE, | ||
189 | GFP_NOFS); | ||
190 | if (nilfs->ns_gc_inodes_h == NULL) | ||
191 | return -ENOMEM; | ||
192 | |||
193 | for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++) | ||
194 | INIT_HLIST_HEAD(&nilfs->ns_gc_inodes_h[loop]); | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * nilfs_destroy_gccache() - free gc_inode hash table | ||
200 | * @nilfs - the nilfs | ||
201 | */ | ||
202 | void nilfs_destroy_gccache(struct the_nilfs *nilfs) | ||
203 | { | ||
204 | if (nilfs->ns_gc_inodes_h) { | ||
205 | nilfs_remove_all_gcinode(nilfs); | ||
206 | kfree(nilfs->ns_gc_inodes_h); | ||
207 | nilfs->ns_gc_inodes_h = NULL; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino, | ||
212 | __u64 cno) | ||
213 | { | ||
214 | struct inode *inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS); | ||
215 | struct nilfs_inode_info *ii; | ||
216 | |||
217 | if (!inode) | ||
218 | return NULL; | ||
219 | |||
220 | inode->i_op = NULL; | ||
221 | inode->i_fop = NULL; | ||
222 | inode->i_mapping->a_ops = &def_gcinode_aops; | ||
223 | |||
224 | ii = NILFS_I(inode); | ||
225 | ii->i_cno = cno; | ||
226 | ii->i_flags = 0; | ||
227 | ii->i_state = 1 << NILFS_I_GCINODE; | ||
228 | ii->i_bh = NULL; | ||
229 | nilfs_bmap_init_gc(ii->i_bmap); | ||
230 | |||
231 | return inode; | ||
232 | } | ||
233 | |||
234 | static unsigned long ihash(ino_t ino, __u64 cno) | ||
235 | { | ||
236 | return hash_long((unsigned long)((ino << 2) + cno), | ||
237 | NILFS_GCINODE_HASH_BITS); | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * nilfs_gc_iget() - find or create gc inode with specified (ino,cno) | ||
242 | */ | ||
243 | struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno) | ||
244 | { | ||
245 | struct hlist_head *head = nilfs->ns_gc_inodes_h + ihash(ino, cno); | ||
246 | struct hlist_node *node; | ||
247 | struct inode *inode; | ||
248 | |||
249 | hlist_for_each_entry(inode, node, head, i_hash) { | ||
250 | if (inode->i_ino == ino && NILFS_I(inode)->i_cno == cno) | ||
251 | return inode; | ||
252 | } | ||
253 | |||
254 | inode = alloc_gcinode(nilfs, ino, cno); | ||
255 | if (likely(inode)) { | ||
256 | hlist_add_head(&inode->i_hash, head); | ||
257 | list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes); | ||
258 | } | ||
259 | return inode; | ||
260 | } | ||
261 | |||
262 | /* | ||
263 | * nilfs_clear_gcinode() - clear and free a gc inode | ||
264 | */ | ||
265 | void nilfs_clear_gcinode(struct inode *inode) | ||
266 | { | ||
267 | nilfs_mdt_clear(inode); | ||
268 | nilfs_mdt_destroy(inode); | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * nilfs_remove_all_gcinode() - remove all inodes from the_nilfs | ||
273 | */ | ||
274 | void nilfs_remove_all_gcinode(struct the_nilfs *nilfs) | ||
275 | { | ||
276 | struct hlist_head *head = nilfs->ns_gc_inodes_h; | ||
277 | struct hlist_node *node, *n; | ||
278 | struct inode *inode; | ||
279 | int loop; | ||
280 | |||
281 | for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++, head++) { | ||
282 | hlist_for_each_entry_safe(inode, node, n, head, i_hash) { | ||
283 | hlist_del_init(&inode->i_hash); | ||
284 | list_del_init(&NILFS_I(inode)->i_dirty); | ||
285 | nilfs_clear_gcinode(inode); /* might sleep */ | ||
286 | } | ||
287 | } | ||
288 | } | ||
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c new file mode 100644 index 000000000000..de86401f209f --- /dev/null +++ b/fs/nilfs2/ifile.c | |||
@@ -0,0 +1,150 @@ | |||
1 | /* | ||
2 | * ifile.c - NILFS inode file | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Amagai Yoshiji <amagai@osrg.net>. | ||
21 | * Revised by Ryusuke Konishi <ryusuke@osrg.net>. | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/types.h> | ||
26 | #include <linux/buffer_head.h> | ||
27 | #include "nilfs.h" | ||
28 | #include "mdt.h" | ||
29 | #include "alloc.h" | ||
30 | #include "ifile.h" | ||
31 | |||
32 | /** | ||
33 | * nilfs_ifile_create_inode - create a new disk inode | ||
34 | * @ifile: ifile inode | ||
35 | * @out_ino: pointer to a variable to store inode number | ||
36 | * @out_bh: buffer_head contains newly allocated disk inode | ||
37 | * | ||
38 | * Return Value: On success, 0 is returned and the newly allocated inode | ||
39 | * number is stored in the place pointed by @ino, and buffer_head pointer | ||
40 | * that contains newly allocated disk inode structure is stored in the | ||
41 | * place pointed by @out_bh | ||
42 | * On error, one of the following negative error codes is returned. | ||
43 | * | ||
44 | * %-EIO - I/O error. | ||
45 | * | ||
46 | * %-ENOMEM - Insufficient amount of memory available. | ||
47 | * | ||
48 | * %-ENOSPC - No inode left. | ||
49 | */ | ||
50 | int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino, | ||
51 | struct buffer_head **out_bh) | ||
52 | { | ||
53 | struct nilfs_palloc_req req; | ||
54 | int ret; | ||
55 | |||
56 | req.pr_entry_nr = 0; /* 0 says find free inode from beginning of | ||
57 | a group. dull code!! */ | ||
58 | req.pr_entry_bh = NULL; | ||
59 | |||
60 | ret = nilfs_palloc_prepare_alloc_entry(ifile, &req); | ||
61 | if (!ret) { | ||
62 | ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1, | ||
63 | &req.pr_entry_bh); | ||
64 | if (ret < 0) | ||
65 | nilfs_palloc_abort_alloc_entry(ifile, &req); | ||
66 | } | ||
67 | if (ret < 0) { | ||
68 | brelse(req.pr_entry_bh); | ||
69 | return ret; | ||
70 | } | ||
71 | nilfs_palloc_commit_alloc_entry(ifile, &req); | ||
72 | nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); | ||
73 | nilfs_mdt_mark_dirty(ifile); | ||
74 | *out_ino = (ino_t)req.pr_entry_nr; | ||
75 | *out_bh = req.pr_entry_bh; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | /** | ||
80 | * nilfs_ifile_delete_inode - delete a disk inode | ||
81 | * @ifile: ifile inode | ||
82 | * @ino: inode number | ||
83 | * | ||
84 | * Return Value: On success, 0 is returned. On error, one of the following | ||
85 | * negative error codes is returned. | ||
86 | * | ||
87 | * %-EIO - I/O error. | ||
88 | * | ||
89 | * %-ENOMEM - Insufficient amount of memory available. | ||
90 | * | ||
91 | * %-ENOENT - The inode number @ino have not been allocated. | ||
92 | */ | ||
93 | int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) | ||
94 | { | ||
95 | struct nilfs_palloc_req req = { | ||
96 | .pr_entry_nr = ino, .pr_entry_bh = NULL | ||
97 | }; | ||
98 | struct nilfs_inode *raw_inode; | ||
99 | void *kaddr; | ||
100 | int ret; | ||
101 | |||
102 | ret = nilfs_palloc_prepare_free_entry(ifile, &req); | ||
103 | if (!ret) { | ||
104 | ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 0, | ||
105 | &req.pr_entry_bh); | ||
106 | if (ret < 0) | ||
107 | nilfs_palloc_abort_free_entry(ifile, &req); | ||
108 | } | ||
109 | if (ret < 0) { | ||
110 | brelse(req.pr_entry_bh); | ||
111 | return ret; | ||
112 | } | ||
113 | |||
114 | kaddr = kmap_atomic(req.pr_entry_bh->b_page, KM_USER0); | ||
115 | raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, | ||
116 | req.pr_entry_bh, kaddr); | ||
117 | raw_inode->i_flags = 0; | ||
118 | kunmap_atomic(kaddr, KM_USER0); | ||
119 | |||
120 | nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); | ||
121 | brelse(req.pr_entry_bh); | ||
122 | |||
123 | nilfs_palloc_commit_free_entry(ifile, &req); | ||
124 | |||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, | ||
129 | struct buffer_head **out_bh) | ||
130 | { | ||
131 | struct super_block *sb = ifile->i_sb; | ||
132 | int err; | ||
133 | |||
134 | if (unlikely(!NILFS_VALID_INODE(sb, ino))) { | ||
135 | nilfs_error(sb, __func__, "bad inode number: %lu", | ||
136 | (unsigned long) ino); | ||
137 | return -EINVAL; | ||
138 | } | ||
139 | |||
140 | err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh); | ||
141 | if (unlikely(err)) { | ||
142 | if (err == -EINVAL) | ||
143 | nilfs_error(sb, __func__, "ifile is broken"); | ||
144 | else | ||
145 | nilfs_warning(sb, __func__, | ||
146 | "unable to read inode: %lu", | ||
147 | (unsigned long) ino); | ||
148 | } | ||
149 | return err; | ||
150 | } | ||
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h new file mode 100644 index 000000000000..5d30a35679b5 --- /dev/null +++ b/fs/nilfs2/ifile.h | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * ifile.h - NILFS inode file | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Amagai Yoshiji <amagai@osrg.net> | ||
21 | * Revised by Ryusuke Konishi <ryusuke@osrg.net> | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #ifndef _NILFS_IFILE_H | ||
26 | #define _NILFS_IFILE_H | ||
27 | |||
28 | #include <linux/fs.h> | ||
29 | #include <linux/buffer_head.h> | ||
30 | #include <linux/nilfs2_fs.h> | ||
31 | #include "mdt.h" | ||
32 | #include "alloc.h" | ||
33 | |||
34 | #define NILFS_IFILE_GFP NILFS_MDT_GFP | ||
35 | |||
36 | static inline struct nilfs_inode * | ||
37 | nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh) | ||
38 | { | ||
39 | void *kaddr = kmap(ibh->b_page); | ||
40 | return nilfs_palloc_block_get_entry(ifile, ino, ibh, kaddr); | ||
41 | } | ||
42 | |||
43 | static inline void nilfs_ifile_unmap_inode(struct inode *ifile, ino_t ino, | ||
44 | struct buffer_head *ibh) | ||
45 | { | ||
46 | kunmap(ibh->b_page); | ||
47 | } | ||
48 | |||
49 | int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); | ||
50 | int nilfs_ifile_delete_inode(struct inode *, ino_t); | ||
51 | int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); | ||
52 | |||
53 | #endif /* _NILFS_IFILE_H */ | ||
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c new file mode 100644 index 000000000000..49ab4a49bb4f --- /dev/null +++ b/fs/nilfs2/inode.c | |||
@@ -0,0 +1,785 @@ | |||
1 | /* | ||
2 | * inode.c - NILFS inode operations. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/buffer_head.h> | ||
25 | #include <linux/mpage.h> | ||
26 | #include <linux/writeback.h> | ||
27 | #include <linux/uio.h> | ||
28 | #include "nilfs.h" | ||
29 | #include "segment.h" | ||
30 | #include "page.h" | ||
31 | #include "mdt.h" | ||
32 | #include "cpfile.h" | ||
33 | #include "ifile.h" | ||
34 | |||
35 | |||
36 | /** | ||
37 | * nilfs_get_block() - get a file block on the filesystem (callback function) | ||
38 | * @inode - inode struct of the target file | ||
39 | * @blkoff - file block number | ||
40 | * @bh_result - buffer head to be mapped on | ||
41 | * @create - indicate whether allocating the block or not when it has not | ||
42 | * been allocated yet. | ||
43 | * | ||
44 | * This function does not issue actual read request of the specified data | ||
45 | * block. It is done by VFS. | ||
46 | * Bulk read for direct-io is not supported yet. (should be supported) | ||
47 | */ | ||
48 | int nilfs_get_block(struct inode *inode, sector_t blkoff, | ||
49 | struct buffer_head *bh_result, int create) | ||
50 | { | ||
51 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
52 | unsigned long blknum = 0; | ||
53 | int err = 0, ret; | ||
54 | struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode)); | ||
55 | |||
56 | /* This exclusion control is a workaround; should be revised */ | ||
57 | down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
58 | ret = nilfs_bmap_lookup(ii->i_bmap, (unsigned long)blkoff, &blknum); | ||
59 | up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
60 | if (ret == 0) { /* found */ | ||
61 | map_bh(bh_result, inode->i_sb, blknum); | ||
62 | goto out; | ||
63 | } | ||
64 | /* data block was not found */ | ||
65 | if (ret == -ENOENT && create) { | ||
66 | struct nilfs_transaction_info ti; | ||
67 | |||
68 | bh_result->b_blocknr = 0; | ||
69 | err = nilfs_transaction_begin(inode->i_sb, &ti, 1); | ||
70 | if (unlikely(err)) | ||
71 | goto out; | ||
72 | err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff, | ||
73 | (unsigned long)bh_result); | ||
74 | if (unlikely(err != 0)) { | ||
75 | if (err == -EEXIST) { | ||
76 | /* | ||
77 | * The get_block() function could be called | ||
78 | * from multiple callers for an inode. | ||
79 | * However, the page having this block must | ||
80 | * be locked in this case. | ||
81 | */ | ||
82 | printk(KERN_WARNING | ||
83 | "nilfs_get_block: a race condition " | ||
84 | "while inserting a data block. " | ||
85 | "(inode number=%lu, file block " | ||
86 | "offset=%llu)\n", | ||
87 | inode->i_ino, | ||
88 | (unsigned long long)blkoff); | ||
89 | err = 0; | ||
90 | } else if (err == -EINVAL) { | ||
91 | nilfs_error(inode->i_sb, __func__, | ||
92 | "broken bmap (inode=%lu)\n", | ||
93 | inode->i_ino); | ||
94 | err = -EIO; | ||
95 | } | ||
96 | nilfs_transaction_abort(inode->i_sb); | ||
97 | goto out; | ||
98 | } | ||
99 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | ||
100 | /* Error handling should be detailed */ | ||
101 | set_buffer_new(bh_result); | ||
102 | map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed | ||
103 | to proper value */ | ||
104 | } else if (ret == -ENOENT) { | ||
105 | /* not found is not error (e.g. hole); must return without | ||
106 | the mapped state flag. */ | ||
107 | ; | ||
108 | } else { | ||
109 | err = ret; | ||
110 | } | ||
111 | |||
112 | out: | ||
113 | return err; | ||
114 | } | ||
115 | |||
116 | /** | ||
117 | * nilfs_readpage() - implement readpage() method of nilfs_aops {} | ||
118 | * address_space_operations. | ||
119 | * @file - file struct of the file to be read | ||
120 | * @page - the page to be read | ||
121 | */ | ||
122 | static int nilfs_readpage(struct file *file, struct page *page) | ||
123 | { | ||
124 | return mpage_readpage(page, nilfs_get_block); | ||
125 | } | ||
126 | |||
127 | /** | ||
128 | * nilfs_readpages() - implement readpages() method of nilfs_aops {} | ||
129 | * address_space_operations. | ||
130 | * @file - file struct of the file to be read | ||
131 | * @mapping - address_space struct used for reading multiple pages | ||
132 | * @pages - the pages to be read | ||
133 | * @nr_pages - number of pages to be read | ||
134 | */ | ||
135 | static int nilfs_readpages(struct file *file, struct address_space *mapping, | ||
136 | struct list_head *pages, unsigned nr_pages) | ||
137 | { | ||
138 | return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block); | ||
139 | } | ||
140 | |||
141 | static int nilfs_writepages(struct address_space *mapping, | ||
142 | struct writeback_control *wbc) | ||
143 | { | ||
144 | struct inode *inode = mapping->host; | ||
145 | int err = 0; | ||
146 | |||
147 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
148 | err = nilfs_construct_dsync_segment(inode->i_sb, inode, | ||
149 | wbc->range_start, | ||
150 | wbc->range_end); | ||
151 | return err; | ||
152 | } | ||
153 | |||
154 | static int nilfs_writepage(struct page *page, struct writeback_control *wbc) | ||
155 | { | ||
156 | struct inode *inode = page->mapping->host; | ||
157 | int err; | ||
158 | |||
159 | redirty_page_for_writepage(wbc, page); | ||
160 | unlock_page(page); | ||
161 | |||
162 | if (wbc->sync_mode == WB_SYNC_ALL) { | ||
163 | err = nilfs_construct_segment(inode->i_sb); | ||
164 | if (unlikely(err)) | ||
165 | return err; | ||
166 | } else if (wbc->for_reclaim) | ||
167 | nilfs_flush_segment(inode->i_sb, inode->i_ino); | ||
168 | |||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | static int nilfs_set_page_dirty(struct page *page) | ||
173 | { | ||
174 | int ret = __set_page_dirty_buffers(page); | ||
175 | |||
176 | if (ret) { | ||
177 | struct inode *inode = page->mapping->host; | ||
178 | struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); | ||
179 | unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); | ||
180 | |||
181 | nilfs_set_file_dirty(sbi, inode, nr_dirty); | ||
182 | } | ||
183 | return ret; | ||
184 | } | ||
185 | |||
186 | static int nilfs_write_begin(struct file *file, struct address_space *mapping, | ||
187 | loff_t pos, unsigned len, unsigned flags, | ||
188 | struct page **pagep, void **fsdata) | ||
189 | |||
190 | { | ||
191 | struct inode *inode = mapping->host; | ||
192 | int err = nilfs_transaction_begin(inode->i_sb, NULL, 1); | ||
193 | |||
194 | if (unlikely(err)) | ||
195 | return err; | ||
196 | |||
197 | *pagep = NULL; | ||
198 | err = block_write_begin(file, mapping, pos, len, flags, pagep, | ||
199 | fsdata, nilfs_get_block); | ||
200 | if (unlikely(err)) | ||
201 | nilfs_transaction_abort(inode->i_sb); | ||
202 | return err; | ||
203 | } | ||
204 | |||
205 | static int nilfs_write_end(struct file *file, struct address_space *mapping, | ||
206 | loff_t pos, unsigned len, unsigned copied, | ||
207 | struct page *page, void *fsdata) | ||
208 | { | ||
209 | struct inode *inode = mapping->host; | ||
210 | unsigned start = pos & (PAGE_CACHE_SIZE - 1); | ||
211 | unsigned nr_dirty; | ||
212 | int err; | ||
213 | |||
214 | nr_dirty = nilfs_page_count_clean_buffers(page, start, | ||
215 | start + copied); | ||
216 | copied = generic_write_end(file, mapping, pos, len, copied, page, | ||
217 | fsdata); | ||
218 | nilfs_set_file_dirty(NILFS_SB(inode->i_sb), inode, nr_dirty); | ||
219 | err = nilfs_transaction_commit(inode->i_sb); | ||
220 | return err ? : copied; | ||
221 | } | ||
222 | |||
223 | static ssize_t | ||
224 | nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | ||
225 | loff_t offset, unsigned long nr_segs) | ||
226 | { | ||
227 | struct file *file = iocb->ki_filp; | ||
228 | struct inode *inode = file->f_mapping->host; | ||
229 | ssize_t size; | ||
230 | |||
231 | if (rw == WRITE) | ||
232 | return 0; | ||
233 | |||
234 | /* Needs synchronization with the cleaner */ | ||
235 | size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | ||
236 | offset, nr_segs, nilfs_get_block, NULL); | ||
237 | return size; | ||
238 | } | ||
239 | |||
240 | struct address_space_operations nilfs_aops = { | ||
241 | .writepage = nilfs_writepage, | ||
242 | .readpage = nilfs_readpage, | ||
243 | /* .sync_page = nilfs_sync_page, */ | ||
244 | .writepages = nilfs_writepages, | ||
245 | .set_page_dirty = nilfs_set_page_dirty, | ||
246 | .readpages = nilfs_readpages, | ||
247 | .write_begin = nilfs_write_begin, | ||
248 | .write_end = nilfs_write_end, | ||
249 | /* .releasepage = nilfs_releasepage, */ | ||
250 | .invalidatepage = block_invalidatepage, | ||
251 | .direct_IO = nilfs_direct_IO, | ||
252 | }; | ||
253 | |||
254 | struct inode *nilfs_new_inode(struct inode *dir, int mode) | ||
255 | { | ||
256 | struct super_block *sb = dir->i_sb; | ||
257 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
258 | struct inode *inode; | ||
259 | struct nilfs_inode_info *ii; | ||
260 | int err = -ENOMEM; | ||
261 | ino_t ino; | ||
262 | |||
263 | inode = new_inode(sb); | ||
264 | if (unlikely(!inode)) | ||
265 | goto failed; | ||
266 | |||
267 | mapping_set_gfp_mask(inode->i_mapping, | ||
268 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); | ||
269 | |||
270 | ii = NILFS_I(inode); | ||
271 | ii->i_state = 1 << NILFS_I_NEW; | ||
272 | |||
273 | err = nilfs_ifile_create_inode(sbi->s_ifile, &ino, &ii->i_bh); | ||
274 | if (unlikely(err)) | ||
275 | goto failed_ifile_create_inode; | ||
276 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ | ||
277 | |||
278 | atomic_inc(&sbi->s_inodes_count); | ||
279 | |||
280 | inode->i_uid = current_fsuid(); | ||
281 | if (dir->i_mode & S_ISGID) { | ||
282 | inode->i_gid = dir->i_gid; | ||
283 | if (S_ISDIR(mode)) | ||
284 | mode |= S_ISGID; | ||
285 | } else | ||
286 | inode->i_gid = current_fsgid(); | ||
287 | |||
288 | inode->i_mode = mode; | ||
289 | inode->i_ino = ino; | ||
290 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
291 | |||
292 | if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { | ||
293 | err = nilfs_bmap_read(ii->i_bmap, NULL); | ||
294 | if (err < 0) | ||
295 | goto failed_bmap; | ||
296 | |||
297 | set_bit(NILFS_I_BMAP, &ii->i_state); | ||
298 | /* No lock is needed; iget() ensures it. */ | ||
299 | } | ||
300 | |||
301 | ii->i_flags = NILFS_I(dir)->i_flags; | ||
302 | if (S_ISLNK(mode)) | ||
303 | ii->i_flags &= ~(NILFS_IMMUTABLE_FL | NILFS_APPEND_FL); | ||
304 | if (!S_ISDIR(mode)) | ||
305 | ii->i_flags &= ~NILFS_DIRSYNC_FL; | ||
306 | |||
307 | /* ii->i_file_acl = 0; */ | ||
308 | /* ii->i_dir_acl = 0; */ | ||
309 | ii->i_dir_start_lookup = 0; | ||
310 | #ifdef CONFIG_NILFS_FS_POSIX_ACL | ||
311 | ii->i_acl = NULL; | ||
312 | ii->i_default_acl = NULL; | ||
313 | #endif | ||
314 | ii->i_cno = 0; | ||
315 | nilfs_set_inode_flags(inode); | ||
316 | spin_lock(&sbi->s_next_gen_lock); | ||
317 | inode->i_generation = sbi->s_next_generation++; | ||
318 | spin_unlock(&sbi->s_next_gen_lock); | ||
319 | insert_inode_hash(inode); | ||
320 | |||
321 | err = nilfs_init_acl(inode, dir); | ||
322 | if (unlikely(err)) | ||
323 | goto failed_acl; /* never occur. When supporting | ||
324 | nilfs_init_acl(), proper cancellation of | ||
325 | above jobs should be considered */ | ||
326 | |||
327 | mark_inode_dirty(inode); | ||
328 | return inode; | ||
329 | |||
330 | failed_acl: | ||
331 | failed_bmap: | ||
332 | inode->i_nlink = 0; | ||
333 | iput(inode); /* raw_inode will be deleted through | ||
334 | generic_delete_inode() */ | ||
335 | goto failed; | ||
336 | |||
337 | failed_ifile_create_inode: | ||
338 | make_bad_inode(inode); | ||
339 | iput(inode); /* if i_nlink == 1, generic_forget_inode() will be | ||
340 | called */ | ||
341 | failed: | ||
342 | return ERR_PTR(err); | ||
343 | } | ||
344 | |||
345 | void nilfs_free_inode(struct inode *inode) | ||
346 | { | ||
347 | struct super_block *sb = inode->i_sb; | ||
348 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
349 | |||
350 | clear_inode(inode); | ||
351 | /* XXX: check error code? Is there any thing I can do? */ | ||
352 | (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino); | ||
353 | atomic_dec(&sbi->s_inodes_count); | ||
354 | } | ||
355 | |||
356 | void nilfs_set_inode_flags(struct inode *inode) | ||
357 | { | ||
358 | unsigned int flags = NILFS_I(inode)->i_flags; | ||
359 | |||
360 | inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | | ||
361 | S_DIRSYNC); | ||
362 | if (flags & NILFS_SYNC_FL) | ||
363 | inode->i_flags |= S_SYNC; | ||
364 | if (flags & NILFS_APPEND_FL) | ||
365 | inode->i_flags |= S_APPEND; | ||
366 | if (flags & NILFS_IMMUTABLE_FL) | ||
367 | inode->i_flags |= S_IMMUTABLE; | ||
368 | #ifndef NILFS_ATIME_DISABLE | ||
369 | if (flags & NILFS_NOATIME_FL) | ||
370 | #endif | ||
371 | inode->i_flags |= S_NOATIME; | ||
372 | if (flags & NILFS_DIRSYNC_FL) | ||
373 | inode->i_flags |= S_DIRSYNC; | ||
374 | mapping_set_gfp_mask(inode->i_mapping, | ||
375 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); | ||
376 | } | ||
377 | |||
378 | int nilfs_read_inode_common(struct inode *inode, | ||
379 | struct nilfs_inode *raw_inode) | ||
380 | { | ||
381 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
382 | int err; | ||
383 | |||
384 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | ||
385 | inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); | ||
386 | inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); | ||
387 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | ||
388 | inode->i_size = le64_to_cpu(raw_inode->i_size); | ||
389 | inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | ||
390 | inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); | ||
391 | inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | ||
392 | inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | ||
393 | inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); | ||
394 | inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | ||
395 | if (inode->i_nlink == 0 && inode->i_mode == 0) | ||
396 | return -EINVAL; /* this inode is deleted */ | ||
397 | |||
398 | inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); | ||
399 | ii->i_flags = le32_to_cpu(raw_inode->i_flags); | ||
400 | #if 0 | ||
401 | ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); | ||
402 | ii->i_dir_acl = S_ISREG(inode->i_mode) ? | ||
403 | 0 : le32_to_cpu(raw_inode->i_dir_acl); | ||
404 | #endif | ||
405 | ii->i_cno = 0; | ||
406 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | ||
407 | |||
408 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | ||
409 | S_ISLNK(inode->i_mode)) { | ||
410 | err = nilfs_bmap_read(ii->i_bmap, raw_inode); | ||
411 | if (err < 0) | ||
412 | return err; | ||
413 | set_bit(NILFS_I_BMAP, &ii->i_state); | ||
414 | /* No lock is needed; iget() ensures it. */ | ||
415 | } | ||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, | ||
420 | struct inode *inode) | ||
421 | { | ||
422 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
423 | struct inode *dat = nilfs_dat_inode(sbi->s_nilfs); | ||
424 | struct buffer_head *bh; | ||
425 | struct nilfs_inode *raw_inode; | ||
426 | int err; | ||
427 | |||
428 | down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
429 | err = nilfs_ifile_get_inode_block(sbi->s_ifile, ino, &bh); | ||
430 | if (unlikely(err)) | ||
431 | goto bad_inode; | ||
432 | |||
433 | raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); | ||
434 | |||
435 | #ifdef CONFIG_NILFS_FS_POSIX_ACL | ||
436 | ii->i_acl = NILFS_ACL_NOT_CACHED; | ||
437 | ii->i_default_acl = NILFS_ACL_NOT_CACHED; | ||
438 | #endif | ||
439 | if (nilfs_read_inode_common(inode, raw_inode)) | ||
440 | goto failed_unmap; | ||
441 | |||
442 | if (S_ISREG(inode->i_mode)) { | ||
443 | inode->i_op = &nilfs_file_inode_operations; | ||
444 | inode->i_fop = &nilfs_file_operations; | ||
445 | inode->i_mapping->a_ops = &nilfs_aops; | ||
446 | } else if (S_ISDIR(inode->i_mode)) { | ||
447 | inode->i_op = &nilfs_dir_inode_operations; | ||
448 | inode->i_fop = &nilfs_dir_operations; | ||
449 | inode->i_mapping->a_ops = &nilfs_aops; | ||
450 | } else if (S_ISLNK(inode->i_mode)) { | ||
451 | inode->i_op = &nilfs_symlink_inode_operations; | ||
452 | inode->i_mapping->a_ops = &nilfs_aops; | ||
453 | } else { | ||
454 | inode->i_op = &nilfs_special_inode_operations; | ||
455 | init_special_inode( | ||
456 | inode, inode->i_mode, | ||
457 | new_decode_dev(le64_to_cpu(raw_inode->i_device_code))); | ||
458 | } | ||
459 | nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); | ||
460 | brelse(bh); | ||
461 | up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
462 | nilfs_set_inode_flags(inode); | ||
463 | return 0; | ||
464 | |||
465 | failed_unmap: | ||
466 | nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); | ||
467 | brelse(bh); | ||
468 | |||
469 | bad_inode: | ||
470 | up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
471 | return err; | ||
472 | } | ||
473 | |||
474 | struct inode *nilfs_iget(struct super_block *sb, unsigned long ino) | ||
475 | { | ||
476 | struct inode *inode; | ||
477 | int err; | ||
478 | |||
479 | inode = iget_locked(sb, ino); | ||
480 | if (unlikely(!inode)) | ||
481 | return ERR_PTR(-ENOMEM); | ||
482 | if (!(inode->i_state & I_NEW)) | ||
483 | return inode; | ||
484 | |||
485 | err = __nilfs_read_inode(sb, ino, inode); | ||
486 | if (unlikely(err)) { | ||
487 | iget_failed(inode); | ||
488 | return ERR_PTR(err); | ||
489 | } | ||
490 | unlock_new_inode(inode); | ||
491 | return inode; | ||
492 | } | ||
493 | |||
494 | void nilfs_write_inode_common(struct inode *inode, | ||
495 | struct nilfs_inode *raw_inode, int has_bmap) | ||
496 | { | ||
497 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
498 | |||
499 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | ||
500 | raw_inode->i_uid = cpu_to_le32(inode->i_uid); | ||
501 | raw_inode->i_gid = cpu_to_le32(inode->i_gid); | ||
502 | raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); | ||
503 | raw_inode->i_size = cpu_to_le64(inode->i_size); | ||
504 | raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
505 | raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); | ||
506 | raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
507 | raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | ||
508 | raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); | ||
509 | |||
510 | raw_inode->i_flags = cpu_to_le32(ii->i_flags); | ||
511 | raw_inode->i_generation = cpu_to_le32(inode->i_generation); | ||
512 | |||
513 | if (has_bmap) | ||
514 | nilfs_bmap_write(ii->i_bmap, raw_inode); | ||
515 | else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | ||
516 | raw_inode->i_device_code = | ||
517 | cpu_to_le64(new_encode_dev(inode->i_rdev)); | ||
518 | /* When extending inode, nilfs->ns_inode_size should be checked | ||
519 | for substitutions of appended fields */ | ||
520 | } | ||
521 | |||
522 | void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) | ||
523 | { | ||
524 | ino_t ino = inode->i_ino; | ||
525 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
526 | struct super_block *sb = inode->i_sb; | ||
527 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
528 | struct nilfs_inode *raw_inode; | ||
529 | |||
530 | raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh); | ||
531 | |||
532 | /* The buffer is guarded with lock_buffer() by the caller */ | ||
533 | if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) | ||
534 | memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size); | ||
535 | set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); | ||
536 | |||
537 | nilfs_write_inode_common(inode, raw_inode, 0); | ||
538 | /* XXX: call with has_bmap = 0 is a workaround to avoid | ||
539 | deadlock of bmap. This delays update of i_bmap to just | ||
540 | before writing */ | ||
541 | nilfs_ifile_unmap_inode(sbi->s_ifile, ino, ibh); | ||
542 | } | ||
543 | |||
544 | #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ | ||
545 | |||
546 | static void nilfs_truncate_bmap(struct nilfs_inode_info *ii, | ||
547 | unsigned long from) | ||
548 | { | ||
549 | unsigned long b; | ||
550 | int ret; | ||
551 | |||
552 | if (!test_bit(NILFS_I_BMAP, &ii->i_state)) | ||
553 | return; | ||
554 | repeat: | ||
555 | ret = nilfs_bmap_last_key(ii->i_bmap, &b); | ||
556 | if (ret == -ENOENT) | ||
557 | return; | ||
558 | else if (ret < 0) | ||
559 | goto failed; | ||
560 | |||
561 | if (b < from) | ||
562 | return; | ||
563 | |||
564 | b -= min_t(unsigned long, NILFS_MAX_TRUNCATE_BLOCKS, b - from); | ||
565 | ret = nilfs_bmap_truncate(ii->i_bmap, b); | ||
566 | nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb); | ||
567 | if (!ret || (ret == -ENOMEM && | ||
568 | nilfs_bmap_truncate(ii->i_bmap, b) == 0)) | ||
569 | goto repeat; | ||
570 | |||
571 | failed: | ||
572 | if (ret == -EINVAL) | ||
573 | nilfs_error(ii->vfs_inode.i_sb, __func__, | ||
574 | "bmap is broken (ino=%lu)", ii->vfs_inode.i_ino); | ||
575 | else | ||
576 | nilfs_warning(ii->vfs_inode.i_sb, __func__, | ||
577 | "failed to truncate bmap (ino=%lu, err=%d)", | ||
578 | ii->vfs_inode.i_ino, ret); | ||
579 | } | ||
580 | |||
581 | void nilfs_truncate(struct inode *inode) | ||
582 | { | ||
583 | unsigned long blkoff; | ||
584 | unsigned int blocksize; | ||
585 | struct nilfs_transaction_info ti; | ||
586 | struct super_block *sb = inode->i_sb; | ||
587 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
588 | |||
589 | if (!test_bit(NILFS_I_BMAP, &ii->i_state)) | ||
590 | return; | ||
591 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
592 | return; | ||
593 | |||
594 | blocksize = sb->s_blocksize; | ||
595 | blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits; | ||
596 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ | ||
597 | |||
598 | block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block); | ||
599 | |||
600 | nilfs_truncate_bmap(ii, blkoff); | ||
601 | |||
602 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
603 | if (IS_SYNC(inode)) | ||
604 | nilfs_set_transaction_flag(NILFS_TI_SYNC); | ||
605 | |||
606 | nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); | ||
607 | nilfs_transaction_commit(sb); | ||
608 | /* May construct a logical segment and may fail in sync mode. | ||
609 | But truncate has no return value. */ | ||
610 | } | ||
611 | |||
612 | void nilfs_delete_inode(struct inode *inode) | ||
613 | { | ||
614 | struct nilfs_transaction_info ti; | ||
615 | struct super_block *sb = inode->i_sb; | ||
616 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
617 | |||
618 | if (unlikely(is_bad_inode(inode))) { | ||
619 | if (inode->i_data.nrpages) | ||
620 | truncate_inode_pages(&inode->i_data, 0); | ||
621 | clear_inode(inode); | ||
622 | return; | ||
623 | } | ||
624 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ | ||
625 | |||
626 | if (inode->i_data.nrpages) | ||
627 | truncate_inode_pages(&inode->i_data, 0); | ||
628 | |||
629 | nilfs_truncate_bmap(ii, 0); | ||
630 | nilfs_free_inode(inode); | ||
631 | /* nilfs_free_inode() marks inode buffer dirty */ | ||
632 | if (IS_SYNC(inode)) | ||
633 | nilfs_set_transaction_flag(NILFS_TI_SYNC); | ||
634 | nilfs_transaction_commit(sb); | ||
635 | /* May construct a logical segment and may fail in sync mode. | ||
636 | But delete_inode has no return value. */ | ||
637 | } | ||
638 | |||
639 | int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) | ||
640 | { | ||
641 | struct nilfs_transaction_info ti; | ||
642 | struct inode *inode = dentry->d_inode; | ||
643 | struct super_block *sb = inode->i_sb; | ||
644 | int err; | ||
645 | |||
646 | err = inode_change_ok(inode, iattr); | ||
647 | if (err) | ||
648 | return err; | ||
649 | |||
650 | err = nilfs_transaction_begin(sb, &ti, 0); | ||
651 | if (unlikely(err)) | ||
652 | return err; | ||
653 | err = inode_setattr(inode, iattr); | ||
654 | if (!err && (iattr->ia_valid & ATTR_MODE)) | ||
655 | err = nilfs_acl_chmod(inode); | ||
656 | if (likely(!err)) | ||
657 | err = nilfs_transaction_commit(sb); | ||
658 | else | ||
659 | nilfs_transaction_abort(sb); | ||
660 | |||
661 | return err; | ||
662 | } | ||
663 | |||
664 | int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, | ||
665 | struct buffer_head **pbh) | ||
666 | { | ||
667 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
668 | int err; | ||
669 | |||
670 | spin_lock(&sbi->s_inode_lock); | ||
671 | /* Caller of this function MUST lock s_inode_lock */ | ||
672 | if (ii->i_bh == NULL) { | ||
673 | spin_unlock(&sbi->s_inode_lock); | ||
674 | err = nilfs_ifile_get_inode_block(sbi->s_ifile, inode->i_ino, | ||
675 | pbh); | ||
676 | if (unlikely(err)) | ||
677 | return err; | ||
678 | spin_lock(&sbi->s_inode_lock); | ||
679 | if (ii->i_bh == NULL) | ||
680 | ii->i_bh = *pbh; | ||
681 | else { | ||
682 | brelse(*pbh); | ||
683 | *pbh = ii->i_bh; | ||
684 | } | ||
685 | } else | ||
686 | *pbh = ii->i_bh; | ||
687 | |||
688 | get_bh(*pbh); | ||
689 | spin_unlock(&sbi->s_inode_lock); | ||
690 | return 0; | ||
691 | } | ||
692 | |||
693 | int nilfs_inode_dirty(struct inode *inode) | ||
694 | { | ||
695 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
696 | struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); | ||
697 | int ret = 0; | ||
698 | |||
699 | if (!list_empty(&ii->i_dirty)) { | ||
700 | spin_lock(&sbi->s_inode_lock); | ||
701 | ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || | ||
702 | test_bit(NILFS_I_BUSY, &ii->i_state); | ||
703 | spin_unlock(&sbi->s_inode_lock); | ||
704 | } | ||
705 | return ret; | ||
706 | } | ||
707 | |||
708 | int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode, | ||
709 | unsigned nr_dirty) | ||
710 | { | ||
711 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
712 | |||
713 | atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks); | ||
714 | |||
715 | if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) | ||
716 | return 0; | ||
717 | |||
718 | spin_lock(&sbi->s_inode_lock); | ||
719 | if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && | ||
720 | !test_bit(NILFS_I_BUSY, &ii->i_state)) { | ||
721 | /* Because this routine may race with nilfs_dispose_list(), | ||
722 | we have to check NILFS_I_QUEUED here, too. */ | ||
723 | if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { | ||
724 | /* This will happen when somebody is freeing | ||
725 | this inode. */ | ||
726 | nilfs_warning(sbi->s_super, __func__, | ||
727 | "cannot get inode (ino=%lu)\n", | ||
728 | inode->i_ino); | ||
729 | spin_unlock(&sbi->s_inode_lock); | ||
730 | return -EINVAL; /* NILFS_I_DIRTY may remain for | ||
731 | freeing inode */ | ||
732 | } | ||
733 | list_del(&ii->i_dirty); | ||
734 | list_add_tail(&ii->i_dirty, &sbi->s_dirty_files); | ||
735 | set_bit(NILFS_I_QUEUED, &ii->i_state); | ||
736 | } | ||
737 | spin_unlock(&sbi->s_inode_lock); | ||
738 | return 0; | ||
739 | } | ||
740 | |||
741 | int nilfs_mark_inode_dirty(struct inode *inode) | ||
742 | { | ||
743 | struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); | ||
744 | struct buffer_head *ibh; | ||
745 | int err; | ||
746 | |||
747 | err = nilfs_load_inode_block(sbi, inode, &ibh); | ||
748 | if (unlikely(err)) { | ||
749 | nilfs_warning(inode->i_sb, __func__, | ||
750 | "failed to reget inode block.\n"); | ||
751 | return err; | ||
752 | } | ||
753 | lock_buffer(ibh); | ||
754 | nilfs_update_inode(inode, ibh); | ||
755 | unlock_buffer(ibh); | ||
756 | nilfs_mdt_mark_buffer_dirty(ibh); | ||
757 | nilfs_mdt_mark_dirty(sbi->s_ifile); | ||
758 | brelse(ibh); | ||
759 | return 0; | ||
760 | } | ||
761 | |||
762 | /** | ||
763 | * nilfs_dirty_inode - reflect changes on given inode to an inode block. | ||
764 | * @inode: inode of the file to be registered. | ||
765 | * | ||
766 | * nilfs_dirty_inode() loads a inode block containing the specified | ||
767 | * @inode and copies data from a nilfs_inode to a corresponding inode | ||
768 | * entry in the inode block. This operation is excluded from the segment | ||
769 | * construction. This function can be called both as a single operation | ||
770 | * and as a part of indivisible file operations. | ||
771 | */ | ||
772 | void nilfs_dirty_inode(struct inode *inode) | ||
773 | { | ||
774 | struct nilfs_transaction_info ti; | ||
775 | |||
776 | if (is_bad_inode(inode)) { | ||
777 | nilfs_warning(inode->i_sb, __func__, | ||
778 | "tried to mark bad_inode dirty. ignored.\n"); | ||
779 | dump_stack(); | ||
780 | return; | ||
781 | } | ||
782 | nilfs_transaction_begin(inode->i_sb, &ti, 0); | ||
783 | nilfs_mark_inode_dirty(inode); | ||
784 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | ||
785 | } | ||
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c new file mode 100644 index 000000000000..108d281ebca5 --- /dev/null +++ b/fs/nilfs2/ioctl.c | |||
@@ -0,0 +1,654 @@ | |||
1 | /* | ||
2 | * ioctl.c - NILFS ioctl operations. | ||
3 | * | ||
4 | * Copyright (C) 2007, 2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/fs.h> | ||
24 | #include <linux/wait.h> | ||
25 | #include <linux/smp_lock.h> /* lock_kernel(), unlock_kernel() */ | ||
26 | #include <linux/capability.h> /* capable() */ | ||
27 | #include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ | ||
28 | #include <linux/nilfs2_fs.h> | ||
29 | #include "nilfs.h" | ||
30 | #include "segment.h" | ||
31 | #include "bmap.h" | ||
32 | #include "cpfile.h" | ||
33 | #include "sufile.h" | ||
34 | #include "dat.h" | ||
35 | |||
36 | |||
37 | static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, | ||
38 | struct nilfs_argv *argv, int dir, | ||
39 | ssize_t (*dofunc)(struct the_nilfs *, | ||
40 | __u64 *, int, | ||
41 | void *, size_t, size_t)) | ||
42 | { | ||
43 | void *buf; | ||
44 | void __user *base = (void __user *)(unsigned long)argv->v_base; | ||
45 | size_t maxmembs, total, n; | ||
46 | ssize_t nr; | ||
47 | int ret, i; | ||
48 | __u64 pos, ppos; | ||
49 | |||
50 | if (argv->v_nmembs == 0) | ||
51 | return 0; | ||
52 | |||
53 | if (argv->v_size > PAGE_SIZE) | ||
54 | return -EINVAL; | ||
55 | |||
56 | buf = (void *)__get_free_pages(GFP_NOFS, 0); | ||
57 | if (unlikely(!buf)) | ||
58 | return -ENOMEM; | ||
59 | maxmembs = PAGE_SIZE / argv->v_size; | ||
60 | |||
61 | ret = 0; | ||
62 | total = 0; | ||
63 | pos = argv->v_index; | ||
64 | for (i = 0; i < argv->v_nmembs; i += n) { | ||
65 | n = (argv->v_nmembs - i < maxmembs) ? | ||
66 | argv->v_nmembs - i : maxmembs; | ||
67 | if ((dir & _IOC_WRITE) && | ||
68 | copy_from_user(buf, base + argv->v_size * i, | ||
69 | argv->v_size * n)) { | ||
70 | ret = -EFAULT; | ||
71 | break; | ||
72 | } | ||
73 | ppos = pos; | ||
74 | nr = dofunc(nilfs, &pos, argv->v_flags, buf, argv->v_size, | ||
75 | n); | ||
76 | if (nr < 0) { | ||
77 | ret = nr; | ||
78 | break; | ||
79 | } | ||
80 | if ((dir & _IOC_READ) && | ||
81 | copy_to_user(base + argv->v_size * i, buf, | ||
82 | argv->v_size * nr)) { | ||
83 | ret = -EFAULT; | ||
84 | break; | ||
85 | } | ||
86 | total += nr; | ||
87 | if ((size_t)nr < n) | ||
88 | break; | ||
89 | if (pos == ppos) | ||
90 | pos += n; | ||
91 | } | ||
92 | argv->v_nmembs = total; | ||
93 | |||
94 | free_pages((unsigned long)buf, 0); | ||
95 | return ret; | ||
96 | } | ||
97 | |||
98 | static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, | ||
99 | unsigned int cmd, void __user *argp) | ||
100 | { | ||
101 | struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; | ||
102 | struct nilfs_transaction_info ti; | ||
103 | struct nilfs_cpmode cpmode; | ||
104 | int ret; | ||
105 | |||
106 | if (!capable(CAP_SYS_ADMIN)) | ||
107 | return -EPERM; | ||
108 | if (copy_from_user(&cpmode, argp, sizeof(cpmode))) | ||
109 | return -EFAULT; | ||
110 | |||
111 | nilfs_transaction_begin(inode->i_sb, &ti, 0); | ||
112 | ret = nilfs_cpfile_change_cpmode( | ||
113 | cpfile, cpmode.cm_cno, cpmode.cm_mode); | ||
114 | if (unlikely(ret < 0)) { | ||
115 | nilfs_transaction_abort(inode->i_sb); | ||
116 | return ret; | ||
117 | } | ||
118 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | ||
119 | return ret; | ||
120 | } | ||
121 | |||
122 | static int | ||
123 | nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, | ||
124 | unsigned int cmd, void __user *argp) | ||
125 | { | ||
126 | struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; | ||
127 | struct nilfs_transaction_info ti; | ||
128 | __u64 cno; | ||
129 | int ret; | ||
130 | |||
131 | if (!capable(CAP_SYS_ADMIN)) | ||
132 | return -EPERM; | ||
133 | if (copy_from_user(&cno, argp, sizeof(cno))) | ||
134 | return -EFAULT; | ||
135 | |||
136 | nilfs_transaction_begin(inode->i_sb, &ti, 0); | ||
137 | ret = nilfs_cpfile_delete_checkpoint(cpfile, cno); | ||
138 | if (unlikely(ret < 0)) { | ||
139 | nilfs_transaction_abort(inode->i_sb); | ||
140 | return ret; | ||
141 | } | ||
142 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | ||
143 | return ret; | ||
144 | } | ||
145 | |||
146 | static ssize_t | ||
147 | nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
148 | void *buf, size_t size, size_t nmembs) | ||
149 | { | ||
150 | return nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf, | ||
151 | nmembs); | ||
152 | } | ||
153 | |||
154 | static int nilfs_ioctl_get_cpinfo(struct inode *inode, struct file *filp, | ||
155 | unsigned int cmd, void __user *argp) | ||
156 | { | ||
157 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
158 | struct nilfs_argv argv; | ||
159 | int ret; | ||
160 | |||
161 | if (copy_from_user(&argv, argp, sizeof(argv))) | ||
162 | return -EFAULT; | ||
163 | |||
164 | down_read(&nilfs->ns_segctor_sem); | ||
165 | ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), | ||
166 | nilfs_ioctl_do_get_cpinfo); | ||
167 | up_read(&nilfs->ns_segctor_sem); | ||
168 | if (ret < 0) | ||
169 | return ret; | ||
170 | |||
171 | if (copy_to_user(argp, &argv, sizeof(argv))) | ||
172 | ret = -EFAULT; | ||
173 | return ret; | ||
174 | } | ||
175 | |||
176 | static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, | ||
177 | unsigned int cmd, void __user *argp) | ||
178 | { | ||
179 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
180 | struct nilfs_cpstat cpstat; | ||
181 | int ret; | ||
182 | |||
183 | down_read(&nilfs->ns_segctor_sem); | ||
184 | ret = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat); | ||
185 | up_read(&nilfs->ns_segctor_sem); | ||
186 | if (ret < 0) | ||
187 | return ret; | ||
188 | |||
189 | if (copy_to_user(argp, &cpstat, sizeof(cpstat))) | ||
190 | ret = -EFAULT; | ||
191 | return ret; | ||
192 | } | ||
193 | |||
194 | static ssize_t | ||
195 | nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
196 | void *buf, size_t size, size_t nmembs) | ||
197 | { | ||
198 | return nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs); | ||
199 | } | ||
200 | |||
201 | static int nilfs_ioctl_get_suinfo(struct inode *inode, struct file *filp, | ||
202 | unsigned int cmd, void __user *argp) | ||
203 | { | ||
204 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
205 | struct nilfs_argv argv; | ||
206 | int ret; | ||
207 | |||
208 | if (copy_from_user(&argv, argp, sizeof(argv))) | ||
209 | return -EFAULT; | ||
210 | |||
211 | down_read(&nilfs->ns_segctor_sem); | ||
212 | ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), | ||
213 | nilfs_ioctl_do_get_suinfo); | ||
214 | up_read(&nilfs->ns_segctor_sem); | ||
215 | if (ret < 0) | ||
216 | return ret; | ||
217 | |||
218 | if (copy_to_user(argp, &argv, sizeof(argv))) | ||
219 | ret = -EFAULT; | ||
220 | return ret; | ||
221 | } | ||
222 | |||
223 | static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, | ||
224 | unsigned int cmd, void __user *argp) | ||
225 | { | ||
226 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
227 | struct nilfs_sustat sustat; | ||
228 | int ret; | ||
229 | |||
230 | down_read(&nilfs->ns_segctor_sem); | ||
231 | ret = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat); | ||
232 | up_read(&nilfs->ns_segctor_sem); | ||
233 | if (ret < 0) | ||
234 | return ret; | ||
235 | |||
236 | if (copy_to_user(argp, &sustat, sizeof(sustat))) | ||
237 | ret = -EFAULT; | ||
238 | return ret; | ||
239 | } | ||
240 | |||
241 | static ssize_t | ||
242 | nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
243 | void *buf, size_t size, size_t nmembs) | ||
244 | { | ||
245 | return nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs); | ||
246 | } | ||
247 | |||
248 | static int nilfs_ioctl_get_vinfo(struct inode *inode, struct file *filp, | ||
249 | unsigned int cmd, void __user *argp) | ||
250 | { | ||
251 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
252 | struct nilfs_argv argv; | ||
253 | int ret; | ||
254 | |||
255 | if (copy_from_user(&argv, argp, sizeof(argv))) | ||
256 | return -EFAULT; | ||
257 | |||
258 | down_read(&nilfs->ns_segctor_sem); | ||
259 | ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), | ||
260 | nilfs_ioctl_do_get_vinfo); | ||
261 | up_read(&nilfs->ns_segctor_sem); | ||
262 | if (ret < 0) | ||
263 | return ret; | ||
264 | |||
265 | if (copy_to_user(argp, &argv, sizeof(argv))) | ||
266 | ret = -EFAULT; | ||
267 | return ret; | ||
268 | } | ||
269 | |||
270 | static ssize_t | ||
271 | nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
272 | void *buf, size_t size, size_t nmembs) | ||
273 | { | ||
274 | struct inode *dat = nilfs_dat_inode(nilfs); | ||
275 | struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; | ||
276 | struct nilfs_bdesc *bdescs = buf; | ||
277 | int ret, i; | ||
278 | |||
279 | for (i = 0; i < nmembs; i++) { | ||
280 | ret = nilfs_bmap_lookup_at_level(bmap, | ||
281 | bdescs[i].bd_offset, | ||
282 | bdescs[i].bd_level + 1, | ||
283 | &bdescs[i].bd_blocknr); | ||
284 | if (ret < 0) { | ||
285 | if (ret != -ENOENT) | ||
286 | return ret; | ||
287 | bdescs[i].bd_blocknr = 0; | ||
288 | } | ||
289 | } | ||
290 | return nmembs; | ||
291 | } | ||
292 | |||
293 | static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, | ||
294 | unsigned int cmd, void __user *argp) | ||
295 | { | ||
296 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
297 | struct nilfs_argv argv; | ||
298 | int ret; | ||
299 | |||
300 | if (copy_from_user(&argv, argp, sizeof(argv))) | ||
301 | return -EFAULT; | ||
302 | |||
303 | down_read(&nilfs->ns_segctor_sem); | ||
304 | ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), | ||
305 | nilfs_ioctl_do_get_bdescs); | ||
306 | up_read(&nilfs->ns_segctor_sem); | ||
307 | if (ret < 0) | ||
308 | return ret; | ||
309 | |||
310 | if (copy_to_user(argp, &argv, sizeof(argv))) | ||
311 | ret = -EFAULT; | ||
312 | return ret; | ||
313 | } | ||
314 | |||
315 | static int nilfs_ioctl_move_inode_block(struct inode *inode, | ||
316 | struct nilfs_vdesc *vdesc, | ||
317 | struct list_head *buffers) | ||
318 | { | ||
319 | struct buffer_head *bh; | ||
320 | int ret; | ||
321 | |||
322 | if (vdesc->vd_flags == 0) | ||
323 | ret = nilfs_gccache_submit_read_data( | ||
324 | inode, vdesc->vd_offset, vdesc->vd_blocknr, | ||
325 | vdesc->vd_vblocknr, &bh); | ||
326 | else | ||
327 | ret = nilfs_gccache_submit_read_node( | ||
328 | inode, vdesc->vd_blocknr, vdesc->vd_vblocknr, &bh); | ||
329 | |||
330 | if (unlikely(ret < 0)) { | ||
331 | if (ret == -ENOENT) | ||
332 | printk(KERN_CRIT | ||
333 | "%s: invalid virtual block address (%s): " | ||
334 | "ino=%llu, cno=%llu, offset=%llu, " | ||
335 | "blocknr=%llu, vblocknr=%llu\n", | ||
336 | __func__, vdesc->vd_flags ? "node" : "data", | ||
337 | (unsigned long long)vdesc->vd_ino, | ||
338 | (unsigned long long)vdesc->vd_cno, | ||
339 | (unsigned long long)vdesc->vd_offset, | ||
340 | (unsigned long long)vdesc->vd_blocknr, | ||
341 | (unsigned long long)vdesc->vd_vblocknr); | ||
342 | return ret; | ||
343 | } | ||
344 | bh->b_private = vdesc; | ||
345 | list_add_tail(&bh->b_assoc_buffers, buffers); | ||
346 | return 0; | ||
347 | } | ||
348 | |||
349 | static ssize_t | ||
350 | nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
351 | void *buf, size_t size, size_t nmembs) | ||
352 | { | ||
353 | struct inode *inode; | ||
354 | struct nilfs_vdesc *vdesc; | ||
355 | struct buffer_head *bh, *n; | ||
356 | LIST_HEAD(buffers); | ||
357 | ino_t ino; | ||
358 | __u64 cno; | ||
359 | int i, ret; | ||
360 | |||
361 | for (i = 0, vdesc = buf; i < nmembs; ) { | ||
362 | ino = vdesc->vd_ino; | ||
363 | cno = vdesc->vd_cno; | ||
364 | inode = nilfs_gc_iget(nilfs, ino, cno); | ||
365 | if (unlikely(inode == NULL)) { | ||
366 | ret = -ENOMEM; | ||
367 | goto failed; | ||
368 | } | ||
369 | do { | ||
370 | ret = nilfs_ioctl_move_inode_block(inode, vdesc, | ||
371 | &buffers); | ||
372 | if (unlikely(ret < 0)) | ||
373 | goto failed; | ||
374 | vdesc++; | ||
375 | } while (++i < nmembs && | ||
376 | vdesc->vd_ino == ino && vdesc->vd_cno == cno); | ||
377 | } | ||
378 | |||
379 | list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) { | ||
380 | ret = nilfs_gccache_wait_and_mark_dirty(bh); | ||
381 | if (unlikely(ret < 0)) { | ||
382 | if (ret == -EEXIST) { | ||
383 | vdesc = bh->b_private; | ||
384 | printk(KERN_CRIT | ||
385 | "%s: conflicting %s buffer: " | ||
386 | "ino=%llu, cno=%llu, offset=%llu, " | ||
387 | "blocknr=%llu, vblocknr=%llu\n", | ||
388 | __func__, | ||
389 | vdesc->vd_flags ? "node" : "data", | ||
390 | (unsigned long long)vdesc->vd_ino, | ||
391 | (unsigned long long)vdesc->vd_cno, | ||
392 | (unsigned long long)vdesc->vd_offset, | ||
393 | (unsigned long long)vdesc->vd_blocknr, | ||
394 | (unsigned long long)vdesc->vd_vblocknr); | ||
395 | } | ||
396 | goto failed; | ||
397 | } | ||
398 | list_del_init(&bh->b_assoc_buffers); | ||
399 | bh->b_private = NULL; | ||
400 | brelse(bh); | ||
401 | } | ||
402 | return nmembs; | ||
403 | |||
404 | failed: | ||
405 | list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) { | ||
406 | list_del_init(&bh->b_assoc_buffers); | ||
407 | bh->b_private = NULL; | ||
408 | brelse(bh); | ||
409 | } | ||
410 | return ret; | ||
411 | } | ||
412 | |||
413 | static inline int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, | ||
414 | struct nilfs_argv *argv, | ||
415 | int dir) | ||
416 | { | ||
417 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
418 | nilfs_ioctl_do_move_blocks); | ||
419 | } | ||
420 | |||
421 | static ssize_t | ||
422 | nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp, | ||
423 | int flags, void *buf, size_t size, | ||
424 | size_t nmembs) | ||
425 | { | ||
426 | struct inode *cpfile = nilfs->ns_cpfile; | ||
427 | struct nilfs_period *periods = buf; | ||
428 | int ret, i; | ||
429 | |||
430 | for (i = 0; i < nmembs; i++) { | ||
431 | ret = nilfs_cpfile_delete_checkpoints( | ||
432 | cpfile, periods[i].p_start, periods[i].p_end); | ||
433 | if (ret < 0) | ||
434 | return ret; | ||
435 | } | ||
436 | return nmembs; | ||
437 | } | ||
438 | |||
439 | static inline int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs, | ||
440 | struct nilfs_argv *argv, | ||
441 | int dir) | ||
442 | { | ||
443 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
444 | nilfs_ioctl_do_delete_checkpoints); | ||
445 | } | ||
446 | |||
447 | static ssize_t | ||
448 | nilfs_ioctl_do_free_vblocknrs(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
449 | void *buf, size_t size, size_t nmembs) | ||
450 | { | ||
451 | int ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs); | ||
452 | |||
453 | return (ret < 0) ? ret : nmembs; | ||
454 | } | ||
455 | |||
456 | static inline int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs, | ||
457 | struct nilfs_argv *argv, | ||
458 | int dir) | ||
459 | { | ||
460 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
461 | nilfs_ioctl_do_free_vblocknrs); | ||
462 | } | ||
463 | |||
464 | static ssize_t | ||
465 | nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp, | ||
466 | int flags, void *buf, size_t size, | ||
467 | size_t nmembs) | ||
468 | { | ||
469 | struct inode *dat = nilfs_dat_inode(nilfs); | ||
470 | struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap; | ||
471 | struct nilfs_bdesc *bdescs = buf; | ||
472 | int ret, i; | ||
473 | |||
474 | for (i = 0; i < nmembs; i++) { | ||
475 | /* XXX: use macro or inline func to check liveness */ | ||
476 | ret = nilfs_bmap_lookup_at_level(bmap, | ||
477 | bdescs[i].bd_offset, | ||
478 | bdescs[i].bd_level + 1, | ||
479 | &bdescs[i].bd_blocknr); | ||
480 | if (ret < 0) { | ||
481 | if (ret != -ENOENT) | ||
482 | return ret; | ||
483 | bdescs[i].bd_blocknr = 0; | ||
484 | } | ||
485 | if (bdescs[i].bd_blocknr != bdescs[i].bd_oblocknr) | ||
486 | /* skip dead block */ | ||
487 | continue; | ||
488 | if (bdescs[i].bd_level == 0) { | ||
489 | ret = nilfs_mdt_mark_block_dirty(dat, | ||
490 | bdescs[i].bd_offset); | ||
491 | if (ret < 0) { | ||
492 | WARN_ON(ret == -ENOENT); | ||
493 | return ret; | ||
494 | } | ||
495 | } else { | ||
496 | ret = nilfs_bmap_mark(bmap, bdescs[i].bd_offset, | ||
497 | bdescs[i].bd_level); | ||
498 | if (ret < 0) { | ||
499 | WARN_ON(ret == -ENOENT); | ||
500 | return ret; | ||
501 | } | ||
502 | } | ||
503 | } | ||
504 | return nmembs; | ||
505 | } | ||
506 | |||
507 | static inline int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs, | ||
508 | struct nilfs_argv *argv, | ||
509 | int dir) | ||
510 | { | ||
511 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
512 | nilfs_ioctl_do_mark_blocks_dirty); | ||
513 | } | ||
514 | |||
515 | static ssize_t | ||
516 | nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags, | ||
517 | void *buf, size_t size, size_t nmembs) | ||
518 | { | ||
519 | struct nilfs_sb_info *sbi = nilfs_get_writer(nilfs); | ||
520 | int ret; | ||
521 | |||
522 | if (unlikely(!sbi)) | ||
523 | return -EROFS; | ||
524 | ret = nilfs_segctor_add_segments_to_be_freed( | ||
525 | NILFS_SC(sbi), buf, nmembs); | ||
526 | nilfs_put_writer(nilfs); | ||
527 | |||
528 | return (ret < 0) ? ret : nmembs; | ||
529 | } | ||
530 | |||
531 | static inline int nilfs_ioctl_free_segments(struct the_nilfs *nilfs, | ||
532 | struct nilfs_argv *argv, | ||
533 | int dir) | ||
534 | { | ||
535 | return nilfs_ioctl_wrap_copy(nilfs, argv, dir, | ||
536 | nilfs_ioctl_do_free_segments); | ||
537 | } | ||
538 | |||
539 | int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs, | ||
540 | void __user *argp) | ||
541 | { | ||
542 | struct nilfs_argv argv[5]; | ||
543 | const char *msg; | ||
544 | int dir, ret; | ||
545 | |||
546 | if (copy_from_user(argv, argp, sizeof(argv))) | ||
547 | return -EFAULT; | ||
548 | |||
549 | dir = _IOC_WRITE; | ||
550 | ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], dir); | ||
551 | if (ret < 0) { | ||
552 | msg = "cannot read source blocks"; | ||
553 | goto failed; | ||
554 | } | ||
555 | ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], dir); | ||
556 | if (ret < 0) { | ||
557 | /* | ||
558 | * can safely abort because checkpoints can be removed | ||
559 | * independently. | ||
560 | */ | ||
561 | msg = "cannot delete checkpoints"; | ||
562 | goto failed; | ||
563 | } | ||
564 | ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], dir); | ||
565 | if (ret < 0) { | ||
566 | /* | ||
567 | * can safely abort because DAT file is updated atomically | ||
568 | * using a copy-on-write technique. | ||
569 | */ | ||
570 | msg = "cannot delete virtual blocks from DAT file"; | ||
571 | goto failed; | ||
572 | } | ||
573 | ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], dir); | ||
574 | if (ret < 0) { | ||
575 | /* | ||
576 | * can safely abort because the operation is nondestructive. | ||
577 | */ | ||
578 | msg = "cannot mark copying blocks dirty"; | ||
579 | goto failed; | ||
580 | } | ||
581 | ret = nilfs_ioctl_free_segments(nilfs, &argv[4], dir); | ||
582 | if (ret < 0) { | ||
583 | /* | ||
584 | * can safely abort because this operation is atomic. | ||
585 | */ | ||
586 | msg = "cannot set segments to be freed"; | ||
587 | goto failed; | ||
588 | } | ||
589 | return 0; | ||
590 | |||
591 | failed: | ||
592 | nilfs_remove_all_gcinode(nilfs); | ||
593 | printk(KERN_ERR "NILFS: GC failed during preparation: %s: err=%d\n", | ||
594 | msg, ret); | ||
595 | return ret; | ||
596 | } | ||
597 | |||
598 | static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, | ||
599 | unsigned int cmd, void __user *argp) | ||
600 | { | ||
601 | if (!capable(CAP_SYS_ADMIN)) | ||
602 | return -EPERM; | ||
603 | return nilfs_clean_segments(inode->i_sb, argp); | ||
604 | } | ||
605 | |||
606 | static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, | ||
607 | unsigned int cmd, void __user *argp) | ||
608 | { | ||
609 | __u64 cno; | ||
610 | int ret; | ||
611 | |||
612 | ret = nilfs_construct_segment(inode->i_sb); | ||
613 | if (ret < 0) | ||
614 | return ret; | ||
615 | |||
616 | if (argp != NULL) { | ||
617 | cno = NILFS_SB(inode->i_sb)->s_nilfs->ns_cno - 1; | ||
618 | if (copy_to_user(argp, &cno, sizeof(cno))) | ||
619 | return -EFAULT; | ||
620 | } | ||
621 | return 0; | ||
622 | } | ||
623 | |||
624 | long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
625 | { | ||
626 | struct inode *inode = filp->f_dentry->d_inode; | ||
627 | void __user *argp = (void * __user *)arg; | ||
628 | |||
629 | switch (cmd) { | ||
630 | case NILFS_IOCTL_CHANGE_CPMODE: | ||
631 | return nilfs_ioctl_change_cpmode(inode, filp, cmd, argp); | ||
632 | case NILFS_IOCTL_DELETE_CHECKPOINT: | ||
633 | return nilfs_ioctl_delete_checkpoint(inode, filp, cmd, argp); | ||
634 | case NILFS_IOCTL_GET_CPINFO: | ||
635 | return nilfs_ioctl_get_cpinfo(inode, filp, cmd, argp); | ||
636 | case NILFS_IOCTL_GET_CPSTAT: | ||
637 | return nilfs_ioctl_get_cpstat(inode, filp, cmd, argp); | ||
638 | case NILFS_IOCTL_GET_SUINFO: | ||
639 | return nilfs_ioctl_get_suinfo(inode, filp, cmd, argp); | ||
640 | case NILFS_IOCTL_GET_SUSTAT: | ||
641 | return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); | ||
642 | case NILFS_IOCTL_GET_VINFO: | ||
643 | /* XXX: rename to ??? */ | ||
644 | return nilfs_ioctl_get_vinfo(inode, filp, cmd, argp); | ||
645 | case NILFS_IOCTL_GET_BDESCS: | ||
646 | return nilfs_ioctl_get_bdescs(inode, filp, cmd, argp); | ||
647 | case NILFS_IOCTL_CLEAN_SEGMENTS: | ||
648 | return nilfs_ioctl_clean_segments(inode, filp, cmd, argp); | ||
649 | case NILFS_IOCTL_SYNC: | ||
650 | return nilfs_ioctl_sync(inode, filp, cmd, argp); | ||
651 | default: | ||
652 | return -ENOTTY; | ||
653 | } | ||
654 | } | ||
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c new file mode 100644 index 000000000000..47dd815433fd --- /dev/null +++ b/fs/nilfs2/mdt.c | |||
@@ -0,0 +1,563 @@ | |||
1 | /* | ||
2 | * mdt.c - meta data file for NILFS | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | */ | ||
22 | |||
23 | #include <linux/buffer_head.h> | ||
24 | #include <linux/mpage.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/writeback.h> | ||
27 | #include <linux/backing-dev.h> | ||
28 | #include <linux/swap.h> | ||
29 | #include "nilfs.h" | ||
30 | #include "segment.h" | ||
31 | #include "page.h" | ||
32 | #include "mdt.h" | ||
33 | |||
34 | |||
35 | #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) | ||
36 | |||
37 | #define INIT_UNUSED_INODE_FIELDS | ||
38 | |||
39 | static int | ||
40 | nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, | ||
41 | struct buffer_head *bh, | ||
42 | void (*init_block)(struct inode *, | ||
43 | struct buffer_head *, void *)) | ||
44 | { | ||
45 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
46 | void *kaddr; | ||
47 | int ret; | ||
48 | |||
49 | /* Caller exclude read accesses using page lock */ | ||
50 | |||
51 | /* set_buffer_new(bh); */ | ||
52 | bh->b_blocknr = 0; | ||
53 | |||
54 | ret = nilfs_bmap_insert(ii->i_bmap, block, (unsigned long)bh); | ||
55 | if (unlikely(ret)) | ||
56 | return ret; | ||
57 | |||
58 | set_buffer_mapped(bh); | ||
59 | |||
60 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
61 | memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); | ||
62 | if (init_block) | ||
63 | init_block(inode, bh, kaddr); | ||
64 | flush_dcache_page(bh->b_page); | ||
65 | kunmap_atomic(kaddr, KM_USER0); | ||
66 | |||
67 | set_buffer_uptodate(bh); | ||
68 | nilfs_mark_buffer_dirty(bh); | ||
69 | nilfs_mdt_mark_dirty(inode); | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, | ||
74 | struct buffer_head **out_bh, | ||
75 | void (*init_block)(struct inode *, | ||
76 | struct buffer_head *, | ||
77 | void *)) | ||
78 | { | ||
79 | struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; | ||
80 | struct nilfs_sb_info *writer = NULL; | ||
81 | struct super_block *sb = inode->i_sb; | ||
82 | struct nilfs_transaction_info ti; | ||
83 | struct buffer_head *bh; | ||
84 | int err; | ||
85 | |||
86 | if (!sb) { | ||
87 | writer = nilfs_get_writer(nilfs); | ||
88 | if (!writer) { | ||
89 | err = -EROFS; | ||
90 | goto out; | ||
91 | } | ||
92 | sb = writer->s_super; | ||
93 | } | ||
94 | |||
95 | nilfs_transaction_begin(sb, &ti, 0); | ||
96 | |||
97 | err = -ENOMEM; | ||
98 | bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0); | ||
99 | if (unlikely(!bh)) | ||
100 | goto failed_unlock; | ||
101 | |||
102 | err = -EEXIST; | ||
103 | if (buffer_uptodate(bh) || buffer_mapped(bh)) | ||
104 | goto failed_bh; | ||
105 | #if 0 | ||
106 | /* The uptodate flag is not protected by the page lock, but | ||
107 | the mapped flag is. Thus, we don't have to wait the buffer. */ | ||
108 | wait_on_buffer(bh); | ||
109 | if (buffer_uptodate(bh)) | ||
110 | goto failed_bh; | ||
111 | #endif | ||
112 | |||
113 | bh->b_bdev = nilfs->ns_bdev; | ||
114 | err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); | ||
115 | if (likely(!err)) { | ||
116 | get_bh(bh); | ||
117 | *out_bh = bh; | ||
118 | } | ||
119 | |||
120 | failed_bh: | ||
121 | unlock_page(bh->b_page); | ||
122 | page_cache_release(bh->b_page); | ||
123 | brelse(bh); | ||
124 | |||
125 | failed_unlock: | ||
126 | if (likely(!err)) | ||
127 | err = nilfs_transaction_commit(sb); | ||
128 | else | ||
129 | nilfs_transaction_abort(sb); | ||
130 | if (writer) | ||
131 | nilfs_put_writer(nilfs); | ||
132 | out: | ||
133 | return err; | ||
134 | } | ||
135 | |||
136 | static int | ||
137 | nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, | ||
138 | int mode, struct buffer_head **out_bh) | ||
139 | { | ||
140 | struct buffer_head *bh; | ||
141 | unsigned long blknum = 0; | ||
142 | int ret = -ENOMEM; | ||
143 | |||
144 | bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); | ||
145 | if (unlikely(!bh)) | ||
146 | goto failed; | ||
147 | |||
148 | ret = -EEXIST; /* internal code */ | ||
149 | if (buffer_uptodate(bh)) | ||
150 | goto out; | ||
151 | |||
152 | if (mode == READA) { | ||
153 | if (!trylock_buffer(bh)) { | ||
154 | ret = -EBUSY; | ||
155 | goto failed_bh; | ||
156 | } | ||
157 | } else /* mode == READ */ | ||
158 | lock_buffer(bh); | ||
159 | |||
160 | if (buffer_uptodate(bh)) { | ||
161 | unlock_buffer(bh); | ||
162 | goto out; | ||
163 | } | ||
164 | if (!buffer_mapped(bh)) { /* unused buffer */ | ||
165 | ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, | ||
166 | &blknum); | ||
167 | if (unlikely(ret)) { | ||
168 | unlock_buffer(bh); | ||
169 | goto failed_bh; | ||
170 | } | ||
171 | bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev; | ||
172 | bh->b_blocknr = blknum; | ||
173 | set_buffer_mapped(bh); | ||
174 | } | ||
175 | |||
176 | bh->b_end_io = end_buffer_read_sync; | ||
177 | get_bh(bh); | ||
178 | submit_bh(mode, bh); | ||
179 | ret = 0; | ||
180 | out: | ||
181 | get_bh(bh); | ||
182 | *out_bh = bh; | ||
183 | |||
184 | failed_bh: | ||
185 | unlock_page(bh->b_page); | ||
186 | page_cache_release(bh->b_page); | ||
187 | brelse(bh); | ||
188 | failed: | ||
189 | return ret; | ||
190 | } | ||
191 | |||
192 | static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, | ||
193 | struct buffer_head **out_bh) | ||
194 | { | ||
195 | struct buffer_head *first_bh, *bh; | ||
196 | unsigned long blkoff; | ||
197 | int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS; | ||
198 | int err; | ||
199 | |||
200 | err = nilfs_mdt_submit_block(inode, block, READ, &first_bh); | ||
201 | if (err == -EEXIST) /* internal code */ | ||
202 | goto out; | ||
203 | |||
204 | if (unlikely(err)) | ||
205 | goto failed; | ||
206 | |||
207 | blkoff = block + 1; | ||
208 | for (i = 0; i < nr_ra_blocks; i++, blkoff++) { | ||
209 | err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); | ||
210 | if (likely(!err || err == -EEXIST)) | ||
211 | brelse(bh); | ||
212 | else if (err != -EBUSY) | ||
213 | break; /* abort readahead if bmap lookup failed */ | ||
214 | |||
215 | if (!buffer_locked(first_bh)) | ||
216 | goto out_no_wait; | ||
217 | } | ||
218 | |||
219 | wait_on_buffer(first_bh); | ||
220 | |||
221 | out_no_wait: | ||
222 | err = -EIO; | ||
223 | if (!buffer_uptodate(first_bh)) | ||
224 | goto failed_bh; | ||
225 | out: | ||
226 | *out_bh = first_bh; | ||
227 | return 0; | ||
228 | |||
229 | failed_bh: | ||
230 | brelse(first_bh); | ||
231 | failed: | ||
232 | return err; | ||
233 | } | ||
234 | |||
235 | /** | ||
236 | * nilfs_mdt_get_block - read or create a buffer on meta data file. | ||
237 | * @inode: inode of the meta data file | ||
238 | * @blkoff: block offset | ||
239 | * @create: create flag | ||
240 | * @init_block: initializer used for newly allocated block | ||
241 | * @out_bh: output of a pointer to the buffer_head | ||
242 | * | ||
243 | * nilfs_mdt_get_block() looks up the specified buffer and tries to create | ||
244 | * a new buffer if @create is not zero. On success, the returned buffer is | ||
245 | * assured to be either existing or formatted using a buffer lock on success. | ||
246 | * @out_bh is substituted only when zero is returned. | ||
247 | * | ||
248 | * Return Value: On success, it returns 0. On error, the following negative | ||
249 | * error code is returned. | ||
250 | * | ||
251 | * %-ENOMEM - Insufficient memory available. | ||
252 | * | ||
253 | * %-EIO - I/O error | ||
254 | * | ||
255 | * %-ENOENT - the specified block does not exist (hole block) | ||
256 | * | ||
257 | * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) | ||
258 | * | ||
259 | * %-EROFS - Read only filesystem (for create mode) | ||
260 | */ | ||
261 | int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, | ||
262 | void (*init_block)(struct inode *, | ||
263 | struct buffer_head *, void *), | ||
264 | struct buffer_head **out_bh) | ||
265 | { | ||
266 | int ret; | ||
267 | |||
268 | /* Should be rewritten with merging nilfs_mdt_read_block() */ | ||
269 | retry: | ||
270 | ret = nilfs_mdt_read_block(inode, blkoff, out_bh); | ||
271 | if (!create || ret != -ENOENT) | ||
272 | return ret; | ||
273 | |||
274 | ret = nilfs_mdt_create_block(inode, blkoff, out_bh, init_block); | ||
275 | if (unlikely(ret == -EEXIST)) { | ||
276 | /* create = 0; */ /* limit read-create loop retries */ | ||
277 | goto retry; | ||
278 | } | ||
279 | return ret; | ||
280 | } | ||
281 | |||
282 | /** | ||
283 | * nilfs_mdt_delete_block - make a hole on the meta data file. | ||
284 | * @inode: inode of the meta data file | ||
285 | * @block: block offset | ||
286 | * | ||
287 | * Return Value: On success, zero is returned. | ||
288 | * On error, one of the following negative error code is returned. | ||
289 | * | ||
290 | * %-ENOMEM - Insufficient memory available. | ||
291 | * | ||
292 | * %-EIO - I/O error | ||
293 | * | ||
294 | * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) | ||
295 | */ | ||
296 | int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) | ||
297 | { | ||
298 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
299 | int err; | ||
300 | |||
301 | err = nilfs_bmap_delete(ii->i_bmap, block); | ||
302 | if (likely(!err)) { | ||
303 | nilfs_mdt_mark_dirty(inode); | ||
304 | nilfs_mdt_forget_block(inode, block); | ||
305 | } | ||
306 | return err; | ||
307 | } | ||
308 | |||
309 | /** | ||
310 | * nilfs_mdt_forget_block - discard dirty state and try to remove the page | ||
311 | * @inode: inode of the meta data file | ||
312 | * @block: block offset | ||
313 | * | ||
314 | * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and | ||
315 | * tries to release the page including the buffer from a page cache. | ||
316 | * | ||
317 | * Return Value: On success, 0 is returned. On error, one of the following | ||
318 | * negative error code is returned. | ||
319 | * | ||
320 | * %-EBUSY - page has an active buffer. | ||
321 | * | ||
322 | * %-ENOENT - page cache has no page addressed by the offset. | ||
323 | */ | ||
324 | int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) | ||
325 | { | ||
326 | pgoff_t index = (pgoff_t)block >> | ||
327 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
328 | struct page *page; | ||
329 | unsigned long first_block; | ||
330 | int ret = 0; | ||
331 | int still_dirty; | ||
332 | |||
333 | page = find_lock_page(inode->i_mapping, index); | ||
334 | if (!page) | ||
335 | return -ENOENT; | ||
336 | |||
337 | wait_on_page_writeback(page); | ||
338 | |||
339 | first_block = (unsigned long)index << | ||
340 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
341 | if (page_has_buffers(page)) { | ||
342 | struct buffer_head *bh; | ||
343 | |||
344 | bh = nilfs_page_get_nth_block(page, block - first_block); | ||
345 | nilfs_forget_buffer(bh); | ||
346 | } | ||
347 | still_dirty = PageDirty(page); | ||
348 | unlock_page(page); | ||
349 | page_cache_release(page); | ||
350 | |||
351 | if (still_dirty || | ||
352 | invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0) | ||
353 | ret = -EBUSY; | ||
354 | return ret; | ||
355 | } | ||
356 | |||
357 | /** | ||
358 | * nilfs_mdt_mark_block_dirty - mark a block on the meta data file dirty. | ||
359 | * @inode: inode of the meta data file | ||
360 | * @block: block offset | ||
361 | * | ||
362 | * Return Value: On success, it returns 0. On error, the following negative | ||
363 | * error code is returned. | ||
364 | * | ||
365 | * %-ENOMEM - Insufficient memory available. | ||
366 | * | ||
367 | * %-EIO - I/O error | ||
368 | * | ||
369 | * %-ENOENT - the specified block does not exist (hole block) | ||
370 | * | ||
371 | * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) | ||
372 | */ | ||
373 | int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) | ||
374 | { | ||
375 | struct buffer_head *bh; | ||
376 | int err; | ||
377 | |||
378 | err = nilfs_mdt_read_block(inode, block, &bh); | ||
379 | if (unlikely(err)) | ||
380 | return err; | ||
381 | nilfs_mark_buffer_dirty(bh); | ||
382 | nilfs_mdt_mark_dirty(inode); | ||
383 | brelse(bh); | ||
384 | return 0; | ||
385 | } | ||
386 | |||
387 | int nilfs_mdt_fetch_dirty(struct inode *inode) | ||
388 | { | ||
389 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
390 | |||
391 | if (nilfs_bmap_test_and_clear_dirty(ii->i_bmap)) { | ||
392 | set_bit(NILFS_I_DIRTY, &ii->i_state); | ||
393 | return 1; | ||
394 | } | ||
395 | return test_bit(NILFS_I_DIRTY, &ii->i_state); | ||
396 | } | ||
397 | |||
398 | static int | ||
399 | nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) | ||
400 | { | ||
401 | struct inode *inode = container_of(page->mapping, | ||
402 | struct inode, i_data); | ||
403 | struct super_block *sb = inode->i_sb; | ||
404 | struct nilfs_sb_info *writer = NULL; | ||
405 | int err = 0; | ||
406 | |||
407 | redirty_page_for_writepage(wbc, page); | ||
408 | unlock_page(page); | ||
409 | |||
410 | if (page->mapping->assoc_mapping) | ||
411 | return 0; /* Do not request flush for shadow page cache */ | ||
412 | if (!sb) { | ||
413 | writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs); | ||
414 | if (!writer) | ||
415 | return -EROFS; | ||
416 | sb = writer->s_super; | ||
417 | } | ||
418 | |||
419 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
420 | err = nilfs_construct_segment(sb); | ||
421 | else if (wbc->for_reclaim) | ||
422 | nilfs_flush_segment(sb, inode->i_ino); | ||
423 | |||
424 | if (writer) | ||
425 | nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs); | ||
426 | return err; | ||
427 | } | ||
428 | |||
429 | |||
430 | static struct address_space_operations def_mdt_aops = { | ||
431 | .writepage = nilfs_mdt_write_page, | ||
432 | }; | ||
433 | |||
434 | static struct inode_operations def_mdt_iops; | ||
435 | static struct file_operations def_mdt_fops; | ||
436 | |||
437 | /* | ||
438 | * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile, | ||
439 | * ifile, or gcinodes. This allows the B-tree code and segment constructor | ||
440 | * to treat them like regular files, and this helps to simplify the | ||
441 | * implementation. | ||
442 | * On the other hand, some of the pseudo inodes have an irregular point: | ||
443 | * They don't have valid inode->i_sb pointer because their lifetimes are | ||
444 | * longer than those of the super block structs; they may continue for | ||
445 | * several consecutive mounts/umounts. This would need discussions. | ||
446 | */ | ||
447 | struct inode * | ||
448 | nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, | ||
449 | ino_t ino, gfp_t gfp_mask) | ||
450 | { | ||
451 | struct inode *inode = nilfs_alloc_inode(sb); | ||
452 | |||
453 | if (!inode) | ||
454 | return NULL; | ||
455 | else { | ||
456 | struct address_space * const mapping = &inode->i_data; | ||
457 | struct nilfs_mdt_info *mi = kzalloc(sizeof(*mi), GFP_NOFS); | ||
458 | |||
459 | if (!mi) { | ||
460 | nilfs_destroy_inode(inode); | ||
461 | return NULL; | ||
462 | } | ||
463 | mi->mi_nilfs = nilfs; | ||
464 | init_rwsem(&mi->mi_sem); | ||
465 | |||
466 | inode->i_sb = sb; /* sb may be NULL for some meta data files */ | ||
467 | inode->i_blkbits = nilfs->ns_blocksize_bits; | ||
468 | inode->i_flags = 0; | ||
469 | atomic_set(&inode->i_count, 1); | ||
470 | inode->i_nlink = 1; | ||
471 | inode->i_ino = ino; | ||
472 | inode->i_mode = S_IFREG; | ||
473 | inode->i_private = mi; | ||
474 | |||
475 | #ifdef INIT_UNUSED_INODE_FIELDS | ||
476 | atomic_set(&inode->i_writecount, 0); | ||
477 | inode->i_size = 0; | ||
478 | inode->i_blocks = 0; | ||
479 | inode->i_bytes = 0; | ||
480 | inode->i_generation = 0; | ||
481 | #ifdef CONFIG_QUOTA | ||
482 | memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); | ||
483 | #endif | ||
484 | inode->i_pipe = NULL; | ||
485 | inode->i_bdev = NULL; | ||
486 | inode->i_cdev = NULL; | ||
487 | inode->i_rdev = 0; | ||
488 | #ifdef CONFIG_SECURITY | ||
489 | inode->i_security = NULL; | ||
490 | #endif | ||
491 | inode->dirtied_when = 0; | ||
492 | |||
493 | INIT_LIST_HEAD(&inode->i_list); | ||
494 | INIT_LIST_HEAD(&inode->i_sb_list); | ||
495 | inode->i_state = 0; | ||
496 | #endif | ||
497 | |||
498 | spin_lock_init(&inode->i_lock); | ||
499 | mutex_init(&inode->i_mutex); | ||
500 | init_rwsem(&inode->i_alloc_sem); | ||
501 | |||
502 | mapping->host = NULL; /* instead of inode */ | ||
503 | mapping->flags = 0; | ||
504 | mapping_set_gfp_mask(mapping, gfp_mask); | ||
505 | mapping->assoc_mapping = NULL; | ||
506 | mapping->backing_dev_info = nilfs->ns_bdi; | ||
507 | |||
508 | inode->i_mapping = mapping; | ||
509 | } | ||
510 | |||
511 | return inode; | ||
512 | } | ||
513 | |||
514 | struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, | ||
515 | ino_t ino, gfp_t gfp_mask) | ||
516 | { | ||
517 | struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, gfp_mask); | ||
518 | |||
519 | if (!inode) | ||
520 | return NULL; | ||
521 | |||
522 | inode->i_op = &def_mdt_iops; | ||
523 | inode->i_fop = &def_mdt_fops; | ||
524 | inode->i_mapping->a_ops = &def_mdt_aops; | ||
525 | return inode; | ||
526 | } | ||
527 | |||
528 | void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, | ||
529 | unsigned header_size) | ||
530 | { | ||
531 | struct nilfs_mdt_info *mi = NILFS_MDT(inode); | ||
532 | |||
533 | mi->mi_entry_size = entry_size; | ||
534 | mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size; | ||
535 | mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); | ||
536 | } | ||
537 | |||
538 | void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow) | ||
539 | { | ||
540 | shadow->i_mapping->assoc_mapping = orig->i_mapping; | ||
541 | NILFS_I(shadow)->i_btnode_cache.assoc_mapping = | ||
542 | &NILFS_I(orig)->i_btnode_cache; | ||
543 | } | ||
544 | |||
545 | void nilfs_mdt_clear(struct inode *inode) | ||
546 | { | ||
547 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
548 | |||
549 | invalidate_mapping_pages(inode->i_mapping, 0, -1); | ||
550 | truncate_inode_pages(inode->i_mapping, 0); | ||
551 | |||
552 | nilfs_bmap_clear(ii->i_bmap); | ||
553 | nilfs_btnode_cache_clear(&ii->i_btnode_cache); | ||
554 | } | ||
555 | |||
556 | void nilfs_mdt_destroy(struct inode *inode) | ||
557 | { | ||
558 | struct nilfs_mdt_info *mdi = NILFS_MDT(inode); | ||
559 | |||
560 | kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ | ||
561 | kfree(mdi); | ||
562 | nilfs_destroy_inode(inode); | ||
563 | } | ||
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h new file mode 100644 index 000000000000..df683e0bca6a --- /dev/null +++ b/fs/nilfs2/mdt.h | |||
@@ -0,0 +1,125 @@ | |||
1 | /* | ||
2 | * mdt.h - NILFS meta data file prototype and definitions | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_MDT_H | ||
24 | #define _NILFS_MDT_H | ||
25 | |||
26 | #include <linux/buffer_head.h> | ||
27 | #include <linux/blockgroup_lock.h> | ||
28 | #include "nilfs.h" | ||
29 | #include "page.h" | ||
30 | |||
31 | /** | ||
32 | * struct nilfs_mdt_info - on-memory private data of meta data files | ||
33 | * @mi_nilfs: back pointer to the_nilfs struct | ||
34 | * @mi_sem: reader/writer semaphore for meta data operations | ||
35 | * @mi_bgl: per-blockgroup locking | ||
36 | * @mi_entry_size: size of an entry | ||
37 | * @mi_first_entry_offset: offset to the first entry | ||
38 | * @mi_entries_per_block: number of entries in a block | ||
39 | * @mi_blocks_per_group: number of blocks in a group | ||
40 | * @mi_blocks_per_desc_block: number of blocks per descriptor block | ||
41 | */ | ||
42 | struct nilfs_mdt_info { | ||
43 | struct the_nilfs *mi_nilfs; | ||
44 | struct rw_semaphore mi_sem; | ||
45 | struct blockgroup_lock *mi_bgl; | ||
46 | unsigned mi_entry_size; | ||
47 | unsigned mi_first_entry_offset; | ||
48 | unsigned long mi_entries_per_block; | ||
49 | unsigned long mi_blocks_per_group; | ||
50 | unsigned long mi_blocks_per_desc_block; | ||
51 | }; | ||
52 | |||
53 | static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) | ||
54 | { | ||
55 | return inode->i_private; | ||
56 | } | ||
57 | |||
58 | static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode) | ||
59 | { | ||
60 | struct super_block *sb = inode->i_sb; | ||
61 | |||
62 | return sb ? NILFS_SB(sb)->s_nilfs : NILFS_MDT(inode)->mi_nilfs; | ||
63 | } | ||
64 | |||
65 | /* Default GFP flags using highmem */ | ||
66 | #define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) | ||
67 | |||
68 | int nilfs_mdt_get_block(struct inode *, unsigned long, int, | ||
69 | void (*init_block)(struct inode *, | ||
70 | struct buffer_head *, void *), | ||
71 | struct buffer_head **); | ||
72 | int nilfs_mdt_delete_block(struct inode *, unsigned long); | ||
73 | int nilfs_mdt_forget_block(struct inode *, unsigned long); | ||
74 | int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); | ||
75 | int nilfs_mdt_fetch_dirty(struct inode *); | ||
76 | |||
77 | struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t, | ||
78 | gfp_t); | ||
79 | struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, | ||
80 | ino_t, gfp_t); | ||
81 | void nilfs_mdt_destroy(struct inode *); | ||
82 | void nilfs_mdt_clear(struct inode *); | ||
83 | void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); | ||
84 | void nilfs_mdt_set_shadow(struct inode *, struct inode *); | ||
85 | |||
86 | |||
87 | #define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh) | ||
88 | |||
89 | static inline void nilfs_mdt_mark_dirty(struct inode *inode) | ||
90 | { | ||
91 | if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state)) | ||
92 | set_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state); | ||
93 | } | ||
94 | |||
95 | static inline void nilfs_mdt_clear_dirty(struct inode *inode) | ||
96 | { | ||
97 | clear_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state); | ||
98 | } | ||
99 | |||
100 | static inline __u64 nilfs_mdt_cno(struct inode *inode) | ||
101 | { | ||
102 | return NILFS_MDT(inode)->mi_nilfs->ns_cno; | ||
103 | } | ||
104 | |||
105 | #define nilfs_mdt_bgl_lock(inode, bg) \ | ||
106 | (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock) | ||
107 | |||
108 | |||
109 | static inline int | ||
110 | nilfs_mdt_read_inode_direct(struct inode *inode, struct buffer_head *bh, | ||
111 | unsigned n) | ||
112 | { | ||
113 | return nilfs_read_inode_common( | ||
114 | inode, (struct nilfs_inode *)(bh->b_data + n)); | ||
115 | } | ||
116 | |||
117 | static inline void | ||
118 | nilfs_mdt_write_inode_direct(struct inode *inode, struct buffer_head *bh, | ||
119 | unsigned n) | ||
120 | { | ||
121 | nilfs_write_inode_common( | ||
122 | inode, (struct nilfs_inode *)(bh->b_data + n), 1); | ||
123 | } | ||
124 | |||
125 | #endif /* _NILFS_MDT_H */ | ||
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c new file mode 100644 index 000000000000..df70dadb336f --- /dev/null +++ b/fs/nilfs2/namei.c | |||
@@ -0,0 +1,474 @@ | |||
1 | /* | ||
2 | * namei.c - NILFS pathname lookup operations. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Modified for NILFS by Amagai Yoshiji <amagai@osrg.net>, | ||
21 | * Ryusuke Konishi <ryusuke@osrg.net> | ||
22 | */ | ||
23 | /* | ||
24 | * linux/fs/ext2/namei.c | ||
25 | * | ||
26 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
27 | * Remy Card (card@masi.ibp.fr) | ||
28 | * Laboratoire MASI - Institut Blaise Pascal | ||
29 | * Universite Pierre et Marie Curie (Paris VI) | ||
30 | * | ||
31 | * from | ||
32 | * | ||
33 | * linux/fs/minix/namei.c | ||
34 | * | ||
35 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
36 | * | ||
37 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
38 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
39 | */ | ||
40 | |||
41 | #include <linux/pagemap.h> | ||
42 | #include "nilfs.h" | ||
43 | |||
44 | |||
45 | static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) | ||
46 | { | ||
47 | int err = nilfs_add_link(dentry, inode); | ||
48 | if (!err) { | ||
49 | d_instantiate(dentry, inode); | ||
50 | return 0; | ||
51 | } | ||
52 | inode_dec_link_count(inode); | ||
53 | iput(inode); | ||
54 | return err; | ||
55 | } | ||
56 | |||
57 | /* | ||
58 | * Methods themselves. | ||
59 | */ | ||
60 | |||
61 | static struct dentry * | ||
62 | nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | ||
63 | { | ||
64 | struct inode *inode; | ||
65 | ino_t ino; | ||
66 | |||
67 | if (dentry->d_name.len > NILFS_NAME_LEN) | ||
68 | return ERR_PTR(-ENAMETOOLONG); | ||
69 | |||
70 | ino = nilfs_inode_by_name(dir, dentry); | ||
71 | inode = NULL; | ||
72 | if (ino) { | ||
73 | inode = nilfs_iget(dir->i_sb, ino); | ||
74 | if (IS_ERR(inode)) | ||
75 | return ERR_CAST(inode); | ||
76 | } | ||
77 | return d_splice_alias(inode, dentry); | ||
78 | } | ||
79 | |||
80 | struct dentry *nilfs_get_parent(struct dentry *child) | ||
81 | { | ||
82 | unsigned long ino; | ||
83 | struct inode *inode; | ||
84 | struct dentry dotdot; | ||
85 | |||
86 | dotdot.d_name.name = ".."; | ||
87 | dotdot.d_name.len = 2; | ||
88 | |||
89 | ino = nilfs_inode_by_name(child->d_inode, &dotdot); | ||
90 | if (!ino) | ||
91 | return ERR_PTR(-ENOENT); | ||
92 | |||
93 | inode = nilfs_iget(child->d_inode->i_sb, ino); | ||
94 | if (IS_ERR(inode)) | ||
95 | return ERR_CAST(inode); | ||
96 | return d_obtain_alias(inode); | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * By the time this is called, we already have created | ||
101 | * the directory cache entry for the new file, but it | ||
102 | * is so far negative - it has no inode. | ||
103 | * | ||
104 | * If the create succeeds, we fill in the inode information | ||
105 | * with d_instantiate(). | ||
106 | */ | ||
107 | static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, | ||
108 | struct nameidata *nd) | ||
109 | { | ||
110 | struct inode *inode; | ||
111 | struct nilfs_transaction_info ti; | ||
112 | int err; | ||
113 | |||
114 | err = nilfs_transaction_begin(dir->i_sb, &ti, 1); | ||
115 | if (err) | ||
116 | return err; | ||
117 | inode = nilfs_new_inode(dir, mode); | ||
118 | err = PTR_ERR(inode); | ||
119 | if (!IS_ERR(inode)) { | ||
120 | inode->i_op = &nilfs_file_inode_operations; | ||
121 | inode->i_fop = &nilfs_file_operations; | ||
122 | inode->i_mapping->a_ops = &nilfs_aops; | ||
123 | mark_inode_dirty(inode); | ||
124 | err = nilfs_add_nondir(dentry, inode); | ||
125 | } | ||
126 | if (!err) | ||
127 | err = nilfs_transaction_commit(dir->i_sb); | ||
128 | else | ||
129 | nilfs_transaction_abort(dir->i_sb); | ||
130 | |||
131 | return err; | ||
132 | } | ||
133 | |||
134 | static int | ||
135 | nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) | ||
136 | { | ||
137 | struct inode *inode; | ||
138 | struct nilfs_transaction_info ti; | ||
139 | int err; | ||
140 | |||
141 | if (!new_valid_dev(rdev)) | ||
142 | return -EINVAL; | ||
143 | |||
144 | err = nilfs_transaction_begin(dir->i_sb, &ti, 1); | ||
145 | if (err) | ||
146 | return err; | ||
147 | inode = nilfs_new_inode(dir, mode); | ||
148 | err = PTR_ERR(inode); | ||
149 | if (!IS_ERR(inode)) { | ||
150 | init_special_inode(inode, inode->i_mode, rdev); | ||
151 | mark_inode_dirty(inode); | ||
152 | err = nilfs_add_nondir(dentry, inode); | ||
153 | } | ||
154 | if (!err) | ||
155 | err = nilfs_transaction_commit(dir->i_sb); | ||
156 | else | ||
157 | nilfs_transaction_abort(dir->i_sb); | ||
158 | |||
159 | return err; | ||
160 | } | ||
161 | |||
162 | static int nilfs_symlink(struct inode *dir, struct dentry *dentry, | ||
163 | const char *symname) | ||
164 | { | ||
165 | struct nilfs_transaction_info ti; | ||
166 | struct super_block *sb = dir->i_sb; | ||
167 | unsigned l = strlen(symname)+1; | ||
168 | struct inode *inode; | ||
169 | int err; | ||
170 | |||
171 | if (l > sb->s_blocksize) | ||
172 | return -ENAMETOOLONG; | ||
173 | |||
174 | err = nilfs_transaction_begin(dir->i_sb, &ti, 1); | ||
175 | if (err) | ||
176 | return err; | ||
177 | |||
178 | inode = nilfs_new_inode(dir, S_IFLNK | S_IRWXUGO); | ||
179 | err = PTR_ERR(inode); | ||
180 | if (IS_ERR(inode)) | ||
181 | goto out; | ||
182 | |||
183 | /* slow symlink */ | ||
184 | inode->i_op = &nilfs_symlink_inode_operations; | ||
185 | inode->i_mapping->a_ops = &nilfs_aops; | ||
186 | err = page_symlink(inode, symname, l); | ||
187 | if (err) | ||
188 | goto out_fail; | ||
189 | |||
190 | /* mark_inode_dirty(inode); */ | ||
191 | /* nilfs_new_inode() and page_symlink() do this */ | ||
192 | |||
193 | err = nilfs_add_nondir(dentry, inode); | ||
194 | out: | ||
195 | if (!err) | ||
196 | err = nilfs_transaction_commit(dir->i_sb); | ||
197 | else | ||
198 | nilfs_transaction_abort(dir->i_sb); | ||
199 | |||
200 | return err; | ||
201 | |||
202 | out_fail: | ||
203 | inode_dec_link_count(inode); | ||
204 | iput(inode); | ||
205 | goto out; | ||
206 | } | ||
207 | |||
208 | static int nilfs_link(struct dentry *old_dentry, struct inode *dir, | ||
209 | struct dentry *dentry) | ||
210 | { | ||
211 | struct inode *inode = old_dentry->d_inode; | ||
212 | struct nilfs_transaction_info ti; | ||
213 | int err; | ||
214 | |||
215 | if (inode->i_nlink >= NILFS_LINK_MAX) | ||
216 | return -EMLINK; | ||
217 | |||
218 | err = nilfs_transaction_begin(dir->i_sb, &ti, 1); | ||
219 | if (err) | ||
220 | return err; | ||
221 | |||
222 | inode->i_ctime = CURRENT_TIME; | ||
223 | inode_inc_link_count(inode); | ||
224 | atomic_inc(&inode->i_count); | ||
225 | |||
226 | err = nilfs_add_nondir(dentry, inode); | ||
227 | if (!err) | ||
228 | err = nilfs_transaction_commit(dir->i_sb); | ||
229 | else | ||
230 | nilfs_transaction_abort(dir->i_sb); | ||
231 | |||
232 | return err; | ||
233 | } | ||
234 | |||
235 | static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
236 | { | ||
237 | struct inode *inode; | ||
238 | struct nilfs_transaction_info ti; | ||
239 | int err; | ||
240 | |||
241 | if (dir->i_nlink >= NILFS_LINK_MAX) | ||
242 | return -EMLINK; | ||
243 | |||
244 | err = nilfs_transaction_begin(dir->i_sb, &ti, 1); | ||
245 | if (err) | ||
246 | return err; | ||
247 | |||
248 | inode_inc_link_count(dir); | ||
249 | |||
250 | inode = nilfs_new_inode(dir, S_IFDIR | mode); | ||
251 | err = PTR_ERR(inode); | ||
252 | if (IS_ERR(inode)) | ||
253 | goto out_dir; | ||
254 | |||
255 | inode->i_op = &nilfs_dir_inode_operations; | ||
256 | inode->i_fop = &nilfs_dir_operations; | ||
257 | inode->i_mapping->a_ops = &nilfs_aops; | ||
258 | |||
259 | inode_inc_link_count(inode); | ||
260 | |||
261 | err = nilfs_make_empty(inode, dir); | ||
262 | if (err) | ||
263 | goto out_fail; | ||
264 | |||
265 | err = nilfs_add_link(dentry, inode); | ||
266 | if (err) | ||
267 | goto out_fail; | ||
268 | |||
269 | d_instantiate(dentry, inode); | ||
270 | out: | ||
271 | if (!err) | ||
272 | err = nilfs_transaction_commit(dir->i_sb); | ||
273 | else | ||
274 | nilfs_transaction_abort(dir->i_sb); | ||
275 | |||
276 | return err; | ||
277 | |||
278 | out_fail: | ||
279 | inode_dec_link_count(inode); | ||
280 | inode_dec_link_count(inode); | ||
281 | iput(inode); | ||
282 | out_dir: | ||
283 | inode_dec_link_count(dir); | ||
284 | goto out; | ||
285 | } | ||
286 | |||
287 | static int nilfs_unlink(struct inode *dir, struct dentry *dentry) | ||
288 | { | ||
289 | struct inode *inode; | ||
290 | struct nilfs_dir_entry *de; | ||
291 | struct page *page; | ||
292 | struct nilfs_transaction_info ti; | ||
293 | int err; | ||
294 | |||
295 | err = nilfs_transaction_begin(dir->i_sb, &ti, 0); | ||
296 | if (err) | ||
297 | return err; | ||
298 | |||
299 | err = -ENOENT; | ||
300 | de = nilfs_find_entry(dir, dentry, &page); | ||
301 | if (!de) | ||
302 | goto out; | ||
303 | |||
304 | inode = dentry->d_inode; | ||
305 | err = -EIO; | ||
306 | if (le64_to_cpu(de->inode) != inode->i_ino) | ||
307 | goto out; | ||
308 | |||
309 | if (!inode->i_nlink) { | ||
310 | nilfs_warning(inode->i_sb, __func__, | ||
311 | "deleting nonexistent file (%lu), %d\n", | ||
312 | inode->i_ino, inode->i_nlink); | ||
313 | inode->i_nlink = 1; | ||
314 | } | ||
315 | err = nilfs_delete_entry(de, page); | ||
316 | if (err) | ||
317 | goto out; | ||
318 | |||
319 | inode->i_ctime = dir->i_ctime; | ||
320 | inode_dec_link_count(inode); | ||
321 | err = 0; | ||
322 | out: | ||
323 | if (!err) | ||
324 | err = nilfs_transaction_commit(dir->i_sb); | ||
325 | else | ||
326 | nilfs_transaction_abort(dir->i_sb); | ||
327 | |||
328 | return err; | ||
329 | } | ||
330 | |||
331 | static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) | ||
332 | { | ||
333 | struct inode *inode = dentry->d_inode; | ||
334 | struct nilfs_transaction_info ti; | ||
335 | int err; | ||
336 | |||
337 | err = nilfs_transaction_begin(dir->i_sb, &ti, 0); | ||
338 | if (err) | ||
339 | return err; | ||
340 | |||
341 | err = -ENOTEMPTY; | ||
342 | if (nilfs_empty_dir(inode)) { | ||
343 | err = nilfs_unlink(dir, dentry); | ||
344 | if (!err) { | ||
345 | inode->i_size = 0; | ||
346 | inode_dec_link_count(inode); | ||
347 | inode_dec_link_count(dir); | ||
348 | } | ||
349 | } | ||
350 | if (!err) | ||
351 | err = nilfs_transaction_commit(dir->i_sb); | ||
352 | else | ||
353 | nilfs_transaction_abort(dir->i_sb); | ||
354 | |||
355 | return err; | ||
356 | } | ||
357 | |||
358 | static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, | ||
359 | struct inode *new_dir, struct dentry *new_dentry) | ||
360 | { | ||
361 | struct inode *old_inode = old_dentry->d_inode; | ||
362 | struct inode *new_inode = new_dentry->d_inode; | ||
363 | struct page *dir_page = NULL; | ||
364 | struct nilfs_dir_entry *dir_de = NULL; | ||
365 | struct page *old_page; | ||
366 | struct nilfs_dir_entry *old_de; | ||
367 | struct nilfs_transaction_info ti; | ||
368 | int err; | ||
369 | |||
370 | err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); | ||
371 | if (unlikely(err)) | ||
372 | return err; | ||
373 | |||
374 | err = -ENOENT; | ||
375 | old_de = nilfs_find_entry(old_dir, old_dentry, &old_page); | ||
376 | if (!old_de) | ||
377 | goto out; | ||
378 | |||
379 | if (S_ISDIR(old_inode->i_mode)) { | ||
380 | err = -EIO; | ||
381 | dir_de = nilfs_dotdot(old_inode, &dir_page); | ||
382 | if (!dir_de) | ||
383 | goto out_old; | ||
384 | } | ||
385 | |||
386 | if (new_inode) { | ||
387 | struct page *new_page; | ||
388 | struct nilfs_dir_entry *new_de; | ||
389 | |||
390 | err = -ENOTEMPTY; | ||
391 | if (dir_de && !nilfs_empty_dir(new_inode)) | ||
392 | goto out_dir; | ||
393 | |||
394 | err = -ENOENT; | ||
395 | new_de = nilfs_find_entry(new_dir, new_dentry, &new_page); | ||
396 | if (!new_de) | ||
397 | goto out_dir; | ||
398 | inode_inc_link_count(old_inode); | ||
399 | nilfs_set_link(new_dir, new_de, new_page, old_inode); | ||
400 | new_inode->i_ctime = CURRENT_TIME; | ||
401 | if (dir_de) | ||
402 | drop_nlink(new_inode); | ||
403 | inode_dec_link_count(new_inode); | ||
404 | } else { | ||
405 | if (dir_de) { | ||
406 | err = -EMLINK; | ||
407 | if (new_dir->i_nlink >= NILFS_LINK_MAX) | ||
408 | goto out_dir; | ||
409 | } | ||
410 | inode_inc_link_count(old_inode); | ||
411 | err = nilfs_add_link(new_dentry, old_inode); | ||
412 | if (err) { | ||
413 | inode_dec_link_count(old_inode); | ||
414 | goto out_dir; | ||
415 | } | ||
416 | if (dir_de) | ||
417 | inode_inc_link_count(new_dir); | ||
418 | } | ||
419 | |||
420 | /* | ||
421 | * Like most other Unix systems, set the ctime for inodes on a | ||
422 | * rename. | ||
423 | * inode_dec_link_count() will mark the inode dirty. | ||
424 | */ | ||
425 | old_inode->i_ctime = CURRENT_TIME; | ||
426 | |||
427 | nilfs_delete_entry(old_de, old_page); | ||
428 | inode_dec_link_count(old_inode); | ||
429 | |||
430 | if (dir_de) { | ||
431 | nilfs_set_link(old_inode, dir_de, dir_page, new_dir); | ||
432 | inode_dec_link_count(old_dir); | ||
433 | } | ||
434 | |||
435 | err = nilfs_transaction_commit(old_dir->i_sb); | ||
436 | return err; | ||
437 | |||
438 | out_dir: | ||
439 | if (dir_de) { | ||
440 | kunmap(dir_page); | ||
441 | page_cache_release(dir_page); | ||
442 | } | ||
443 | out_old: | ||
444 | kunmap(old_page); | ||
445 | page_cache_release(old_page); | ||
446 | out: | ||
447 | nilfs_transaction_abort(old_dir->i_sb); | ||
448 | return err; | ||
449 | } | ||
450 | |||
451 | struct inode_operations nilfs_dir_inode_operations = { | ||
452 | .create = nilfs_create, | ||
453 | .lookup = nilfs_lookup, | ||
454 | .link = nilfs_link, | ||
455 | .unlink = nilfs_unlink, | ||
456 | .symlink = nilfs_symlink, | ||
457 | .mkdir = nilfs_mkdir, | ||
458 | .rmdir = nilfs_rmdir, | ||
459 | .mknod = nilfs_mknod, | ||
460 | .rename = nilfs_rename, | ||
461 | .setattr = nilfs_setattr, | ||
462 | .permission = nilfs_permission, | ||
463 | }; | ||
464 | |||
465 | struct inode_operations nilfs_special_inode_operations = { | ||
466 | .setattr = nilfs_setattr, | ||
467 | .permission = nilfs_permission, | ||
468 | }; | ||
469 | |||
470 | struct inode_operations nilfs_symlink_inode_operations = { | ||
471 | .readlink = generic_readlink, | ||
472 | .follow_link = page_follow_link_light, | ||
473 | .put_link = page_put_link, | ||
474 | }; | ||
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h new file mode 100644 index 000000000000..7558c977db02 --- /dev/null +++ b/fs/nilfs2/nilfs.h | |||
@@ -0,0 +1,318 @@ | |||
1 | /* | ||
2 | * nilfs.h - NILFS local header file. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net> | ||
21 | * Ryusuke Konishi <ryusuke@osrg.net> | ||
22 | */ | ||
23 | |||
24 | #ifndef _NILFS_H | ||
25 | #define _NILFS_H | ||
26 | |||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/spinlock.h> | ||
30 | #include <linux/blkdev.h> | ||
31 | #include <linux/nilfs2_fs.h> | ||
32 | #include "the_nilfs.h" | ||
33 | #include "sb.h" | ||
34 | #include "bmap.h" | ||
35 | #include "bmap_union.h" | ||
36 | |||
37 | /* | ||
38 | * NILFS filesystem version | ||
39 | */ | ||
40 | #define NILFS_VERSION "2.0.5" | ||
41 | |||
42 | /* | ||
43 | * nilfs inode data in memory | ||
44 | */ | ||
45 | struct nilfs_inode_info { | ||
46 | __u32 i_flags; | ||
47 | unsigned long i_state; /* Dynamic state flags */ | ||
48 | struct nilfs_bmap *i_bmap; | ||
49 | union nilfs_bmap_union i_bmap_union; | ||
50 | __u64 i_xattr; /* sector_t ??? */ | ||
51 | __u32 i_dir_start_lookup; | ||
52 | __u64 i_cno; /* check point number for GC inode */ | ||
53 | struct address_space i_btnode_cache; | ||
54 | struct list_head i_dirty; /* List for connecting dirty files */ | ||
55 | |||
56 | #ifdef CONFIG_NILFS_XATTR | ||
57 | /* | ||
58 | * Extended attributes can be read independently of the main file | ||
59 | * data. Taking i_sem even when reading would cause contention | ||
60 | * between readers of EAs and writers of regular file data, so | ||
61 | * instead we synchronize on xattr_sem when reading or changing | ||
62 | * EAs. | ||
63 | */ | ||
64 | struct rw_semaphore xattr_sem; | ||
65 | #endif | ||
66 | #ifdef CONFIG_NILFS_POSIX_ACL | ||
67 | struct posix_acl *i_acl; | ||
68 | struct posix_acl *i_default_acl; | ||
69 | #endif | ||
70 | struct buffer_head *i_bh; /* i_bh contains a new or dirty | ||
71 | disk inode */ | ||
72 | struct inode vfs_inode; | ||
73 | }; | ||
74 | |||
75 | static inline struct nilfs_inode_info *NILFS_I(const struct inode *inode) | ||
76 | { | ||
77 | return container_of(inode, struct nilfs_inode_info, vfs_inode); | ||
78 | } | ||
79 | |||
80 | static inline struct nilfs_inode_info * | ||
81 | NILFS_BMAP_I(const struct nilfs_bmap *bmap) | ||
82 | { | ||
83 | return container_of((union nilfs_bmap_union *)bmap, | ||
84 | struct nilfs_inode_info, | ||
85 | i_bmap_union); | ||
86 | } | ||
87 | |||
88 | static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) | ||
89 | { | ||
90 | struct nilfs_inode_info *ii = | ||
91 | container_of(btnc, struct nilfs_inode_info, i_btnode_cache); | ||
92 | return &ii->vfs_inode; | ||
93 | } | ||
94 | |||
95 | static inline struct inode *NILFS_AS_I(struct address_space *mapping) | ||
96 | { | ||
97 | return (mapping->host) ? : | ||
98 | container_of(mapping, struct inode, i_data); | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * Dynamic state flags of NILFS on-memory inode (i_state) | ||
103 | */ | ||
104 | enum { | ||
105 | NILFS_I_NEW = 0, /* Inode is newly created */ | ||
106 | NILFS_I_DIRTY, /* The file is dirty */ | ||
107 | NILFS_I_QUEUED, /* inode is in dirty_files list */ | ||
108 | NILFS_I_BUSY, /* inode is grabbed by a segment | ||
109 | constructor */ | ||
110 | NILFS_I_COLLECTED, /* All dirty blocks are collected */ | ||
111 | NILFS_I_UPDATED, /* The file has been written back */ | ||
112 | NILFS_I_INODE_DIRTY, /* write_inode is requested */ | ||
113 | NILFS_I_BMAP, /* has bmap and btnode_cache */ | ||
114 | NILFS_I_GCINODE, /* inode for GC, on memory only */ | ||
115 | NILFS_I_GCDAT, /* shadow DAT, on memory only */ | ||
116 | }; | ||
117 | |||
118 | /* | ||
119 | * Macros to check inode numbers | ||
120 | */ | ||
121 | #define NILFS_MDT_INO_BITS \ | ||
122 | ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \ | ||
123 | 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \ | ||
124 | 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO)) | ||
125 | |||
126 | #define NILFS_SYS_INO_BITS \ | ||
127 | ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) | ||
128 | |||
129 | #define NILFS_FIRST_INO(sb) (NILFS_SB(sb)->s_nilfs->ns_first_ino) | ||
130 | |||
131 | #define NILFS_MDT_INODE(sb, ino) \ | ||
132 | ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) | ||
133 | #define NILFS_VALID_INODE(sb, ino) \ | ||
134 | ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino)))) | ||
135 | |||
136 | /** | ||
137 | * struct nilfs_transaction_info: context information for synchronization | ||
138 | * @ti_magic: Magic number | ||
139 | * @ti_save: Backup of journal_info field of task_struct | ||
140 | * @ti_flags: Flags | ||
141 | * @ti_count: Nest level | ||
142 | * @ti_garbage: List of inode to be put when releasing semaphore | ||
143 | */ | ||
144 | struct nilfs_transaction_info { | ||
145 | u32 ti_magic; | ||
146 | void *ti_save; | ||
147 | /* This should never used. If this happens, | ||
148 | one of other filesystems has a bug. */ | ||
149 | unsigned short ti_flags; | ||
150 | unsigned short ti_count; | ||
151 | struct list_head ti_garbage; | ||
152 | }; | ||
153 | |||
154 | /* ti_magic */ | ||
155 | #define NILFS_TI_MAGIC 0xd9e392fb | ||
156 | |||
157 | /* ti_flags */ | ||
158 | #define NILFS_TI_DYNAMIC_ALLOC 0x0001 /* Allocated from slab */ | ||
159 | #define NILFS_TI_SYNC 0x0002 /* Force to construct segment at the | ||
160 | end of transaction. */ | ||
161 | #define NILFS_TI_GC 0x0004 /* GC context */ | ||
162 | #define NILFS_TI_COMMIT 0x0008 /* Change happened or not */ | ||
163 | #define NILFS_TI_WRITER 0x0010 /* Constructor context */ | ||
164 | |||
165 | |||
166 | int nilfs_transaction_begin(struct super_block *, | ||
167 | struct nilfs_transaction_info *, int); | ||
168 | int nilfs_transaction_commit(struct super_block *); | ||
169 | void nilfs_transaction_abort(struct super_block *); | ||
170 | |||
171 | static inline void nilfs_set_transaction_flag(unsigned int flag) | ||
172 | { | ||
173 | struct nilfs_transaction_info *ti = current->journal_info; | ||
174 | |||
175 | ti->ti_flags |= flag; | ||
176 | } | ||
177 | |||
178 | static inline int nilfs_test_transaction_flag(unsigned int flag) | ||
179 | { | ||
180 | struct nilfs_transaction_info *ti = current->journal_info; | ||
181 | |||
182 | if (ti == NULL || ti->ti_magic != NILFS_TI_MAGIC) | ||
183 | return 0; | ||
184 | return !!(ti->ti_flags & flag); | ||
185 | } | ||
186 | |||
187 | static inline int nilfs_doing_gc(void) | ||
188 | { | ||
189 | return nilfs_test_transaction_flag(NILFS_TI_GC); | ||
190 | } | ||
191 | |||
192 | static inline int nilfs_doing_construction(void) | ||
193 | { | ||
194 | return nilfs_test_transaction_flag(NILFS_TI_WRITER); | ||
195 | } | ||
196 | |||
197 | static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs) | ||
198 | { | ||
199 | return nilfs_doing_gc() ? nilfs->ns_gc_dat : nilfs->ns_dat; | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * function prototype | ||
204 | */ | ||
205 | #ifdef CONFIG_NILFS_POSIX_ACL | ||
206 | #error "NILFS: not yet supported POSIX ACL" | ||
207 | extern int nilfs_permission(struct inode *, int, struct nameidata *); | ||
208 | extern int nilfs_acl_chmod(struct inode *); | ||
209 | extern int nilfs_init_acl(struct inode *, struct inode *); | ||
210 | #else | ||
211 | #define nilfs_permission NULL | ||
212 | |||
213 | static inline int nilfs_acl_chmod(struct inode *inode) | ||
214 | { | ||
215 | return 0; | ||
216 | } | ||
217 | |||
218 | static inline int nilfs_init_acl(struct inode *inode, struct inode *dir) | ||
219 | { | ||
220 | inode->i_mode &= ~current_umask(); | ||
221 | return 0; | ||
222 | } | ||
223 | #endif | ||
224 | |||
225 | #define NILFS_ATIME_DISABLE | ||
226 | |||
227 | /* dir.c */ | ||
228 | extern int nilfs_add_link(struct dentry *, struct inode *); | ||
229 | extern ino_t nilfs_inode_by_name(struct inode *, struct dentry *); | ||
230 | extern int nilfs_make_empty(struct inode *, struct inode *); | ||
231 | extern struct nilfs_dir_entry * | ||
232 | nilfs_find_entry(struct inode *, struct dentry *, struct page **); | ||
233 | extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *); | ||
234 | extern int nilfs_empty_dir(struct inode *); | ||
235 | extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **); | ||
236 | extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, | ||
237 | struct page *, struct inode *); | ||
238 | |||
239 | /* file.c */ | ||
240 | extern int nilfs_sync_file(struct file *, struct dentry *, int); | ||
241 | |||
242 | /* ioctl.c */ | ||
243 | long nilfs_ioctl(struct file *, unsigned int, unsigned long); | ||
244 | int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, void __user *); | ||
245 | |||
246 | /* inode.c */ | ||
247 | extern struct inode *nilfs_new_inode(struct inode *, int); | ||
248 | extern void nilfs_free_inode(struct inode *); | ||
249 | extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); | ||
250 | extern void nilfs_set_inode_flags(struct inode *); | ||
251 | extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *); | ||
252 | extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int); | ||
253 | extern struct inode *nilfs_iget(struct super_block *, unsigned long); | ||
254 | extern void nilfs_update_inode(struct inode *, struct buffer_head *); | ||
255 | extern void nilfs_truncate(struct inode *); | ||
256 | extern void nilfs_delete_inode(struct inode *); | ||
257 | extern int nilfs_setattr(struct dentry *, struct iattr *); | ||
258 | extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, | ||
259 | struct buffer_head **); | ||
260 | extern int nilfs_inode_dirty(struct inode *); | ||
261 | extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *, | ||
262 | unsigned); | ||
263 | extern int nilfs_mark_inode_dirty(struct inode *); | ||
264 | extern void nilfs_dirty_inode(struct inode *); | ||
265 | |||
266 | /* namei.c */ | ||
267 | extern struct dentry *nilfs_get_parent(struct dentry *); | ||
268 | |||
269 | /* super.c */ | ||
270 | extern struct inode *nilfs_alloc_inode(struct super_block *); | ||
271 | extern void nilfs_destroy_inode(struct inode *); | ||
272 | extern void nilfs_error(struct super_block *, const char *, const char *, ...) | ||
273 | __attribute__ ((format (printf, 3, 4))); | ||
274 | extern void nilfs_warning(struct super_block *, const char *, const char *, ...) | ||
275 | __attribute__ ((format (printf, 3, 4))); | ||
276 | extern struct nilfs_super_block * | ||
277 | nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); | ||
278 | extern int nilfs_store_magic_and_option(struct super_block *, | ||
279 | struct nilfs_super_block *, char *); | ||
280 | extern int nilfs_commit_super(struct nilfs_sb_info *, int); | ||
281 | extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64); | ||
282 | extern void nilfs_detach_checkpoint(struct nilfs_sb_info *); | ||
283 | |||
284 | /* gcinode.c */ | ||
285 | int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64, | ||
286 | struct buffer_head **); | ||
287 | int nilfs_gccache_submit_read_node(struct inode *, sector_t, __u64, | ||
288 | struct buffer_head **); | ||
289 | int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *); | ||
290 | int nilfs_init_gccache(struct the_nilfs *); | ||
291 | void nilfs_destroy_gccache(struct the_nilfs *); | ||
292 | void nilfs_clear_gcinode(struct inode *); | ||
293 | struct inode *nilfs_gc_iget(struct the_nilfs *, ino_t, __u64); | ||
294 | void nilfs_remove_all_gcinode(struct the_nilfs *); | ||
295 | |||
296 | /* gcdat.c */ | ||
297 | int nilfs_init_gcdat_inode(struct the_nilfs *); | ||
298 | void nilfs_commit_gcdat_inode(struct the_nilfs *); | ||
299 | void nilfs_clear_gcdat_inode(struct the_nilfs *); | ||
300 | |||
301 | /* | ||
302 | * Inodes and files operations | ||
303 | */ | ||
304 | extern struct file_operations nilfs_dir_operations; | ||
305 | extern struct inode_operations nilfs_file_inode_operations; | ||
306 | extern struct file_operations nilfs_file_operations; | ||
307 | extern struct address_space_operations nilfs_aops; | ||
308 | extern struct inode_operations nilfs_dir_inode_operations; | ||
309 | extern struct inode_operations nilfs_special_inode_operations; | ||
310 | extern struct inode_operations nilfs_symlink_inode_operations; | ||
311 | |||
312 | /* | ||
313 | * filesystem type | ||
314 | */ | ||
315 | extern struct file_system_type nilfs_fs_type; | ||
316 | |||
317 | |||
318 | #endif /* _NILFS_H */ | ||
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c new file mode 100644 index 000000000000..1bfbba9c0e9a --- /dev/null +++ b/fs/nilfs2/page.c | |||
@@ -0,0 +1,540 @@ | |||
1 | /* | ||
2 | * page.c - buffer/page management specific to NILFS | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net>, | ||
21 | * Seiji Kihara <kihara@osrg.net>. | ||
22 | */ | ||
23 | |||
24 | #include <linux/pagemap.h> | ||
25 | #include <linux/writeback.h> | ||
26 | #include <linux/swap.h> | ||
27 | #include <linux/bitops.h> | ||
28 | #include <linux/page-flags.h> | ||
29 | #include <linux/list.h> | ||
30 | #include <linux/highmem.h> | ||
31 | #include <linux/pagevec.h> | ||
32 | #include "nilfs.h" | ||
33 | #include "page.h" | ||
34 | #include "mdt.h" | ||
35 | |||
36 | |||
37 | #define NILFS_BUFFER_INHERENT_BITS \ | ||
38 | ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ | ||
39 | (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated)) | ||
40 | |||
41 | static struct buffer_head * | ||
42 | __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, | ||
43 | int blkbits, unsigned long b_state) | ||
44 | |||
45 | { | ||
46 | unsigned long first_block; | ||
47 | struct buffer_head *bh; | ||
48 | |||
49 | if (!page_has_buffers(page)) | ||
50 | create_empty_buffers(page, 1 << blkbits, b_state); | ||
51 | |||
52 | first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits); | ||
53 | bh = nilfs_page_get_nth_block(page, block - first_block); | ||
54 | |||
55 | touch_buffer(bh); | ||
56 | wait_on_buffer(bh); | ||
57 | return bh; | ||
58 | } | ||
59 | |||
60 | /* | ||
61 | * Since the page cache of B-tree node pages or data page cache of pseudo | ||
62 | * inodes does not have a valid mapping->host pointer, calling | ||
63 | * mark_buffer_dirty() for their buffers causes a NULL pointer dereference; | ||
64 | * it calls __mark_inode_dirty(NULL) through __set_page_dirty(). | ||
65 | * To avoid this problem, the old style mark_buffer_dirty() is used instead. | ||
66 | */ | ||
67 | void nilfs_mark_buffer_dirty(struct buffer_head *bh) | ||
68 | { | ||
69 | if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) | ||
70 | __set_page_dirty_nobuffers(bh->b_page); | ||
71 | } | ||
72 | |||
73 | struct buffer_head *nilfs_grab_buffer(struct inode *inode, | ||
74 | struct address_space *mapping, | ||
75 | unsigned long blkoff, | ||
76 | unsigned long b_state) | ||
77 | { | ||
78 | int blkbits = inode->i_blkbits; | ||
79 | pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); | ||
80 | struct page *page, *opage; | ||
81 | struct buffer_head *bh, *obh; | ||
82 | |||
83 | page = grab_cache_page(mapping, index); | ||
84 | if (unlikely(!page)) | ||
85 | return NULL; | ||
86 | |||
87 | bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); | ||
88 | if (unlikely(!bh)) { | ||
89 | unlock_page(page); | ||
90 | page_cache_release(page); | ||
91 | return NULL; | ||
92 | } | ||
93 | if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { | ||
94 | /* | ||
95 | * Shadow page cache uses assoc_mapping to point its original | ||
96 | * page cache. The following code tries the original cache | ||
97 | * if the given cache is a shadow and it didn't hit. | ||
98 | */ | ||
99 | opage = find_lock_page(mapping->assoc_mapping, index); | ||
100 | if (!opage) | ||
101 | return bh; | ||
102 | |||
103 | obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, | ||
104 | b_state); | ||
105 | if (buffer_uptodate(obh)) { | ||
106 | nilfs_copy_buffer(bh, obh); | ||
107 | if (buffer_dirty(obh)) { | ||
108 | nilfs_mark_buffer_dirty(bh); | ||
109 | if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) | ||
110 | nilfs_mdt_mark_dirty(inode); | ||
111 | } | ||
112 | } | ||
113 | brelse(obh); | ||
114 | unlock_page(opage); | ||
115 | page_cache_release(opage); | ||
116 | } | ||
117 | return bh; | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * nilfs_forget_buffer - discard dirty state | ||
122 | * @inode: owner inode of the buffer | ||
123 | * @bh: buffer head of the buffer to be discarded | ||
124 | */ | ||
125 | void nilfs_forget_buffer(struct buffer_head *bh) | ||
126 | { | ||
127 | struct page *page = bh->b_page; | ||
128 | |||
129 | lock_buffer(bh); | ||
130 | clear_buffer_nilfs_volatile(bh); | ||
131 | if (test_clear_buffer_dirty(bh) && nilfs_page_buffers_clean(page)) | ||
132 | __nilfs_clear_page_dirty(page); | ||
133 | |||
134 | clear_buffer_uptodate(bh); | ||
135 | clear_buffer_mapped(bh); | ||
136 | bh->b_blocknr = -1; | ||
137 | ClearPageUptodate(page); | ||
138 | ClearPageMappedToDisk(page); | ||
139 | unlock_buffer(bh); | ||
140 | brelse(bh); | ||
141 | } | ||
142 | |||
143 | /** | ||
144 | * nilfs_copy_buffer -- copy buffer data and flags | ||
145 | * @dbh: destination buffer | ||
146 | * @sbh: source buffer | ||
147 | */ | ||
148 | void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) | ||
149 | { | ||
150 | void *kaddr0, *kaddr1; | ||
151 | unsigned long bits; | ||
152 | struct page *spage = sbh->b_page, *dpage = dbh->b_page; | ||
153 | struct buffer_head *bh; | ||
154 | |||
155 | kaddr0 = kmap_atomic(spage, KM_USER0); | ||
156 | kaddr1 = kmap_atomic(dpage, KM_USER1); | ||
157 | memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); | ||
158 | kunmap_atomic(kaddr1, KM_USER1); | ||
159 | kunmap_atomic(kaddr0, KM_USER0); | ||
160 | |||
161 | dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; | ||
162 | dbh->b_blocknr = sbh->b_blocknr; | ||
163 | dbh->b_bdev = sbh->b_bdev; | ||
164 | |||
165 | bh = dbh; | ||
166 | bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped)); | ||
167 | while ((bh = bh->b_this_page) != dbh) { | ||
168 | lock_buffer(bh); | ||
169 | bits &= bh->b_state; | ||
170 | unlock_buffer(bh); | ||
171 | } | ||
172 | if (bits & (1UL << BH_Uptodate)) | ||
173 | SetPageUptodate(dpage); | ||
174 | else | ||
175 | ClearPageUptodate(dpage); | ||
176 | if (bits & (1UL << BH_Mapped)) | ||
177 | SetPageMappedToDisk(dpage); | ||
178 | else | ||
179 | ClearPageMappedToDisk(dpage); | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * nilfs_page_buffers_clean - check if a page has dirty buffers or not. | ||
184 | * @page: page to be checked | ||
185 | * | ||
186 | * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. | ||
187 | * Otherwise, it returns non-zero value. | ||
188 | */ | ||
189 | int nilfs_page_buffers_clean(struct page *page) | ||
190 | { | ||
191 | struct buffer_head *bh, *head; | ||
192 | |||
193 | bh = head = page_buffers(page); | ||
194 | do { | ||
195 | if (buffer_dirty(bh)) | ||
196 | return 0; | ||
197 | bh = bh->b_this_page; | ||
198 | } while (bh != head); | ||
199 | return 1; | ||
200 | } | ||
201 | |||
202 | void nilfs_page_bug(struct page *page) | ||
203 | { | ||
204 | struct address_space *m; | ||
205 | unsigned long ino = 0; | ||
206 | |||
207 | if (unlikely(!page)) { | ||
208 | printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); | ||
209 | return; | ||
210 | } | ||
211 | |||
212 | m = page->mapping; | ||
213 | if (m) { | ||
214 | struct inode *inode = NILFS_AS_I(m); | ||
215 | if (inode != NULL) | ||
216 | ino = inode->i_ino; | ||
217 | } | ||
218 | printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " | ||
219 | "mapping=%p ino=%lu\n", | ||
220 | page, atomic_read(&page->_count), | ||
221 | (unsigned long long)page->index, page->flags, m, ino); | ||
222 | |||
223 | if (page_has_buffers(page)) { | ||
224 | struct buffer_head *bh, *head; | ||
225 | int i = 0; | ||
226 | |||
227 | bh = head = page_buffers(page); | ||
228 | do { | ||
229 | printk(KERN_CRIT | ||
230 | " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", | ||
231 | i++, bh, atomic_read(&bh->b_count), | ||
232 | (unsigned long long)bh->b_blocknr, bh->b_state); | ||
233 | bh = bh->b_this_page; | ||
234 | } while (bh != head); | ||
235 | } | ||
236 | } | ||
237 | |||
238 | /** | ||
239 | * nilfs_alloc_private_page - allocate a private page with buffer heads | ||
240 | * | ||
241 | * Return Value: On success, a pointer to the allocated page is returned. | ||
242 | * On error, NULL is returned. | ||
243 | */ | ||
244 | struct page *nilfs_alloc_private_page(struct block_device *bdev, int size, | ||
245 | unsigned long state) | ||
246 | { | ||
247 | struct buffer_head *bh, *head, *tail; | ||
248 | struct page *page; | ||
249 | |||
250 | page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */ | ||
251 | if (unlikely(!page)) | ||
252 | return NULL; | ||
253 | |||
254 | lock_page(page); | ||
255 | head = alloc_page_buffers(page, size, 0); | ||
256 | if (unlikely(!head)) { | ||
257 | unlock_page(page); | ||
258 | __free_page(page); | ||
259 | return NULL; | ||
260 | } | ||
261 | |||
262 | bh = head; | ||
263 | do { | ||
264 | bh->b_state = (1UL << BH_NILFS_Allocated) | state; | ||
265 | tail = bh; | ||
266 | bh->b_bdev = bdev; | ||
267 | bh = bh->b_this_page; | ||
268 | } while (bh); | ||
269 | |||
270 | tail->b_this_page = head; | ||
271 | attach_page_buffers(page, head); | ||
272 | |||
273 | return page; | ||
274 | } | ||
275 | |||
276 | void nilfs_free_private_page(struct page *page) | ||
277 | { | ||
278 | BUG_ON(!PageLocked(page)); | ||
279 | BUG_ON(page->mapping); | ||
280 | |||
281 | if (page_has_buffers(page) && !try_to_free_buffers(page)) | ||
282 | NILFS_PAGE_BUG(page, "failed to free page"); | ||
283 | |||
284 | unlock_page(page); | ||
285 | __free_page(page); | ||
286 | } | ||
287 | |||
288 | /** | ||
289 | * nilfs_copy_page -- copy the page with buffers | ||
290 | * @dst: destination page | ||
291 | * @src: source page | ||
292 | * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. | ||
293 | * | ||
294 | * This fuction is for both data pages and btnode pages. The dirty flag | ||
295 | * should be treated by caller. The page must not be under i/o. | ||
296 | * Both src and dst page must be locked | ||
297 | */ | ||
298 | static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) | ||
299 | { | ||
300 | struct buffer_head *dbh, *dbufs, *sbh, *sbufs; | ||
301 | unsigned long mask = NILFS_BUFFER_INHERENT_BITS; | ||
302 | |||
303 | BUG_ON(PageWriteback(dst)); | ||
304 | |||
305 | sbh = sbufs = page_buffers(src); | ||
306 | if (!page_has_buffers(dst)) | ||
307 | create_empty_buffers(dst, sbh->b_size, 0); | ||
308 | |||
309 | if (copy_dirty) | ||
310 | mask |= (1UL << BH_Dirty); | ||
311 | |||
312 | dbh = dbufs = page_buffers(dst); | ||
313 | do { | ||
314 | lock_buffer(sbh); | ||
315 | lock_buffer(dbh); | ||
316 | dbh->b_state = sbh->b_state & mask; | ||
317 | dbh->b_blocknr = sbh->b_blocknr; | ||
318 | dbh->b_bdev = sbh->b_bdev; | ||
319 | sbh = sbh->b_this_page; | ||
320 | dbh = dbh->b_this_page; | ||
321 | } while (dbh != dbufs); | ||
322 | |||
323 | copy_highpage(dst, src); | ||
324 | |||
325 | if (PageUptodate(src) && !PageUptodate(dst)) | ||
326 | SetPageUptodate(dst); | ||
327 | else if (!PageUptodate(src) && PageUptodate(dst)) | ||
328 | ClearPageUptodate(dst); | ||
329 | if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) | ||
330 | SetPageMappedToDisk(dst); | ||
331 | else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) | ||
332 | ClearPageMappedToDisk(dst); | ||
333 | |||
334 | do { | ||
335 | unlock_buffer(sbh); | ||
336 | unlock_buffer(dbh); | ||
337 | sbh = sbh->b_this_page; | ||
338 | dbh = dbh->b_this_page; | ||
339 | } while (dbh != dbufs); | ||
340 | } | ||
341 | |||
342 | int nilfs_copy_dirty_pages(struct address_space *dmap, | ||
343 | struct address_space *smap) | ||
344 | { | ||
345 | struct pagevec pvec; | ||
346 | unsigned int i; | ||
347 | pgoff_t index = 0; | ||
348 | int err = 0; | ||
349 | |||
350 | pagevec_init(&pvec, 0); | ||
351 | repeat: | ||
352 | if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, | ||
353 | PAGEVEC_SIZE)) | ||
354 | return 0; | ||
355 | |||
356 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
357 | struct page *page = pvec.pages[i], *dpage; | ||
358 | |||
359 | lock_page(page); | ||
360 | if (unlikely(!PageDirty(page))) | ||
361 | NILFS_PAGE_BUG(page, "inconsistent dirty state"); | ||
362 | |||
363 | dpage = grab_cache_page(dmap, page->index); | ||
364 | if (unlikely(!dpage)) { | ||
365 | /* No empty page is added to the page cache */ | ||
366 | err = -ENOMEM; | ||
367 | unlock_page(page); | ||
368 | break; | ||
369 | } | ||
370 | if (unlikely(!page_has_buffers(page))) | ||
371 | NILFS_PAGE_BUG(page, | ||
372 | "found empty page in dat page cache"); | ||
373 | |||
374 | nilfs_copy_page(dpage, page, 1); | ||
375 | __set_page_dirty_nobuffers(dpage); | ||
376 | |||
377 | unlock_page(dpage); | ||
378 | page_cache_release(dpage); | ||
379 | unlock_page(page); | ||
380 | } | ||
381 | pagevec_release(&pvec); | ||
382 | cond_resched(); | ||
383 | |||
384 | if (likely(!err)) | ||
385 | goto repeat; | ||
386 | return err; | ||
387 | } | ||
388 | |||
389 | /** | ||
390 | * nilfs_copy_back_pages -- copy back pages to orignal cache from shadow cache | ||
391 | * @dmap: destination page cache | ||
392 | * @smap: source page cache | ||
393 | * | ||
394 | * No pages must no be added to the cache during this process. | ||
395 | * This must be ensured by the caller. | ||
396 | */ | ||
397 | void nilfs_copy_back_pages(struct address_space *dmap, | ||
398 | struct address_space *smap) | ||
399 | { | ||
400 | struct pagevec pvec; | ||
401 | unsigned int i, n; | ||
402 | pgoff_t index = 0; | ||
403 | int err; | ||
404 | |||
405 | pagevec_init(&pvec, 0); | ||
406 | repeat: | ||
407 | n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE); | ||
408 | if (!n) | ||
409 | return; | ||
410 | index = pvec.pages[n - 1]->index + 1; | ||
411 | |||
412 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
413 | struct page *page = pvec.pages[i], *dpage; | ||
414 | pgoff_t offset = page->index; | ||
415 | |||
416 | lock_page(page); | ||
417 | dpage = find_lock_page(dmap, offset); | ||
418 | if (dpage) { | ||
419 | /* override existing page on the destination cache */ | ||
420 | WARN_ON(PageDirty(dpage)); | ||
421 | nilfs_copy_page(dpage, page, 0); | ||
422 | unlock_page(dpage); | ||
423 | page_cache_release(dpage); | ||
424 | } else { | ||
425 | struct page *page2; | ||
426 | |||
427 | /* move the page to the destination cache */ | ||
428 | spin_lock_irq(&smap->tree_lock); | ||
429 | page2 = radix_tree_delete(&smap->page_tree, offset); | ||
430 | WARN_ON(page2 != page); | ||
431 | |||
432 | smap->nrpages--; | ||
433 | spin_unlock_irq(&smap->tree_lock); | ||
434 | |||
435 | spin_lock_irq(&dmap->tree_lock); | ||
436 | err = radix_tree_insert(&dmap->page_tree, offset, page); | ||
437 | if (unlikely(err < 0)) { | ||
438 | WARN_ON(err == -EEXIST); | ||
439 | page->mapping = NULL; | ||
440 | page_cache_release(page); /* for cache */ | ||
441 | } else { | ||
442 | page->mapping = dmap; | ||
443 | dmap->nrpages++; | ||
444 | if (PageDirty(page)) | ||
445 | radix_tree_tag_set(&dmap->page_tree, | ||
446 | offset, | ||
447 | PAGECACHE_TAG_DIRTY); | ||
448 | } | ||
449 | spin_unlock_irq(&dmap->tree_lock); | ||
450 | } | ||
451 | unlock_page(page); | ||
452 | } | ||
453 | pagevec_release(&pvec); | ||
454 | cond_resched(); | ||
455 | |||
456 | goto repeat; | ||
457 | } | ||
458 | |||
459 | void nilfs_clear_dirty_pages(struct address_space *mapping) | ||
460 | { | ||
461 | struct pagevec pvec; | ||
462 | unsigned int i; | ||
463 | pgoff_t index = 0; | ||
464 | |||
465 | pagevec_init(&pvec, 0); | ||
466 | |||
467 | while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | ||
468 | PAGEVEC_SIZE)) { | ||
469 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
470 | struct page *page = pvec.pages[i]; | ||
471 | struct buffer_head *bh, *head; | ||
472 | |||
473 | lock_page(page); | ||
474 | ClearPageUptodate(page); | ||
475 | ClearPageMappedToDisk(page); | ||
476 | bh = head = page_buffers(page); | ||
477 | do { | ||
478 | lock_buffer(bh); | ||
479 | clear_buffer_dirty(bh); | ||
480 | clear_buffer_nilfs_volatile(bh); | ||
481 | clear_buffer_uptodate(bh); | ||
482 | clear_buffer_mapped(bh); | ||
483 | unlock_buffer(bh); | ||
484 | bh = bh->b_this_page; | ||
485 | } while (bh != head); | ||
486 | |||
487 | __nilfs_clear_page_dirty(page); | ||
488 | unlock_page(page); | ||
489 | } | ||
490 | pagevec_release(&pvec); | ||
491 | cond_resched(); | ||
492 | } | ||
493 | } | ||
494 | |||
495 | unsigned nilfs_page_count_clean_buffers(struct page *page, | ||
496 | unsigned from, unsigned to) | ||
497 | { | ||
498 | unsigned block_start, block_end; | ||
499 | struct buffer_head *bh, *head; | ||
500 | unsigned nc = 0; | ||
501 | |||
502 | for (bh = head = page_buffers(page), block_start = 0; | ||
503 | bh != head || !block_start; | ||
504 | block_start = block_end, bh = bh->b_this_page) { | ||
505 | block_end = block_start + bh->b_size; | ||
506 | if (block_end > from && block_start < to && !buffer_dirty(bh)) | ||
507 | nc++; | ||
508 | } | ||
509 | return nc; | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * NILFS2 needs clear_page_dirty() in the following two cases: | ||
514 | * | ||
515 | * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears | ||
516 | * page dirty flags when it copies back pages from the shadow cache | ||
517 | * (gcdat->{i_mapping,i_btnode_cache}) to its original cache | ||
518 | * (dat->{i_mapping,i_btnode_cache}). | ||
519 | * | ||
520 | * 2) Some B-tree operations like insertion or deletion may dispose buffers | ||
521 | * in dirty state, and this needs to cancel the dirty state of their pages. | ||
522 | */ | ||
523 | int __nilfs_clear_page_dirty(struct page *page) | ||
524 | { | ||
525 | struct address_space *mapping = page->mapping; | ||
526 | |||
527 | if (mapping) { | ||
528 | spin_lock_irq(&mapping->tree_lock); | ||
529 | if (test_bit(PG_dirty, &page->flags)) { | ||
530 | radix_tree_tag_clear(&mapping->page_tree, | ||
531 | page_index(page), | ||
532 | PAGECACHE_TAG_DIRTY); | ||
533 | spin_unlock_irq(&mapping->tree_lock); | ||
534 | return clear_page_dirty_for_io(page); | ||
535 | } | ||
536 | spin_unlock_irq(&mapping->tree_lock); | ||
537 | return 0; | ||
538 | } | ||
539 | return TestClearPageDirty(page); | ||
540 | } | ||
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h new file mode 100644 index 000000000000..8abca4d1c1f8 --- /dev/null +++ b/fs/nilfs2/page.h | |||
@@ -0,0 +1,76 @@ | |||
1 | /* | ||
2 | * page.h - buffer/page management specific to NILFS | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net>, | ||
21 | * Seiji Kihara <kihara@osrg.net>. | ||
22 | */ | ||
23 | |||
24 | #ifndef _NILFS_PAGE_H | ||
25 | #define _NILFS_PAGE_H | ||
26 | |||
27 | #include <linux/buffer_head.h> | ||
28 | #include "nilfs.h" | ||
29 | |||
30 | /* | ||
31 | * Extended buffer state bits | ||
32 | */ | ||
33 | enum { | ||
34 | BH_NILFS_Allocated = BH_PrivateStart, | ||
35 | BH_NILFS_Node, | ||
36 | BH_NILFS_Volatile, | ||
37 | }; | ||
38 | |||
39 | BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */ | ||
40 | BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ | ||
41 | BUFFER_FNS(NILFS_Volatile, nilfs_volatile) | ||
42 | |||
43 | |||
44 | void nilfs_mark_buffer_dirty(struct buffer_head *bh); | ||
45 | int __nilfs_clear_page_dirty(struct page *); | ||
46 | |||
47 | struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, | ||
48 | unsigned long, unsigned long); | ||
49 | void nilfs_forget_buffer(struct buffer_head *); | ||
50 | void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); | ||
51 | int nilfs_page_buffers_clean(struct page *); | ||
52 | void nilfs_page_bug(struct page *); | ||
53 | struct page *nilfs_alloc_private_page(struct block_device *, int, | ||
54 | unsigned long); | ||
55 | void nilfs_free_private_page(struct page *); | ||
56 | |||
57 | int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); | ||
58 | void nilfs_copy_back_pages(struct address_space *, struct address_space *); | ||
59 | void nilfs_clear_dirty_pages(struct address_space *); | ||
60 | unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); | ||
61 | |||
62 | #define NILFS_PAGE_BUG(page, m, a...) \ | ||
63 | do { nilfs_page_bug(page); BUG(); } while (0) | ||
64 | |||
65 | static inline struct buffer_head * | ||
66 | nilfs_page_get_nth_block(struct page *page, unsigned int count) | ||
67 | { | ||
68 | struct buffer_head *bh = page_buffers(page); | ||
69 | |||
70 | while (count-- > 0) | ||
71 | bh = bh->b_this_page; | ||
72 | get_bh(bh); | ||
73 | return bh; | ||
74 | } | ||
75 | |||
76 | #endif /* _NILFS_PAGE_H */ | ||
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c new file mode 100644 index 000000000000..6ade0963fc1d --- /dev/null +++ b/fs/nilfs2/recovery.c | |||
@@ -0,0 +1,929 @@ | |||
1 | /* | ||
2 | * recovery.c - NILFS recovery logic | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | */ | ||
22 | |||
23 | #include <linux/buffer_head.h> | ||
24 | #include <linux/blkdev.h> | ||
25 | #include <linux/swap.h> | ||
26 | #include <linux/crc32.h> | ||
27 | #include "nilfs.h" | ||
28 | #include "segment.h" | ||
29 | #include "sufile.h" | ||
30 | #include "page.h" | ||
31 | #include "seglist.h" | ||
32 | #include "segbuf.h" | ||
33 | |||
34 | /* | ||
35 | * Segment check result | ||
36 | */ | ||
37 | enum { | ||
38 | NILFS_SEG_VALID, | ||
39 | NILFS_SEG_NO_SUPER_ROOT, | ||
40 | NILFS_SEG_FAIL_IO, | ||
41 | NILFS_SEG_FAIL_MAGIC, | ||
42 | NILFS_SEG_FAIL_SEQ, | ||
43 | NILFS_SEG_FAIL_CHECKSUM_SEGSUM, | ||
44 | NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT, | ||
45 | NILFS_SEG_FAIL_CHECKSUM_FULL, | ||
46 | NILFS_SEG_FAIL_CONSISTENCY, | ||
47 | }; | ||
48 | |||
49 | /* work structure for recovery */ | ||
50 | struct nilfs_recovery_block { | ||
51 | ino_t ino; /* Inode number of the file that this block | ||
52 | belongs to */ | ||
53 | sector_t blocknr; /* block number */ | ||
54 | __u64 vblocknr; /* virtual block number */ | ||
55 | unsigned long blkoff; /* File offset of the data block (per block) */ | ||
56 | struct list_head list; | ||
57 | }; | ||
58 | |||
59 | |||
60 | static int nilfs_warn_segment_error(int err) | ||
61 | { | ||
62 | switch (err) { | ||
63 | case NILFS_SEG_FAIL_IO: | ||
64 | printk(KERN_WARNING | ||
65 | "NILFS warning: I/O error on loading last segment\n"); | ||
66 | return -EIO; | ||
67 | case NILFS_SEG_FAIL_MAGIC: | ||
68 | printk(KERN_WARNING | ||
69 | "NILFS warning: Segment magic number invalid\n"); | ||
70 | break; | ||
71 | case NILFS_SEG_FAIL_SEQ: | ||
72 | printk(KERN_WARNING | ||
73 | "NILFS warning: Sequence number mismatch\n"); | ||
74 | break; | ||
75 | case NILFS_SEG_FAIL_CHECKSUM_SEGSUM: | ||
76 | printk(KERN_WARNING | ||
77 | "NILFS warning: Checksum error in segment summary\n"); | ||
78 | break; | ||
79 | case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT: | ||
80 | printk(KERN_WARNING | ||
81 | "NILFS warning: Checksum error in super root\n"); | ||
82 | break; | ||
83 | case NILFS_SEG_FAIL_CHECKSUM_FULL: | ||
84 | printk(KERN_WARNING | ||
85 | "NILFS warning: Checksum error in segment payload\n"); | ||
86 | break; | ||
87 | case NILFS_SEG_FAIL_CONSISTENCY: | ||
88 | printk(KERN_WARNING | ||
89 | "NILFS warning: Inconsistent segment\n"); | ||
90 | break; | ||
91 | case NILFS_SEG_NO_SUPER_ROOT: | ||
92 | printk(KERN_WARNING | ||
93 | "NILFS warning: No super root in the last segment\n"); | ||
94 | break; | ||
95 | } | ||
96 | return -EINVAL; | ||
97 | } | ||
98 | |||
99 | static void store_segsum_info(struct nilfs_segsum_info *ssi, | ||
100 | struct nilfs_segment_summary *sum, | ||
101 | unsigned int blocksize) | ||
102 | { | ||
103 | ssi->flags = le16_to_cpu(sum->ss_flags); | ||
104 | ssi->seg_seq = le64_to_cpu(sum->ss_seq); | ||
105 | ssi->ctime = le64_to_cpu(sum->ss_create); | ||
106 | ssi->next = le64_to_cpu(sum->ss_next); | ||
107 | ssi->nblocks = le32_to_cpu(sum->ss_nblocks); | ||
108 | ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo); | ||
109 | ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes); | ||
110 | |||
111 | ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize); | ||
112 | ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi); | ||
113 | } | ||
114 | |||
115 | /** | ||
116 | * calc_crc_cont - check CRC of blocks continuously | ||
117 | * @sbi: nilfs_sb_info | ||
118 | * @bhs: buffer head of start block | ||
119 | * @sum: place to store result | ||
120 | * @offset: offset bytes in the first block | ||
121 | * @check_bytes: number of bytes to be checked | ||
122 | * @start: DBN of start block | ||
123 | * @nblock: number of blocks to be checked | ||
124 | */ | ||
125 | static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs, | ||
126 | u32 *sum, unsigned long offset, u64 check_bytes, | ||
127 | sector_t start, unsigned long nblock) | ||
128 | { | ||
129 | unsigned long blocksize = sbi->s_super->s_blocksize; | ||
130 | unsigned long size; | ||
131 | u32 crc; | ||
132 | |||
133 | BUG_ON(offset >= blocksize); | ||
134 | check_bytes -= offset; | ||
135 | size = min_t(u64, check_bytes, blocksize - offset); | ||
136 | crc = crc32_le(sbi->s_nilfs->ns_crc_seed, | ||
137 | (unsigned char *)bhs->b_data + offset, size); | ||
138 | if (--nblock > 0) { | ||
139 | do { | ||
140 | struct buffer_head *bh | ||
141 | = sb_bread(sbi->s_super, ++start); | ||
142 | if (!bh) | ||
143 | return -EIO; | ||
144 | check_bytes -= size; | ||
145 | size = min_t(u64, check_bytes, blocksize); | ||
146 | crc = crc32_le(crc, bh->b_data, size); | ||
147 | brelse(bh); | ||
148 | } while (--nblock > 0); | ||
149 | } | ||
150 | *sum = crc; | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | /** | ||
155 | * nilfs_read_super_root_block - read super root block | ||
156 | * @sb: super_block | ||
157 | * @sr_block: disk block number of the super root block | ||
158 | * @pbh: address of a buffer_head pointer to return super root buffer | ||
159 | * @check: CRC check flag | ||
160 | */ | ||
161 | int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, | ||
162 | struct buffer_head **pbh, int check) | ||
163 | { | ||
164 | struct buffer_head *bh_sr; | ||
165 | struct nilfs_super_root *sr; | ||
166 | u32 crc; | ||
167 | int ret; | ||
168 | |||
169 | *pbh = NULL; | ||
170 | bh_sr = sb_bread(sb, sr_block); | ||
171 | if (unlikely(!bh_sr)) { | ||
172 | ret = NILFS_SEG_FAIL_IO; | ||
173 | goto failed; | ||
174 | } | ||
175 | |||
176 | sr = (struct nilfs_super_root *)bh_sr->b_data; | ||
177 | if (check) { | ||
178 | unsigned bytes = le16_to_cpu(sr->sr_bytes); | ||
179 | |||
180 | if (bytes == 0 || bytes > sb->s_blocksize) { | ||
181 | ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT; | ||
182 | goto failed_bh; | ||
183 | } | ||
184 | if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc, | ||
185 | sizeof(sr->sr_sum), bytes, sr_block, 1)) { | ||
186 | ret = NILFS_SEG_FAIL_IO; | ||
187 | goto failed_bh; | ||
188 | } | ||
189 | if (crc != le32_to_cpu(sr->sr_sum)) { | ||
190 | ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT; | ||
191 | goto failed_bh; | ||
192 | } | ||
193 | } | ||
194 | *pbh = bh_sr; | ||
195 | return 0; | ||
196 | |||
197 | failed_bh: | ||
198 | brelse(bh_sr); | ||
199 | |||
200 | failed: | ||
201 | return nilfs_warn_segment_error(ret); | ||
202 | } | ||
203 | |||
204 | /** | ||
205 | * load_segment_summary - read segment summary of the specified partial segment | ||
206 | * @sbi: nilfs_sb_info | ||
207 | * @pseg_start: start disk block number of partial segment | ||
208 | * @seg_seq: sequence number requested | ||
209 | * @ssi: pointer to nilfs_segsum_info struct to store information | ||
210 | * @full_check: full check flag | ||
211 | * (0: only checks segment summary CRC, 1: data CRC) | ||
212 | */ | ||
213 | static int | ||
214 | load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start, | ||
215 | u64 seg_seq, struct nilfs_segsum_info *ssi, | ||
216 | int full_check) | ||
217 | { | ||
218 | struct buffer_head *bh_sum; | ||
219 | struct nilfs_segment_summary *sum; | ||
220 | unsigned long offset, nblock; | ||
221 | u64 check_bytes; | ||
222 | u32 crc, crc_sum; | ||
223 | int ret = NILFS_SEG_FAIL_IO; | ||
224 | |||
225 | bh_sum = sb_bread(sbi->s_super, pseg_start); | ||
226 | if (!bh_sum) | ||
227 | goto out; | ||
228 | |||
229 | sum = (struct nilfs_segment_summary *)bh_sum->b_data; | ||
230 | |||
231 | /* Check consistency of segment summary */ | ||
232 | if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) { | ||
233 | ret = NILFS_SEG_FAIL_MAGIC; | ||
234 | goto failed; | ||
235 | } | ||
236 | store_segsum_info(ssi, sum, sbi->s_super->s_blocksize); | ||
237 | if (seg_seq != ssi->seg_seq) { | ||
238 | ret = NILFS_SEG_FAIL_SEQ; | ||
239 | goto failed; | ||
240 | } | ||
241 | if (full_check) { | ||
242 | offset = sizeof(sum->ss_datasum); | ||
243 | check_bytes = | ||
244 | ((u64)ssi->nblocks << sbi->s_super->s_blocksize_bits); | ||
245 | nblock = ssi->nblocks; | ||
246 | crc_sum = le32_to_cpu(sum->ss_datasum); | ||
247 | ret = NILFS_SEG_FAIL_CHECKSUM_FULL; | ||
248 | } else { /* only checks segment summary */ | ||
249 | offset = sizeof(sum->ss_datasum) + sizeof(sum->ss_sumsum); | ||
250 | check_bytes = ssi->sumbytes; | ||
251 | nblock = ssi->nsumblk; | ||
252 | crc_sum = le32_to_cpu(sum->ss_sumsum); | ||
253 | ret = NILFS_SEG_FAIL_CHECKSUM_SEGSUM; | ||
254 | } | ||
255 | |||
256 | if (unlikely(nblock == 0 || | ||
257 | nblock > sbi->s_nilfs->ns_blocks_per_segment)) { | ||
258 | /* This limits the number of blocks read in the CRC check */ | ||
259 | ret = NILFS_SEG_FAIL_CONSISTENCY; | ||
260 | goto failed; | ||
261 | } | ||
262 | if (calc_crc_cont(sbi, bh_sum, &crc, offset, check_bytes, | ||
263 | pseg_start, nblock)) { | ||
264 | ret = NILFS_SEG_FAIL_IO; | ||
265 | goto failed; | ||
266 | } | ||
267 | if (crc == crc_sum) | ||
268 | ret = 0; | ||
269 | failed: | ||
270 | brelse(bh_sum); | ||
271 | out: | ||
272 | return ret; | ||
273 | } | ||
274 | |||
275 | static void *segsum_get(struct super_block *sb, struct buffer_head **pbh, | ||
276 | unsigned int *offset, unsigned int bytes) | ||
277 | { | ||
278 | void *ptr; | ||
279 | sector_t blocknr; | ||
280 | |||
281 | BUG_ON((*pbh)->b_size < *offset); | ||
282 | if (bytes > (*pbh)->b_size - *offset) { | ||
283 | blocknr = (*pbh)->b_blocknr; | ||
284 | brelse(*pbh); | ||
285 | *pbh = sb_bread(sb, blocknr + 1); | ||
286 | if (unlikely(!*pbh)) | ||
287 | return NULL; | ||
288 | *offset = 0; | ||
289 | } | ||
290 | ptr = (*pbh)->b_data + *offset; | ||
291 | *offset += bytes; | ||
292 | return ptr; | ||
293 | } | ||
294 | |||
295 | static void segsum_skip(struct super_block *sb, struct buffer_head **pbh, | ||
296 | unsigned int *offset, unsigned int bytes, | ||
297 | unsigned long count) | ||
298 | { | ||
299 | unsigned int rest_item_in_current_block | ||
300 | = ((*pbh)->b_size - *offset) / bytes; | ||
301 | |||
302 | if (count <= rest_item_in_current_block) { | ||
303 | *offset += bytes * count; | ||
304 | } else { | ||
305 | sector_t blocknr = (*pbh)->b_blocknr; | ||
306 | unsigned int nitem_per_block = (*pbh)->b_size / bytes; | ||
307 | unsigned int bcnt; | ||
308 | |||
309 | count -= rest_item_in_current_block; | ||
310 | bcnt = DIV_ROUND_UP(count, nitem_per_block); | ||
311 | *offset = bytes * (count - (bcnt - 1) * nitem_per_block); | ||
312 | |||
313 | brelse(*pbh); | ||
314 | *pbh = sb_bread(sb, blocknr + bcnt); | ||
315 | } | ||
316 | } | ||
317 | |||
318 | static int | ||
319 | collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, | ||
320 | struct nilfs_segsum_info *ssi, | ||
321 | struct list_head *head) | ||
322 | { | ||
323 | struct buffer_head *bh; | ||
324 | unsigned int offset; | ||
325 | unsigned long nfinfo = ssi->nfinfo; | ||
326 | sector_t blocknr = sum_blocknr + ssi->nsumblk; | ||
327 | ino_t ino; | ||
328 | int err = -EIO; | ||
329 | |||
330 | if (!nfinfo) | ||
331 | return 0; | ||
332 | |||
333 | bh = sb_bread(sbi->s_super, sum_blocknr); | ||
334 | if (unlikely(!bh)) | ||
335 | goto out; | ||
336 | |||
337 | offset = le16_to_cpu( | ||
338 | ((struct nilfs_segment_summary *)bh->b_data)->ss_bytes); | ||
339 | for (;;) { | ||
340 | unsigned long nblocks, ndatablk, nnodeblk; | ||
341 | struct nilfs_finfo *finfo; | ||
342 | |||
343 | finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo)); | ||
344 | if (unlikely(!finfo)) | ||
345 | goto out; | ||
346 | |||
347 | ino = le64_to_cpu(finfo->fi_ino); | ||
348 | nblocks = le32_to_cpu(finfo->fi_nblocks); | ||
349 | ndatablk = le32_to_cpu(finfo->fi_ndatablk); | ||
350 | nnodeblk = nblocks - ndatablk; | ||
351 | |||
352 | while (ndatablk-- > 0) { | ||
353 | struct nilfs_recovery_block *rb; | ||
354 | struct nilfs_binfo_v *binfo; | ||
355 | |||
356 | binfo = segsum_get(sbi->s_super, &bh, &offset, | ||
357 | sizeof(*binfo)); | ||
358 | if (unlikely(!binfo)) | ||
359 | goto out; | ||
360 | |||
361 | rb = kmalloc(sizeof(*rb), GFP_NOFS); | ||
362 | if (unlikely(!rb)) { | ||
363 | err = -ENOMEM; | ||
364 | goto out; | ||
365 | } | ||
366 | rb->ino = ino; | ||
367 | rb->blocknr = blocknr++; | ||
368 | rb->vblocknr = le64_to_cpu(binfo->bi_vblocknr); | ||
369 | rb->blkoff = le64_to_cpu(binfo->bi_blkoff); | ||
370 | /* INIT_LIST_HEAD(&rb->list); */ | ||
371 | list_add_tail(&rb->list, head); | ||
372 | } | ||
373 | if (--nfinfo == 0) | ||
374 | break; | ||
375 | blocknr += nnodeblk; /* always 0 for the data sync segments */ | ||
376 | segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64), | ||
377 | nnodeblk); | ||
378 | if (unlikely(!bh)) | ||
379 | goto out; | ||
380 | } | ||
381 | err = 0; | ||
382 | out: | ||
383 | brelse(bh); /* brelse(NULL) is just ignored */ | ||
384 | return err; | ||
385 | } | ||
386 | |||
387 | static void dispose_recovery_list(struct list_head *head) | ||
388 | { | ||
389 | while (!list_empty(head)) { | ||
390 | struct nilfs_recovery_block *rb | ||
391 | = list_entry(head->next, | ||
392 | struct nilfs_recovery_block, list); | ||
393 | list_del(&rb->list); | ||
394 | kfree(rb); | ||
395 | } | ||
396 | } | ||
397 | |||
398 | void nilfs_dispose_segment_list(struct list_head *head) | ||
399 | { | ||
400 | while (!list_empty(head)) { | ||
401 | struct nilfs_segment_entry *ent | ||
402 | = list_entry(head->next, | ||
403 | struct nilfs_segment_entry, list); | ||
404 | list_del(&ent->list); | ||
405 | nilfs_free_segment_entry(ent); | ||
406 | } | ||
407 | } | ||
408 | |||
409 | static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, | ||
410 | struct nilfs_recovery_info *ri) | ||
411 | { | ||
412 | struct list_head *head = &ri->ri_used_segments; | ||
413 | struct nilfs_segment_entry *ent, *n; | ||
414 | struct inode *sufile = nilfs->ns_sufile; | ||
415 | __u64 segnum[4]; | ||
416 | time_t mtime; | ||
417 | int err; | ||
418 | int i; | ||
419 | |||
420 | segnum[0] = nilfs->ns_segnum; | ||
421 | segnum[1] = nilfs->ns_nextnum; | ||
422 | segnum[2] = ri->ri_segnum; | ||
423 | segnum[3] = ri->ri_nextnum; | ||
424 | |||
425 | /* | ||
426 | * Releasing the next segment of the latest super root. | ||
427 | * The next segment is invalidated by this recovery. | ||
428 | */ | ||
429 | err = nilfs_sufile_free(sufile, segnum[1]); | ||
430 | if (unlikely(err)) | ||
431 | goto failed; | ||
432 | |||
433 | err = -ENOMEM; | ||
434 | for (i = 1; i < 4; i++) { | ||
435 | ent = nilfs_alloc_segment_entry(segnum[i]); | ||
436 | if (unlikely(!ent)) | ||
437 | goto failed; | ||
438 | list_add_tail(&ent->list, head); | ||
439 | } | ||
440 | |||
441 | /* | ||
442 | * Collecting segments written after the latest super root. | ||
443 | * These are marked dirty to avoid being reallocated in the next write. | ||
444 | */ | ||
445 | mtime = get_seconds(); | ||
446 | list_for_each_entry_safe(ent, n, head, list) { | ||
447 | if (ent->segnum == segnum[0]) { | ||
448 | list_del(&ent->list); | ||
449 | nilfs_free_segment_entry(ent); | ||
450 | continue; | ||
451 | } | ||
452 | err = nilfs_open_segment_entry(ent, sufile); | ||
453 | if (unlikely(err)) | ||
454 | goto failed; | ||
455 | if (!nilfs_segment_usage_dirty(ent->raw_su)) { | ||
456 | /* make the segment garbage */ | ||
457 | ent->raw_su->su_nblocks = cpu_to_le32(0); | ||
458 | ent->raw_su->su_lastmod = cpu_to_le32(mtime); | ||
459 | nilfs_segment_usage_set_dirty(ent->raw_su); | ||
460 | } | ||
461 | list_del(&ent->list); | ||
462 | nilfs_close_segment_entry(ent, sufile); | ||
463 | nilfs_free_segment_entry(ent); | ||
464 | } | ||
465 | |||
466 | /* Allocate new segments for recovery */ | ||
467 | err = nilfs_sufile_alloc(sufile, &segnum[0]); | ||
468 | if (unlikely(err)) | ||
469 | goto failed; | ||
470 | |||
471 | nilfs->ns_pseg_offset = 0; | ||
472 | nilfs->ns_seg_seq = ri->ri_seq + 2; | ||
473 | nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0]; | ||
474 | return 0; | ||
475 | |||
476 | failed: | ||
477 | /* No need to recover sufile because it will be destroyed on error */ | ||
478 | return err; | ||
479 | } | ||
480 | |||
481 | static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi, | ||
482 | struct nilfs_recovery_block *rb, | ||
483 | struct page *page) | ||
484 | { | ||
485 | struct buffer_head *bh_org; | ||
486 | void *kaddr; | ||
487 | |||
488 | bh_org = sb_bread(sbi->s_super, rb->blocknr); | ||
489 | if (unlikely(!bh_org)) | ||
490 | return -EIO; | ||
491 | |||
492 | kaddr = kmap_atomic(page, KM_USER0); | ||
493 | memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size); | ||
494 | kunmap_atomic(kaddr, KM_USER0); | ||
495 | brelse(bh_org); | ||
496 | return 0; | ||
497 | } | ||
498 | |||
499 | static int recover_dsync_blocks(struct nilfs_sb_info *sbi, | ||
500 | struct list_head *head, | ||
501 | unsigned long *nr_salvaged_blocks) | ||
502 | { | ||
503 | struct inode *inode; | ||
504 | struct nilfs_recovery_block *rb, *n; | ||
505 | unsigned blocksize = sbi->s_super->s_blocksize; | ||
506 | struct page *page; | ||
507 | loff_t pos; | ||
508 | int err = 0, err2 = 0; | ||
509 | |||
510 | list_for_each_entry_safe(rb, n, head, list) { | ||
511 | inode = nilfs_iget(sbi->s_super, rb->ino); | ||
512 | if (IS_ERR(inode)) { | ||
513 | err = PTR_ERR(inode); | ||
514 | inode = NULL; | ||
515 | goto failed_inode; | ||
516 | } | ||
517 | |||
518 | pos = rb->blkoff << inode->i_blkbits; | ||
519 | page = NULL; | ||
520 | err = block_write_begin(NULL, inode->i_mapping, pos, blocksize, | ||
521 | 0, &page, NULL, nilfs_get_block); | ||
522 | if (unlikely(err)) | ||
523 | goto failed_inode; | ||
524 | |||
525 | err = nilfs_recovery_copy_block(sbi, rb, page); | ||
526 | if (unlikely(err)) | ||
527 | goto failed_page; | ||
528 | |||
529 | err = nilfs_set_file_dirty(sbi, inode, 1); | ||
530 | if (unlikely(err)) | ||
531 | goto failed_page; | ||
532 | |||
533 | block_write_end(NULL, inode->i_mapping, pos, blocksize, | ||
534 | blocksize, page, NULL); | ||
535 | |||
536 | unlock_page(page); | ||
537 | page_cache_release(page); | ||
538 | |||
539 | (*nr_salvaged_blocks)++; | ||
540 | goto next; | ||
541 | |||
542 | failed_page: | ||
543 | unlock_page(page); | ||
544 | page_cache_release(page); | ||
545 | |||
546 | failed_inode: | ||
547 | printk(KERN_WARNING | ||
548 | "NILFS warning: error recovering data block " | ||
549 | "(err=%d, ino=%lu, block-offset=%llu)\n", | ||
550 | err, rb->ino, (unsigned long long)rb->blkoff); | ||
551 | if (!err2) | ||
552 | err2 = err; | ||
553 | next: | ||
554 | iput(inode); /* iput(NULL) is just ignored */ | ||
555 | list_del_init(&rb->list); | ||
556 | kfree(rb); | ||
557 | } | ||
558 | return err2; | ||
559 | } | ||
560 | |||
561 | /** | ||
562 | * nilfs_do_roll_forward - salvage logical segments newer than the latest | ||
563 | * checkpoint | ||
564 | * @sbi: nilfs_sb_info | ||
565 | * @nilfs: the_nilfs | ||
566 | * @ri: pointer to a nilfs_recovery_info | ||
567 | */ | ||
568 | static int nilfs_do_roll_forward(struct the_nilfs *nilfs, | ||
569 | struct nilfs_sb_info *sbi, | ||
570 | struct nilfs_recovery_info *ri) | ||
571 | { | ||
572 | struct nilfs_segsum_info ssi; | ||
573 | sector_t pseg_start; | ||
574 | sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */ | ||
575 | unsigned long nsalvaged_blocks = 0; | ||
576 | u64 seg_seq; | ||
577 | __u64 segnum, nextnum = 0; | ||
578 | int empty_seg = 0; | ||
579 | int err = 0, ret; | ||
580 | LIST_HEAD(dsync_blocks); /* list of data blocks to be recovered */ | ||
581 | enum { | ||
582 | RF_INIT_ST, | ||
583 | RF_DSYNC_ST, /* scanning data-sync segments */ | ||
584 | }; | ||
585 | int state = RF_INIT_ST; | ||
586 | |||
587 | nilfs_attach_writer(nilfs, sbi); | ||
588 | pseg_start = ri->ri_lsegs_start; | ||
589 | seg_seq = ri->ri_lsegs_start_seq; | ||
590 | segnum = nilfs_get_segnum_of_block(nilfs, pseg_start); | ||
591 | nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); | ||
592 | |||
593 | while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) { | ||
594 | |||
595 | ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1); | ||
596 | if (ret) { | ||
597 | if (ret == NILFS_SEG_FAIL_IO) { | ||
598 | err = -EIO; | ||
599 | goto failed; | ||
600 | } | ||
601 | goto strayed; | ||
602 | } | ||
603 | if (unlikely(NILFS_SEG_HAS_SR(&ssi))) | ||
604 | goto confused; | ||
605 | |||
606 | /* Found a valid partial segment; do recovery actions */ | ||
607 | nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); | ||
608 | empty_seg = 0; | ||
609 | nilfs->ns_ctime = ssi.ctime; | ||
610 | if (!(ssi.flags & NILFS_SS_GC)) | ||
611 | nilfs->ns_nongc_ctime = ssi.ctime; | ||
612 | |||
613 | switch (state) { | ||
614 | case RF_INIT_ST: | ||
615 | if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi)) | ||
616 | goto try_next_pseg; | ||
617 | state = RF_DSYNC_ST; | ||
618 | /* Fall through */ | ||
619 | case RF_DSYNC_ST: | ||
620 | if (!NILFS_SEG_DSYNC(&ssi)) | ||
621 | goto confused; | ||
622 | |||
623 | err = collect_blocks_from_segsum( | ||
624 | sbi, pseg_start, &ssi, &dsync_blocks); | ||
625 | if (unlikely(err)) | ||
626 | goto failed; | ||
627 | if (NILFS_SEG_LOGEND(&ssi)) { | ||
628 | err = recover_dsync_blocks( | ||
629 | sbi, &dsync_blocks, &nsalvaged_blocks); | ||
630 | if (unlikely(err)) | ||
631 | goto failed; | ||
632 | state = RF_INIT_ST; | ||
633 | } | ||
634 | break; /* Fall through to try_next_pseg */ | ||
635 | } | ||
636 | |||
637 | try_next_pseg: | ||
638 | if (pseg_start == ri->ri_lsegs_end) | ||
639 | break; | ||
640 | pseg_start += ssi.nblocks; | ||
641 | if (pseg_start < seg_end) | ||
642 | continue; | ||
643 | goto feed_segment; | ||
644 | |||
645 | strayed: | ||
646 | if (pseg_start == ri->ri_lsegs_end) | ||
647 | break; | ||
648 | |||
649 | feed_segment: | ||
650 | /* Looking to the next full segment */ | ||
651 | if (empty_seg++) | ||
652 | break; | ||
653 | seg_seq++; | ||
654 | segnum = nextnum; | ||
655 | nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); | ||
656 | pseg_start = seg_start; | ||
657 | } | ||
658 | |||
659 | if (nsalvaged_blocks) { | ||
660 | printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n", | ||
661 | sbi->s_super->s_id, nsalvaged_blocks); | ||
662 | ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; | ||
663 | } | ||
664 | out: | ||
665 | dispose_recovery_list(&dsync_blocks); | ||
666 | nilfs_detach_writer(sbi->s_nilfs, sbi); | ||
667 | return err; | ||
668 | |||
669 | confused: | ||
670 | err = -EINVAL; | ||
671 | failed: | ||
672 | printk(KERN_ERR | ||
673 | "NILFS (device %s): Error roll-forwarding " | ||
674 | "(err=%d, pseg block=%llu). ", | ||
675 | sbi->s_super->s_id, err, (unsigned long long)pseg_start); | ||
676 | goto out; | ||
677 | } | ||
678 | |||
679 | static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, | ||
680 | struct nilfs_sb_info *sbi, | ||
681 | struct nilfs_recovery_info *ri) | ||
682 | { | ||
683 | struct buffer_head *bh; | ||
684 | int err; | ||
685 | |||
686 | if (nilfs_get_segnum_of_block(nilfs, ri->ri_lsegs_start) != | ||
687 | nilfs_get_segnum_of_block(nilfs, ri->ri_super_root)) | ||
688 | return; | ||
689 | |||
690 | bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start); | ||
691 | BUG_ON(!bh); | ||
692 | memset(bh->b_data, 0, bh->b_size); | ||
693 | set_buffer_dirty(bh); | ||
694 | err = sync_dirty_buffer(bh); | ||
695 | if (unlikely(err)) | ||
696 | printk(KERN_WARNING | ||
697 | "NILFS warning: buffer sync write failed during " | ||
698 | "post-cleaning of recovery.\n"); | ||
699 | brelse(bh); | ||
700 | } | ||
701 | |||
702 | /** | ||
703 | * nilfs_recover_logical_segments - salvage logical segments written after | ||
704 | * the latest super root | ||
705 | * @nilfs: the_nilfs | ||
706 | * @sbi: nilfs_sb_info | ||
707 | * @ri: pointer to a nilfs_recovery_info struct to store search results. | ||
708 | * | ||
709 | * Return Value: On success, 0 is returned. On error, one of the following | ||
710 | * negative error code is returned. | ||
711 | * | ||
712 | * %-EINVAL - Inconsistent filesystem state. | ||
713 | * | ||
714 | * %-EIO - I/O error | ||
715 | * | ||
716 | * %-ENOSPC - No space left on device (only in a panic state). | ||
717 | * | ||
718 | * %-ERESTARTSYS - Interrupted. | ||
719 | * | ||
720 | * %-ENOMEM - Insufficient memory available. | ||
721 | */ | ||
722 | int nilfs_recover_logical_segments(struct the_nilfs *nilfs, | ||
723 | struct nilfs_sb_info *sbi, | ||
724 | struct nilfs_recovery_info *ri) | ||
725 | { | ||
726 | int err; | ||
727 | |||
728 | if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0) | ||
729 | return 0; | ||
730 | |||
731 | err = nilfs_attach_checkpoint(sbi, ri->ri_cno); | ||
732 | if (unlikely(err)) { | ||
733 | printk(KERN_ERR | ||
734 | "NILFS: error loading the latest checkpoint.\n"); | ||
735 | return err; | ||
736 | } | ||
737 | |||
738 | err = nilfs_do_roll_forward(nilfs, sbi, ri); | ||
739 | if (unlikely(err)) | ||
740 | goto failed; | ||
741 | |||
742 | if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { | ||
743 | err = nilfs_prepare_segment_for_recovery(nilfs, ri); | ||
744 | if (unlikely(err)) { | ||
745 | printk(KERN_ERR "NILFS: Error preparing segments for " | ||
746 | "recovery.\n"); | ||
747 | goto failed; | ||
748 | } | ||
749 | |||
750 | err = nilfs_attach_segment_constructor(sbi); | ||
751 | if (unlikely(err)) | ||
752 | goto failed; | ||
753 | |||
754 | set_nilfs_discontinued(nilfs); | ||
755 | err = nilfs_construct_segment(sbi->s_super); | ||
756 | nilfs_detach_segment_constructor(sbi); | ||
757 | |||
758 | if (unlikely(err)) { | ||
759 | printk(KERN_ERR "NILFS: Oops! recovery failed. " | ||
760 | "(err=%d)\n", err); | ||
761 | goto failed; | ||
762 | } | ||
763 | |||
764 | nilfs_finish_roll_forward(nilfs, sbi, ri); | ||
765 | } | ||
766 | |||
767 | nilfs_detach_checkpoint(sbi); | ||
768 | return 0; | ||
769 | |||
770 | failed: | ||
771 | nilfs_detach_checkpoint(sbi); | ||
772 | nilfs_mdt_clear(nilfs->ns_cpfile); | ||
773 | nilfs_mdt_clear(nilfs->ns_sufile); | ||
774 | nilfs_mdt_clear(nilfs->ns_dat); | ||
775 | return err; | ||
776 | } | ||
777 | |||
778 | /** | ||
779 | * nilfs_search_super_root - search the latest valid super root | ||
780 | * @nilfs: the_nilfs | ||
781 | * @sbi: nilfs_sb_info | ||
782 | * @ri: pointer to a nilfs_recovery_info struct to store search results. | ||
783 | * | ||
784 | * nilfs_search_super_root() looks for the latest super-root from a partial | ||
785 | * segment pointed by the superblock. It sets up struct the_nilfs through | ||
786 | * this search. It fills nilfs_recovery_info (ri) required for recovery. | ||
787 | * | ||
788 | * Return Value: On success, 0 is returned. On error, one of the following | ||
789 | * negative error code is returned. | ||
790 | * | ||
791 | * %-EINVAL - No valid segment found | ||
792 | * | ||
793 | * %-EIO - I/O error | ||
794 | */ | ||
795 | int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, | ||
796 | struct nilfs_recovery_info *ri) | ||
797 | { | ||
798 | struct nilfs_segsum_info ssi; | ||
799 | sector_t pseg_start, pseg_end, sr_pseg_start = 0; | ||
800 | sector_t seg_start, seg_end; /* range of full segment (block number) */ | ||
801 | u64 seg_seq; | ||
802 | __u64 segnum, nextnum = 0; | ||
803 | __u64 cno; | ||
804 | struct nilfs_segment_entry *ent; | ||
805 | LIST_HEAD(segments); | ||
806 | int empty_seg = 0, scan_newer = 0; | ||
807 | int ret; | ||
808 | |||
809 | pseg_start = nilfs->ns_last_pseg; | ||
810 | seg_seq = nilfs->ns_last_seq; | ||
811 | cno = nilfs->ns_last_cno; | ||
812 | segnum = nilfs_get_segnum_of_block(nilfs, pseg_start); | ||
813 | |||
814 | /* Calculate range of segment */ | ||
815 | nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); | ||
816 | |||
817 | for (;;) { | ||
818 | /* Load segment summary */ | ||
819 | ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1); | ||
820 | if (ret) { | ||
821 | if (ret == NILFS_SEG_FAIL_IO) | ||
822 | goto failed; | ||
823 | goto strayed; | ||
824 | } | ||
825 | pseg_end = pseg_start + ssi.nblocks - 1; | ||
826 | if (unlikely(pseg_end > seg_end)) { | ||
827 | ret = NILFS_SEG_FAIL_CONSISTENCY; | ||
828 | goto strayed; | ||
829 | } | ||
830 | |||
831 | /* A valid partial segment */ | ||
832 | ri->ri_pseg_start = pseg_start; | ||
833 | ri->ri_seq = seg_seq; | ||
834 | ri->ri_segnum = segnum; | ||
835 | nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); | ||
836 | ri->ri_nextnum = nextnum; | ||
837 | empty_seg = 0; | ||
838 | |||
839 | if (!NILFS_SEG_HAS_SR(&ssi)) { | ||
840 | if (!scan_newer) { | ||
841 | /* This will never happen because a superblock | ||
842 | (last_segment) always points to a pseg | ||
843 | having a super root. */ | ||
844 | ret = NILFS_SEG_FAIL_CONSISTENCY; | ||
845 | goto failed; | ||
846 | } | ||
847 | if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) { | ||
848 | ri->ri_lsegs_start = pseg_start; | ||
849 | ri->ri_lsegs_start_seq = seg_seq; | ||
850 | } | ||
851 | if (NILFS_SEG_LOGEND(&ssi)) | ||
852 | ri->ri_lsegs_end = pseg_start; | ||
853 | goto try_next_pseg; | ||
854 | } | ||
855 | |||
856 | /* A valid super root was found. */ | ||
857 | ri->ri_cno = cno++; | ||
858 | ri->ri_super_root = pseg_end; | ||
859 | ri->ri_lsegs_start = ri->ri_lsegs_end = 0; | ||
860 | |||
861 | nilfs_dispose_segment_list(&segments); | ||
862 | nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start) | ||
863 | + ssi.nblocks - seg_start; | ||
864 | nilfs->ns_seg_seq = seg_seq; | ||
865 | nilfs->ns_segnum = segnum; | ||
866 | nilfs->ns_cno = cno; /* nilfs->ns_cno = ri->ri_cno + 1 */ | ||
867 | nilfs->ns_ctime = ssi.ctime; | ||
868 | nilfs->ns_nextnum = nextnum; | ||
869 | |||
870 | if (scan_newer) | ||
871 | ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED; | ||
872 | else { | ||
873 | if (nilfs->ns_mount_state & NILFS_VALID_FS) | ||
874 | goto super_root_found; | ||
875 | scan_newer = 1; | ||
876 | } | ||
877 | |||
878 | /* reset region for roll-forward */ | ||
879 | pseg_start += ssi.nblocks; | ||
880 | if (pseg_start < seg_end) | ||
881 | continue; | ||
882 | goto feed_segment; | ||
883 | |||
884 | try_next_pseg: | ||
885 | /* Standing on a course, or met an inconsistent state */ | ||
886 | pseg_start += ssi.nblocks; | ||
887 | if (pseg_start < seg_end) | ||
888 | continue; | ||
889 | goto feed_segment; | ||
890 | |||
891 | strayed: | ||
892 | /* Off the trail */ | ||
893 | if (!scan_newer) | ||
894 | /* | ||
895 | * This can happen if a checkpoint was written without | ||
896 | * barriers, or as a result of an I/O failure. | ||
897 | */ | ||
898 | goto failed; | ||
899 | |||
900 | feed_segment: | ||
901 | /* Looking to the next full segment */ | ||
902 | if (empty_seg++) | ||
903 | goto super_root_found; /* found a valid super root */ | ||
904 | |||
905 | ent = nilfs_alloc_segment_entry(segnum); | ||
906 | if (unlikely(!ent)) { | ||
907 | ret = -ENOMEM; | ||
908 | goto failed; | ||
909 | } | ||
910 | list_add_tail(&ent->list, &segments); | ||
911 | |||
912 | seg_seq++; | ||
913 | segnum = nextnum; | ||
914 | nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); | ||
915 | pseg_start = seg_start; | ||
916 | } | ||
917 | |||
918 | super_root_found: | ||
919 | /* Updating pointers relating to the latest checkpoint */ | ||
920 | list_splice(&segments, ri->ri_used_segments.prev); | ||
921 | nilfs->ns_last_pseg = sr_pseg_start; | ||
922 | nilfs->ns_last_seq = nilfs->ns_seg_seq; | ||
923 | nilfs->ns_last_cno = ri->ri_cno; | ||
924 | return 0; | ||
925 | |||
926 | failed: | ||
927 | nilfs_dispose_segment_list(&segments); | ||
928 | return (ret < 0) ? ret : nilfs_warn_segment_error(ret); | ||
929 | } | ||
diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h new file mode 100644 index 000000000000..adccd4fc654e --- /dev/null +++ b/fs/nilfs2/sb.h | |||
@@ -0,0 +1,102 @@ | |||
1 | /* | ||
2 | * sb.h - NILFS on-memory super block structure. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #ifndef _NILFS_SB | ||
25 | #define _NILFS_SB | ||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include <linux/fs.h> | ||
29 | |||
30 | /* | ||
31 | * Mount options | ||
32 | */ | ||
33 | struct nilfs_mount_options { | ||
34 | unsigned long mount_opt; | ||
35 | __u64 snapshot_cno; | ||
36 | }; | ||
37 | |||
38 | struct the_nilfs; | ||
39 | struct nilfs_sc_info; | ||
40 | |||
41 | /* | ||
42 | * NILFS super-block data in memory | ||
43 | */ | ||
44 | struct nilfs_sb_info { | ||
45 | /* Snapshot status */ | ||
46 | __u64 s_snapshot_cno; /* Checkpoint number */ | ||
47 | atomic_t s_inodes_count; | ||
48 | atomic_t s_blocks_count; /* Reserved (might be deleted) */ | ||
49 | |||
50 | /* Mount options */ | ||
51 | unsigned long s_mount_opt; | ||
52 | uid_t s_resuid; | ||
53 | gid_t s_resgid; | ||
54 | |||
55 | unsigned long s_interval; /* construction interval */ | ||
56 | unsigned long s_watermark; /* threshold of data amount | ||
57 | for the segment construction */ | ||
58 | |||
59 | /* Fundamental members */ | ||
60 | struct super_block *s_super; /* reverse pointer to super_block */ | ||
61 | struct the_nilfs *s_nilfs; | ||
62 | struct list_head s_list; /* list head for nilfs->ns_supers */ | ||
63 | |||
64 | /* Segment constructor */ | ||
65 | struct list_head s_dirty_files; /* dirty files list */ | ||
66 | struct nilfs_sc_info *s_sc_info; /* segment constructor info */ | ||
67 | spinlock_t s_inode_lock; /* Lock for the nilfs inode. | ||
68 | It covers s_dirty_files list */ | ||
69 | |||
70 | /* Metadata files */ | ||
71 | struct inode *s_ifile; /* index file inode */ | ||
72 | |||
73 | /* Inode allocator */ | ||
74 | spinlock_t s_next_gen_lock; | ||
75 | u32 s_next_generation; | ||
76 | }; | ||
77 | |||
78 | static inline struct nilfs_sb_info *NILFS_SB(struct super_block *sb) | ||
79 | { | ||
80 | return sb->s_fs_info; | ||
81 | } | ||
82 | |||
83 | static inline struct nilfs_sc_info *NILFS_SC(struct nilfs_sb_info *sbi) | ||
84 | { | ||
85 | return sbi->s_sc_info; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Bit operations for the mount option | ||
90 | */ | ||
91 | #define nilfs_clear_opt(sbi, opt) \ | ||
92 | do { (sbi)->s_mount_opt &= ~NILFS_MOUNT_##opt; } while (0) | ||
93 | #define nilfs_set_opt(sbi, opt) \ | ||
94 | do { (sbi)->s_mount_opt |= NILFS_MOUNT_##opt; } while (0) | ||
95 | #define nilfs_test_opt(sbi, opt) ((sbi)->s_mount_opt & NILFS_MOUNT_##opt) | ||
96 | #define nilfs_write_opt(sbi, mask, opt) \ | ||
97 | do { (sbi)->s_mount_opt = \ | ||
98 | (((sbi)->s_mount_opt & ~NILFS_MOUNT_##mask) | \ | ||
99 | NILFS_MOUNT_##opt); \ | ||
100 | } while (0) | ||
101 | |||
102 | #endif /* _NILFS_SB */ | ||
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c new file mode 100644 index 000000000000..1e68821b4a9b --- /dev/null +++ b/fs/nilfs2/segbuf.c | |||
@@ -0,0 +1,439 @@ | |||
1 | /* | ||
2 | * segbuf.c - NILFS segment buffer | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/buffer_head.h> | ||
25 | #include <linux/writeback.h> | ||
26 | #include <linux/crc32.h> | ||
27 | #include "page.h" | ||
28 | #include "segbuf.h" | ||
29 | #include "seglist.h" | ||
30 | |||
31 | |||
32 | static struct kmem_cache *nilfs_segbuf_cachep; | ||
33 | |||
34 | static void nilfs_segbuf_init_once(void *obj) | ||
35 | { | ||
36 | memset(obj, 0, sizeof(struct nilfs_segment_buffer)); | ||
37 | } | ||
38 | |||
39 | int __init nilfs_init_segbuf_cache(void) | ||
40 | { | ||
41 | nilfs_segbuf_cachep = | ||
42 | kmem_cache_create("nilfs2_segbuf_cache", | ||
43 | sizeof(struct nilfs_segment_buffer), | ||
44 | 0, SLAB_RECLAIM_ACCOUNT, | ||
45 | nilfs_segbuf_init_once); | ||
46 | |||
47 | return (nilfs_segbuf_cachep == NULL) ? -ENOMEM : 0; | ||
48 | } | ||
49 | |||
50 | void nilfs_destroy_segbuf_cache(void) | ||
51 | { | ||
52 | kmem_cache_destroy(nilfs_segbuf_cachep); | ||
53 | } | ||
54 | |||
55 | struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb) | ||
56 | { | ||
57 | struct nilfs_segment_buffer *segbuf; | ||
58 | |||
59 | segbuf = kmem_cache_alloc(nilfs_segbuf_cachep, GFP_NOFS); | ||
60 | if (unlikely(!segbuf)) | ||
61 | return NULL; | ||
62 | |||
63 | segbuf->sb_super = sb; | ||
64 | INIT_LIST_HEAD(&segbuf->sb_list); | ||
65 | INIT_LIST_HEAD(&segbuf->sb_segsum_buffers); | ||
66 | INIT_LIST_HEAD(&segbuf->sb_payload_buffers); | ||
67 | return segbuf; | ||
68 | } | ||
69 | |||
70 | void nilfs_segbuf_free(struct nilfs_segment_buffer *segbuf) | ||
71 | { | ||
72 | kmem_cache_free(nilfs_segbuf_cachep, segbuf); | ||
73 | } | ||
74 | |||
75 | void nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum, | ||
76 | unsigned long offset, struct the_nilfs *nilfs) | ||
77 | { | ||
78 | segbuf->sb_segnum = segnum; | ||
79 | nilfs_get_segment_range(nilfs, segnum, &segbuf->sb_fseg_start, | ||
80 | &segbuf->sb_fseg_end); | ||
81 | |||
82 | segbuf->sb_pseg_start = segbuf->sb_fseg_start + offset; | ||
83 | segbuf->sb_rest_blocks = | ||
84 | segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; | ||
85 | } | ||
86 | |||
87 | void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf, | ||
88 | __u64 nextnum, struct the_nilfs *nilfs) | ||
89 | { | ||
90 | segbuf->sb_nextnum = nextnum; | ||
91 | segbuf->sb_sum.next = nilfs_get_segment_start_blocknr(nilfs, nextnum); | ||
92 | } | ||
93 | |||
94 | int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf) | ||
95 | { | ||
96 | struct buffer_head *bh; | ||
97 | |||
98 | bh = sb_getblk(segbuf->sb_super, | ||
99 | segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk); | ||
100 | if (unlikely(!bh)) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | nilfs_segbuf_add_segsum_buffer(segbuf, bh); | ||
104 | return 0; | ||
105 | } | ||
106 | |||
107 | int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf, | ||
108 | struct buffer_head **bhp) | ||
109 | { | ||
110 | struct buffer_head *bh; | ||
111 | |||
112 | bh = sb_getblk(segbuf->sb_super, | ||
113 | segbuf->sb_pseg_start + segbuf->sb_sum.nblocks); | ||
114 | if (unlikely(!bh)) | ||
115 | return -ENOMEM; | ||
116 | |||
117 | nilfs_segbuf_add_payload_buffer(segbuf, bh); | ||
118 | *bhp = bh; | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags, | ||
123 | time_t ctime) | ||
124 | { | ||
125 | int err; | ||
126 | |||
127 | segbuf->sb_sum.nblocks = segbuf->sb_sum.nsumblk = 0; | ||
128 | err = nilfs_segbuf_extend_segsum(segbuf); | ||
129 | if (unlikely(err)) | ||
130 | return err; | ||
131 | |||
132 | segbuf->sb_sum.flags = flags; | ||
133 | segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary); | ||
134 | segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0; | ||
135 | segbuf->sb_sum.ctime = ctime; | ||
136 | |||
137 | segbuf->sb_io_error = 0; | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Setup segument summary | ||
143 | */ | ||
144 | void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *segbuf) | ||
145 | { | ||
146 | struct nilfs_segment_summary *raw_sum; | ||
147 | struct buffer_head *bh_sum; | ||
148 | |||
149 | bh_sum = list_entry(segbuf->sb_segsum_buffers.next, | ||
150 | struct buffer_head, b_assoc_buffers); | ||
151 | raw_sum = (struct nilfs_segment_summary *)bh_sum->b_data; | ||
152 | |||
153 | raw_sum->ss_magic = cpu_to_le32(NILFS_SEGSUM_MAGIC); | ||
154 | raw_sum->ss_bytes = cpu_to_le16(sizeof(*raw_sum)); | ||
155 | raw_sum->ss_flags = cpu_to_le16(segbuf->sb_sum.flags); | ||
156 | raw_sum->ss_seq = cpu_to_le64(segbuf->sb_sum.seg_seq); | ||
157 | raw_sum->ss_create = cpu_to_le64(segbuf->sb_sum.ctime); | ||
158 | raw_sum->ss_next = cpu_to_le64(segbuf->sb_sum.next); | ||
159 | raw_sum->ss_nblocks = cpu_to_le32(segbuf->sb_sum.nblocks); | ||
160 | raw_sum->ss_nfinfo = cpu_to_le32(segbuf->sb_sum.nfinfo); | ||
161 | raw_sum->ss_sumbytes = cpu_to_le32(segbuf->sb_sum.sumbytes); | ||
162 | raw_sum->ss_pad = 0; | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * CRC calculation routines | ||
167 | */ | ||
168 | void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf, | ||
169 | u32 seed) | ||
170 | { | ||
171 | struct buffer_head *bh; | ||
172 | struct nilfs_segment_summary *raw_sum; | ||
173 | unsigned long size, bytes = segbuf->sb_sum.sumbytes; | ||
174 | u32 crc; | ||
175 | |||
176 | bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head, | ||
177 | b_assoc_buffers); | ||
178 | |||
179 | raw_sum = (struct nilfs_segment_summary *)bh->b_data; | ||
180 | size = min_t(unsigned long, bytes, bh->b_size); | ||
181 | crc = crc32_le(seed, | ||
182 | (unsigned char *)raw_sum + | ||
183 | sizeof(raw_sum->ss_datasum) + sizeof(raw_sum->ss_sumsum), | ||
184 | size - (sizeof(raw_sum->ss_datasum) + | ||
185 | sizeof(raw_sum->ss_sumsum))); | ||
186 | |||
187 | list_for_each_entry_continue(bh, &segbuf->sb_segsum_buffers, | ||
188 | b_assoc_buffers) { | ||
189 | bytes -= size; | ||
190 | size = min_t(unsigned long, bytes, bh->b_size); | ||
191 | crc = crc32_le(crc, bh->b_data, size); | ||
192 | } | ||
193 | raw_sum->ss_sumsum = cpu_to_le32(crc); | ||
194 | } | ||
195 | |||
196 | void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, | ||
197 | u32 seed) | ||
198 | { | ||
199 | struct buffer_head *bh; | ||
200 | struct nilfs_segment_summary *raw_sum; | ||
201 | void *kaddr; | ||
202 | u32 crc; | ||
203 | |||
204 | bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head, | ||
205 | b_assoc_buffers); | ||
206 | raw_sum = (struct nilfs_segment_summary *)bh->b_data; | ||
207 | crc = crc32_le(seed, | ||
208 | (unsigned char *)raw_sum + sizeof(raw_sum->ss_datasum), | ||
209 | bh->b_size - sizeof(raw_sum->ss_datasum)); | ||
210 | |||
211 | list_for_each_entry_continue(bh, &segbuf->sb_segsum_buffers, | ||
212 | b_assoc_buffers) { | ||
213 | crc = crc32_le(crc, bh->b_data, bh->b_size); | ||
214 | } | ||
215 | list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { | ||
216 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
217 | crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); | ||
218 | kunmap_atomic(kaddr, KM_USER0); | ||
219 | } | ||
220 | raw_sum->ss_datasum = cpu_to_le32(crc); | ||
221 | } | ||
222 | |||
223 | void nilfs_release_buffers(struct list_head *list) | ||
224 | { | ||
225 | struct buffer_head *bh, *n; | ||
226 | |||
227 | list_for_each_entry_safe(bh, n, list, b_assoc_buffers) { | ||
228 | list_del_init(&bh->b_assoc_buffers); | ||
229 | if (buffer_nilfs_allocated(bh)) { | ||
230 | struct page *clone_page = bh->b_page; | ||
231 | |||
232 | /* remove clone page */ | ||
233 | brelse(bh); | ||
234 | page_cache_release(clone_page); /* for each bh */ | ||
235 | if (page_count(clone_page) <= 2) { | ||
236 | lock_page(clone_page); | ||
237 | nilfs_free_private_page(clone_page); | ||
238 | } | ||
239 | continue; | ||
240 | } | ||
241 | brelse(bh); | ||
242 | } | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * BIO operations | ||
247 | */ | ||
248 | static void nilfs_end_bio_write(struct bio *bio, int err) | ||
249 | { | ||
250 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
251 | struct nilfs_write_info *wi = bio->bi_private; | ||
252 | |||
253 | if (err == -EOPNOTSUPP) { | ||
254 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | ||
255 | bio_put(bio); | ||
256 | /* to be detected by submit_seg_bio() */ | ||
257 | } | ||
258 | |||
259 | if (!uptodate) | ||
260 | atomic_inc(&wi->err); | ||
261 | |||
262 | bio_put(bio); | ||
263 | complete(&wi->bio_event); | ||
264 | } | ||
265 | |||
266 | static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) | ||
267 | { | ||
268 | struct bio *bio = wi->bio; | ||
269 | int err; | ||
270 | |||
271 | if (wi->nbio > 0 && bdi_write_congested(wi->bdi)) { | ||
272 | wait_for_completion(&wi->bio_event); | ||
273 | wi->nbio--; | ||
274 | if (unlikely(atomic_read(&wi->err))) { | ||
275 | bio_put(bio); | ||
276 | err = -EIO; | ||
277 | goto failed; | ||
278 | } | ||
279 | } | ||
280 | |||
281 | bio->bi_end_io = nilfs_end_bio_write; | ||
282 | bio->bi_private = wi; | ||
283 | bio_get(bio); | ||
284 | submit_bio(mode, bio); | ||
285 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) { | ||
286 | bio_put(bio); | ||
287 | err = -EOPNOTSUPP; | ||
288 | goto failed; | ||
289 | } | ||
290 | wi->nbio++; | ||
291 | bio_put(bio); | ||
292 | |||
293 | wi->bio = NULL; | ||
294 | wi->rest_blocks -= wi->end - wi->start; | ||
295 | wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); | ||
296 | wi->start = wi->end; | ||
297 | return 0; | ||
298 | |||
299 | failed: | ||
300 | wi->bio = NULL; | ||
301 | return err; | ||
302 | } | ||
303 | |||
304 | /** | ||
305 | * nilfs_alloc_seg_bio - allocate a bio for writing segment. | ||
306 | * @sb: super block | ||
307 | * @start: beginning disk block number of this BIO. | ||
308 | * @nr_vecs: request size of page vector. | ||
309 | * | ||
310 | * alloc_seg_bio() allocates a new BIO structure and initialize it. | ||
311 | * | ||
312 | * Return Value: On success, pointer to the struct bio is returned. | ||
313 | * On error, NULL is returned. | ||
314 | */ | ||
315 | static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start, | ||
316 | int nr_vecs) | ||
317 | { | ||
318 | struct bio *bio; | ||
319 | |||
320 | bio = bio_alloc(GFP_NOWAIT, nr_vecs); | ||
321 | if (bio == NULL) { | ||
322 | while (!bio && (nr_vecs >>= 1)) | ||
323 | bio = bio_alloc(GFP_NOWAIT, nr_vecs); | ||
324 | } | ||
325 | if (likely(bio)) { | ||
326 | bio->bi_bdev = sb->s_bdev; | ||
327 | bio->bi_sector = (sector_t)start << (sb->s_blocksize_bits - 9); | ||
328 | } | ||
329 | return bio; | ||
330 | } | ||
331 | |||
332 | void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, | ||
333 | struct nilfs_write_info *wi) | ||
334 | { | ||
335 | wi->bio = NULL; | ||
336 | wi->rest_blocks = segbuf->sb_sum.nblocks; | ||
337 | wi->max_pages = bio_get_nr_vecs(wi->sb->s_bdev); | ||
338 | wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); | ||
339 | wi->start = wi->end = 0; | ||
340 | wi->nbio = 0; | ||
341 | wi->blocknr = segbuf->sb_pseg_start; | ||
342 | |||
343 | atomic_set(&wi->err, 0); | ||
344 | init_completion(&wi->bio_event); | ||
345 | } | ||
346 | |||
347 | static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh, | ||
348 | int mode) | ||
349 | { | ||
350 | int len, err; | ||
351 | |||
352 | BUG_ON(wi->nr_vecs <= 0); | ||
353 | repeat: | ||
354 | if (!wi->bio) { | ||
355 | wi->bio = nilfs_alloc_seg_bio(wi->sb, wi->blocknr + wi->end, | ||
356 | wi->nr_vecs); | ||
357 | if (unlikely(!wi->bio)) | ||
358 | return -ENOMEM; | ||
359 | } | ||
360 | |||
361 | len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh)); | ||
362 | if (len == bh->b_size) { | ||
363 | wi->end++; | ||
364 | return 0; | ||
365 | } | ||
366 | /* bio is FULL */ | ||
367 | err = nilfs_submit_seg_bio(wi, mode); | ||
368 | /* never submit current bh */ | ||
369 | if (likely(!err)) | ||
370 | goto repeat; | ||
371 | return err; | ||
372 | } | ||
373 | |||
374 | int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, | ||
375 | struct nilfs_write_info *wi) | ||
376 | { | ||
377 | struct buffer_head *bh; | ||
378 | int res, rw = WRITE; | ||
379 | |||
380 | list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { | ||
381 | res = nilfs_submit_bh(wi, bh, rw); | ||
382 | if (unlikely(res)) | ||
383 | goto failed_bio; | ||
384 | } | ||
385 | |||
386 | list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { | ||
387 | res = nilfs_submit_bh(wi, bh, rw); | ||
388 | if (unlikely(res)) | ||
389 | goto failed_bio; | ||
390 | } | ||
391 | |||
392 | if (wi->bio) { | ||
393 | /* | ||
394 | * Last BIO is always sent through the following | ||
395 | * submission. | ||
396 | */ | ||
397 | rw |= (1 << BIO_RW_SYNCIO); | ||
398 | res = nilfs_submit_seg_bio(wi, rw); | ||
399 | if (unlikely(res)) | ||
400 | goto failed_bio; | ||
401 | } | ||
402 | |||
403 | res = 0; | ||
404 | out: | ||
405 | return res; | ||
406 | |||
407 | failed_bio: | ||
408 | atomic_inc(&wi->err); | ||
409 | goto out; | ||
410 | } | ||
411 | |||
412 | /** | ||
413 | * nilfs_segbuf_wait - wait for completion of requested BIOs | ||
414 | * @wi: nilfs_write_info | ||
415 | * | ||
416 | * Return Value: On Success, 0 is returned. On Error, one of the following | ||
417 | * negative error code is returned. | ||
418 | * | ||
419 | * %-EIO - I/O error | ||
420 | */ | ||
421 | int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf, | ||
422 | struct nilfs_write_info *wi) | ||
423 | { | ||
424 | int err = 0; | ||
425 | |||
426 | if (!wi->nbio) | ||
427 | return 0; | ||
428 | |||
429 | do { | ||
430 | wait_for_completion(&wi->bio_event); | ||
431 | } while (--wi->nbio > 0); | ||
432 | |||
433 | if (unlikely(atomic_read(&wi->err) > 0)) { | ||
434 | printk(KERN_ERR "NILFS: IO error writing segment\n"); | ||
435 | err = -EIO; | ||
436 | segbuf->sb_io_error = 1; | ||
437 | } | ||
438 | return err; | ||
439 | } | ||
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h new file mode 100644 index 000000000000..0c3076f4e592 --- /dev/null +++ b/fs/nilfs2/segbuf.h | |||
@@ -0,0 +1,201 @@ | |||
1 | /* | ||
2 | * segbuf.h - NILFS Segment buffer prototypes and definitions | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | #ifndef _NILFS_SEGBUF_H | ||
24 | #define _NILFS_SEGBUF_H | ||
25 | |||
26 | #include <linux/fs.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/bio.h> | ||
29 | #include <linux/completion.h> | ||
30 | #include <linux/backing-dev.h> | ||
31 | |||
32 | /** | ||
33 | * struct nilfs_segsum_info - On-memory segment summary | ||
34 | * @flags: Flags | ||
35 | * @nfinfo: Number of file information structures | ||
36 | * @nblocks: Number of blocks included in the partial segment | ||
37 | * @nsumblk: Number of summary blocks | ||
38 | * @sumbytes: Byte count of segment summary | ||
39 | * @nfileblk: Total number of file blocks | ||
40 | * @seg_seq: Segment sequence number | ||
41 | * @ctime: Creation time | ||
42 | * @next: Block number of the next full segment | ||
43 | */ | ||
44 | struct nilfs_segsum_info { | ||
45 | unsigned int flags; | ||
46 | unsigned long nfinfo; | ||
47 | unsigned long nblocks; | ||
48 | unsigned long nsumblk; | ||
49 | unsigned long sumbytes; | ||
50 | unsigned long nfileblk; | ||
51 | u64 seg_seq; | ||
52 | time_t ctime; | ||
53 | sector_t next; | ||
54 | }; | ||
55 | |||
56 | /* macro for the flags */ | ||
57 | #define NILFS_SEG_HAS_SR(sum) ((sum)->flags & NILFS_SS_SR) | ||
58 | #define NILFS_SEG_LOGBGN(sum) ((sum)->flags & NILFS_SS_LOGBGN) | ||
59 | #define NILFS_SEG_LOGEND(sum) ((sum)->flags & NILFS_SS_LOGEND) | ||
60 | #define NILFS_SEG_DSYNC(sum) ((sum)->flags & NILFS_SS_SYNDT) | ||
61 | #define NILFS_SEG_SIMPLEX(sum) \ | ||
62 | (((sum)->flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) == \ | ||
63 | (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) | ||
64 | |||
65 | #define NILFS_SEG_EMPTY(sum) ((sum)->nblocks == (sum)->nsumblk) | ||
66 | |||
67 | /** | ||
68 | * struct nilfs_segment_buffer - Segment buffer | ||
69 | * @sb_super: back pointer to a superblock struct | ||
70 | * @sb_list: List head to chain this structure | ||
71 | * @sb_sum: On-memory segment summary | ||
72 | * @sb_segnum: Index number of the full segment | ||
73 | * @sb_nextnum: Index number of the next full segment | ||
74 | * @sb_fseg_start: Start block number of the full segment | ||
75 | * @sb_fseg_end: End block number of the full segment | ||
76 | * @sb_pseg_start: Disk block number of partial segment | ||
77 | * @sb_rest_blocks: Number of residual blocks in the current segment | ||
78 | * @sb_segsum_buffers: List of buffers for segment summaries | ||
79 | * @sb_payload_buffers: List of buffers for segment payload | ||
80 | * @sb_io_error: I/O error status | ||
81 | */ | ||
82 | struct nilfs_segment_buffer { | ||
83 | struct super_block *sb_super; | ||
84 | struct list_head sb_list; | ||
85 | |||
86 | /* Segment information */ | ||
87 | struct nilfs_segsum_info sb_sum; | ||
88 | __u64 sb_segnum; | ||
89 | __u64 sb_nextnum; | ||
90 | sector_t sb_fseg_start, sb_fseg_end; | ||
91 | sector_t sb_pseg_start; | ||
92 | unsigned sb_rest_blocks; | ||
93 | |||
94 | /* Buffers */ | ||
95 | struct list_head sb_segsum_buffers; | ||
96 | struct list_head sb_payload_buffers; /* including super root */ | ||
97 | |||
98 | /* io status */ | ||
99 | int sb_io_error; | ||
100 | }; | ||
101 | |||
102 | #define NILFS_LIST_SEGBUF(head) \ | ||
103 | list_entry((head), struct nilfs_segment_buffer, sb_list) | ||
104 | #define NILFS_NEXT_SEGBUF(segbuf) NILFS_LIST_SEGBUF((segbuf)->sb_list.next) | ||
105 | #define NILFS_PREV_SEGBUF(segbuf) NILFS_LIST_SEGBUF((segbuf)->sb_list.prev) | ||
106 | #define NILFS_LAST_SEGBUF(head) NILFS_LIST_SEGBUF((head)->prev) | ||
107 | #define NILFS_FIRST_SEGBUF(head) NILFS_LIST_SEGBUF((head)->next) | ||
108 | #define NILFS_SEGBUF_IS_LAST(segbuf, head) ((segbuf)->sb_list.next == (head)) | ||
109 | |||
110 | #define nilfs_for_each_segbuf_before(s, t, h) \ | ||
111 | for ((s) = NILFS_FIRST_SEGBUF(h); (s) != (t); \ | ||
112 | (s) = NILFS_NEXT_SEGBUF(s)) | ||
113 | |||
114 | #define NILFS_SEGBUF_FIRST_BH(head) \ | ||
115 | (list_entry((head)->next, struct buffer_head, b_assoc_buffers)) | ||
116 | #define NILFS_SEGBUF_NEXT_BH(bh) \ | ||
117 | (list_entry((bh)->b_assoc_buffers.next, struct buffer_head, \ | ||
118 | b_assoc_buffers)) | ||
119 | #define NILFS_SEGBUF_BH_IS_LAST(bh, head) ((bh)->b_assoc_buffers.next == head) | ||
120 | |||
121 | |||
122 | int __init nilfs_init_segbuf_cache(void); | ||
123 | void nilfs_destroy_segbuf_cache(void); | ||
124 | struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *); | ||
125 | void nilfs_segbuf_free(struct nilfs_segment_buffer *); | ||
126 | void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, | ||
127 | struct the_nilfs *); | ||
128 | void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, | ||
129 | struct the_nilfs *); | ||
130 | int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t); | ||
131 | int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *); | ||
132 | int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *, | ||
133 | struct buffer_head **); | ||
134 | void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *); | ||
135 | void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *, u32); | ||
136 | void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *, u32); | ||
137 | |||
138 | static inline void | ||
139 | nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf, | ||
140 | struct buffer_head *bh) | ||
141 | { | ||
142 | list_add_tail(&bh->b_assoc_buffers, &segbuf->sb_segsum_buffers); | ||
143 | segbuf->sb_sum.nblocks++; | ||
144 | segbuf->sb_sum.nsumblk++; | ||
145 | } | ||
146 | |||
147 | static inline void | ||
148 | nilfs_segbuf_add_payload_buffer(struct nilfs_segment_buffer *segbuf, | ||
149 | struct buffer_head *bh) | ||
150 | { | ||
151 | list_add_tail(&bh->b_assoc_buffers, &segbuf->sb_payload_buffers); | ||
152 | segbuf->sb_sum.nblocks++; | ||
153 | } | ||
154 | |||
155 | static inline void | ||
156 | nilfs_segbuf_add_file_buffer(struct nilfs_segment_buffer *segbuf, | ||
157 | struct buffer_head *bh) | ||
158 | { | ||
159 | get_bh(bh); | ||
160 | nilfs_segbuf_add_payload_buffer(segbuf, bh); | ||
161 | segbuf->sb_sum.nfileblk++; | ||
162 | } | ||
163 | |||
164 | void nilfs_release_buffers(struct list_head *); | ||
165 | |||
166 | static inline void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf) | ||
167 | { | ||
168 | nilfs_release_buffers(&segbuf->sb_segsum_buffers); | ||
169 | nilfs_release_buffers(&segbuf->sb_payload_buffers); | ||
170 | } | ||
171 | |||
172 | struct nilfs_write_info { | ||
173 | struct bio *bio; | ||
174 | int start, end; /* The region to be submitted */ | ||
175 | int rest_blocks; | ||
176 | int max_pages; | ||
177 | int nr_vecs; | ||
178 | sector_t blocknr; | ||
179 | |||
180 | int nbio; | ||
181 | atomic_t err; | ||
182 | struct completion bio_event; | ||
183 | /* completion event of segment write */ | ||
184 | |||
185 | /* | ||
186 | * The following fields must be set explicitly | ||
187 | */ | ||
188 | struct super_block *sb; | ||
189 | struct backing_dev_info *bdi; /* backing dev info */ | ||
190 | struct buffer_head *bh_sr; | ||
191 | }; | ||
192 | |||
193 | |||
194 | void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *, | ||
195 | struct nilfs_write_info *); | ||
196 | int nilfs_segbuf_write(struct nilfs_segment_buffer *, | ||
197 | struct nilfs_write_info *); | ||
198 | int nilfs_segbuf_wait(struct nilfs_segment_buffer *, | ||
199 | struct nilfs_write_info *); | ||
200 | |||
201 | #endif /* _NILFS_SEGBUF_H */ | ||
diff --git a/fs/nilfs2/seglist.h b/fs/nilfs2/seglist.h new file mode 100644 index 000000000000..d39df9144e99 --- /dev/null +++ b/fs/nilfs2/seglist.h | |||
@@ -0,0 +1,85 @@ | |||
1 | /* | ||
2 | * seglist.h - expediential structure and routines to handle list of segments | ||
3 | * (would be removed in a future release) | ||
4 | * | ||
5 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
20 | * | ||
21 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
22 | * | ||
23 | */ | ||
24 | #ifndef _NILFS_SEGLIST_H | ||
25 | #define _NILFS_SEGLIST_H | ||
26 | |||
27 | #include <linux/fs.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/nilfs2_fs.h> | ||
30 | #include "sufile.h" | ||
31 | |||
32 | struct nilfs_segment_entry { | ||
33 | __u64 segnum; | ||
34 | |||
35 | #define NILFS_SLH_FREED 0x0001 /* The segment was freed provisonally. | ||
36 | It must be cancelled if | ||
37 | construction aborted */ | ||
38 | |||
39 | unsigned flags; | ||
40 | struct list_head list; | ||
41 | struct buffer_head *bh_su; | ||
42 | struct nilfs_segment_usage *raw_su; | ||
43 | }; | ||
44 | |||
45 | |||
46 | void nilfs_dispose_segment_list(struct list_head *); | ||
47 | |||
48 | static inline struct nilfs_segment_entry * | ||
49 | nilfs_alloc_segment_entry(__u64 segnum) | ||
50 | { | ||
51 | struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS); | ||
52 | |||
53 | if (likely(ent)) { | ||
54 | ent->segnum = segnum; | ||
55 | ent->flags = 0; | ||
56 | ent->bh_su = NULL; | ||
57 | ent->raw_su = NULL; | ||
58 | INIT_LIST_HEAD(&ent->list); | ||
59 | } | ||
60 | return ent; | ||
61 | } | ||
62 | |||
63 | static inline int nilfs_open_segment_entry(struct nilfs_segment_entry *ent, | ||
64 | struct inode *sufile) | ||
65 | { | ||
66 | return nilfs_sufile_get_segment_usage(sufile, ent->segnum, | ||
67 | &ent->raw_su, &ent->bh_su); | ||
68 | } | ||
69 | |||
70 | static inline void nilfs_close_segment_entry(struct nilfs_segment_entry *ent, | ||
71 | struct inode *sufile) | ||
72 | { | ||
73 | if (!ent->bh_su) | ||
74 | return; | ||
75 | nilfs_sufile_put_segment_usage(sufile, ent->segnum, ent->bh_su); | ||
76 | ent->bh_su = NULL; | ||
77 | ent->raw_su = NULL; | ||
78 | } | ||
79 | |||
80 | static inline void nilfs_free_segment_entry(struct nilfs_segment_entry *ent) | ||
81 | { | ||
82 | kfree(ent); | ||
83 | } | ||
84 | |||
85 | #endif /* _NILFS_SEGLIST_H */ | ||
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c new file mode 100644 index 000000000000..fb70ec3be20e --- /dev/null +++ b/fs/nilfs2/segment.c | |||
@@ -0,0 +1,2977 @@ | |||
1 | /* | ||
2 | * segment.c - NILFS segment constructor. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/pagemap.h> | ||
25 | #include <linux/buffer_head.h> | ||
26 | #include <linux/writeback.h> | ||
27 | #include <linux/bio.h> | ||
28 | #include <linux/completion.h> | ||
29 | #include <linux/blkdev.h> | ||
30 | #include <linux/backing-dev.h> | ||
31 | #include <linux/freezer.h> | ||
32 | #include <linux/kthread.h> | ||
33 | #include <linux/crc32.h> | ||
34 | #include <linux/pagevec.h> | ||
35 | #include "nilfs.h" | ||
36 | #include "btnode.h" | ||
37 | #include "page.h" | ||
38 | #include "segment.h" | ||
39 | #include "sufile.h" | ||
40 | #include "cpfile.h" | ||
41 | #include "ifile.h" | ||
42 | #include "seglist.h" | ||
43 | #include "segbuf.h" | ||
44 | |||
45 | |||
46 | /* | ||
47 | * Segment constructor | ||
48 | */ | ||
49 | #define SC_N_INODEVEC 16 /* Size of locally allocated inode vector */ | ||
50 | |||
51 | #define SC_MAX_SEGDELTA 64 /* Upper limit of the number of segments | ||
52 | appended in collection retry loop */ | ||
53 | |||
54 | /* Construction mode */ | ||
55 | enum { | ||
56 | SC_LSEG_SR = 1, /* Make a logical segment having a super root */ | ||
57 | SC_LSEG_DSYNC, /* Flush data blocks of a given file and make | ||
58 | a logical segment without a super root */ | ||
59 | SC_FLUSH_FILE, /* Flush data files, leads to segment writes without | ||
60 | creating a checkpoint */ | ||
61 | SC_FLUSH_DAT, /* Flush DAT file. This also creates segments without | ||
62 | a checkpoint */ | ||
63 | }; | ||
64 | |||
65 | /* Stage numbers of dirty block collection */ | ||
66 | enum { | ||
67 | NILFS_ST_INIT = 0, | ||
68 | NILFS_ST_GC, /* Collecting dirty blocks for GC */ | ||
69 | NILFS_ST_FILE, | ||
70 | NILFS_ST_IFILE, | ||
71 | NILFS_ST_CPFILE, | ||
72 | NILFS_ST_SUFILE, | ||
73 | NILFS_ST_DAT, | ||
74 | NILFS_ST_SR, /* Super root */ | ||
75 | NILFS_ST_DSYNC, /* Data sync blocks */ | ||
76 | NILFS_ST_DONE, | ||
77 | }; | ||
78 | |||
79 | /* State flags of collection */ | ||
80 | #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ | ||
81 | #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ | ||
82 | #define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED) | ||
83 | |||
84 | /* Operations depending on the construction mode and file type */ | ||
85 | struct nilfs_sc_operations { | ||
86 | int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *, | ||
87 | struct inode *); | ||
88 | int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *, | ||
89 | struct inode *); | ||
90 | int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *, | ||
91 | struct inode *); | ||
92 | void (*write_data_binfo)(struct nilfs_sc_info *, | ||
93 | struct nilfs_segsum_pointer *, | ||
94 | union nilfs_binfo *); | ||
95 | void (*write_node_binfo)(struct nilfs_sc_info *, | ||
96 | struct nilfs_segsum_pointer *, | ||
97 | union nilfs_binfo *); | ||
98 | }; | ||
99 | |||
100 | /* | ||
101 | * Other definitions | ||
102 | */ | ||
103 | static void nilfs_segctor_start_timer(struct nilfs_sc_info *); | ||
104 | static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); | ||
105 | static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); | ||
106 | static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *, | ||
107 | int); | ||
108 | |||
109 | #define nilfs_cnt32_gt(a, b) \ | ||
110 | (typecheck(__u32, a) && typecheck(__u32, b) && \ | ||
111 | ((__s32)(b) - (__s32)(a) < 0)) | ||
112 | #define nilfs_cnt32_ge(a, b) \ | ||
113 | (typecheck(__u32, a) && typecheck(__u32, b) && \ | ||
114 | ((__s32)(a) - (__s32)(b) >= 0)) | ||
115 | #define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a) | ||
116 | #define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a) | ||
117 | |||
118 | /* | ||
119 | * Transaction | ||
120 | */ | ||
121 | static struct kmem_cache *nilfs_transaction_cachep; | ||
122 | |||
123 | /** | ||
124 | * nilfs_init_transaction_cache - create a cache for nilfs_transaction_info | ||
125 | * | ||
126 | * nilfs_init_transaction_cache() creates a slab cache for the struct | ||
127 | * nilfs_transaction_info. | ||
128 | * | ||
129 | * Return Value: On success, it returns 0. On error, one of the following | ||
130 | * negative error code is returned. | ||
131 | * | ||
132 | * %-ENOMEM - Insufficient memory available. | ||
133 | */ | ||
134 | int nilfs_init_transaction_cache(void) | ||
135 | { | ||
136 | nilfs_transaction_cachep = | ||
137 | kmem_cache_create("nilfs2_transaction_cache", | ||
138 | sizeof(struct nilfs_transaction_info), | ||
139 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
140 | return (nilfs_transaction_cachep == NULL) ? -ENOMEM : 0; | ||
141 | } | ||
142 | |||
143 | /** | ||
144 | * nilfs_detroy_transaction_cache - destroy the cache for transaction info | ||
145 | * | ||
146 | * nilfs_destroy_transaction_cache() frees the slab cache for the struct | ||
147 | * nilfs_transaction_info. | ||
148 | */ | ||
149 | void nilfs_destroy_transaction_cache(void) | ||
150 | { | ||
151 | kmem_cache_destroy(nilfs_transaction_cachep); | ||
152 | } | ||
153 | |||
154 | static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) | ||
155 | { | ||
156 | struct nilfs_transaction_info *cur_ti = current->journal_info; | ||
157 | void *save = NULL; | ||
158 | |||
159 | if (cur_ti) { | ||
160 | if (cur_ti->ti_magic == NILFS_TI_MAGIC) | ||
161 | return ++cur_ti->ti_count; | ||
162 | else { | ||
163 | /* | ||
164 | * If journal_info field is occupied by other FS, | ||
165 | * it is saved and will be restored on | ||
166 | * nilfs_transaction_commit(). | ||
167 | */ | ||
168 | printk(KERN_WARNING | ||
169 | "NILFS warning: journal info from a different " | ||
170 | "FS\n"); | ||
171 | save = current->journal_info; | ||
172 | } | ||
173 | } | ||
174 | if (!ti) { | ||
175 | ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS); | ||
176 | if (!ti) | ||
177 | return -ENOMEM; | ||
178 | ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC; | ||
179 | } else { | ||
180 | ti->ti_flags = 0; | ||
181 | } | ||
182 | ti->ti_count = 0; | ||
183 | ti->ti_save = save; | ||
184 | ti->ti_magic = NILFS_TI_MAGIC; | ||
185 | current->journal_info = ti; | ||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | /** | ||
190 | * nilfs_transaction_begin - start indivisible file operations. | ||
191 | * @sb: super block | ||
192 | * @ti: nilfs_transaction_info | ||
193 | * @vacancy_check: flags for vacancy rate checks | ||
194 | * | ||
195 | * nilfs_transaction_begin() acquires a reader/writer semaphore, called | ||
196 | * the segment semaphore, to make a segment construction and write tasks | ||
197 | * exclusive. The function is used with nilfs_transaction_commit() in pairs. | ||
198 | * The region enclosed by these two functions can be nested. To avoid a | ||
199 | * deadlock, the semaphore is only acquired or released in the outermost call. | ||
200 | * | ||
201 | * This function allocates a nilfs_transaction_info struct to keep context | ||
202 | * information on it. It is initialized and hooked onto the current task in | ||
203 | * the outermost call. If a pre-allocated struct is given to @ti, it is used | ||
204 | * instead; othewise a new struct is assigned from a slab. | ||
205 | * | ||
206 | * When @vacancy_check flag is set, this function will check the amount of | ||
207 | * free space, and will wait for the GC to reclaim disk space if low capacity. | ||
208 | * | ||
209 | * Return Value: On success, 0 is returned. On error, one of the following | ||
210 | * negative error code is returned. | ||
211 | * | ||
212 | * %-ENOMEM - Insufficient memory available. | ||
213 | * | ||
214 | * %-ENOSPC - No space left on device | ||
215 | */ | ||
216 | int nilfs_transaction_begin(struct super_block *sb, | ||
217 | struct nilfs_transaction_info *ti, | ||
218 | int vacancy_check) | ||
219 | { | ||
220 | struct nilfs_sb_info *sbi; | ||
221 | struct the_nilfs *nilfs; | ||
222 | int ret = nilfs_prepare_segment_lock(ti); | ||
223 | |||
224 | if (unlikely(ret < 0)) | ||
225 | return ret; | ||
226 | if (ret > 0) | ||
227 | return 0; | ||
228 | |||
229 | sbi = NILFS_SB(sb); | ||
230 | nilfs = sbi->s_nilfs; | ||
231 | down_read(&nilfs->ns_segctor_sem); | ||
232 | if (vacancy_check && nilfs_near_disk_full(nilfs)) { | ||
233 | up_read(&nilfs->ns_segctor_sem); | ||
234 | ret = -ENOSPC; | ||
235 | goto failed; | ||
236 | } | ||
237 | return 0; | ||
238 | |||
239 | failed: | ||
240 | ti = current->journal_info; | ||
241 | current->journal_info = ti->ti_save; | ||
242 | if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) | ||
243 | kmem_cache_free(nilfs_transaction_cachep, ti); | ||
244 | return ret; | ||
245 | } | ||
246 | |||
247 | /** | ||
248 | * nilfs_transaction_commit - commit indivisible file operations. | ||
249 | * @sb: super block | ||
250 | * | ||
251 | * nilfs_transaction_commit() releases the read semaphore which is | ||
252 | * acquired by nilfs_transaction_begin(). This is only performed | ||
253 | * in outermost call of this function. If a commit flag is set, | ||
254 | * nilfs_transaction_commit() sets a timer to start the segment | ||
255 | * constructor. If a sync flag is set, it starts construction | ||
256 | * directly. | ||
257 | */ | ||
258 | int nilfs_transaction_commit(struct super_block *sb) | ||
259 | { | ||
260 | struct nilfs_transaction_info *ti = current->journal_info; | ||
261 | struct nilfs_sb_info *sbi; | ||
262 | struct nilfs_sc_info *sci; | ||
263 | int err = 0; | ||
264 | |||
265 | BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); | ||
266 | ti->ti_flags |= NILFS_TI_COMMIT; | ||
267 | if (ti->ti_count > 0) { | ||
268 | ti->ti_count--; | ||
269 | return 0; | ||
270 | } | ||
271 | sbi = NILFS_SB(sb); | ||
272 | sci = NILFS_SC(sbi); | ||
273 | if (sci != NULL) { | ||
274 | if (ti->ti_flags & NILFS_TI_COMMIT) | ||
275 | nilfs_segctor_start_timer(sci); | ||
276 | if (atomic_read(&sbi->s_nilfs->ns_ndirtyblks) > | ||
277 | sci->sc_watermark) | ||
278 | nilfs_segctor_do_flush(sci, 0); | ||
279 | } | ||
280 | up_read(&sbi->s_nilfs->ns_segctor_sem); | ||
281 | current->journal_info = ti->ti_save; | ||
282 | |||
283 | if (ti->ti_flags & NILFS_TI_SYNC) | ||
284 | err = nilfs_construct_segment(sb); | ||
285 | if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) | ||
286 | kmem_cache_free(nilfs_transaction_cachep, ti); | ||
287 | return err; | ||
288 | } | ||
289 | |||
290 | void nilfs_transaction_abort(struct super_block *sb) | ||
291 | { | ||
292 | struct nilfs_transaction_info *ti = current->journal_info; | ||
293 | |||
294 | BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); | ||
295 | if (ti->ti_count > 0) { | ||
296 | ti->ti_count--; | ||
297 | return; | ||
298 | } | ||
299 | up_read(&NILFS_SB(sb)->s_nilfs->ns_segctor_sem); | ||
300 | |||
301 | current->journal_info = ti->ti_save; | ||
302 | if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) | ||
303 | kmem_cache_free(nilfs_transaction_cachep, ti); | ||
304 | } | ||
305 | |||
306 | void nilfs_relax_pressure_in_lock(struct super_block *sb) | ||
307 | { | ||
308 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
309 | struct nilfs_sc_info *sci = NILFS_SC(sbi); | ||
310 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
311 | |||
312 | if (!sci || !sci->sc_flush_request) | ||
313 | return; | ||
314 | |||
315 | set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); | ||
316 | up_read(&nilfs->ns_segctor_sem); | ||
317 | |||
318 | down_write(&nilfs->ns_segctor_sem); | ||
319 | if (sci->sc_flush_request && | ||
320 | test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) { | ||
321 | struct nilfs_transaction_info *ti = current->journal_info; | ||
322 | |||
323 | ti->ti_flags |= NILFS_TI_WRITER; | ||
324 | nilfs_segctor_do_immediate_flush(sci); | ||
325 | ti->ti_flags &= ~NILFS_TI_WRITER; | ||
326 | } | ||
327 | downgrade_write(&nilfs->ns_segctor_sem); | ||
328 | } | ||
329 | |||
330 | static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, | ||
331 | struct nilfs_transaction_info *ti, | ||
332 | int gcflag) | ||
333 | { | ||
334 | struct nilfs_transaction_info *cur_ti = current->journal_info; | ||
335 | |||
336 | WARN_ON(cur_ti); | ||
337 | ti->ti_flags = NILFS_TI_WRITER; | ||
338 | ti->ti_count = 0; | ||
339 | ti->ti_save = cur_ti; | ||
340 | ti->ti_magic = NILFS_TI_MAGIC; | ||
341 | INIT_LIST_HEAD(&ti->ti_garbage); | ||
342 | current->journal_info = ti; | ||
343 | |||
344 | for (;;) { | ||
345 | down_write(&sbi->s_nilfs->ns_segctor_sem); | ||
346 | if (!test_bit(NILFS_SC_PRIOR_FLUSH, &NILFS_SC(sbi)->sc_flags)) | ||
347 | break; | ||
348 | |||
349 | nilfs_segctor_do_immediate_flush(NILFS_SC(sbi)); | ||
350 | |||
351 | up_write(&sbi->s_nilfs->ns_segctor_sem); | ||
352 | yield(); | ||
353 | } | ||
354 | if (gcflag) | ||
355 | ti->ti_flags |= NILFS_TI_GC; | ||
356 | } | ||
357 | |||
358 | static void nilfs_transaction_unlock(struct nilfs_sb_info *sbi) | ||
359 | { | ||
360 | struct nilfs_transaction_info *ti = current->journal_info; | ||
361 | |||
362 | BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); | ||
363 | BUG_ON(ti->ti_count > 0); | ||
364 | |||
365 | up_write(&sbi->s_nilfs->ns_segctor_sem); | ||
366 | current->journal_info = ti->ti_save; | ||
367 | if (!list_empty(&ti->ti_garbage)) | ||
368 | nilfs_dispose_list(sbi, &ti->ti_garbage, 0); | ||
369 | } | ||
370 | |||
371 | static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, | ||
372 | struct nilfs_segsum_pointer *ssp, | ||
373 | unsigned bytes) | ||
374 | { | ||
375 | struct nilfs_segment_buffer *segbuf = sci->sc_curseg; | ||
376 | unsigned blocksize = sci->sc_super->s_blocksize; | ||
377 | void *p; | ||
378 | |||
379 | if (unlikely(ssp->offset + bytes > blocksize)) { | ||
380 | ssp->offset = 0; | ||
381 | BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh, | ||
382 | &segbuf->sb_segsum_buffers)); | ||
383 | ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh); | ||
384 | } | ||
385 | p = ssp->bh->b_data + ssp->offset; | ||
386 | ssp->offset += bytes; | ||
387 | return p; | ||
388 | } | ||
389 | |||
390 | /** | ||
391 | * nilfs_segctor_reset_segment_buffer - reset the current segment buffer | ||
392 | * @sci: nilfs_sc_info | ||
393 | */ | ||
394 | static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) | ||
395 | { | ||
396 | struct nilfs_segment_buffer *segbuf = sci->sc_curseg; | ||
397 | struct buffer_head *sumbh; | ||
398 | unsigned sumbytes; | ||
399 | unsigned flags = 0; | ||
400 | int err; | ||
401 | |||
402 | if (nilfs_doing_gc()) | ||
403 | flags = NILFS_SS_GC; | ||
404 | err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime); | ||
405 | if (unlikely(err)) | ||
406 | return err; | ||
407 | |||
408 | sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); | ||
409 | sumbytes = segbuf->sb_sum.sumbytes; | ||
410 | sci->sc_finfo_ptr.bh = sumbh; sci->sc_finfo_ptr.offset = sumbytes; | ||
411 | sci->sc_binfo_ptr.bh = sumbh; sci->sc_binfo_ptr.offset = sumbytes; | ||
412 | sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; | ||
413 | return 0; | ||
414 | } | ||
415 | |||
416 | static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) | ||
417 | { | ||
418 | sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; | ||
419 | if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs)) | ||
420 | return -E2BIG; /* The current segment is filled up | ||
421 | (internal code) */ | ||
422 | sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); | ||
423 | return nilfs_segctor_reset_segment_buffer(sci); | ||
424 | } | ||
425 | |||
426 | static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci) | ||
427 | { | ||
428 | struct nilfs_segment_buffer *segbuf = sci->sc_curseg; | ||
429 | int err; | ||
430 | |||
431 | if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) { | ||
432 | err = nilfs_segctor_feed_segment(sci); | ||
433 | if (err) | ||
434 | return err; | ||
435 | segbuf = sci->sc_curseg; | ||
436 | } | ||
437 | err = nilfs_segbuf_extend_payload(segbuf, &sci->sc_super_root); | ||
438 | if (likely(!err)) | ||
439 | segbuf->sb_sum.flags |= NILFS_SS_SR; | ||
440 | return err; | ||
441 | } | ||
442 | |||
443 | /* | ||
444 | * Functions for making segment summary and payloads | ||
445 | */ | ||
446 | static int nilfs_segctor_segsum_block_required( | ||
447 | struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp, | ||
448 | unsigned binfo_size) | ||
449 | { | ||
450 | unsigned blocksize = sci->sc_super->s_blocksize; | ||
451 | /* Size of finfo and binfo is enough small against blocksize */ | ||
452 | |||
453 | return ssp->offset + binfo_size + | ||
454 | (!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) > | ||
455 | blocksize; | ||
456 | } | ||
457 | |||
458 | static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci, | ||
459 | struct inode *inode) | ||
460 | { | ||
461 | sci->sc_curseg->sb_sum.nfinfo++; | ||
462 | sci->sc_binfo_ptr = sci->sc_finfo_ptr; | ||
463 | nilfs_segctor_map_segsum_entry( | ||
464 | sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); | ||
465 | |||
466 | if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) | ||
467 | set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); | ||
468 | /* skip finfo */ | ||
469 | } | ||
470 | |||
471 | static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, | ||
472 | struct inode *inode) | ||
473 | { | ||
474 | struct nilfs_finfo *finfo; | ||
475 | struct nilfs_inode_info *ii; | ||
476 | struct nilfs_segment_buffer *segbuf; | ||
477 | |||
478 | if (sci->sc_blk_cnt == 0) | ||
479 | return; | ||
480 | |||
481 | ii = NILFS_I(inode); | ||
482 | finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr, | ||
483 | sizeof(*finfo)); | ||
484 | finfo->fi_ino = cpu_to_le64(inode->i_ino); | ||
485 | finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt); | ||
486 | finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt); | ||
487 | finfo->fi_cno = cpu_to_le64(ii->i_cno); | ||
488 | |||
489 | segbuf = sci->sc_curseg; | ||
490 | segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset + | ||
491 | sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1); | ||
492 | sci->sc_finfo_ptr = sci->sc_binfo_ptr; | ||
493 | sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; | ||
494 | } | ||
495 | |||
496 | static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, | ||
497 | struct buffer_head *bh, | ||
498 | struct inode *inode, | ||
499 | unsigned binfo_size) | ||
500 | { | ||
501 | struct nilfs_segment_buffer *segbuf; | ||
502 | int required, err = 0; | ||
503 | |||
504 | retry: | ||
505 | segbuf = sci->sc_curseg; | ||
506 | required = nilfs_segctor_segsum_block_required( | ||
507 | sci, &sci->sc_binfo_ptr, binfo_size); | ||
508 | if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) { | ||
509 | nilfs_segctor_end_finfo(sci, inode); | ||
510 | err = nilfs_segctor_feed_segment(sci); | ||
511 | if (err) | ||
512 | return err; | ||
513 | goto retry; | ||
514 | } | ||
515 | if (unlikely(required)) { | ||
516 | err = nilfs_segbuf_extend_segsum(segbuf); | ||
517 | if (unlikely(err)) | ||
518 | goto failed; | ||
519 | } | ||
520 | if (sci->sc_blk_cnt == 0) | ||
521 | nilfs_segctor_begin_finfo(sci, inode); | ||
522 | |||
523 | nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size); | ||
524 | /* Substitution to vblocknr is delayed until update_blocknr() */ | ||
525 | nilfs_segbuf_add_file_buffer(segbuf, bh); | ||
526 | sci->sc_blk_cnt++; | ||
527 | failed: | ||
528 | return err; | ||
529 | } | ||
530 | |||
531 | static int nilfs_handle_bmap_error(int err, const char *fname, | ||
532 | struct inode *inode, struct super_block *sb) | ||
533 | { | ||
534 | if (err == -EINVAL) { | ||
535 | nilfs_error(sb, fname, "broken bmap (inode=%lu)\n", | ||
536 | inode->i_ino); | ||
537 | err = -EIO; | ||
538 | } | ||
539 | return err; | ||
540 | } | ||
541 | |||
542 | /* | ||
543 | * Callback functions that enumerate, mark, and collect dirty blocks | ||
544 | */ | ||
545 | static int nilfs_collect_file_data(struct nilfs_sc_info *sci, | ||
546 | struct buffer_head *bh, struct inode *inode) | ||
547 | { | ||
548 | int err; | ||
549 | |||
550 | err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); | ||
551 | if (unlikely(err < 0)) | ||
552 | return nilfs_handle_bmap_error(err, __func__, inode, | ||
553 | sci->sc_super); | ||
554 | |||
555 | err = nilfs_segctor_add_file_block(sci, bh, inode, | ||
556 | sizeof(struct nilfs_binfo_v)); | ||
557 | if (!err) | ||
558 | sci->sc_datablk_cnt++; | ||
559 | return err; | ||
560 | } | ||
561 | |||
562 | static int nilfs_collect_file_node(struct nilfs_sc_info *sci, | ||
563 | struct buffer_head *bh, | ||
564 | struct inode *inode) | ||
565 | { | ||
566 | int err; | ||
567 | |||
568 | err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); | ||
569 | if (unlikely(err < 0)) | ||
570 | return nilfs_handle_bmap_error(err, __func__, inode, | ||
571 | sci->sc_super); | ||
572 | return 0; | ||
573 | } | ||
574 | |||
575 | static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, | ||
576 | struct buffer_head *bh, | ||
577 | struct inode *inode) | ||
578 | { | ||
579 | WARN_ON(!buffer_dirty(bh)); | ||
580 | return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); | ||
581 | } | ||
582 | |||
583 | static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci, | ||
584 | struct nilfs_segsum_pointer *ssp, | ||
585 | union nilfs_binfo *binfo) | ||
586 | { | ||
587 | struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry( | ||
588 | sci, ssp, sizeof(*binfo_v)); | ||
589 | *binfo_v = binfo->bi_v; | ||
590 | } | ||
591 | |||
592 | static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci, | ||
593 | struct nilfs_segsum_pointer *ssp, | ||
594 | union nilfs_binfo *binfo) | ||
595 | { | ||
596 | __le64 *vblocknr = nilfs_segctor_map_segsum_entry( | ||
597 | sci, ssp, sizeof(*vblocknr)); | ||
598 | *vblocknr = binfo->bi_v.bi_vblocknr; | ||
599 | } | ||
600 | |||
601 | struct nilfs_sc_operations nilfs_sc_file_ops = { | ||
602 | .collect_data = nilfs_collect_file_data, | ||
603 | .collect_node = nilfs_collect_file_node, | ||
604 | .collect_bmap = nilfs_collect_file_bmap, | ||
605 | .write_data_binfo = nilfs_write_file_data_binfo, | ||
606 | .write_node_binfo = nilfs_write_file_node_binfo, | ||
607 | }; | ||
608 | |||
609 | static int nilfs_collect_dat_data(struct nilfs_sc_info *sci, | ||
610 | struct buffer_head *bh, struct inode *inode) | ||
611 | { | ||
612 | int err; | ||
613 | |||
614 | err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); | ||
615 | if (unlikely(err < 0)) | ||
616 | return nilfs_handle_bmap_error(err, __func__, inode, | ||
617 | sci->sc_super); | ||
618 | |||
619 | err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); | ||
620 | if (!err) | ||
621 | sci->sc_datablk_cnt++; | ||
622 | return err; | ||
623 | } | ||
624 | |||
625 | static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci, | ||
626 | struct buffer_head *bh, struct inode *inode) | ||
627 | { | ||
628 | WARN_ON(!buffer_dirty(bh)); | ||
629 | return nilfs_segctor_add_file_block(sci, bh, inode, | ||
630 | sizeof(struct nilfs_binfo_dat)); | ||
631 | } | ||
632 | |||
633 | static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci, | ||
634 | struct nilfs_segsum_pointer *ssp, | ||
635 | union nilfs_binfo *binfo) | ||
636 | { | ||
637 | __le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp, | ||
638 | sizeof(*blkoff)); | ||
639 | *blkoff = binfo->bi_dat.bi_blkoff; | ||
640 | } | ||
641 | |||
642 | static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci, | ||
643 | struct nilfs_segsum_pointer *ssp, | ||
644 | union nilfs_binfo *binfo) | ||
645 | { | ||
646 | struct nilfs_binfo_dat *binfo_dat = | ||
647 | nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat)); | ||
648 | *binfo_dat = binfo->bi_dat; | ||
649 | } | ||
650 | |||
651 | struct nilfs_sc_operations nilfs_sc_dat_ops = { | ||
652 | .collect_data = nilfs_collect_dat_data, | ||
653 | .collect_node = nilfs_collect_file_node, | ||
654 | .collect_bmap = nilfs_collect_dat_bmap, | ||
655 | .write_data_binfo = nilfs_write_dat_data_binfo, | ||
656 | .write_node_binfo = nilfs_write_dat_node_binfo, | ||
657 | }; | ||
658 | |||
659 | struct nilfs_sc_operations nilfs_sc_dsync_ops = { | ||
660 | .collect_data = nilfs_collect_file_data, | ||
661 | .collect_node = NULL, | ||
662 | .collect_bmap = NULL, | ||
663 | .write_data_binfo = nilfs_write_file_data_binfo, | ||
664 | .write_node_binfo = NULL, | ||
665 | }; | ||
666 | |||
667 | static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, | ||
668 | struct list_head *listp, | ||
669 | size_t nlimit, | ||
670 | loff_t start, loff_t end) | ||
671 | { | ||
672 | struct address_space *mapping = inode->i_mapping; | ||
673 | struct pagevec pvec; | ||
674 | pgoff_t index = 0, last = ULONG_MAX; | ||
675 | size_t ndirties = 0; | ||
676 | int i; | ||
677 | |||
678 | if (unlikely(start != 0 || end != LLONG_MAX)) { | ||
679 | /* | ||
680 | * A valid range is given for sync-ing data pages. The | ||
681 | * range is rounded to per-page; extra dirty buffers | ||
682 | * may be included if blocksize < pagesize. | ||
683 | */ | ||
684 | index = start >> PAGE_SHIFT; | ||
685 | last = end >> PAGE_SHIFT; | ||
686 | } | ||
687 | pagevec_init(&pvec, 0); | ||
688 | repeat: | ||
689 | if (unlikely(index > last) || | ||
690 | !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | ||
691 | min_t(pgoff_t, last - index, | ||
692 | PAGEVEC_SIZE - 1) + 1)) | ||
693 | return ndirties; | ||
694 | |||
695 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
696 | struct buffer_head *bh, *head; | ||
697 | struct page *page = pvec.pages[i]; | ||
698 | |||
699 | if (unlikely(page->index > last)) | ||
700 | break; | ||
701 | |||
702 | if (mapping->host) { | ||
703 | lock_page(page); | ||
704 | if (!page_has_buffers(page)) | ||
705 | create_empty_buffers(page, | ||
706 | 1 << inode->i_blkbits, 0); | ||
707 | unlock_page(page); | ||
708 | } | ||
709 | |||
710 | bh = head = page_buffers(page); | ||
711 | do { | ||
712 | if (!buffer_dirty(bh)) | ||
713 | continue; | ||
714 | get_bh(bh); | ||
715 | list_add_tail(&bh->b_assoc_buffers, listp); | ||
716 | ndirties++; | ||
717 | if (unlikely(ndirties >= nlimit)) { | ||
718 | pagevec_release(&pvec); | ||
719 | cond_resched(); | ||
720 | return ndirties; | ||
721 | } | ||
722 | } while (bh = bh->b_this_page, bh != head); | ||
723 | } | ||
724 | pagevec_release(&pvec); | ||
725 | cond_resched(); | ||
726 | goto repeat; | ||
727 | } | ||
728 | |||
729 | static void nilfs_lookup_dirty_node_buffers(struct inode *inode, | ||
730 | struct list_head *listp) | ||
731 | { | ||
732 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
733 | struct address_space *mapping = &ii->i_btnode_cache; | ||
734 | struct pagevec pvec; | ||
735 | struct buffer_head *bh, *head; | ||
736 | unsigned int i; | ||
737 | pgoff_t index = 0; | ||
738 | |||
739 | pagevec_init(&pvec, 0); | ||
740 | |||
741 | while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | ||
742 | PAGEVEC_SIZE)) { | ||
743 | for (i = 0; i < pagevec_count(&pvec); i++) { | ||
744 | bh = head = page_buffers(pvec.pages[i]); | ||
745 | do { | ||
746 | if (buffer_dirty(bh)) { | ||
747 | get_bh(bh); | ||
748 | list_add_tail(&bh->b_assoc_buffers, | ||
749 | listp); | ||
750 | } | ||
751 | bh = bh->b_this_page; | ||
752 | } while (bh != head); | ||
753 | } | ||
754 | pagevec_release(&pvec); | ||
755 | cond_resched(); | ||
756 | } | ||
757 | } | ||
758 | |||
759 | static void nilfs_dispose_list(struct nilfs_sb_info *sbi, | ||
760 | struct list_head *head, int force) | ||
761 | { | ||
762 | struct nilfs_inode_info *ii, *n; | ||
763 | struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii; | ||
764 | unsigned nv = 0; | ||
765 | |||
766 | while (!list_empty(head)) { | ||
767 | spin_lock(&sbi->s_inode_lock); | ||
768 | list_for_each_entry_safe(ii, n, head, i_dirty) { | ||
769 | list_del_init(&ii->i_dirty); | ||
770 | if (force) { | ||
771 | if (unlikely(ii->i_bh)) { | ||
772 | brelse(ii->i_bh); | ||
773 | ii->i_bh = NULL; | ||
774 | } | ||
775 | } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { | ||
776 | set_bit(NILFS_I_QUEUED, &ii->i_state); | ||
777 | list_add_tail(&ii->i_dirty, | ||
778 | &sbi->s_dirty_files); | ||
779 | continue; | ||
780 | } | ||
781 | ivec[nv++] = ii; | ||
782 | if (nv == SC_N_INODEVEC) | ||
783 | break; | ||
784 | } | ||
785 | spin_unlock(&sbi->s_inode_lock); | ||
786 | |||
787 | for (pii = ivec; nv > 0; pii++, nv--) | ||
788 | iput(&(*pii)->vfs_inode); | ||
789 | } | ||
790 | } | ||
791 | |||
792 | static int nilfs_test_metadata_dirty(struct nilfs_sb_info *sbi) | ||
793 | { | ||
794 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
795 | int ret = 0; | ||
796 | |||
797 | if (nilfs_mdt_fetch_dirty(sbi->s_ifile)) | ||
798 | ret++; | ||
799 | if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile)) | ||
800 | ret++; | ||
801 | if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile)) | ||
802 | ret++; | ||
803 | if (ret || nilfs_doing_gc()) | ||
804 | if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs))) | ||
805 | ret++; | ||
806 | return ret; | ||
807 | } | ||
808 | |||
809 | static int nilfs_segctor_clean(struct nilfs_sc_info *sci) | ||
810 | { | ||
811 | return list_empty(&sci->sc_dirty_files) && | ||
812 | !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) && | ||
813 | list_empty(&sci->sc_cleaning_segments) && | ||
814 | (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes)); | ||
815 | } | ||
816 | |||
817 | static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) | ||
818 | { | ||
819 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
820 | int ret = 0; | ||
821 | |||
822 | if (nilfs_test_metadata_dirty(sbi)) | ||
823 | set_bit(NILFS_SC_DIRTY, &sci->sc_flags); | ||
824 | |||
825 | spin_lock(&sbi->s_inode_lock); | ||
826 | if (list_empty(&sbi->s_dirty_files) && nilfs_segctor_clean(sci)) | ||
827 | ret++; | ||
828 | |||
829 | spin_unlock(&sbi->s_inode_lock); | ||
830 | return ret; | ||
831 | } | ||
832 | |||
833 | static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) | ||
834 | { | ||
835 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
836 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
837 | |||
838 | nilfs_mdt_clear_dirty(sbi->s_ifile); | ||
839 | nilfs_mdt_clear_dirty(nilfs->ns_cpfile); | ||
840 | nilfs_mdt_clear_dirty(nilfs->ns_sufile); | ||
841 | nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); | ||
842 | } | ||
843 | |||
844 | static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) | ||
845 | { | ||
846 | struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; | ||
847 | struct buffer_head *bh_cp; | ||
848 | struct nilfs_checkpoint *raw_cp; | ||
849 | int err; | ||
850 | |||
851 | /* XXX: this interface will be changed */ | ||
852 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1, | ||
853 | &raw_cp, &bh_cp); | ||
854 | if (likely(!err)) { | ||
855 | /* The following code is duplicated with cpfile. But, it is | ||
856 | needed to collect the checkpoint even if it was not newly | ||
857 | created */ | ||
858 | nilfs_mdt_mark_buffer_dirty(bh_cp); | ||
859 | nilfs_mdt_mark_dirty(nilfs->ns_cpfile); | ||
860 | nilfs_cpfile_put_checkpoint( | ||
861 | nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); | ||
862 | } else | ||
863 | WARN_ON(err == -EINVAL || err == -ENOENT); | ||
864 | |||
865 | return err; | ||
866 | } | ||
867 | |||
868 | static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) | ||
869 | { | ||
870 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
871 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
872 | struct buffer_head *bh_cp; | ||
873 | struct nilfs_checkpoint *raw_cp; | ||
874 | int err; | ||
875 | |||
876 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, | ||
877 | &raw_cp, &bh_cp); | ||
878 | if (unlikely(err)) { | ||
879 | WARN_ON(err == -EINVAL || err == -ENOENT); | ||
880 | goto failed_ibh; | ||
881 | } | ||
882 | raw_cp->cp_snapshot_list.ssl_next = 0; | ||
883 | raw_cp->cp_snapshot_list.ssl_prev = 0; | ||
884 | raw_cp->cp_inodes_count = | ||
885 | cpu_to_le64(atomic_read(&sbi->s_inodes_count)); | ||
886 | raw_cp->cp_blocks_count = | ||
887 | cpu_to_le64(atomic_read(&sbi->s_blocks_count)); | ||
888 | raw_cp->cp_nblk_inc = | ||
889 | cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); | ||
890 | raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); | ||
891 | raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); | ||
892 | |||
893 | if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) | ||
894 | nilfs_checkpoint_clear_minor(raw_cp); | ||
895 | else | ||
896 | nilfs_checkpoint_set_minor(raw_cp); | ||
897 | |||
898 | nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1); | ||
899 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); | ||
900 | return 0; | ||
901 | |||
902 | failed_ibh: | ||
903 | return err; | ||
904 | } | ||
905 | |||
906 | static void nilfs_fill_in_file_bmap(struct inode *ifile, | ||
907 | struct nilfs_inode_info *ii) | ||
908 | |||
909 | { | ||
910 | struct buffer_head *ibh; | ||
911 | struct nilfs_inode *raw_inode; | ||
912 | |||
913 | if (test_bit(NILFS_I_BMAP, &ii->i_state)) { | ||
914 | ibh = ii->i_bh; | ||
915 | BUG_ON(!ibh); | ||
916 | raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino, | ||
917 | ibh); | ||
918 | nilfs_bmap_write(ii->i_bmap, raw_inode); | ||
919 | nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh); | ||
920 | } | ||
921 | } | ||
922 | |||
923 | static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci, | ||
924 | struct inode *ifile) | ||
925 | { | ||
926 | struct nilfs_inode_info *ii; | ||
927 | |||
928 | list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) { | ||
929 | nilfs_fill_in_file_bmap(ifile, ii); | ||
930 | set_bit(NILFS_I_COLLECTED, &ii->i_state); | ||
931 | } | ||
932 | } | ||
933 | |||
934 | /* | ||
935 | * CRC calculation routines | ||
936 | */ | ||
937 | static void nilfs_fill_in_super_root_crc(struct buffer_head *bh_sr, u32 seed) | ||
938 | { | ||
939 | struct nilfs_super_root *raw_sr = | ||
940 | (struct nilfs_super_root *)bh_sr->b_data; | ||
941 | u32 crc; | ||
942 | |||
943 | crc = crc32_le(seed, | ||
944 | (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), | ||
945 | NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); | ||
946 | raw_sr->sr_sum = cpu_to_le32(crc); | ||
947 | } | ||
948 | |||
949 | static void nilfs_segctor_fill_in_checksums(struct nilfs_sc_info *sci, | ||
950 | u32 seed) | ||
951 | { | ||
952 | struct nilfs_segment_buffer *segbuf; | ||
953 | |||
954 | if (sci->sc_super_root) | ||
955 | nilfs_fill_in_super_root_crc(sci->sc_super_root, seed); | ||
956 | |||
957 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
958 | nilfs_segbuf_fill_in_segsum_crc(segbuf, seed); | ||
959 | nilfs_segbuf_fill_in_data_crc(segbuf, seed); | ||
960 | } | ||
961 | } | ||
962 | |||
963 | static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, | ||
964 | struct the_nilfs *nilfs) | ||
965 | { | ||
966 | struct buffer_head *bh_sr = sci->sc_super_root; | ||
967 | struct nilfs_super_root *raw_sr = | ||
968 | (struct nilfs_super_root *)bh_sr->b_data; | ||
969 | unsigned isz = nilfs->ns_inode_size; | ||
970 | |||
971 | raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); | ||
972 | raw_sr->sr_nongc_ctime | ||
973 | = cpu_to_le64(nilfs_doing_gc() ? | ||
974 | nilfs->ns_nongc_ctime : sci->sc_seg_ctime); | ||
975 | raw_sr->sr_flags = 0; | ||
976 | |||
977 | nilfs_mdt_write_inode_direct( | ||
978 | nilfs_dat_inode(nilfs), bh_sr, NILFS_SR_DAT_OFFSET(isz)); | ||
979 | nilfs_mdt_write_inode_direct( | ||
980 | nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(isz)); | ||
981 | nilfs_mdt_write_inode_direct( | ||
982 | nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(isz)); | ||
983 | } | ||
984 | |||
985 | static void nilfs_redirty_inodes(struct list_head *head) | ||
986 | { | ||
987 | struct nilfs_inode_info *ii; | ||
988 | |||
989 | list_for_each_entry(ii, head, i_dirty) { | ||
990 | if (test_bit(NILFS_I_COLLECTED, &ii->i_state)) | ||
991 | clear_bit(NILFS_I_COLLECTED, &ii->i_state); | ||
992 | } | ||
993 | } | ||
994 | |||
995 | static void nilfs_drop_collected_inodes(struct list_head *head) | ||
996 | { | ||
997 | struct nilfs_inode_info *ii; | ||
998 | |||
999 | list_for_each_entry(ii, head, i_dirty) { | ||
1000 | if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state)) | ||
1001 | continue; | ||
1002 | |||
1003 | clear_bit(NILFS_I_INODE_DIRTY, &ii->i_state); | ||
1004 | set_bit(NILFS_I_UPDATED, &ii->i_state); | ||
1005 | } | ||
1006 | } | ||
1007 | |||
1008 | static void nilfs_segctor_cancel_free_segments(struct nilfs_sc_info *sci, | ||
1009 | struct inode *sufile) | ||
1010 | |||
1011 | { | ||
1012 | struct list_head *head = &sci->sc_cleaning_segments; | ||
1013 | struct nilfs_segment_entry *ent; | ||
1014 | int err; | ||
1015 | |||
1016 | list_for_each_entry(ent, head, list) { | ||
1017 | if (!(ent->flags & NILFS_SLH_FREED)) | ||
1018 | break; | ||
1019 | err = nilfs_sufile_cancel_free(sufile, ent->segnum); | ||
1020 | WARN_ON(err); /* do not happen */ | ||
1021 | ent->flags &= ~NILFS_SLH_FREED; | ||
1022 | } | ||
1023 | } | ||
1024 | |||
1025 | static int nilfs_segctor_prepare_free_segments(struct nilfs_sc_info *sci, | ||
1026 | struct inode *sufile) | ||
1027 | { | ||
1028 | struct list_head *head = &sci->sc_cleaning_segments; | ||
1029 | struct nilfs_segment_entry *ent; | ||
1030 | int err; | ||
1031 | |||
1032 | list_for_each_entry(ent, head, list) { | ||
1033 | err = nilfs_sufile_free(sufile, ent->segnum); | ||
1034 | if (unlikely(err)) | ||
1035 | return err; | ||
1036 | ent->flags |= NILFS_SLH_FREED; | ||
1037 | } | ||
1038 | return 0; | ||
1039 | } | ||
1040 | |||
1041 | static void nilfs_segctor_commit_free_segments(struct nilfs_sc_info *sci) | ||
1042 | { | ||
1043 | nilfs_dispose_segment_list(&sci->sc_cleaning_segments); | ||
1044 | } | ||
1045 | |||
1046 | static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, | ||
1047 | struct inode *inode, | ||
1048 | struct list_head *listp, | ||
1049 | int (*collect)(struct nilfs_sc_info *, | ||
1050 | struct buffer_head *, | ||
1051 | struct inode *)) | ||
1052 | { | ||
1053 | struct buffer_head *bh, *n; | ||
1054 | int err = 0; | ||
1055 | |||
1056 | if (collect) { | ||
1057 | list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) { | ||
1058 | list_del_init(&bh->b_assoc_buffers); | ||
1059 | err = collect(sci, bh, inode); | ||
1060 | brelse(bh); | ||
1061 | if (unlikely(err)) | ||
1062 | goto dispose_buffers; | ||
1063 | } | ||
1064 | return 0; | ||
1065 | } | ||
1066 | |||
1067 | dispose_buffers: | ||
1068 | while (!list_empty(listp)) { | ||
1069 | bh = list_entry(listp->next, struct buffer_head, | ||
1070 | b_assoc_buffers); | ||
1071 | list_del_init(&bh->b_assoc_buffers); | ||
1072 | brelse(bh); | ||
1073 | } | ||
1074 | return err; | ||
1075 | } | ||
1076 | |||
1077 | static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci) | ||
1078 | { | ||
1079 | /* Remaining number of blocks within segment buffer */ | ||
1080 | return sci->sc_segbuf_nblocks - | ||
1081 | (sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks); | ||
1082 | } | ||
1083 | |||
1084 | static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci, | ||
1085 | struct inode *inode, | ||
1086 | struct nilfs_sc_operations *sc_ops) | ||
1087 | { | ||
1088 | LIST_HEAD(data_buffers); | ||
1089 | LIST_HEAD(node_buffers); | ||
1090 | int err; | ||
1091 | |||
1092 | if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { | ||
1093 | size_t n, rest = nilfs_segctor_buffer_rest(sci); | ||
1094 | |||
1095 | n = nilfs_lookup_dirty_data_buffers( | ||
1096 | inode, &data_buffers, rest + 1, 0, LLONG_MAX); | ||
1097 | if (n > rest) { | ||
1098 | err = nilfs_segctor_apply_buffers( | ||
1099 | sci, inode, &data_buffers, | ||
1100 | sc_ops->collect_data); | ||
1101 | BUG_ON(!err); /* always receive -E2BIG or true error */ | ||
1102 | goto break_or_fail; | ||
1103 | } | ||
1104 | } | ||
1105 | nilfs_lookup_dirty_node_buffers(inode, &node_buffers); | ||
1106 | |||
1107 | if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { | ||
1108 | err = nilfs_segctor_apply_buffers( | ||
1109 | sci, inode, &data_buffers, sc_ops->collect_data); | ||
1110 | if (unlikely(err)) { | ||
1111 | /* dispose node list */ | ||
1112 | nilfs_segctor_apply_buffers( | ||
1113 | sci, inode, &node_buffers, NULL); | ||
1114 | goto break_or_fail; | ||
1115 | } | ||
1116 | sci->sc_stage.flags |= NILFS_CF_NODE; | ||
1117 | } | ||
1118 | /* Collect node */ | ||
1119 | err = nilfs_segctor_apply_buffers( | ||
1120 | sci, inode, &node_buffers, sc_ops->collect_node); | ||
1121 | if (unlikely(err)) | ||
1122 | goto break_or_fail; | ||
1123 | |||
1124 | nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers); | ||
1125 | err = nilfs_segctor_apply_buffers( | ||
1126 | sci, inode, &node_buffers, sc_ops->collect_bmap); | ||
1127 | if (unlikely(err)) | ||
1128 | goto break_or_fail; | ||
1129 | |||
1130 | nilfs_segctor_end_finfo(sci, inode); | ||
1131 | sci->sc_stage.flags &= ~NILFS_CF_NODE; | ||
1132 | |||
1133 | break_or_fail: | ||
1134 | return err; | ||
1135 | } | ||
1136 | |||
1137 | static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, | ||
1138 | struct inode *inode) | ||
1139 | { | ||
1140 | LIST_HEAD(data_buffers); | ||
1141 | size_t n, rest = nilfs_segctor_buffer_rest(sci); | ||
1142 | int err; | ||
1143 | |||
1144 | n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1, | ||
1145 | sci->sc_dsync_start, | ||
1146 | sci->sc_dsync_end); | ||
1147 | |||
1148 | err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers, | ||
1149 | nilfs_collect_file_data); | ||
1150 | if (!err) { | ||
1151 | nilfs_segctor_end_finfo(sci, inode); | ||
1152 | BUG_ON(n > rest); | ||
1153 | /* always receive -E2BIG or true error if n > rest */ | ||
1154 | } | ||
1155 | return err; | ||
1156 | } | ||
1157 | |||
1158 | static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) | ||
1159 | { | ||
1160 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
1161 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
1162 | struct list_head *head; | ||
1163 | struct nilfs_inode_info *ii; | ||
1164 | int err = 0; | ||
1165 | |||
1166 | switch (sci->sc_stage.scnt) { | ||
1167 | case NILFS_ST_INIT: | ||
1168 | /* Pre-processes */ | ||
1169 | sci->sc_stage.flags = 0; | ||
1170 | |||
1171 | if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) { | ||
1172 | sci->sc_nblk_inc = 0; | ||
1173 | sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN; | ||
1174 | if (mode == SC_LSEG_DSYNC) { | ||
1175 | sci->sc_stage.scnt = NILFS_ST_DSYNC; | ||
1176 | goto dsync_mode; | ||
1177 | } | ||
1178 | } | ||
1179 | |||
1180 | sci->sc_stage.dirty_file_ptr = NULL; | ||
1181 | sci->sc_stage.gc_inode_ptr = NULL; | ||
1182 | if (mode == SC_FLUSH_DAT) { | ||
1183 | sci->sc_stage.scnt = NILFS_ST_DAT; | ||
1184 | goto dat_stage; | ||
1185 | } | ||
1186 | sci->sc_stage.scnt++; /* Fall through */ | ||
1187 | case NILFS_ST_GC: | ||
1188 | if (nilfs_doing_gc()) { | ||
1189 | head = &sci->sc_gc_inodes; | ||
1190 | ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr, | ||
1191 | head, i_dirty); | ||
1192 | list_for_each_entry_continue(ii, head, i_dirty) { | ||
1193 | err = nilfs_segctor_scan_file( | ||
1194 | sci, &ii->vfs_inode, | ||
1195 | &nilfs_sc_file_ops); | ||
1196 | if (unlikely(err)) { | ||
1197 | sci->sc_stage.gc_inode_ptr = list_entry( | ||
1198 | ii->i_dirty.prev, | ||
1199 | struct nilfs_inode_info, | ||
1200 | i_dirty); | ||
1201 | goto break_or_fail; | ||
1202 | } | ||
1203 | set_bit(NILFS_I_COLLECTED, &ii->i_state); | ||
1204 | } | ||
1205 | sci->sc_stage.gc_inode_ptr = NULL; | ||
1206 | } | ||
1207 | sci->sc_stage.scnt++; /* Fall through */ | ||
1208 | case NILFS_ST_FILE: | ||
1209 | head = &sci->sc_dirty_files; | ||
1210 | ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head, | ||
1211 | i_dirty); | ||
1212 | list_for_each_entry_continue(ii, head, i_dirty) { | ||
1213 | clear_bit(NILFS_I_DIRTY, &ii->i_state); | ||
1214 | |||
1215 | err = nilfs_segctor_scan_file(sci, &ii->vfs_inode, | ||
1216 | &nilfs_sc_file_ops); | ||
1217 | if (unlikely(err)) { | ||
1218 | sci->sc_stage.dirty_file_ptr = | ||
1219 | list_entry(ii->i_dirty.prev, | ||
1220 | struct nilfs_inode_info, | ||
1221 | i_dirty); | ||
1222 | goto break_or_fail; | ||
1223 | } | ||
1224 | /* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */ | ||
1225 | /* XXX: required ? */ | ||
1226 | } | ||
1227 | sci->sc_stage.dirty_file_ptr = NULL; | ||
1228 | if (mode == SC_FLUSH_FILE) { | ||
1229 | sci->sc_stage.scnt = NILFS_ST_DONE; | ||
1230 | return 0; | ||
1231 | } | ||
1232 | sci->sc_stage.scnt++; | ||
1233 | sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; | ||
1234 | /* Fall through */ | ||
1235 | case NILFS_ST_IFILE: | ||
1236 | err = nilfs_segctor_scan_file(sci, sbi->s_ifile, | ||
1237 | &nilfs_sc_file_ops); | ||
1238 | if (unlikely(err)) | ||
1239 | break; | ||
1240 | sci->sc_stage.scnt++; | ||
1241 | /* Creating a checkpoint */ | ||
1242 | err = nilfs_segctor_create_checkpoint(sci); | ||
1243 | if (unlikely(err)) | ||
1244 | break; | ||
1245 | /* Fall through */ | ||
1246 | case NILFS_ST_CPFILE: | ||
1247 | err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile, | ||
1248 | &nilfs_sc_file_ops); | ||
1249 | if (unlikely(err)) | ||
1250 | break; | ||
1251 | sci->sc_stage.scnt++; /* Fall through */ | ||
1252 | case NILFS_ST_SUFILE: | ||
1253 | err = nilfs_segctor_prepare_free_segments(sci, | ||
1254 | nilfs->ns_sufile); | ||
1255 | if (unlikely(err)) | ||
1256 | break; | ||
1257 | err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, | ||
1258 | &nilfs_sc_file_ops); | ||
1259 | if (unlikely(err)) | ||
1260 | break; | ||
1261 | sci->sc_stage.scnt++; /* Fall through */ | ||
1262 | case NILFS_ST_DAT: | ||
1263 | dat_stage: | ||
1264 | err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs), | ||
1265 | &nilfs_sc_dat_ops); | ||
1266 | if (unlikely(err)) | ||
1267 | break; | ||
1268 | if (mode == SC_FLUSH_DAT) { | ||
1269 | sci->sc_stage.scnt = NILFS_ST_DONE; | ||
1270 | return 0; | ||
1271 | } | ||
1272 | sci->sc_stage.scnt++; /* Fall through */ | ||
1273 | case NILFS_ST_SR: | ||
1274 | if (mode == SC_LSEG_SR) { | ||
1275 | /* Appending a super root */ | ||
1276 | err = nilfs_segctor_add_super_root(sci); | ||
1277 | if (unlikely(err)) | ||
1278 | break; | ||
1279 | } | ||
1280 | /* End of a logical segment */ | ||
1281 | sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; | ||
1282 | sci->sc_stage.scnt = NILFS_ST_DONE; | ||
1283 | return 0; | ||
1284 | case NILFS_ST_DSYNC: | ||
1285 | dsync_mode: | ||
1286 | sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT; | ||
1287 | ii = sci->sc_dsync_inode; | ||
1288 | if (!test_bit(NILFS_I_BUSY, &ii->i_state)) | ||
1289 | break; | ||
1290 | |||
1291 | err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode); | ||
1292 | if (unlikely(err)) | ||
1293 | break; | ||
1294 | sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; | ||
1295 | sci->sc_stage.scnt = NILFS_ST_DONE; | ||
1296 | return 0; | ||
1297 | case NILFS_ST_DONE: | ||
1298 | return 0; | ||
1299 | default: | ||
1300 | BUG(); | ||
1301 | } | ||
1302 | |||
1303 | break_or_fail: | ||
1304 | return err; | ||
1305 | } | ||
1306 | |||
1307 | static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum) | ||
1308 | { | ||
1309 | struct buffer_head *bh_su; | ||
1310 | struct nilfs_segment_usage *raw_su; | ||
1311 | int err; | ||
1312 | |||
1313 | err = nilfs_sufile_get_segment_usage(sufile, segnum, &raw_su, &bh_su); | ||
1314 | if (unlikely(err)) | ||
1315 | return err; | ||
1316 | nilfs_mdt_mark_buffer_dirty(bh_su); | ||
1317 | nilfs_mdt_mark_dirty(sufile); | ||
1318 | nilfs_sufile_put_segment_usage(sufile, segnum, bh_su); | ||
1319 | return 0; | ||
1320 | } | ||
1321 | |||
1322 | static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, | ||
1323 | struct the_nilfs *nilfs) | ||
1324 | { | ||
1325 | struct nilfs_segment_buffer *segbuf, *n; | ||
1326 | __u64 nextnum; | ||
1327 | int err; | ||
1328 | |||
1329 | if (list_empty(&sci->sc_segbufs)) { | ||
1330 | segbuf = nilfs_segbuf_new(sci->sc_super); | ||
1331 | if (unlikely(!segbuf)) | ||
1332 | return -ENOMEM; | ||
1333 | list_add(&segbuf->sb_list, &sci->sc_segbufs); | ||
1334 | } else | ||
1335 | segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); | ||
1336 | |||
1337 | nilfs_segbuf_map(segbuf, nilfs->ns_segnum, nilfs->ns_pseg_offset, | ||
1338 | nilfs); | ||
1339 | |||
1340 | if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { | ||
1341 | nilfs_shift_to_next_segment(nilfs); | ||
1342 | nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); | ||
1343 | } | ||
1344 | sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; | ||
1345 | |||
1346 | err = nilfs_touch_segusage(nilfs->ns_sufile, segbuf->sb_segnum); | ||
1347 | if (unlikely(err)) | ||
1348 | return err; | ||
1349 | |||
1350 | if (nilfs->ns_segnum == nilfs->ns_nextnum) { | ||
1351 | /* Start from the head of a new full segment */ | ||
1352 | err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum); | ||
1353 | if (unlikely(err)) | ||
1354 | return err; | ||
1355 | } else | ||
1356 | nextnum = nilfs->ns_nextnum; | ||
1357 | |||
1358 | segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; | ||
1359 | nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); | ||
1360 | |||
1361 | /* truncating segment buffers */ | ||
1362 | list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, | ||
1363 | sb_list) { | ||
1364 | list_del_init(&segbuf->sb_list); | ||
1365 | nilfs_segbuf_free(segbuf); | ||
1366 | } | ||
1367 | return 0; | ||
1368 | } | ||
1369 | |||
1370 | static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, | ||
1371 | struct the_nilfs *nilfs, int nadd) | ||
1372 | { | ||
1373 | struct nilfs_segment_buffer *segbuf, *prev, *n; | ||
1374 | struct inode *sufile = nilfs->ns_sufile; | ||
1375 | __u64 nextnextnum; | ||
1376 | LIST_HEAD(list); | ||
1377 | int err, ret, i; | ||
1378 | |||
1379 | prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs); | ||
1380 | /* | ||
1381 | * Since the segment specified with nextnum might be allocated during | ||
1382 | * the previous construction, the buffer including its segusage may | ||
1383 | * not be dirty. The following call ensures that the buffer is dirty | ||
1384 | * and will pin the buffer on memory until the sufile is written. | ||
1385 | */ | ||
1386 | err = nilfs_touch_segusage(sufile, prev->sb_nextnum); | ||
1387 | if (unlikely(err)) | ||
1388 | return err; | ||
1389 | |||
1390 | for (i = 0; i < nadd; i++) { | ||
1391 | /* extend segment info */ | ||
1392 | err = -ENOMEM; | ||
1393 | segbuf = nilfs_segbuf_new(sci->sc_super); | ||
1394 | if (unlikely(!segbuf)) | ||
1395 | goto failed; | ||
1396 | |||
1397 | /* map this buffer to region of segment on-disk */ | ||
1398 | nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); | ||
1399 | sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks; | ||
1400 | |||
1401 | /* allocate the next next full segment */ | ||
1402 | err = nilfs_sufile_alloc(sufile, &nextnextnum); | ||
1403 | if (unlikely(err)) | ||
1404 | goto failed_segbuf; | ||
1405 | |||
1406 | segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1; | ||
1407 | nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs); | ||
1408 | |||
1409 | list_add_tail(&segbuf->sb_list, &list); | ||
1410 | prev = segbuf; | ||
1411 | } | ||
1412 | list_splice(&list, sci->sc_segbufs.prev); | ||
1413 | return 0; | ||
1414 | |||
1415 | failed_segbuf: | ||
1416 | nilfs_segbuf_free(segbuf); | ||
1417 | failed: | ||
1418 | list_for_each_entry_safe(segbuf, n, &list, sb_list) { | ||
1419 | ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); | ||
1420 | WARN_ON(ret); /* never fails */ | ||
1421 | list_del_init(&segbuf->sb_list); | ||
1422 | nilfs_segbuf_free(segbuf); | ||
1423 | } | ||
1424 | return err; | ||
1425 | } | ||
1426 | |||
1427 | static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, | ||
1428 | struct the_nilfs *nilfs) | ||
1429 | { | ||
1430 | struct nilfs_segment_buffer *segbuf; | ||
1431 | int ret, done = 0; | ||
1432 | |||
1433 | segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); | ||
1434 | if (nilfs->ns_nextnum != segbuf->sb_nextnum) { | ||
1435 | ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); | ||
1436 | WARN_ON(ret); /* never fails */ | ||
1437 | } | ||
1438 | if (segbuf->sb_io_error) { | ||
1439 | /* Case 1: The first segment failed */ | ||
1440 | if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) | ||
1441 | /* Case 1a: Partial segment appended into an existing | ||
1442 | segment */ | ||
1443 | nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start, | ||
1444 | segbuf->sb_fseg_end); | ||
1445 | else /* Case 1b: New full segment */ | ||
1446 | set_nilfs_discontinued(nilfs); | ||
1447 | done++; | ||
1448 | } | ||
1449 | |||
1450 | list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { | ||
1451 | ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); | ||
1452 | WARN_ON(ret); /* never fails */ | ||
1453 | if (!done && segbuf->sb_io_error) { | ||
1454 | if (segbuf->sb_segnum != nilfs->ns_nextnum) | ||
1455 | /* Case 2: extended segment (!= next) failed */ | ||
1456 | nilfs_sufile_set_error(nilfs->ns_sufile, | ||
1457 | segbuf->sb_segnum); | ||
1458 | done++; | ||
1459 | } | ||
1460 | } | ||
1461 | } | ||
1462 | |||
1463 | static void nilfs_segctor_clear_segment_buffers(struct nilfs_sc_info *sci) | ||
1464 | { | ||
1465 | struct nilfs_segment_buffer *segbuf; | ||
1466 | |||
1467 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) | ||
1468 | nilfs_segbuf_clear(segbuf); | ||
1469 | sci->sc_super_root = NULL; | ||
1470 | } | ||
1471 | |||
1472 | static void nilfs_segctor_destroy_segment_buffers(struct nilfs_sc_info *sci) | ||
1473 | { | ||
1474 | struct nilfs_segment_buffer *segbuf; | ||
1475 | |||
1476 | while (!list_empty(&sci->sc_segbufs)) { | ||
1477 | segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); | ||
1478 | list_del_init(&segbuf->sb_list); | ||
1479 | nilfs_segbuf_free(segbuf); | ||
1480 | } | ||
1481 | /* sci->sc_curseg = NULL; */ | ||
1482 | } | ||
1483 | |||
1484 | static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci, | ||
1485 | struct the_nilfs *nilfs, int err) | ||
1486 | { | ||
1487 | if (unlikely(err)) { | ||
1488 | nilfs_segctor_free_incomplete_segments(sci, nilfs); | ||
1489 | nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); | ||
1490 | } | ||
1491 | nilfs_segctor_clear_segment_buffers(sci); | ||
1492 | } | ||
1493 | |||
1494 | static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, | ||
1495 | struct inode *sufile) | ||
1496 | { | ||
1497 | struct nilfs_segment_buffer *segbuf; | ||
1498 | struct buffer_head *bh_su; | ||
1499 | struct nilfs_segment_usage *raw_su; | ||
1500 | unsigned long live_blocks; | ||
1501 | int ret; | ||
1502 | |||
1503 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
1504 | ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, | ||
1505 | &raw_su, &bh_su); | ||
1506 | WARN_ON(ret); /* always succeed because bh_su is dirty */ | ||
1507 | live_blocks = segbuf->sb_sum.nblocks + | ||
1508 | (segbuf->sb_pseg_start - segbuf->sb_fseg_start); | ||
1509 | raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime); | ||
1510 | raw_su->su_nblocks = cpu_to_le32(live_blocks); | ||
1511 | nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, | ||
1512 | bh_su); | ||
1513 | } | ||
1514 | } | ||
1515 | |||
1516 | static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, | ||
1517 | struct inode *sufile) | ||
1518 | { | ||
1519 | struct nilfs_segment_buffer *segbuf; | ||
1520 | struct buffer_head *bh_su; | ||
1521 | struct nilfs_segment_usage *raw_su; | ||
1522 | int ret; | ||
1523 | |||
1524 | segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); | ||
1525 | ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, | ||
1526 | &raw_su, &bh_su); | ||
1527 | WARN_ON(ret); /* always succeed because bh_su is dirty */ | ||
1528 | raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start - | ||
1529 | segbuf->sb_fseg_start); | ||
1530 | nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su); | ||
1531 | |||
1532 | list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { | ||
1533 | ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, | ||
1534 | &raw_su, &bh_su); | ||
1535 | WARN_ON(ret); /* always succeed */ | ||
1536 | raw_su->su_nblocks = 0; | ||
1537 | nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, | ||
1538 | bh_su); | ||
1539 | } | ||
1540 | } | ||
1541 | |||
1542 | static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci, | ||
1543 | struct nilfs_segment_buffer *last, | ||
1544 | struct inode *sufile) | ||
1545 | { | ||
1546 | struct nilfs_segment_buffer *segbuf = last, *n; | ||
1547 | int ret; | ||
1548 | |||
1549 | list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, | ||
1550 | sb_list) { | ||
1551 | list_del_init(&segbuf->sb_list); | ||
1552 | sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; | ||
1553 | ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); | ||
1554 | WARN_ON(ret); | ||
1555 | nilfs_segbuf_free(segbuf); | ||
1556 | } | ||
1557 | } | ||
1558 | |||
1559 | |||
1560 | static int nilfs_segctor_collect(struct nilfs_sc_info *sci, | ||
1561 | struct the_nilfs *nilfs, int mode) | ||
1562 | { | ||
1563 | struct nilfs_cstage prev_stage = sci->sc_stage; | ||
1564 | int err, nadd = 1; | ||
1565 | |||
1566 | /* Collection retry loop */ | ||
1567 | for (;;) { | ||
1568 | sci->sc_super_root = NULL; | ||
1569 | sci->sc_nblk_this_inc = 0; | ||
1570 | sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); | ||
1571 | |||
1572 | err = nilfs_segctor_reset_segment_buffer(sci); | ||
1573 | if (unlikely(err)) | ||
1574 | goto failed; | ||
1575 | |||
1576 | err = nilfs_segctor_collect_blocks(sci, mode); | ||
1577 | sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; | ||
1578 | if (!err) | ||
1579 | break; | ||
1580 | |||
1581 | if (unlikely(err != -E2BIG)) | ||
1582 | goto failed; | ||
1583 | |||
1584 | /* The current segment is filled up */ | ||
1585 | if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) | ||
1586 | break; | ||
1587 | |||
1588 | nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile); | ||
1589 | nilfs_segctor_clear_segment_buffers(sci); | ||
1590 | |||
1591 | err = nilfs_segctor_extend_segments(sci, nilfs, nadd); | ||
1592 | if (unlikely(err)) | ||
1593 | return err; | ||
1594 | |||
1595 | nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); | ||
1596 | sci->sc_stage = prev_stage; | ||
1597 | } | ||
1598 | nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); | ||
1599 | return 0; | ||
1600 | |||
1601 | failed: | ||
1602 | return err; | ||
1603 | } | ||
1604 | |||
1605 | static void nilfs_list_replace_buffer(struct buffer_head *old_bh, | ||
1606 | struct buffer_head *new_bh) | ||
1607 | { | ||
1608 | BUG_ON(!list_empty(&new_bh->b_assoc_buffers)); | ||
1609 | |||
1610 | list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers); | ||
1611 | /* The caller must release old_bh */ | ||
1612 | } | ||
1613 | |||
1614 | static int | ||
1615 | nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, | ||
1616 | struct nilfs_segment_buffer *segbuf, | ||
1617 | int mode) | ||
1618 | { | ||
1619 | struct inode *inode = NULL; | ||
1620 | sector_t blocknr; | ||
1621 | unsigned long nfinfo = segbuf->sb_sum.nfinfo; | ||
1622 | unsigned long nblocks = 0, ndatablk = 0; | ||
1623 | struct nilfs_sc_operations *sc_op = NULL; | ||
1624 | struct nilfs_segsum_pointer ssp; | ||
1625 | struct nilfs_finfo *finfo = NULL; | ||
1626 | union nilfs_binfo binfo; | ||
1627 | struct buffer_head *bh, *bh_org; | ||
1628 | ino_t ino = 0; | ||
1629 | int err = 0; | ||
1630 | |||
1631 | if (!nfinfo) | ||
1632 | goto out; | ||
1633 | |||
1634 | blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk; | ||
1635 | ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); | ||
1636 | ssp.offset = sizeof(struct nilfs_segment_summary); | ||
1637 | |||
1638 | list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { | ||
1639 | if (bh == sci->sc_super_root) | ||
1640 | break; | ||
1641 | if (!finfo) { | ||
1642 | finfo = nilfs_segctor_map_segsum_entry( | ||
1643 | sci, &ssp, sizeof(*finfo)); | ||
1644 | ino = le64_to_cpu(finfo->fi_ino); | ||
1645 | nblocks = le32_to_cpu(finfo->fi_nblocks); | ||
1646 | ndatablk = le32_to_cpu(finfo->fi_ndatablk); | ||
1647 | |||
1648 | if (buffer_nilfs_node(bh)) | ||
1649 | inode = NILFS_BTNC_I(bh->b_page->mapping); | ||
1650 | else | ||
1651 | inode = NILFS_AS_I(bh->b_page->mapping); | ||
1652 | |||
1653 | if (mode == SC_LSEG_DSYNC) | ||
1654 | sc_op = &nilfs_sc_dsync_ops; | ||
1655 | else if (ino == NILFS_DAT_INO) | ||
1656 | sc_op = &nilfs_sc_dat_ops; | ||
1657 | else /* file blocks */ | ||
1658 | sc_op = &nilfs_sc_file_ops; | ||
1659 | } | ||
1660 | bh_org = bh; | ||
1661 | get_bh(bh_org); | ||
1662 | err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr, | ||
1663 | &binfo); | ||
1664 | if (bh != bh_org) | ||
1665 | nilfs_list_replace_buffer(bh_org, bh); | ||
1666 | brelse(bh_org); | ||
1667 | if (unlikely(err)) | ||
1668 | goto failed_bmap; | ||
1669 | |||
1670 | if (ndatablk > 0) | ||
1671 | sc_op->write_data_binfo(sci, &ssp, &binfo); | ||
1672 | else | ||
1673 | sc_op->write_node_binfo(sci, &ssp, &binfo); | ||
1674 | |||
1675 | blocknr++; | ||
1676 | if (--nblocks == 0) { | ||
1677 | finfo = NULL; | ||
1678 | if (--nfinfo == 0) | ||
1679 | break; | ||
1680 | } else if (ndatablk > 0) | ||
1681 | ndatablk--; | ||
1682 | } | ||
1683 | out: | ||
1684 | return 0; | ||
1685 | |||
1686 | failed_bmap: | ||
1687 | err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super); | ||
1688 | return err; | ||
1689 | } | ||
1690 | |||
1691 | static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) | ||
1692 | { | ||
1693 | struct nilfs_segment_buffer *segbuf; | ||
1694 | int err; | ||
1695 | |||
1696 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
1697 | err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode); | ||
1698 | if (unlikely(err)) | ||
1699 | return err; | ||
1700 | nilfs_segbuf_fill_in_segsum(segbuf); | ||
1701 | } | ||
1702 | return 0; | ||
1703 | } | ||
1704 | |||
1705 | static int | ||
1706 | nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out) | ||
1707 | { | ||
1708 | struct page *clone_page; | ||
1709 | struct buffer_head *bh, *head, *bh2; | ||
1710 | void *kaddr; | ||
1711 | |||
1712 | bh = head = page_buffers(page); | ||
1713 | |||
1714 | clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0); | ||
1715 | if (unlikely(!clone_page)) | ||
1716 | return -ENOMEM; | ||
1717 | |||
1718 | bh2 = page_buffers(clone_page); | ||
1719 | kaddr = kmap_atomic(page, KM_USER0); | ||
1720 | do { | ||
1721 | if (list_empty(&bh->b_assoc_buffers)) | ||
1722 | continue; | ||
1723 | get_bh(bh2); | ||
1724 | page_cache_get(clone_page); /* for each bh */ | ||
1725 | memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size); | ||
1726 | bh2->b_blocknr = bh->b_blocknr; | ||
1727 | list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers); | ||
1728 | list_add_tail(&bh->b_assoc_buffers, out); | ||
1729 | } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head); | ||
1730 | kunmap_atomic(kaddr, KM_USER0); | ||
1731 | |||
1732 | if (!TestSetPageWriteback(clone_page)) | ||
1733 | inc_zone_page_state(clone_page, NR_WRITEBACK); | ||
1734 | unlock_page(clone_page); | ||
1735 | |||
1736 | return 0; | ||
1737 | } | ||
1738 | |||
1739 | static int nilfs_test_page_to_be_frozen(struct page *page) | ||
1740 | { | ||
1741 | struct address_space *mapping = page->mapping; | ||
1742 | |||
1743 | if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode)) | ||
1744 | return 0; | ||
1745 | |||
1746 | if (page_mapped(page)) { | ||
1747 | ClearPageChecked(page); | ||
1748 | return 1; | ||
1749 | } | ||
1750 | return PageChecked(page); | ||
1751 | } | ||
1752 | |||
1753 | static int nilfs_begin_page_io(struct page *page, struct list_head *out) | ||
1754 | { | ||
1755 | if (!page || PageWriteback(page)) | ||
1756 | /* For split b-tree node pages, this function may be called | ||
1757 | twice. We ignore the 2nd or later calls by this check. */ | ||
1758 | return 0; | ||
1759 | |||
1760 | lock_page(page); | ||
1761 | clear_page_dirty_for_io(page); | ||
1762 | set_page_writeback(page); | ||
1763 | unlock_page(page); | ||
1764 | |||
1765 | if (nilfs_test_page_to_be_frozen(page)) { | ||
1766 | int err = nilfs_copy_replace_page_buffers(page, out); | ||
1767 | if (unlikely(err)) | ||
1768 | return err; | ||
1769 | } | ||
1770 | return 0; | ||
1771 | } | ||
1772 | |||
1773 | static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, | ||
1774 | struct page **failed_page) | ||
1775 | { | ||
1776 | struct nilfs_segment_buffer *segbuf; | ||
1777 | struct page *bd_page = NULL, *fs_page = NULL; | ||
1778 | struct list_head *list = &sci->sc_copied_buffers; | ||
1779 | int err; | ||
1780 | |||
1781 | *failed_page = NULL; | ||
1782 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
1783 | struct buffer_head *bh; | ||
1784 | |||
1785 | list_for_each_entry(bh, &segbuf->sb_segsum_buffers, | ||
1786 | b_assoc_buffers) { | ||
1787 | if (bh->b_page != bd_page) { | ||
1788 | if (bd_page) { | ||
1789 | lock_page(bd_page); | ||
1790 | clear_page_dirty_for_io(bd_page); | ||
1791 | set_page_writeback(bd_page); | ||
1792 | unlock_page(bd_page); | ||
1793 | } | ||
1794 | bd_page = bh->b_page; | ||
1795 | } | ||
1796 | } | ||
1797 | |||
1798 | list_for_each_entry(bh, &segbuf->sb_payload_buffers, | ||
1799 | b_assoc_buffers) { | ||
1800 | if (bh == sci->sc_super_root) { | ||
1801 | if (bh->b_page != bd_page) { | ||
1802 | lock_page(bd_page); | ||
1803 | clear_page_dirty_for_io(bd_page); | ||
1804 | set_page_writeback(bd_page); | ||
1805 | unlock_page(bd_page); | ||
1806 | bd_page = bh->b_page; | ||
1807 | } | ||
1808 | break; | ||
1809 | } | ||
1810 | if (bh->b_page != fs_page) { | ||
1811 | err = nilfs_begin_page_io(fs_page, list); | ||
1812 | if (unlikely(err)) { | ||
1813 | *failed_page = fs_page; | ||
1814 | goto out; | ||
1815 | } | ||
1816 | fs_page = bh->b_page; | ||
1817 | } | ||
1818 | } | ||
1819 | } | ||
1820 | if (bd_page) { | ||
1821 | lock_page(bd_page); | ||
1822 | clear_page_dirty_for_io(bd_page); | ||
1823 | set_page_writeback(bd_page); | ||
1824 | unlock_page(bd_page); | ||
1825 | } | ||
1826 | err = nilfs_begin_page_io(fs_page, list); | ||
1827 | if (unlikely(err)) | ||
1828 | *failed_page = fs_page; | ||
1829 | out: | ||
1830 | return err; | ||
1831 | } | ||
1832 | |||
1833 | static int nilfs_segctor_write(struct nilfs_sc_info *sci, | ||
1834 | struct backing_dev_info *bdi) | ||
1835 | { | ||
1836 | struct nilfs_segment_buffer *segbuf; | ||
1837 | struct nilfs_write_info wi; | ||
1838 | int err, res; | ||
1839 | |||
1840 | wi.sb = sci->sc_super; | ||
1841 | wi.bh_sr = sci->sc_super_root; | ||
1842 | wi.bdi = bdi; | ||
1843 | |||
1844 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
1845 | nilfs_segbuf_prepare_write(segbuf, &wi); | ||
1846 | err = nilfs_segbuf_write(segbuf, &wi); | ||
1847 | |||
1848 | res = nilfs_segbuf_wait(segbuf, &wi); | ||
1849 | err = unlikely(err) ? : res; | ||
1850 | if (unlikely(err)) | ||
1851 | return err; | ||
1852 | } | ||
1853 | return 0; | ||
1854 | } | ||
1855 | |||
1856 | static int nilfs_page_has_uncleared_buffer(struct page *page) | ||
1857 | { | ||
1858 | struct buffer_head *head, *bh; | ||
1859 | |||
1860 | head = bh = page_buffers(page); | ||
1861 | do { | ||
1862 | if (buffer_dirty(bh) && !list_empty(&bh->b_assoc_buffers)) | ||
1863 | return 1; | ||
1864 | bh = bh->b_this_page; | ||
1865 | } while (bh != head); | ||
1866 | return 0; | ||
1867 | } | ||
1868 | |||
1869 | static void __nilfs_end_page_io(struct page *page, int err) | ||
1870 | { | ||
1871 | if (!err) { | ||
1872 | if (!nilfs_page_buffers_clean(page)) | ||
1873 | __set_page_dirty_nobuffers(page); | ||
1874 | ClearPageError(page); | ||
1875 | } else { | ||
1876 | __set_page_dirty_nobuffers(page); | ||
1877 | SetPageError(page); | ||
1878 | } | ||
1879 | |||
1880 | if (buffer_nilfs_allocated(page_buffers(page))) { | ||
1881 | if (TestClearPageWriteback(page)) | ||
1882 | dec_zone_page_state(page, NR_WRITEBACK); | ||
1883 | } else | ||
1884 | end_page_writeback(page); | ||
1885 | } | ||
1886 | |||
1887 | static void nilfs_end_page_io(struct page *page, int err) | ||
1888 | { | ||
1889 | if (!page) | ||
1890 | return; | ||
1891 | |||
1892 | if (buffer_nilfs_node(page_buffers(page)) && | ||
1893 | nilfs_page_has_uncleared_buffer(page)) | ||
1894 | /* For b-tree node pages, this function may be called twice | ||
1895 | or more because they might be split in a segment. | ||
1896 | This check assures that cleanup has been done for all | ||
1897 | buffers in a split btnode page. */ | ||
1898 | return; | ||
1899 | |||
1900 | __nilfs_end_page_io(page, err); | ||
1901 | } | ||
1902 | |||
1903 | static void nilfs_clear_copied_buffers(struct list_head *list, int err) | ||
1904 | { | ||
1905 | struct buffer_head *bh, *head; | ||
1906 | struct page *page; | ||
1907 | |||
1908 | while (!list_empty(list)) { | ||
1909 | bh = list_entry(list->next, struct buffer_head, | ||
1910 | b_assoc_buffers); | ||
1911 | page = bh->b_page; | ||
1912 | page_cache_get(page); | ||
1913 | head = bh = page_buffers(page); | ||
1914 | do { | ||
1915 | if (!list_empty(&bh->b_assoc_buffers)) { | ||
1916 | list_del_init(&bh->b_assoc_buffers); | ||
1917 | if (!err) { | ||
1918 | set_buffer_uptodate(bh); | ||
1919 | clear_buffer_dirty(bh); | ||
1920 | clear_buffer_nilfs_volatile(bh); | ||
1921 | } | ||
1922 | brelse(bh); /* for b_assoc_buffers */ | ||
1923 | } | ||
1924 | } while ((bh = bh->b_this_page) != head); | ||
1925 | |||
1926 | __nilfs_end_page_io(page, err); | ||
1927 | page_cache_release(page); | ||
1928 | } | ||
1929 | } | ||
1930 | |||
1931 | static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, | ||
1932 | struct page *failed_page, int err) | ||
1933 | { | ||
1934 | struct nilfs_segment_buffer *segbuf; | ||
1935 | struct page *bd_page = NULL, *fs_page = NULL; | ||
1936 | |||
1937 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
1938 | struct buffer_head *bh; | ||
1939 | |||
1940 | list_for_each_entry(bh, &segbuf->sb_segsum_buffers, | ||
1941 | b_assoc_buffers) { | ||
1942 | if (bh->b_page != bd_page) { | ||
1943 | if (bd_page) | ||
1944 | end_page_writeback(bd_page); | ||
1945 | bd_page = bh->b_page; | ||
1946 | } | ||
1947 | } | ||
1948 | |||
1949 | list_for_each_entry(bh, &segbuf->sb_payload_buffers, | ||
1950 | b_assoc_buffers) { | ||
1951 | if (bh == sci->sc_super_root) { | ||
1952 | if (bh->b_page != bd_page) { | ||
1953 | end_page_writeback(bd_page); | ||
1954 | bd_page = bh->b_page; | ||
1955 | } | ||
1956 | break; | ||
1957 | } | ||
1958 | if (bh->b_page != fs_page) { | ||
1959 | nilfs_end_page_io(fs_page, err); | ||
1960 | if (unlikely(fs_page == failed_page)) | ||
1961 | goto done; | ||
1962 | fs_page = bh->b_page; | ||
1963 | } | ||
1964 | } | ||
1965 | } | ||
1966 | if (bd_page) | ||
1967 | end_page_writeback(bd_page); | ||
1968 | |||
1969 | nilfs_end_page_io(fs_page, err); | ||
1970 | done: | ||
1971 | nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err); | ||
1972 | } | ||
1973 | |||
1974 | static void nilfs_set_next_segment(struct the_nilfs *nilfs, | ||
1975 | struct nilfs_segment_buffer *segbuf) | ||
1976 | { | ||
1977 | nilfs->ns_segnum = segbuf->sb_segnum; | ||
1978 | nilfs->ns_nextnum = segbuf->sb_nextnum; | ||
1979 | nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start | ||
1980 | + segbuf->sb_sum.nblocks; | ||
1981 | nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq; | ||
1982 | nilfs->ns_ctime = segbuf->sb_sum.ctime; | ||
1983 | } | ||
1984 | |||
1985 | static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) | ||
1986 | { | ||
1987 | struct nilfs_segment_buffer *segbuf; | ||
1988 | struct page *bd_page = NULL, *fs_page = NULL; | ||
1989 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
1990 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
1991 | int update_sr = (sci->sc_super_root != NULL); | ||
1992 | |||
1993 | list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { | ||
1994 | struct buffer_head *bh; | ||
1995 | |||
1996 | list_for_each_entry(bh, &segbuf->sb_segsum_buffers, | ||
1997 | b_assoc_buffers) { | ||
1998 | set_buffer_uptodate(bh); | ||
1999 | clear_buffer_dirty(bh); | ||
2000 | if (bh->b_page != bd_page) { | ||
2001 | if (bd_page) | ||
2002 | end_page_writeback(bd_page); | ||
2003 | bd_page = bh->b_page; | ||
2004 | } | ||
2005 | } | ||
2006 | /* | ||
2007 | * We assume that the buffers which belong to the same page | ||
2008 | * continue over the buffer list. | ||
2009 | * Under this assumption, the last BHs of pages is | ||
2010 | * identifiable by the discontinuity of bh->b_page | ||
2011 | * (page != fs_page). | ||
2012 | * | ||
2013 | * For B-tree node blocks, however, this assumption is not | ||
2014 | * guaranteed. The cleanup code of B-tree node pages needs | ||
2015 | * special care. | ||
2016 | */ | ||
2017 | list_for_each_entry(bh, &segbuf->sb_payload_buffers, | ||
2018 | b_assoc_buffers) { | ||
2019 | set_buffer_uptodate(bh); | ||
2020 | clear_buffer_dirty(bh); | ||
2021 | clear_buffer_nilfs_volatile(bh); | ||
2022 | if (bh == sci->sc_super_root) { | ||
2023 | if (bh->b_page != bd_page) { | ||
2024 | end_page_writeback(bd_page); | ||
2025 | bd_page = bh->b_page; | ||
2026 | } | ||
2027 | break; | ||
2028 | } | ||
2029 | if (bh->b_page != fs_page) { | ||
2030 | nilfs_end_page_io(fs_page, 0); | ||
2031 | fs_page = bh->b_page; | ||
2032 | } | ||
2033 | } | ||
2034 | |||
2035 | if (!NILFS_SEG_SIMPLEX(&segbuf->sb_sum)) { | ||
2036 | if (NILFS_SEG_LOGBGN(&segbuf->sb_sum)) { | ||
2037 | set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); | ||
2038 | sci->sc_lseg_stime = jiffies; | ||
2039 | } | ||
2040 | if (NILFS_SEG_LOGEND(&segbuf->sb_sum)) | ||
2041 | clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); | ||
2042 | } | ||
2043 | } | ||
2044 | /* | ||
2045 | * Since pages may continue over multiple segment buffers, | ||
2046 | * end of the last page must be checked outside of the loop. | ||
2047 | */ | ||
2048 | if (bd_page) | ||
2049 | end_page_writeback(bd_page); | ||
2050 | |||
2051 | nilfs_end_page_io(fs_page, 0); | ||
2052 | |||
2053 | nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0); | ||
2054 | |||
2055 | nilfs_drop_collected_inodes(&sci->sc_dirty_files); | ||
2056 | |||
2057 | if (nilfs_doing_gc()) { | ||
2058 | nilfs_drop_collected_inodes(&sci->sc_gc_inodes); | ||
2059 | if (update_sr) | ||
2060 | nilfs_commit_gcdat_inode(nilfs); | ||
2061 | } else | ||
2062 | nilfs->ns_nongc_ctime = sci->sc_seg_ctime; | ||
2063 | |||
2064 | sci->sc_nblk_inc += sci->sc_nblk_this_inc; | ||
2065 | |||
2066 | segbuf = NILFS_LAST_SEGBUF(&sci->sc_segbufs); | ||
2067 | nilfs_set_next_segment(nilfs, segbuf); | ||
2068 | |||
2069 | if (update_sr) { | ||
2070 | nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, | ||
2071 | segbuf->sb_sum.seg_seq, nilfs->ns_cno++); | ||
2072 | sbi->s_super->s_dirt = 1; | ||
2073 | |||
2074 | clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); | ||
2075 | clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); | ||
2076 | set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); | ||
2077 | } else | ||
2078 | clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); | ||
2079 | } | ||
2080 | |||
2081 | static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, | ||
2082 | struct nilfs_sb_info *sbi) | ||
2083 | { | ||
2084 | struct nilfs_inode_info *ii, *n; | ||
2085 | __u64 cno = sbi->s_nilfs->ns_cno; | ||
2086 | |||
2087 | spin_lock(&sbi->s_inode_lock); | ||
2088 | retry: | ||
2089 | list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) { | ||
2090 | if (!ii->i_bh) { | ||
2091 | struct buffer_head *ibh; | ||
2092 | int err; | ||
2093 | |||
2094 | spin_unlock(&sbi->s_inode_lock); | ||
2095 | err = nilfs_ifile_get_inode_block( | ||
2096 | sbi->s_ifile, ii->vfs_inode.i_ino, &ibh); | ||
2097 | if (unlikely(err)) { | ||
2098 | nilfs_warning(sbi->s_super, __func__, | ||
2099 | "failed to get inode block.\n"); | ||
2100 | return err; | ||
2101 | } | ||
2102 | nilfs_mdt_mark_buffer_dirty(ibh); | ||
2103 | nilfs_mdt_mark_dirty(sbi->s_ifile); | ||
2104 | spin_lock(&sbi->s_inode_lock); | ||
2105 | if (likely(!ii->i_bh)) | ||
2106 | ii->i_bh = ibh; | ||
2107 | else | ||
2108 | brelse(ibh); | ||
2109 | goto retry; | ||
2110 | } | ||
2111 | ii->i_cno = cno; | ||
2112 | |||
2113 | clear_bit(NILFS_I_QUEUED, &ii->i_state); | ||
2114 | set_bit(NILFS_I_BUSY, &ii->i_state); | ||
2115 | list_del(&ii->i_dirty); | ||
2116 | list_add_tail(&ii->i_dirty, &sci->sc_dirty_files); | ||
2117 | } | ||
2118 | spin_unlock(&sbi->s_inode_lock); | ||
2119 | |||
2120 | NILFS_I(sbi->s_ifile)->i_cno = cno; | ||
2121 | |||
2122 | return 0; | ||
2123 | } | ||
2124 | |||
2125 | static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, | ||
2126 | struct nilfs_sb_info *sbi) | ||
2127 | { | ||
2128 | struct nilfs_transaction_info *ti = current->journal_info; | ||
2129 | struct nilfs_inode_info *ii, *n; | ||
2130 | __u64 cno = sbi->s_nilfs->ns_cno; | ||
2131 | |||
2132 | spin_lock(&sbi->s_inode_lock); | ||
2133 | list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { | ||
2134 | if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || | ||
2135 | test_bit(NILFS_I_DIRTY, &ii->i_state)) { | ||
2136 | /* The current checkpoint number (=nilfs->ns_cno) is | ||
2137 | changed between check-in and check-out only if the | ||
2138 | super root is written out. So, we can update i_cno | ||
2139 | for the inodes that remain in the dirty list. */ | ||
2140 | ii->i_cno = cno; | ||
2141 | continue; | ||
2142 | } | ||
2143 | clear_bit(NILFS_I_BUSY, &ii->i_state); | ||
2144 | brelse(ii->i_bh); | ||
2145 | ii->i_bh = NULL; | ||
2146 | list_del(&ii->i_dirty); | ||
2147 | list_add_tail(&ii->i_dirty, &ti->ti_garbage); | ||
2148 | } | ||
2149 | spin_unlock(&sbi->s_inode_lock); | ||
2150 | } | ||
2151 | |||
2152 | /* | ||
2153 | * Main procedure of segment constructor | ||
2154 | */ | ||
2155 | static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) | ||
2156 | { | ||
2157 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
2158 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
2159 | struct page *failed_page; | ||
2160 | int err, has_sr = 0; | ||
2161 | |||
2162 | sci->sc_stage.scnt = NILFS_ST_INIT; | ||
2163 | |||
2164 | err = nilfs_segctor_check_in_files(sci, sbi); | ||
2165 | if (unlikely(err)) | ||
2166 | goto out; | ||
2167 | |||
2168 | if (nilfs_test_metadata_dirty(sbi)) | ||
2169 | set_bit(NILFS_SC_DIRTY, &sci->sc_flags); | ||
2170 | |||
2171 | if (nilfs_segctor_clean(sci)) | ||
2172 | goto out; | ||
2173 | |||
2174 | do { | ||
2175 | sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK; | ||
2176 | |||
2177 | err = nilfs_segctor_begin_construction(sci, nilfs); | ||
2178 | if (unlikely(err)) | ||
2179 | goto out; | ||
2180 | |||
2181 | /* Update time stamp */ | ||
2182 | sci->sc_seg_ctime = get_seconds(); | ||
2183 | |||
2184 | err = nilfs_segctor_collect(sci, nilfs, mode); | ||
2185 | if (unlikely(err)) | ||
2186 | goto failed; | ||
2187 | |||
2188 | has_sr = (sci->sc_super_root != NULL); | ||
2189 | |||
2190 | /* Avoid empty segment */ | ||
2191 | if (sci->sc_stage.scnt == NILFS_ST_DONE && | ||
2192 | NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { | ||
2193 | nilfs_segctor_end_construction(sci, nilfs, 1); | ||
2194 | goto out; | ||
2195 | } | ||
2196 | |||
2197 | err = nilfs_segctor_assign(sci, mode); | ||
2198 | if (unlikely(err)) | ||
2199 | goto failed; | ||
2200 | |||
2201 | if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) | ||
2202 | nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile); | ||
2203 | |||
2204 | if (has_sr) { | ||
2205 | err = nilfs_segctor_fill_in_checkpoint(sci); | ||
2206 | if (unlikely(err)) | ||
2207 | goto failed_to_make_up; | ||
2208 | |||
2209 | nilfs_segctor_fill_in_super_root(sci, nilfs); | ||
2210 | } | ||
2211 | nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); | ||
2212 | |||
2213 | /* Write partial segments */ | ||
2214 | err = nilfs_segctor_prepare_write(sci, &failed_page); | ||
2215 | if (unlikely(err)) | ||
2216 | goto failed_to_write; | ||
2217 | |||
2218 | nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed); | ||
2219 | |||
2220 | err = nilfs_segctor_write(sci, nilfs->ns_bdi); | ||
2221 | if (unlikely(err)) | ||
2222 | goto failed_to_write; | ||
2223 | |||
2224 | nilfs_segctor_complete_write(sci); | ||
2225 | |||
2226 | /* Commit segments */ | ||
2227 | if (has_sr) { | ||
2228 | nilfs_segctor_commit_free_segments(sci); | ||
2229 | nilfs_segctor_clear_metadata_dirty(sci); | ||
2230 | } | ||
2231 | |||
2232 | nilfs_segctor_end_construction(sci, nilfs, 0); | ||
2233 | |||
2234 | } while (sci->sc_stage.scnt != NILFS_ST_DONE); | ||
2235 | |||
2236 | out: | ||
2237 | nilfs_segctor_destroy_segment_buffers(sci); | ||
2238 | nilfs_segctor_check_out_files(sci, sbi); | ||
2239 | return err; | ||
2240 | |||
2241 | failed_to_write: | ||
2242 | nilfs_segctor_abort_write(sci, failed_page, err); | ||
2243 | nilfs_segctor_cancel_segusage(sci, nilfs->ns_sufile); | ||
2244 | |||
2245 | failed_to_make_up: | ||
2246 | if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) | ||
2247 | nilfs_redirty_inodes(&sci->sc_dirty_files); | ||
2248 | |||
2249 | failed: | ||
2250 | if (nilfs_doing_gc()) | ||
2251 | nilfs_redirty_inodes(&sci->sc_gc_inodes); | ||
2252 | nilfs_segctor_end_construction(sci, nilfs, err); | ||
2253 | goto out; | ||
2254 | } | ||
2255 | |||
2256 | /** | ||
2257 | * nilfs_secgtor_start_timer - set timer of background write | ||
2258 | * @sci: nilfs_sc_info | ||
2259 | * | ||
2260 | * If the timer has already been set, it ignores the new request. | ||
2261 | * This function MUST be called within a section locking the segment | ||
2262 | * semaphore. | ||
2263 | */ | ||
2264 | static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) | ||
2265 | { | ||
2266 | spin_lock(&sci->sc_state_lock); | ||
2267 | if (sci->sc_timer && !(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { | ||
2268 | sci->sc_timer->expires = jiffies + sci->sc_interval; | ||
2269 | add_timer(sci->sc_timer); | ||
2270 | sci->sc_state |= NILFS_SEGCTOR_COMMIT; | ||
2271 | } | ||
2272 | spin_unlock(&sci->sc_state_lock); | ||
2273 | } | ||
2274 | |||
2275 | static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) | ||
2276 | { | ||
2277 | spin_lock(&sci->sc_state_lock); | ||
2278 | if (!(sci->sc_flush_request & (1 << bn))) { | ||
2279 | unsigned long prev_req = sci->sc_flush_request; | ||
2280 | |||
2281 | sci->sc_flush_request |= (1 << bn); | ||
2282 | if (!prev_req) | ||
2283 | wake_up(&sci->sc_wait_daemon); | ||
2284 | } | ||
2285 | spin_unlock(&sci->sc_state_lock); | ||
2286 | } | ||
2287 | |||
2288 | /** | ||
2289 | * nilfs_flush_segment - trigger a segment construction for resource control | ||
2290 | * @sb: super block | ||
2291 | * @ino: inode number of the file to be flushed out. | ||
2292 | */ | ||
2293 | void nilfs_flush_segment(struct super_block *sb, ino_t ino) | ||
2294 | { | ||
2295 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
2296 | struct nilfs_sc_info *sci = NILFS_SC(sbi); | ||
2297 | |||
2298 | if (!sci || nilfs_doing_construction()) | ||
2299 | return; | ||
2300 | nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0); | ||
2301 | /* assign bit 0 to data files */ | ||
2302 | } | ||
2303 | |||
2304 | int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci, | ||
2305 | __u64 *segnum, size_t nsegs) | ||
2306 | { | ||
2307 | struct nilfs_segment_entry *ent; | ||
2308 | struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; | ||
2309 | struct inode *sufile = nilfs->ns_sufile; | ||
2310 | LIST_HEAD(list); | ||
2311 | __u64 *pnum; | ||
2312 | size_t i; | ||
2313 | int err; | ||
2314 | |||
2315 | for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) { | ||
2316 | ent = nilfs_alloc_segment_entry(*pnum); | ||
2317 | if (unlikely(!ent)) { | ||
2318 | err = -ENOMEM; | ||
2319 | goto failed; | ||
2320 | } | ||
2321 | list_add_tail(&ent->list, &list); | ||
2322 | |||
2323 | err = nilfs_open_segment_entry(ent, sufile); | ||
2324 | if (unlikely(err)) | ||
2325 | goto failed; | ||
2326 | |||
2327 | if (unlikely(!nilfs_segment_usage_dirty(ent->raw_su))) | ||
2328 | printk(KERN_WARNING "NILFS: unused segment is " | ||
2329 | "requested to be cleaned (segnum=%llu)\n", | ||
2330 | (unsigned long long)ent->segnum); | ||
2331 | nilfs_close_segment_entry(ent, sufile); | ||
2332 | } | ||
2333 | list_splice(&list, sci->sc_cleaning_segments.prev); | ||
2334 | return 0; | ||
2335 | |||
2336 | failed: | ||
2337 | nilfs_dispose_segment_list(&list); | ||
2338 | return err; | ||
2339 | } | ||
2340 | |||
2341 | void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *sci) | ||
2342 | { | ||
2343 | nilfs_dispose_segment_list(&sci->sc_cleaning_segments); | ||
2344 | } | ||
2345 | |||
2346 | struct nilfs_segctor_wait_request { | ||
2347 | wait_queue_t wq; | ||
2348 | __u32 seq; | ||
2349 | int err; | ||
2350 | atomic_t done; | ||
2351 | }; | ||
2352 | |||
2353 | static int nilfs_segctor_sync(struct nilfs_sc_info *sci) | ||
2354 | { | ||
2355 | struct nilfs_segctor_wait_request wait_req; | ||
2356 | int err = 0; | ||
2357 | |||
2358 | spin_lock(&sci->sc_state_lock); | ||
2359 | init_wait(&wait_req.wq); | ||
2360 | wait_req.err = 0; | ||
2361 | atomic_set(&wait_req.done, 0); | ||
2362 | wait_req.seq = ++sci->sc_seq_request; | ||
2363 | spin_unlock(&sci->sc_state_lock); | ||
2364 | |||
2365 | init_waitqueue_entry(&wait_req.wq, current); | ||
2366 | add_wait_queue(&sci->sc_wait_request, &wait_req.wq); | ||
2367 | set_current_state(TASK_INTERRUPTIBLE); | ||
2368 | wake_up(&sci->sc_wait_daemon); | ||
2369 | |||
2370 | for (;;) { | ||
2371 | if (atomic_read(&wait_req.done)) { | ||
2372 | err = wait_req.err; | ||
2373 | break; | ||
2374 | } | ||
2375 | if (!signal_pending(current)) { | ||
2376 | schedule(); | ||
2377 | continue; | ||
2378 | } | ||
2379 | err = -ERESTARTSYS; | ||
2380 | break; | ||
2381 | } | ||
2382 | finish_wait(&sci->sc_wait_request, &wait_req.wq); | ||
2383 | return err; | ||
2384 | } | ||
2385 | |||
2386 | static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) | ||
2387 | { | ||
2388 | struct nilfs_segctor_wait_request *wrq, *n; | ||
2389 | unsigned long flags; | ||
2390 | |||
2391 | spin_lock_irqsave(&sci->sc_wait_request.lock, flags); | ||
2392 | list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list, | ||
2393 | wq.task_list) { | ||
2394 | if (!atomic_read(&wrq->done) && | ||
2395 | nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { | ||
2396 | wrq->err = err; | ||
2397 | atomic_set(&wrq->done, 1); | ||
2398 | } | ||
2399 | if (atomic_read(&wrq->done)) { | ||
2400 | wrq->wq.func(&wrq->wq, | ||
2401 | TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, | ||
2402 | 0, NULL); | ||
2403 | } | ||
2404 | } | ||
2405 | spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags); | ||
2406 | } | ||
2407 | |||
2408 | /** | ||
2409 | * nilfs_construct_segment - construct a logical segment | ||
2410 | * @sb: super block | ||
2411 | * | ||
2412 | * Return Value: On success, 0 is retured. On errors, one of the following | ||
2413 | * negative error code is returned. | ||
2414 | * | ||
2415 | * %-EROFS - Read only filesystem. | ||
2416 | * | ||
2417 | * %-EIO - I/O error | ||
2418 | * | ||
2419 | * %-ENOSPC - No space left on device (only in a panic state). | ||
2420 | * | ||
2421 | * %-ERESTARTSYS - Interrupted. | ||
2422 | * | ||
2423 | * %-ENOMEM - Insufficient memory available. | ||
2424 | */ | ||
2425 | int nilfs_construct_segment(struct super_block *sb) | ||
2426 | { | ||
2427 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
2428 | struct nilfs_sc_info *sci = NILFS_SC(sbi); | ||
2429 | struct nilfs_transaction_info *ti; | ||
2430 | int err; | ||
2431 | |||
2432 | if (!sci) | ||
2433 | return -EROFS; | ||
2434 | |||
2435 | /* A call inside transactions causes a deadlock. */ | ||
2436 | BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC); | ||
2437 | |||
2438 | err = nilfs_segctor_sync(sci); | ||
2439 | return err; | ||
2440 | } | ||
2441 | |||
2442 | /** | ||
2443 | * nilfs_construct_dsync_segment - construct a data-only logical segment | ||
2444 | * @sb: super block | ||
2445 | * @inode: inode whose data blocks should be written out | ||
2446 | * @start: start byte offset | ||
2447 | * @end: end byte offset (inclusive) | ||
2448 | * | ||
2449 | * Return Value: On success, 0 is retured. On errors, one of the following | ||
2450 | * negative error code is returned. | ||
2451 | * | ||
2452 | * %-EROFS - Read only filesystem. | ||
2453 | * | ||
2454 | * %-EIO - I/O error | ||
2455 | * | ||
2456 | * %-ENOSPC - No space left on device (only in a panic state). | ||
2457 | * | ||
2458 | * %-ERESTARTSYS - Interrupted. | ||
2459 | * | ||
2460 | * %-ENOMEM - Insufficient memory available. | ||
2461 | */ | ||
2462 | int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, | ||
2463 | loff_t start, loff_t end) | ||
2464 | { | ||
2465 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
2466 | struct nilfs_sc_info *sci = NILFS_SC(sbi); | ||
2467 | struct nilfs_inode_info *ii; | ||
2468 | struct nilfs_transaction_info ti; | ||
2469 | int err = 0; | ||
2470 | |||
2471 | if (!sci) | ||
2472 | return -EROFS; | ||
2473 | |||
2474 | nilfs_transaction_lock(sbi, &ti, 0); | ||
2475 | |||
2476 | ii = NILFS_I(inode); | ||
2477 | if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) || | ||
2478 | nilfs_test_opt(sbi, STRICT_ORDER) || | ||
2479 | test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || | ||
2480 | nilfs_discontinued(sbi->s_nilfs)) { | ||
2481 | nilfs_transaction_unlock(sbi); | ||
2482 | err = nilfs_segctor_sync(sci); | ||
2483 | return err; | ||
2484 | } | ||
2485 | |||
2486 | spin_lock(&sbi->s_inode_lock); | ||
2487 | if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && | ||
2488 | !test_bit(NILFS_I_BUSY, &ii->i_state)) { | ||
2489 | spin_unlock(&sbi->s_inode_lock); | ||
2490 | nilfs_transaction_unlock(sbi); | ||
2491 | return 0; | ||
2492 | } | ||
2493 | spin_unlock(&sbi->s_inode_lock); | ||
2494 | sci->sc_dsync_inode = ii; | ||
2495 | sci->sc_dsync_start = start; | ||
2496 | sci->sc_dsync_end = end; | ||
2497 | |||
2498 | err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); | ||
2499 | |||
2500 | nilfs_transaction_unlock(sbi); | ||
2501 | return err; | ||
2502 | } | ||
2503 | |||
2504 | struct nilfs_segctor_req { | ||
2505 | int mode; | ||
2506 | __u32 seq_accepted; | ||
2507 | int sc_err; /* construction failure */ | ||
2508 | int sb_err; /* super block writeback failure */ | ||
2509 | }; | ||
2510 | |||
2511 | #define FLUSH_FILE_BIT (0x1) /* data file only */ | ||
2512 | #define FLUSH_DAT_BIT (1 << NILFS_DAT_INO) /* DAT only */ | ||
2513 | |||
2514 | static void nilfs_segctor_accept(struct nilfs_sc_info *sci, | ||
2515 | struct nilfs_segctor_req *req) | ||
2516 | { | ||
2517 | req->sc_err = req->sb_err = 0; | ||
2518 | spin_lock(&sci->sc_state_lock); | ||
2519 | req->seq_accepted = sci->sc_seq_request; | ||
2520 | spin_unlock(&sci->sc_state_lock); | ||
2521 | |||
2522 | if (sci->sc_timer) | ||
2523 | del_timer_sync(sci->sc_timer); | ||
2524 | } | ||
2525 | |||
2526 | static void nilfs_segctor_notify(struct nilfs_sc_info *sci, | ||
2527 | struct nilfs_segctor_req *req) | ||
2528 | { | ||
2529 | /* Clear requests (even when the construction failed) */ | ||
2530 | spin_lock(&sci->sc_state_lock); | ||
2531 | |||
2532 | sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; | ||
2533 | |||
2534 | if (req->mode == SC_LSEG_SR) { | ||
2535 | sci->sc_seq_done = req->seq_accepted; | ||
2536 | nilfs_segctor_wakeup(sci, req->sc_err ? : req->sb_err); | ||
2537 | sci->sc_flush_request = 0; | ||
2538 | } else if (req->mode == SC_FLUSH_FILE) | ||
2539 | sci->sc_flush_request &= ~FLUSH_FILE_BIT; | ||
2540 | else if (req->mode == SC_FLUSH_DAT) | ||
2541 | sci->sc_flush_request &= ~FLUSH_DAT_BIT; | ||
2542 | |||
2543 | spin_unlock(&sci->sc_state_lock); | ||
2544 | } | ||
2545 | |||
2546 | static int nilfs_segctor_construct(struct nilfs_sc_info *sci, | ||
2547 | struct nilfs_segctor_req *req) | ||
2548 | { | ||
2549 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
2550 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
2551 | int err = 0; | ||
2552 | |||
2553 | if (nilfs_discontinued(nilfs)) | ||
2554 | req->mode = SC_LSEG_SR; | ||
2555 | if (!nilfs_segctor_confirm(sci)) { | ||
2556 | err = nilfs_segctor_do_construct(sci, req->mode); | ||
2557 | req->sc_err = err; | ||
2558 | } | ||
2559 | if (likely(!err)) { | ||
2560 | if (req->mode != SC_FLUSH_DAT) | ||
2561 | atomic_set(&nilfs->ns_ndirtyblks, 0); | ||
2562 | if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && | ||
2563 | nilfs_discontinued(nilfs)) { | ||
2564 | down_write(&nilfs->ns_sem); | ||
2565 | req->sb_err = nilfs_commit_super(sbi, 0); | ||
2566 | up_write(&nilfs->ns_sem); | ||
2567 | } | ||
2568 | } | ||
2569 | return err; | ||
2570 | } | ||
2571 | |||
2572 | static void nilfs_construction_timeout(unsigned long data) | ||
2573 | { | ||
2574 | struct task_struct *p = (struct task_struct *)data; | ||
2575 | wake_up_process(p); | ||
2576 | } | ||
2577 | |||
2578 | static void | ||
2579 | nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) | ||
2580 | { | ||
2581 | struct nilfs_inode_info *ii, *n; | ||
2582 | |||
2583 | list_for_each_entry_safe(ii, n, head, i_dirty) { | ||
2584 | if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) | ||
2585 | continue; | ||
2586 | hlist_del_init(&ii->vfs_inode.i_hash); | ||
2587 | list_del_init(&ii->i_dirty); | ||
2588 | nilfs_clear_gcinode(&ii->vfs_inode); | ||
2589 | } | ||
2590 | } | ||
2591 | |||
2592 | int nilfs_clean_segments(struct super_block *sb, void __user *argp) | ||
2593 | { | ||
2594 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
2595 | struct nilfs_sc_info *sci = NILFS_SC(sbi); | ||
2596 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
2597 | struct nilfs_transaction_info ti; | ||
2598 | struct nilfs_segctor_req req = { .mode = SC_LSEG_SR }; | ||
2599 | int err; | ||
2600 | |||
2601 | if (unlikely(!sci)) | ||
2602 | return -EROFS; | ||
2603 | |||
2604 | nilfs_transaction_lock(sbi, &ti, 1); | ||
2605 | |||
2606 | err = nilfs_init_gcdat_inode(nilfs); | ||
2607 | if (unlikely(err)) | ||
2608 | goto out_unlock; | ||
2609 | err = nilfs_ioctl_prepare_clean_segments(nilfs, argp); | ||
2610 | if (unlikely(err)) | ||
2611 | goto out_unlock; | ||
2612 | |||
2613 | list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev); | ||
2614 | |||
2615 | for (;;) { | ||
2616 | nilfs_segctor_accept(sci, &req); | ||
2617 | err = nilfs_segctor_construct(sci, &req); | ||
2618 | nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes); | ||
2619 | nilfs_segctor_notify(sci, &req); | ||
2620 | |||
2621 | if (likely(!err)) | ||
2622 | break; | ||
2623 | |||
2624 | nilfs_warning(sb, __func__, | ||
2625 | "segment construction failed. (err=%d)", err); | ||
2626 | set_current_state(TASK_INTERRUPTIBLE); | ||
2627 | schedule_timeout(sci->sc_interval); | ||
2628 | } | ||
2629 | |||
2630 | out_unlock: | ||
2631 | nilfs_clear_gcdat_inode(nilfs); | ||
2632 | nilfs_transaction_unlock(sbi); | ||
2633 | return err; | ||
2634 | } | ||
2635 | |||
2636 | static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) | ||
2637 | { | ||
2638 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
2639 | struct nilfs_transaction_info ti; | ||
2640 | struct nilfs_segctor_req req = { .mode = mode }; | ||
2641 | |||
2642 | nilfs_transaction_lock(sbi, &ti, 0); | ||
2643 | |||
2644 | nilfs_segctor_accept(sci, &req); | ||
2645 | nilfs_segctor_construct(sci, &req); | ||
2646 | nilfs_segctor_notify(sci, &req); | ||
2647 | |||
2648 | /* | ||
2649 | * Unclosed segment should be retried. We do this using sc_timer. | ||
2650 | * Timeout of sc_timer will invoke complete construction which leads | ||
2651 | * to close the current logical segment. | ||
2652 | */ | ||
2653 | if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) | ||
2654 | nilfs_segctor_start_timer(sci); | ||
2655 | |||
2656 | nilfs_transaction_unlock(sbi); | ||
2657 | } | ||
2658 | |||
2659 | static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) | ||
2660 | { | ||
2661 | int mode = 0; | ||
2662 | int err; | ||
2663 | |||
2664 | spin_lock(&sci->sc_state_lock); | ||
2665 | mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ? | ||
2666 | SC_FLUSH_DAT : SC_FLUSH_FILE; | ||
2667 | spin_unlock(&sci->sc_state_lock); | ||
2668 | |||
2669 | if (mode) { | ||
2670 | err = nilfs_segctor_do_construct(sci, mode); | ||
2671 | |||
2672 | spin_lock(&sci->sc_state_lock); | ||
2673 | sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ? | ||
2674 | ~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT; | ||
2675 | spin_unlock(&sci->sc_state_lock); | ||
2676 | } | ||
2677 | clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); | ||
2678 | } | ||
2679 | |||
2680 | static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) | ||
2681 | { | ||
2682 | if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || | ||
2683 | time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) { | ||
2684 | if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT)) | ||
2685 | return SC_FLUSH_FILE; | ||
2686 | else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT)) | ||
2687 | return SC_FLUSH_DAT; | ||
2688 | } | ||
2689 | return SC_LSEG_SR; | ||
2690 | } | ||
2691 | |||
2692 | /** | ||
2693 | * nilfs_segctor_thread - main loop of the segment constructor thread. | ||
2694 | * @arg: pointer to a struct nilfs_sc_info. | ||
2695 | * | ||
2696 | * nilfs_segctor_thread() initializes a timer and serves as a daemon | ||
2697 | * to execute segment constructions. | ||
2698 | */ | ||
2699 | static int nilfs_segctor_thread(void *arg) | ||
2700 | { | ||
2701 | struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; | ||
2702 | struct timer_list timer; | ||
2703 | int timeout = 0; | ||
2704 | |||
2705 | init_timer(&timer); | ||
2706 | timer.data = (unsigned long)current; | ||
2707 | timer.function = nilfs_construction_timeout; | ||
2708 | sci->sc_timer = &timer; | ||
2709 | |||
2710 | /* start sync. */ | ||
2711 | sci->sc_task = current; | ||
2712 | wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ | ||
2713 | printk(KERN_INFO | ||
2714 | "segctord starting. Construction interval = %lu seconds, " | ||
2715 | "CP frequency < %lu seconds\n", | ||
2716 | sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); | ||
2717 | |||
2718 | spin_lock(&sci->sc_state_lock); | ||
2719 | loop: | ||
2720 | for (;;) { | ||
2721 | int mode; | ||
2722 | |||
2723 | if (sci->sc_state & NILFS_SEGCTOR_QUIT) | ||
2724 | goto end_thread; | ||
2725 | |||
2726 | if (timeout || sci->sc_seq_request != sci->sc_seq_done) | ||
2727 | mode = SC_LSEG_SR; | ||
2728 | else if (!sci->sc_flush_request) | ||
2729 | break; | ||
2730 | else | ||
2731 | mode = nilfs_segctor_flush_mode(sci); | ||
2732 | |||
2733 | spin_unlock(&sci->sc_state_lock); | ||
2734 | nilfs_segctor_thread_construct(sci, mode); | ||
2735 | spin_lock(&sci->sc_state_lock); | ||
2736 | timeout = 0; | ||
2737 | } | ||
2738 | |||
2739 | |||
2740 | if (freezing(current)) { | ||
2741 | spin_unlock(&sci->sc_state_lock); | ||
2742 | refrigerator(); | ||
2743 | spin_lock(&sci->sc_state_lock); | ||
2744 | } else { | ||
2745 | DEFINE_WAIT(wait); | ||
2746 | int should_sleep = 1; | ||
2747 | |||
2748 | prepare_to_wait(&sci->sc_wait_daemon, &wait, | ||
2749 | TASK_INTERRUPTIBLE); | ||
2750 | |||
2751 | if (sci->sc_seq_request != sci->sc_seq_done) | ||
2752 | should_sleep = 0; | ||
2753 | else if (sci->sc_flush_request) | ||
2754 | should_sleep = 0; | ||
2755 | else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) | ||
2756 | should_sleep = time_before(jiffies, | ||
2757 | sci->sc_timer->expires); | ||
2758 | |||
2759 | if (should_sleep) { | ||
2760 | spin_unlock(&sci->sc_state_lock); | ||
2761 | schedule(); | ||
2762 | spin_lock(&sci->sc_state_lock); | ||
2763 | } | ||
2764 | finish_wait(&sci->sc_wait_daemon, &wait); | ||
2765 | timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && | ||
2766 | time_after_eq(jiffies, sci->sc_timer->expires)); | ||
2767 | } | ||
2768 | goto loop; | ||
2769 | |||
2770 | end_thread: | ||
2771 | spin_unlock(&sci->sc_state_lock); | ||
2772 | del_timer_sync(sci->sc_timer); | ||
2773 | sci->sc_timer = NULL; | ||
2774 | |||
2775 | /* end sync. */ | ||
2776 | sci->sc_task = NULL; | ||
2777 | wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ | ||
2778 | return 0; | ||
2779 | } | ||
2780 | |||
2781 | static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) | ||
2782 | { | ||
2783 | struct task_struct *t; | ||
2784 | |||
2785 | t = kthread_run(nilfs_segctor_thread, sci, "segctord"); | ||
2786 | if (IS_ERR(t)) { | ||
2787 | int err = PTR_ERR(t); | ||
2788 | |||
2789 | printk(KERN_ERR "NILFS: error %d creating segctord thread\n", | ||
2790 | err); | ||
2791 | return err; | ||
2792 | } | ||
2793 | wait_event(sci->sc_wait_task, sci->sc_task != NULL); | ||
2794 | return 0; | ||
2795 | } | ||
2796 | |||
2797 | static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) | ||
2798 | { | ||
2799 | sci->sc_state |= NILFS_SEGCTOR_QUIT; | ||
2800 | |||
2801 | while (sci->sc_task) { | ||
2802 | wake_up(&sci->sc_wait_daemon); | ||
2803 | spin_unlock(&sci->sc_state_lock); | ||
2804 | wait_event(sci->sc_wait_task, sci->sc_task == NULL); | ||
2805 | spin_lock(&sci->sc_state_lock); | ||
2806 | } | ||
2807 | } | ||
2808 | |||
2809 | static int nilfs_segctor_init(struct nilfs_sc_info *sci) | ||
2810 | { | ||
2811 | sci->sc_seq_done = sci->sc_seq_request; | ||
2812 | |||
2813 | return nilfs_segctor_start_thread(sci); | ||
2814 | } | ||
2815 | |||
2816 | /* | ||
2817 | * Setup & clean-up functions | ||
2818 | */ | ||
2819 | static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) | ||
2820 | { | ||
2821 | struct nilfs_sc_info *sci; | ||
2822 | |||
2823 | sci = kzalloc(sizeof(*sci), GFP_KERNEL); | ||
2824 | if (!sci) | ||
2825 | return NULL; | ||
2826 | |||
2827 | sci->sc_sbi = sbi; | ||
2828 | sci->sc_super = sbi->s_super; | ||
2829 | |||
2830 | init_waitqueue_head(&sci->sc_wait_request); | ||
2831 | init_waitqueue_head(&sci->sc_wait_daemon); | ||
2832 | init_waitqueue_head(&sci->sc_wait_task); | ||
2833 | spin_lock_init(&sci->sc_state_lock); | ||
2834 | INIT_LIST_HEAD(&sci->sc_dirty_files); | ||
2835 | INIT_LIST_HEAD(&sci->sc_segbufs); | ||
2836 | INIT_LIST_HEAD(&sci->sc_gc_inodes); | ||
2837 | INIT_LIST_HEAD(&sci->sc_cleaning_segments); | ||
2838 | INIT_LIST_HEAD(&sci->sc_copied_buffers); | ||
2839 | |||
2840 | sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; | ||
2841 | sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; | ||
2842 | sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; | ||
2843 | |||
2844 | if (sbi->s_interval) | ||
2845 | sci->sc_interval = sbi->s_interval; | ||
2846 | if (sbi->s_watermark) | ||
2847 | sci->sc_watermark = sbi->s_watermark; | ||
2848 | return sci; | ||
2849 | } | ||
2850 | |||
2851 | static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) | ||
2852 | { | ||
2853 | int ret, retrycount = NILFS_SC_CLEANUP_RETRY; | ||
2854 | |||
2855 | /* The segctord thread was stopped and its timer was removed. | ||
2856 | But some tasks remain. */ | ||
2857 | do { | ||
2858 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
2859 | struct nilfs_transaction_info ti; | ||
2860 | struct nilfs_segctor_req req = { .mode = SC_LSEG_SR }; | ||
2861 | |||
2862 | nilfs_transaction_lock(sbi, &ti, 0); | ||
2863 | nilfs_segctor_accept(sci, &req); | ||
2864 | ret = nilfs_segctor_construct(sci, &req); | ||
2865 | nilfs_segctor_notify(sci, &req); | ||
2866 | nilfs_transaction_unlock(sbi); | ||
2867 | |||
2868 | } while (ret && retrycount-- > 0); | ||
2869 | } | ||
2870 | |||
2871 | /** | ||
2872 | * nilfs_segctor_destroy - destroy the segment constructor. | ||
2873 | * @sci: nilfs_sc_info | ||
2874 | * | ||
2875 | * nilfs_segctor_destroy() kills the segctord thread and frees | ||
2876 | * the nilfs_sc_info struct. | ||
2877 | * Caller must hold the segment semaphore. | ||
2878 | */ | ||
2879 | static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) | ||
2880 | { | ||
2881 | struct nilfs_sb_info *sbi = sci->sc_sbi; | ||
2882 | int flag; | ||
2883 | |||
2884 | up_write(&sbi->s_nilfs->ns_segctor_sem); | ||
2885 | |||
2886 | spin_lock(&sci->sc_state_lock); | ||
2887 | nilfs_segctor_kill_thread(sci); | ||
2888 | flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request | ||
2889 | || sci->sc_seq_request != sci->sc_seq_done); | ||
2890 | spin_unlock(&sci->sc_state_lock); | ||
2891 | |||
2892 | if (flag || nilfs_segctor_confirm(sci)) | ||
2893 | nilfs_segctor_write_out(sci); | ||
2894 | |||
2895 | WARN_ON(!list_empty(&sci->sc_copied_buffers)); | ||
2896 | |||
2897 | if (!list_empty(&sci->sc_dirty_files)) { | ||
2898 | nilfs_warning(sbi->s_super, __func__, | ||
2899 | "dirty file(s) after the final construction\n"); | ||
2900 | nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); | ||
2901 | } | ||
2902 | |||
2903 | if (!list_empty(&sci->sc_cleaning_segments)) | ||
2904 | nilfs_dispose_segment_list(&sci->sc_cleaning_segments); | ||
2905 | |||
2906 | WARN_ON(!list_empty(&sci->sc_segbufs)); | ||
2907 | |||
2908 | down_write(&sbi->s_nilfs->ns_segctor_sem); | ||
2909 | |||
2910 | kfree(sci); | ||
2911 | } | ||
2912 | |||
2913 | /** | ||
2914 | * nilfs_attach_segment_constructor - attach a segment constructor | ||
2915 | * @sbi: nilfs_sb_info | ||
2916 | * | ||
2917 | * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, | ||
2918 | * initilizes it, and starts the segment constructor. | ||
2919 | * | ||
2920 | * Return Value: On success, 0 is returned. On error, one of the following | ||
2921 | * negative error code is returned. | ||
2922 | * | ||
2923 | * %-ENOMEM - Insufficient memory available. | ||
2924 | */ | ||
2925 | int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi) | ||
2926 | { | ||
2927 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
2928 | int err; | ||
2929 | |||
2930 | /* Each field of nilfs_segctor is cleared through the initialization | ||
2931 | of super-block info */ | ||
2932 | sbi->s_sc_info = nilfs_segctor_new(sbi); | ||
2933 | if (!sbi->s_sc_info) | ||
2934 | return -ENOMEM; | ||
2935 | |||
2936 | nilfs_attach_writer(nilfs, sbi); | ||
2937 | err = nilfs_segctor_init(NILFS_SC(sbi)); | ||
2938 | if (err) { | ||
2939 | nilfs_detach_writer(nilfs, sbi); | ||
2940 | kfree(sbi->s_sc_info); | ||
2941 | sbi->s_sc_info = NULL; | ||
2942 | } | ||
2943 | return err; | ||
2944 | } | ||
2945 | |||
2946 | /** | ||
2947 | * nilfs_detach_segment_constructor - destroy the segment constructor | ||
2948 | * @sbi: nilfs_sb_info | ||
2949 | * | ||
2950 | * nilfs_detach_segment_constructor() kills the segment constructor daemon, | ||
2951 | * frees the struct nilfs_sc_info, and destroy the dirty file list. | ||
2952 | */ | ||
2953 | void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) | ||
2954 | { | ||
2955 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
2956 | LIST_HEAD(garbage_list); | ||
2957 | |||
2958 | down_write(&nilfs->ns_segctor_sem); | ||
2959 | if (NILFS_SC(sbi)) { | ||
2960 | nilfs_segctor_destroy(NILFS_SC(sbi)); | ||
2961 | sbi->s_sc_info = NULL; | ||
2962 | } | ||
2963 | |||
2964 | /* Force to free the list of dirty files */ | ||
2965 | spin_lock(&sbi->s_inode_lock); | ||
2966 | if (!list_empty(&sbi->s_dirty_files)) { | ||
2967 | list_splice_init(&sbi->s_dirty_files, &garbage_list); | ||
2968 | nilfs_warning(sbi->s_super, __func__, | ||
2969 | "Non empty dirty list after the last " | ||
2970 | "segment construction\n"); | ||
2971 | } | ||
2972 | spin_unlock(&sbi->s_inode_lock); | ||
2973 | up_write(&nilfs->ns_segctor_sem); | ||
2974 | |||
2975 | nilfs_dispose_list(sbi, &garbage_list, 1); | ||
2976 | nilfs_detach_writer(nilfs, sbi); | ||
2977 | } | ||
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h new file mode 100644 index 000000000000..a98fc1ed0bbb --- /dev/null +++ b/fs/nilfs2/segment.h | |||
@@ -0,0 +1,243 @@ | |||
1 | /* | ||
2 | * segment.h - NILFS Segment constructor prototypes and definitions | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | #ifndef _NILFS_SEGMENT_H | ||
24 | #define _NILFS_SEGMENT_H | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/fs.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/nilfs2_fs.h> | ||
30 | #include "sb.h" | ||
31 | |||
32 | /** | ||
33 | * struct nilfs_recovery_info - Recovery infomation | ||
34 | * @ri_need_recovery: Recovery status | ||
35 | * @ri_super_root: Block number of the last super root | ||
36 | * @ri_ri_cno: Number of the last checkpoint | ||
37 | * @ri_lsegs_start: Region for roll-forwarding (start block number) | ||
38 | * @ri_lsegs_end: Region for roll-forwarding (end block number) | ||
39 | * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start | ||
40 | * @ri_used_segments: List of segments to be mark active | ||
41 | * @ri_pseg_start: Block number of the last partial segment | ||
42 | * @ri_seq: Sequence number on the last partial segment | ||
43 | * @ri_segnum: Segment number on the last partial segment | ||
44 | * @ri_nextnum: Next segment number on the last partial segment | ||
45 | */ | ||
46 | struct nilfs_recovery_info { | ||
47 | int ri_need_recovery; | ||
48 | sector_t ri_super_root; | ||
49 | __u64 ri_cno; | ||
50 | |||
51 | sector_t ri_lsegs_start; | ||
52 | sector_t ri_lsegs_end; | ||
53 | u64 ri_lsegs_start_seq; | ||
54 | struct list_head ri_used_segments; | ||
55 | sector_t ri_pseg_start; | ||
56 | u64 ri_seq; | ||
57 | __u64 ri_segnum; | ||
58 | __u64 ri_nextnum; | ||
59 | }; | ||
60 | |||
61 | /* ri_need_recovery */ | ||
62 | #define NILFS_RECOVERY_SR_UPDATED 1 /* The super root was updated */ | ||
63 | #define NILFS_RECOVERY_ROLLFORWARD_DONE 2 /* Rollforward was carried out */ | ||
64 | |||
65 | /** | ||
66 | * struct nilfs_cstage - Context of collection stage | ||
67 | * @scnt: Stage count | ||
68 | * @flags: State flags | ||
69 | * @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file | ||
70 | * @gc_inode_ptr: Pointer on the list of gc-inodes | ||
71 | */ | ||
72 | struct nilfs_cstage { | ||
73 | int scnt; | ||
74 | unsigned flags; | ||
75 | struct nilfs_inode_info *dirty_file_ptr; | ||
76 | struct nilfs_inode_info *gc_inode_ptr; | ||
77 | }; | ||
78 | |||
79 | struct nilfs_segment_buffer; | ||
80 | |||
81 | struct nilfs_segsum_pointer { | ||
82 | struct buffer_head *bh; | ||
83 | unsigned offset; /* offset in bytes */ | ||
84 | }; | ||
85 | |||
86 | /** | ||
87 | * struct nilfs_sc_info - Segment constructor information | ||
88 | * @sc_super: Back pointer to super_block struct | ||
89 | * @sc_sbi: Back pointer to nilfs_sb_info struct | ||
90 | * @sc_nblk_inc: Block count of current generation | ||
91 | * @sc_dirty_files: List of files to be written | ||
92 | * @sc_gc_inodes: List of GC inodes having blocks to be written | ||
93 | * @sc_cleaning_segments: List of segments to be freed through construction | ||
94 | * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data | ||
95 | * @sc_dsync_inode: inode whose data pages are written for a sync operation | ||
96 | * @sc_dsync_start: start byte offset of data pages | ||
97 | * @sc_dsync_end: end byte offset of data pages (inclusive) | ||
98 | * @sc_segbufs: List of segment buffers | ||
99 | * @sc_segbuf_nblocks: Number of available blocks in segment buffers. | ||
100 | * @sc_curseg: Current segment buffer | ||
101 | * @sc_super_root: Pointer to the super root buffer | ||
102 | * @sc_stage: Collection stage | ||
103 | * @sc_finfo_ptr: pointer to the current finfo struct in the segment summary | ||
104 | * @sc_binfo_ptr: pointer to the current binfo struct in the segment summary | ||
105 | * @sc_blk_cnt: Block count of a file | ||
106 | * @sc_datablk_cnt: Data block count of a file | ||
107 | * @sc_nblk_this_inc: Number of blocks included in the current logical segment | ||
108 | * @sc_seg_ctime: Creation time | ||
109 | * @sc_flags: Internal flags | ||
110 | * @sc_state_lock: spinlock for sc_state and so on | ||
111 | * @sc_state: Segctord state flags | ||
112 | * @sc_flush_request: inode bitmap of metadata files to be flushed | ||
113 | * @sc_wait_request: Client request queue | ||
114 | * @sc_wait_daemon: Daemon wait queue | ||
115 | * @sc_wait_task: Start/end wait queue to control segctord task | ||
116 | * @sc_seq_request: Request counter | ||
117 | * @sc_seq_done: Completion counter | ||
118 | * @sc_sync: Request of explicit sync operation | ||
119 | * @sc_interval: Timeout value of background construction | ||
120 | * @sc_mjcp_freq: Frequency of creating checkpoints | ||
121 | * @sc_lseg_stime: Start time of the latest logical segment | ||
122 | * @sc_watermark: Watermark for the number of dirty buffers | ||
123 | * @sc_timer: Timer for segctord | ||
124 | * @sc_task: current thread of segctord | ||
125 | */ | ||
126 | struct nilfs_sc_info { | ||
127 | struct super_block *sc_super; | ||
128 | struct nilfs_sb_info *sc_sbi; | ||
129 | |||
130 | unsigned long sc_nblk_inc; | ||
131 | |||
132 | struct list_head sc_dirty_files; | ||
133 | struct list_head sc_gc_inodes; | ||
134 | struct list_head sc_cleaning_segments; | ||
135 | struct list_head sc_copied_buffers; | ||
136 | |||
137 | struct nilfs_inode_info *sc_dsync_inode; | ||
138 | loff_t sc_dsync_start; | ||
139 | loff_t sc_dsync_end; | ||
140 | |||
141 | /* Segment buffers */ | ||
142 | struct list_head sc_segbufs; | ||
143 | unsigned long sc_segbuf_nblocks; | ||
144 | struct nilfs_segment_buffer *sc_curseg; | ||
145 | struct buffer_head *sc_super_root; | ||
146 | |||
147 | struct nilfs_cstage sc_stage; | ||
148 | |||
149 | struct nilfs_segsum_pointer sc_finfo_ptr; | ||
150 | struct nilfs_segsum_pointer sc_binfo_ptr; | ||
151 | unsigned long sc_blk_cnt; | ||
152 | unsigned long sc_datablk_cnt; | ||
153 | unsigned long sc_nblk_this_inc; | ||
154 | time_t sc_seg_ctime; | ||
155 | |||
156 | unsigned long sc_flags; | ||
157 | |||
158 | spinlock_t sc_state_lock; | ||
159 | unsigned long sc_state; | ||
160 | unsigned long sc_flush_request; | ||
161 | |||
162 | wait_queue_head_t sc_wait_request; | ||
163 | wait_queue_head_t sc_wait_daemon; | ||
164 | wait_queue_head_t sc_wait_task; | ||
165 | |||
166 | __u32 sc_seq_request; | ||
167 | __u32 sc_seq_done; | ||
168 | |||
169 | int sc_sync; | ||
170 | unsigned long sc_interval; | ||
171 | unsigned long sc_mjcp_freq; | ||
172 | unsigned long sc_lseg_stime; /* in 1/HZ seconds */ | ||
173 | unsigned long sc_watermark; | ||
174 | |||
175 | struct timer_list *sc_timer; | ||
176 | struct task_struct *sc_task; | ||
177 | }; | ||
178 | |||
179 | /* sc_flags */ | ||
180 | enum { | ||
181 | NILFS_SC_DIRTY, /* One or more dirty meta-data blocks exist */ | ||
182 | NILFS_SC_UNCLOSED, /* Logical segment is not closed */ | ||
183 | NILFS_SC_SUPER_ROOT, /* The latest segment has a super root */ | ||
184 | NILFS_SC_PRIOR_FLUSH, /* Requesting immediate flush without making a | ||
185 | checkpoint */ | ||
186 | NILFS_SC_HAVE_DELTA, /* Next checkpoint will have update of files | ||
187 | other than DAT, cpfile, sufile, or files | ||
188 | moved by GC */ | ||
189 | }; | ||
190 | |||
191 | /* sc_state */ | ||
192 | #define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */ | ||
193 | #define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */ | ||
194 | |||
195 | /* | ||
196 | * Constant parameters | ||
197 | */ | ||
198 | #define NILFS_SC_CLEANUP_RETRY 3 /* Retry count of construction when | ||
199 | destroying segctord */ | ||
200 | |||
201 | /* | ||
202 | * Default values of timeout, in seconds. | ||
203 | */ | ||
204 | #define NILFS_SC_DEFAULT_TIMEOUT 5 /* Timeout value of dirty blocks. | ||
205 | It triggers construction of a | ||
206 | logical segment with a super root */ | ||
207 | #define NILFS_SC_DEFAULT_SR_FREQ 30 /* Maximum frequency of super root | ||
208 | creation */ | ||
209 | |||
210 | /* | ||
211 | * The default threshold amount of data, in block counts. | ||
212 | */ | ||
213 | #define NILFS_SC_DEFAULT_WATERMARK 3600 | ||
214 | |||
215 | |||
216 | /* segment.c */ | ||
217 | extern int nilfs_init_transaction_cache(void); | ||
218 | extern void nilfs_destroy_transaction_cache(void); | ||
219 | extern void nilfs_relax_pressure_in_lock(struct super_block *); | ||
220 | |||
221 | extern int nilfs_construct_segment(struct super_block *); | ||
222 | extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, | ||
223 | loff_t, loff_t); | ||
224 | extern void nilfs_flush_segment(struct super_block *, ino_t); | ||
225 | extern int nilfs_clean_segments(struct super_block *, void __user *); | ||
226 | |||
227 | extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *, | ||
228 | __u64 *, size_t); | ||
229 | extern void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *); | ||
230 | |||
231 | extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *); | ||
232 | extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); | ||
233 | |||
234 | /* recovery.c */ | ||
235 | extern int nilfs_read_super_root_block(struct super_block *, sector_t, | ||
236 | struct buffer_head **, int); | ||
237 | extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *, | ||
238 | struct nilfs_recovery_info *); | ||
239 | extern int nilfs_recover_logical_segments(struct the_nilfs *, | ||
240 | struct nilfs_sb_info *, | ||
241 | struct nilfs_recovery_info *); | ||
242 | |||
243 | #endif /* _NILFS_SEGMENT_H */ | ||
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c new file mode 100644 index 000000000000..c774cf397e2f --- /dev/null +++ b/fs/nilfs2/sufile.c | |||
@@ -0,0 +1,640 @@ | |||
1 | /* | ||
2 | * sufile.c - NILFS segment usage file. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/fs.h> | ||
25 | #include <linux/string.h> | ||
26 | #include <linux/buffer_head.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/nilfs2_fs.h> | ||
29 | #include "mdt.h" | ||
30 | #include "sufile.h" | ||
31 | |||
32 | |||
33 | static inline unsigned long | ||
34 | nilfs_sufile_segment_usages_per_block(const struct inode *sufile) | ||
35 | { | ||
36 | return NILFS_MDT(sufile)->mi_entries_per_block; | ||
37 | } | ||
38 | |||
39 | static unsigned long | ||
40 | nilfs_sufile_get_blkoff(const struct inode *sufile, __u64 segnum) | ||
41 | { | ||
42 | __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; | ||
43 | do_div(t, nilfs_sufile_segment_usages_per_block(sufile)); | ||
44 | return (unsigned long)t; | ||
45 | } | ||
46 | |||
47 | static unsigned long | ||
48 | nilfs_sufile_get_offset(const struct inode *sufile, __u64 segnum) | ||
49 | { | ||
50 | __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; | ||
51 | return do_div(t, nilfs_sufile_segment_usages_per_block(sufile)); | ||
52 | } | ||
53 | |||
54 | static unsigned long | ||
55 | nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr, | ||
56 | __u64 max) | ||
57 | { | ||
58 | return min_t(unsigned long, | ||
59 | nilfs_sufile_segment_usages_per_block(sufile) - | ||
60 | nilfs_sufile_get_offset(sufile, curr), | ||
61 | max - curr + 1); | ||
62 | } | ||
63 | |||
64 | static inline struct nilfs_sufile_header * | ||
65 | nilfs_sufile_block_get_header(const struct inode *sufile, | ||
66 | struct buffer_head *bh, | ||
67 | void *kaddr) | ||
68 | { | ||
69 | return kaddr + bh_offset(bh); | ||
70 | } | ||
71 | |||
72 | static struct nilfs_segment_usage * | ||
73 | nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, | ||
74 | struct buffer_head *bh, void *kaddr) | ||
75 | { | ||
76 | return kaddr + bh_offset(bh) + | ||
77 | nilfs_sufile_get_offset(sufile, segnum) * | ||
78 | NILFS_MDT(sufile)->mi_entry_size; | ||
79 | } | ||
80 | |||
81 | static inline int nilfs_sufile_get_header_block(struct inode *sufile, | ||
82 | struct buffer_head **bhp) | ||
83 | { | ||
84 | return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); | ||
85 | } | ||
86 | |||
87 | static inline int | ||
88 | nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, | ||
89 | int create, struct buffer_head **bhp) | ||
90 | { | ||
91 | return nilfs_mdt_get_block(sufile, | ||
92 | nilfs_sufile_get_blkoff(sufile, segnum), | ||
93 | create, NULL, bhp); | ||
94 | } | ||
95 | |||
96 | /** | ||
97 | * nilfs_sufile_alloc - allocate a segment | ||
98 | * @sufile: inode of segment usage file | ||
99 | * @segnump: pointer to segment number | ||
100 | * | ||
101 | * Description: nilfs_sufile_alloc() allocates a clean segment. | ||
102 | * | ||
103 | * Return Value: On success, 0 is returned and the segment number of the | ||
104 | * allocated segment is stored in the place pointed by @segnump. On error, one | ||
105 | * of the following negative error codes is returned. | ||
106 | * | ||
107 | * %-EIO - I/O error. | ||
108 | * | ||
109 | * %-ENOMEM - Insufficient amount of memory available. | ||
110 | * | ||
111 | * %-ENOSPC - No clean segment left. | ||
112 | */ | ||
113 | int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) | ||
114 | { | ||
115 | struct buffer_head *header_bh, *su_bh; | ||
116 | struct the_nilfs *nilfs; | ||
117 | struct nilfs_sufile_header *header; | ||
118 | struct nilfs_segment_usage *su; | ||
119 | size_t susz = NILFS_MDT(sufile)->mi_entry_size; | ||
120 | __u64 segnum, maxsegnum, last_alloc; | ||
121 | void *kaddr; | ||
122 | unsigned long nsegments, ncleansegs, nsus; | ||
123 | int ret, i, j; | ||
124 | |||
125 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
126 | |||
127 | nilfs = NILFS_MDT(sufile)->mi_nilfs; | ||
128 | |||
129 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
130 | if (ret < 0) | ||
131 | goto out_sem; | ||
132 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
133 | header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); | ||
134 | ncleansegs = le64_to_cpu(header->sh_ncleansegs); | ||
135 | last_alloc = le64_to_cpu(header->sh_last_alloc); | ||
136 | kunmap_atomic(kaddr, KM_USER0); | ||
137 | |||
138 | nsegments = nilfs_sufile_get_nsegments(sufile); | ||
139 | segnum = last_alloc + 1; | ||
140 | maxsegnum = nsegments - 1; | ||
141 | for (i = 0; i < nsegments; i += nsus) { | ||
142 | if (segnum >= nsegments) { | ||
143 | /* wrap around */ | ||
144 | segnum = 0; | ||
145 | maxsegnum = last_alloc; | ||
146 | } | ||
147 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, | ||
148 | &su_bh); | ||
149 | if (ret < 0) | ||
150 | goto out_header; | ||
151 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); | ||
152 | su = nilfs_sufile_block_get_segment_usage( | ||
153 | sufile, segnum, su_bh, kaddr); | ||
154 | |||
155 | nsus = nilfs_sufile_segment_usages_in_block( | ||
156 | sufile, segnum, maxsegnum); | ||
157 | for (j = 0; j < nsus; j++, su = (void *)su + susz, segnum++) { | ||
158 | if (!nilfs_segment_usage_clean(su)) | ||
159 | continue; | ||
160 | /* found a clean segment */ | ||
161 | nilfs_segment_usage_set_dirty(su); | ||
162 | kunmap_atomic(kaddr, KM_USER0); | ||
163 | |||
164 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
165 | header = nilfs_sufile_block_get_header( | ||
166 | sufile, header_bh, kaddr); | ||
167 | le64_add_cpu(&header->sh_ncleansegs, -1); | ||
168 | le64_add_cpu(&header->sh_ndirtysegs, 1); | ||
169 | header->sh_last_alloc = cpu_to_le64(segnum); | ||
170 | kunmap_atomic(kaddr, KM_USER0); | ||
171 | |||
172 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
173 | nilfs_mdt_mark_buffer_dirty(su_bh); | ||
174 | nilfs_mdt_mark_dirty(sufile); | ||
175 | brelse(su_bh); | ||
176 | *segnump = segnum; | ||
177 | goto out_header; | ||
178 | } | ||
179 | |||
180 | kunmap_atomic(kaddr, KM_USER0); | ||
181 | brelse(su_bh); | ||
182 | } | ||
183 | |||
184 | /* no segments left */ | ||
185 | ret = -ENOSPC; | ||
186 | |||
187 | out_header: | ||
188 | brelse(header_bh); | ||
189 | |||
190 | out_sem: | ||
191 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
192 | return ret; | ||
193 | } | ||
194 | |||
195 | /** | ||
196 | * nilfs_sufile_cancel_free - | ||
197 | * @sufile: inode of segment usage file | ||
198 | * @segnum: segment number | ||
199 | * | ||
200 | * Description: | ||
201 | * | ||
202 | * Return Value: On success, 0 is returned. On error, one of the following | ||
203 | * negative error codes is returned. | ||
204 | * | ||
205 | * %-EIO - I/O error. | ||
206 | * | ||
207 | * %-ENOMEM - Insufficient amount of memory available. | ||
208 | */ | ||
209 | int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) | ||
210 | { | ||
211 | struct buffer_head *header_bh, *su_bh; | ||
212 | struct the_nilfs *nilfs; | ||
213 | struct nilfs_sufile_header *header; | ||
214 | struct nilfs_segment_usage *su; | ||
215 | void *kaddr; | ||
216 | int ret; | ||
217 | |||
218 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
219 | |||
220 | nilfs = NILFS_MDT(sufile)->mi_nilfs; | ||
221 | |||
222 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
223 | if (ret < 0) | ||
224 | goto out_sem; | ||
225 | |||
226 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); | ||
227 | if (ret < 0) | ||
228 | goto out_header; | ||
229 | |||
230 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); | ||
231 | su = nilfs_sufile_block_get_segment_usage( | ||
232 | sufile, segnum, su_bh, kaddr); | ||
233 | if (unlikely(!nilfs_segment_usage_clean(su))) { | ||
234 | printk(KERN_WARNING "%s: segment %llu must be clean\n", | ||
235 | __func__, (unsigned long long)segnum); | ||
236 | kunmap_atomic(kaddr, KM_USER0); | ||
237 | goto out_su_bh; | ||
238 | } | ||
239 | nilfs_segment_usage_set_dirty(su); | ||
240 | kunmap_atomic(kaddr, KM_USER0); | ||
241 | |||
242 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
243 | header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); | ||
244 | le64_add_cpu(&header->sh_ncleansegs, -1); | ||
245 | le64_add_cpu(&header->sh_ndirtysegs, 1); | ||
246 | kunmap_atomic(kaddr, KM_USER0); | ||
247 | |||
248 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
249 | nilfs_mdt_mark_buffer_dirty(su_bh); | ||
250 | nilfs_mdt_mark_dirty(sufile); | ||
251 | |||
252 | out_su_bh: | ||
253 | brelse(su_bh); | ||
254 | out_header: | ||
255 | brelse(header_bh); | ||
256 | out_sem: | ||
257 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
258 | return ret; | ||
259 | } | ||
260 | |||
261 | /** | ||
262 | * nilfs_sufile_freev - free segments | ||
263 | * @sufile: inode of segment usage file | ||
264 | * @segnum: array of segment numbers | ||
265 | * @nsegs: number of segments | ||
266 | * | ||
267 | * Description: nilfs_sufile_freev() frees segments specified by @segnum and | ||
268 | * @nsegs, which must have been returned by a previous call to | ||
269 | * nilfs_sufile_alloc(). | ||
270 | * | ||
271 | * Return Value: On success, 0 is returned. On error, one of the following | ||
272 | * negative error codes is returned. | ||
273 | * | ||
274 | * %-EIO - I/O error. | ||
275 | * | ||
276 | * %-ENOMEM - Insufficient amount of memory available. | ||
277 | */ | ||
278 | #define NILFS_SUFILE_FREEV_PREALLOC 16 | ||
279 | int nilfs_sufile_freev(struct inode *sufile, __u64 *segnum, size_t nsegs) | ||
280 | { | ||
281 | struct buffer_head *header_bh, **su_bh, | ||
282 | *su_bh_prealloc[NILFS_SUFILE_FREEV_PREALLOC]; | ||
283 | struct the_nilfs *nilfs; | ||
284 | struct nilfs_sufile_header *header; | ||
285 | struct nilfs_segment_usage *su; | ||
286 | void *kaddr; | ||
287 | int ret, i; | ||
288 | |||
289 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
290 | |||
291 | nilfs = NILFS_MDT(sufile)->mi_nilfs; | ||
292 | |||
293 | /* prepare resources */ | ||
294 | if (nsegs <= NILFS_SUFILE_FREEV_PREALLOC) | ||
295 | su_bh = su_bh_prealloc; | ||
296 | else { | ||
297 | su_bh = kmalloc(sizeof(*su_bh) * nsegs, GFP_NOFS); | ||
298 | if (su_bh == NULL) { | ||
299 | ret = -ENOMEM; | ||
300 | goto out_sem; | ||
301 | } | ||
302 | } | ||
303 | |||
304 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
305 | if (ret < 0) | ||
306 | goto out_su_bh; | ||
307 | for (i = 0; i < nsegs; i++) { | ||
308 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum[i], | ||
309 | 0, &su_bh[i]); | ||
310 | if (ret < 0) | ||
311 | goto out_bh; | ||
312 | } | ||
313 | |||
314 | /* free segments */ | ||
315 | for (i = 0; i < nsegs; i++) { | ||
316 | kaddr = kmap_atomic(su_bh[i]->b_page, KM_USER0); | ||
317 | su = nilfs_sufile_block_get_segment_usage( | ||
318 | sufile, segnum[i], su_bh[i], kaddr); | ||
319 | WARN_ON(nilfs_segment_usage_error(su)); | ||
320 | nilfs_segment_usage_set_clean(su); | ||
321 | kunmap_atomic(kaddr, KM_USER0); | ||
322 | nilfs_mdt_mark_buffer_dirty(su_bh[i]); | ||
323 | } | ||
324 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
325 | header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); | ||
326 | le64_add_cpu(&header->sh_ncleansegs, nsegs); | ||
327 | le64_add_cpu(&header->sh_ndirtysegs, -(u64)nsegs); | ||
328 | kunmap_atomic(kaddr, KM_USER0); | ||
329 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
330 | nilfs_mdt_mark_dirty(sufile); | ||
331 | |||
332 | out_bh: | ||
333 | for (i--; i >= 0; i--) | ||
334 | brelse(su_bh[i]); | ||
335 | brelse(header_bh); | ||
336 | |||
337 | out_su_bh: | ||
338 | if (su_bh != su_bh_prealloc) | ||
339 | kfree(su_bh); | ||
340 | |||
341 | out_sem: | ||
342 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
343 | return ret; | ||
344 | } | ||
345 | |||
346 | /** | ||
347 | * nilfs_sufile_free - | ||
348 | * @sufile: | ||
349 | * @segnum: | ||
350 | */ | ||
351 | int nilfs_sufile_free(struct inode *sufile, __u64 segnum) | ||
352 | { | ||
353 | return nilfs_sufile_freev(sufile, &segnum, 1); | ||
354 | } | ||
355 | |||
356 | /** | ||
357 | * nilfs_sufile_get_segment_usage - get a segment usage | ||
358 | * @sufile: inode of segment usage file | ||
359 | * @segnum: segment number | ||
360 | * @sup: pointer to segment usage | ||
361 | * @bhp: pointer to buffer head | ||
362 | * | ||
363 | * Description: nilfs_sufile_get_segment_usage() acquires the segment usage | ||
364 | * specified by @segnum. | ||
365 | * | ||
366 | * Return Value: On success, 0 is returned, and the segment usage and the | ||
367 | * buffer head of the buffer on which the segment usage is located are stored | ||
368 | * in the place pointed by @sup and @bhp, respectively. On error, one of the | ||
369 | * following negative error codes is returned. | ||
370 | * | ||
371 | * %-EIO - I/O error. | ||
372 | * | ||
373 | * %-ENOMEM - Insufficient amount of memory available. | ||
374 | * | ||
375 | * %-EINVAL - Invalid segment usage number. | ||
376 | */ | ||
377 | int nilfs_sufile_get_segment_usage(struct inode *sufile, __u64 segnum, | ||
378 | struct nilfs_segment_usage **sup, | ||
379 | struct buffer_head **bhp) | ||
380 | { | ||
381 | struct buffer_head *bh; | ||
382 | struct nilfs_segment_usage *su; | ||
383 | void *kaddr; | ||
384 | int ret; | ||
385 | |||
386 | /* segnum is 0 origin */ | ||
387 | if (segnum >= nilfs_sufile_get_nsegments(sufile)) | ||
388 | return -EINVAL; | ||
389 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
390 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &bh); | ||
391 | if (ret < 0) | ||
392 | goto out_sem; | ||
393 | kaddr = kmap(bh->b_page); | ||
394 | su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); | ||
395 | if (nilfs_segment_usage_error(su)) { | ||
396 | kunmap(bh->b_page); | ||
397 | brelse(bh); | ||
398 | ret = -EINVAL; | ||
399 | goto out_sem; | ||
400 | } | ||
401 | |||
402 | if (sup != NULL) | ||
403 | *sup = su; | ||
404 | *bhp = bh; | ||
405 | |||
406 | out_sem: | ||
407 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
408 | return ret; | ||
409 | } | ||
410 | |||
411 | /** | ||
412 | * nilfs_sufile_put_segment_usage - put a segment usage | ||
413 | * @sufile: inode of segment usage file | ||
414 | * @segnum: segment number | ||
415 | * @bh: buffer head | ||
416 | * | ||
417 | * Description: nilfs_sufile_put_segment_usage() releases the segment usage | ||
418 | * specified by @segnum. @bh must be the buffer head which have been returned | ||
419 | * by a previous call to nilfs_sufile_get_segment_usage() with @segnum. | ||
420 | */ | ||
421 | void nilfs_sufile_put_segment_usage(struct inode *sufile, __u64 segnum, | ||
422 | struct buffer_head *bh) | ||
423 | { | ||
424 | kunmap(bh->b_page); | ||
425 | brelse(bh); | ||
426 | } | ||
427 | |||
428 | /** | ||
429 | * nilfs_sufile_get_stat - get segment usage statistics | ||
430 | * @sufile: inode of segment usage file | ||
431 | * @stat: pointer to a structure of segment usage statistics | ||
432 | * | ||
433 | * Description: nilfs_sufile_get_stat() returns information about segment | ||
434 | * usage. | ||
435 | * | ||
436 | * Return Value: On success, 0 is returned, and segment usage information is | ||
437 | * stored in the place pointed by @stat. On error, one of the following | ||
438 | * negative error codes is returned. | ||
439 | * | ||
440 | * %-EIO - I/O error. | ||
441 | * | ||
442 | * %-ENOMEM - Insufficient amount of memory available. | ||
443 | */ | ||
444 | int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) | ||
445 | { | ||
446 | struct buffer_head *header_bh; | ||
447 | struct nilfs_sufile_header *header; | ||
448 | struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; | ||
449 | void *kaddr; | ||
450 | int ret; | ||
451 | |||
452 | down_read(&NILFS_MDT(sufile)->mi_sem); | ||
453 | |||
454 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
455 | if (ret < 0) | ||
456 | goto out_sem; | ||
457 | |||
458 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
459 | header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); | ||
460 | sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); | ||
461 | sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); | ||
462 | sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); | ||
463 | sustat->ss_ctime = nilfs->ns_ctime; | ||
464 | sustat->ss_nongc_ctime = nilfs->ns_nongc_ctime; | ||
465 | spin_lock(&nilfs->ns_last_segment_lock); | ||
466 | sustat->ss_prot_seq = nilfs->ns_prot_seq; | ||
467 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
468 | kunmap_atomic(kaddr, KM_USER0); | ||
469 | brelse(header_bh); | ||
470 | |||
471 | out_sem: | ||
472 | up_read(&NILFS_MDT(sufile)->mi_sem); | ||
473 | return ret; | ||
474 | } | ||
475 | |||
476 | /** | ||
477 | * nilfs_sufile_get_ncleansegs - get the number of clean segments | ||
478 | * @sufile: inode of segment usage file | ||
479 | * @nsegsp: pointer to the number of clean segments | ||
480 | * | ||
481 | * Description: nilfs_sufile_get_ncleansegs() acquires the number of clean | ||
482 | * segments. | ||
483 | * | ||
484 | * Return Value: On success, 0 is returned and the number of clean segments is | ||
485 | * stored in the place pointed by @nsegsp. On error, one of the following | ||
486 | * negative error codes is returned. | ||
487 | * | ||
488 | * %-EIO - I/O error. | ||
489 | * | ||
490 | * %-ENOMEM - Insufficient amount of memory available. | ||
491 | */ | ||
492 | int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp) | ||
493 | { | ||
494 | struct nilfs_sustat sustat; | ||
495 | int ret; | ||
496 | |||
497 | ret = nilfs_sufile_get_stat(sufile, &sustat); | ||
498 | if (ret == 0) | ||
499 | *nsegsp = sustat.ss_ncleansegs; | ||
500 | return ret; | ||
501 | } | ||
502 | |||
503 | /** | ||
504 | * nilfs_sufile_set_error - mark a segment as erroneous | ||
505 | * @sufile: inode of segment usage file | ||
506 | * @segnum: segment number | ||
507 | * | ||
508 | * Description: nilfs_sufile_set_error() marks the segment specified by | ||
509 | * @segnum as erroneous. The error segment will never be used again. | ||
510 | * | ||
511 | * Return Value: On success, 0 is returned. On error, one of the following | ||
512 | * negative error codes is returned. | ||
513 | * | ||
514 | * %-EIO - I/O error. | ||
515 | * | ||
516 | * %-ENOMEM - Insufficient amount of memory available. | ||
517 | * | ||
518 | * %-EINVAL - Invalid segment usage number. | ||
519 | */ | ||
520 | int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) | ||
521 | { | ||
522 | struct buffer_head *header_bh, *su_bh; | ||
523 | struct nilfs_segment_usage *su; | ||
524 | struct nilfs_sufile_header *header; | ||
525 | void *kaddr; | ||
526 | int ret; | ||
527 | |||
528 | if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { | ||
529 | printk(KERN_WARNING "%s: invalid segment number: %llu\n", | ||
530 | __func__, (unsigned long long)segnum); | ||
531 | return -EINVAL; | ||
532 | } | ||
533 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
534 | |||
535 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
536 | if (ret < 0) | ||
537 | goto out_sem; | ||
538 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); | ||
539 | if (ret < 0) | ||
540 | goto out_header; | ||
541 | |||
542 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); | ||
543 | su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); | ||
544 | if (nilfs_segment_usage_error(su)) { | ||
545 | kunmap_atomic(kaddr, KM_USER0); | ||
546 | brelse(su_bh); | ||
547 | goto out_header; | ||
548 | } | ||
549 | |||
550 | nilfs_segment_usage_set_error(su); | ||
551 | kunmap_atomic(kaddr, KM_USER0); | ||
552 | brelse(su_bh); | ||
553 | |||
554 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
555 | header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); | ||
556 | le64_add_cpu(&header->sh_ndirtysegs, -1); | ||
557 | kunmap_atomic(kaddr, KM_USER0); | ||
558 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
559 | nilfs_mdt_mark_buffer_dirty(su_bh); | ||
560 | nilfs_mdt_mark_dirty(sufile); | ||
561 | brelse(su_bh); | ||
562 | |||
563 | out_header: | ||
564 | brelse(header_bh); | ||
565 | |||
566 | out_sem: | ||
567 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
568 | return ret; | ||
569 | } | ||
570 | |||
571 | /** | ||
572 | * nilfs_sufile_get_suinfo - | ||
573 | * @sufile: inode of segment usage file | ||
574 | * @segnum: segment number to start looking | ||
575 | * @si: array of suinfo | ||
576 | * @nsi: size of suinfo array | ||
577 | * | ||
578 | * Description: | ||
579 | * | ||
580 | * Return Value: On success, 0 is returned and .... On error, one of the | ||
581 | * following negative error codes is returned. | ||
582 | * | ||
583 | * %-EIO - I/O error. | ||
584 | * | ||
585 | * %-ENOMEM - Insufficient amount of memory available. | ||
586 | */ | ||
587 | ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, | ||
588 | struct nilfs_suinfo *si, size_t nsi) | ||
589 | { | ||
590 | struct buffer_head *su_bh; | ||
591 | struct nilfs_segment_usage *su; | ||
592 | size_t susz = NILFS_MDT(sufile)->mi_entry_size; | ||
593 | struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; | ||
594 | void *kaddr; | ||
595 | unsigned long nsegs, segusages_per_block; | ||
596 | ssize_t n; | ||
597 | int ret, i, j; | ||
598 | |||
599 | down_read(&NILFS_MDT(sufile)->mi_sem); | ||
600 | |||
601 | segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile); | ||
602 | nsegs = min_t(unsigned long, | ||
603 | nilfs_sufile_get_nsegments(sufile) - segnum, | ||
604 | nsi); | ||
605 | for (i = 0; i < nsegs; i += n, segnum += n) { | ||
606 | n = min_t(unsigned long, | ||
607 | segusages_per_block - | ||
608 | nilfs_sufile_get_offset(sufile, segnum), | ||
609 | nsegs - i); | ||
610 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, | ||
611 | &su_bh); | ||
612 | if (ret < 0) { | ||
613 | if (ret != -ENOENT) | ||
614 | goto out; | ||
615 | /* hole */ | ||
616 | memset(&si[i], 0, sizeof(struct nilfs_suinfo) * n); | ||
617 | continue; | ||
618 | } | ||
619 | |||
620 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); | ||
621 | su = nilfs_sufile_block_get_segment_usage( | ||
622 | sufile, segnum, su_bh, kaddr); | ||
623 | for (j = 0; j < n; j++, su = (void *)su + susz) { | ||
624 | si[i + j].sui_lastmod = le64_to_cpu(su->su_lastmod); | ||
625 | si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); | ||
626 | si[i + j].sui_flags = le32_to_cpu(su->su_flags) & | ||
627 | ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); | ||
628 | if (nilfs_segment_is_active(nilfs, segnum + i + j)) | ||
629 | si[i + j].sui_flags |= | ||
630 | (1UL << NILFS_SEGMENT_USAGE_ACTIVE); | ||
631 | } | ||
632 | kunmap_atomic(kaddr, KM_USER0); | ||
633 | brelse(su_bh); | ||
634 | } | ||
635 | ret = nsegs; | ||
636 | |||
637 | out: | ||
638 | up_read(&NILFS_MDT(sufile)->mi_sem); | ||
639 | return ret; | ||
640 | } | ||
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h new file mode 100644 index 000000000000..d595f33a768d --- /dev/null +++ b/fs/nilfs2/sufile.h | |||
@@ -0,0 +1,54 @@ | |||
1 | /* | ||
2 | * sufile.h - NILFS segment usage file. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_SUFILE_H | ||
24 | #define _NILFS_SUFILE_H | ||
25 | |||
26 | #include <linux/fs.h> | ||
27 | #include <linux/buffer_head.h> | ||
28 | #include <linux/nilfs2_fs.h> | ||
29 | #include "mdt.h" | ||
30 | |||
31 | #define NILFS_SUFILE_GFP NILFS_MDT_GFP | ||
32 | |||
33 | static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) | ||
34 | { | ||
35 | return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments; | ||
36 | } | ||
37 | |||
38 | int nilfs_sufile_alloc(struct inode *, __u64 *); | ||
39 | int nilfs_sufile_cancel_free(struct inode *, __u64); | ||
40 | int nilfs_sufile_freev(struct inode *, __u64 *, size_t); | ||
41 | int nilfs_sufile_free(struct inode *, __u64); | ||
42 | int nilfs_sufile_get_segment_usage(struct inode *, __u64, | ||
43 | struct nilfs_segment_usage **, | ||
44 | struct buffer_head **); | ||
45 | void nilfs_sufile_put_segment_usage(struct inode *, __u64, | ||
46 | struct buffer_head *); | ||
47 | int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); | ||
48 | int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); | ||
49 | int nilfs_sufile_set_error(struct inode *, __u64); | ||
50 | ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *, | ||
51 | size_t); | ||
52 | |||
53 | |||
54 | #endif /* _NILFS_SUFILE_H */ | ||
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c new file mode 100644 index 000000000000..e117e1ea9bff --- /dev/null +++ b/fs/nilfs2/super.c | |||
@@ -0,0 +1,1323 @@ | |||
1 | /* | ||
2 | * super.c - NILFS module and super block management. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | */ | ||
22 | /* | ||
23 | * linux/fs/ext2/super.c | ||
24 | * | ||
25 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
26 | * Remy Card (card@masi.ibp.fr) | ||
27 | * Laboratoire MASI - Institut Blaise Pascal | ||
28 | * Universite Pierre et Marie Curie (Paris VI) | ||
29 | * | ||
30 | * from | ||
31 | * | ||
32 | * linux/fs/minix/inode.c | ||
33 | * | ||
34 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
35 | * | ||
36 | * Big-endian to little-endian byte-swapping/bitmaps by | ||
37 | * David S. Miller (davem@caip.rutgers.edu), 1995 | ||
38 | */ | ||
39 | |||
40 | #include <linux/module.h> | ||
41 | #include <linux/string.h> | ||
42 | #include <linux/slab.h> | ||
43 | #include <linux/init.h> | ||
44 | #include <linux/blkdev.h> | ||
45 | #include <linux/parser.h> | ||
46 | #include <linux/random.h> | ||
47 | #include <linux/crc32.h> | ||
48 | #include <linux/smp_lock.h> | ||
49 | #include <linux/vfs.h> | ||
50 | #include <linux/writeback.h> | ||
51 | #include <linux/kobject.h> | ||
52 | #include <linux/exportfs.h> | ||
53 | #include "nilfs.h" | ||
54 | #include "mdt.h" | ||
55 | #include "alloc.h" | ||
56 | #include "page.h" | ||
57 | #include "cpfile.h" | ||
58 | #include "ifile.h" | ||
59 | #include "dat.h" | ||
60 | #include "segment.h" | ||
61 | #include "segbuf.h" | ||
62 | |||
63 | MODULE_AUTHOR("NTT Corp."); | ||
64 | MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " | ||
65 | "(NILFS)"); | ||
66 | MODULE_VERSION(NILFS_VERSION); | ||
67 | MODULE_LICENSE("GPL"); | ||
68 | |||
69 | static int nilfs_remount(struct super_block *sb, int *flags, char *data); | ||
70 | static int test_exclusive_mount(struct file_system_type *fs_type, | ||
71 | struct block_device *bdev, int flags); | ||
72 | |||
73 | /** | ||
74 | * nilfs_error() - report failure condition on a filesystem | ||
75 | * | ||
76 | * nilfs_error() sets an ERROR_FS flag on the superblock as well as | ||
77 | * reporting an error message. It should be called when NILFS detects | ||
78 | * incoherences or defects of meta data on disk. As for sustainable | ||
79 | * errors such as a single-shot I/O error, nilfs_warning() or the printk() | ||
80 | * function should be used instead. | ||
81 | * | ||
82 | * The segment constructor must not call this function because it can | ||
83 | * kill itself. | ||
84 | */ | ||
85 | void nilfs_error(struct super_block *sb, const char *function, | ||
86 | const char *fmt, ...) | ||
87 | { | ||
88 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
89 | va_list args; | ||
90 | |||
91 | va_start(args, fmt); | ||
92 | printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function); | ||
93 | vprintk(fmt, args); | ||
94 | printk("\n"); | ||
95 | va_end(args); | ||
96 | |||
97 | if (!(sb->s_flags & MS_RDONLY)) { | ||
98 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
99 | |||
100 | if (!nilfs_test_opt(sbi, ERRORS_CONT)) | ||
101 | nilfs_detach_segment_constructor(sbi); | ||
102 | |||
103 | down_write(&nilfs->ns_sem); | ||
104 | if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { | ||
105 | nilfs->ns_mount_state |= NILFS_ERROR_FS; | ||
106 | nilfs->ns_sbp[0]->s_state |= | ||
107 | cpu_to_le16(NILFS_ERROR_FS); | ||
108 | nilfs_commit_super(sbi, 1); | ||
109 | } | ||
110 | up_write(&nilfs->ns_sem); | ||
111 | |||
112 | if (nilfs_test_opt(sbi, ERRORS_RO)) { | ||
113 | printk(KERN_CRIT "Remounting filesystem read-only\n"); | ||
114 | sb->s_flags |= MS_RDONLY; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | if (nilfs_test_opt(sbi, ERRORS_PANIC)) | ||
119 | panic("NILFS (device %s): panic forced after error\n", | ||
120 | sb->s_id); | ||
121 | } | ||
122 | |||
123 | void nilfs_warning(struct super_block *sb, const char *function, | ||
124 | const char *fmt, ...) | ||
125 | { | ||
126 | va_list args; | ||
127 | |||
128 | va_start(args, fmt); | ||
129 | printk(KERN_WARNING "NILFS warning (device %s): %s: ", | ||
130 | sb->s_id, function); | ||
131 | vprintk(fmt, args); | ||
132 | printk("\n"); | ||
133 | va_end(args); | ||
134 | } | ||
135 | |||
136 | static struct kmem_cache *nilfs_inode_cachep; | ||
137 | |||
138 | struct inode *nilfs_alloc_inode(struct super_block *sb) | ||
139 | { | ||
140 | struct nilfs_inode_info *ii; | ||
141 | |||
142 | ii = kmem_cache_alloc(nilfs_inode_cachep, GFP_NOFS); | ||
143 | if (!ii) | ||
144 | return NULL; | ||
145 | ii->i_bh = NULL; | ||
146 | ii->i_state = 0; | ||
147 | ii->vfs_inode.i_version = 1; | ||
148 | nilfs_btnode_cache_init(&ii->i_btnode_cache); | ||
149 | return &ii->vfs_inode; | ||
150 | } | ||
151 | |||
152 | void nilfs_destroy_inode(struct inode *inode) | ||
153 | { | ||
154 | kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); | ||
155 | } | ||
156 | |||
157 | static void init_once(void *obj) | ||
158 | { | ||
159 | struct nilfs_inode_info *ii = obj; | ||
160 | |||
161 | INIT_LIST_HEAD(&ii->i_dirty); | ||
162 | #ifdef CONFIG_NILFS_XATTR | ||
163 | init_rwsem(&ii->xattr_sem); | ||
164 | #endif | ||
165 | nilfs_btnode_cache_init_once(&ii->i_btnode_cache); | ||
166 | ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union; | ||
167 | inode_init_once(&ii->vfs_inode); | ||
168 | } | ||
169 | |||
170 | static int nilfs_init_inode_cache(void) | ||
171 | { | ||
172 | nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache", | ||
173 | sizeof(struct nilfs_inode_info), | ||
174 | 0, SLAB_RECLAIM_ACCOUNT, | ||
175 | init_once); | ||
176 | |||
177 | return (nilfs_inode_cachep == NULL) ? -ENOMEM : 0; | ||
178 | } | ||
179 | |||
180 | static inline void nilfs_destroy_inode_cache(void) | ||
181 | { | ||
182 | kmem_cache_destroy(nilfs_inode_cachep); | ||
183 | } | ||
184 | |||
185 | static void nilfs_clear_inode(struct inode *inode) | ||
186 | { | ||
187 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
188 | |||
189 | #ifdef CONFIG_NILFS_POSIX_ACL | ||
190 | if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) { | ||
191 | posix_acl_release(ii->i_acl); | ||
192 | ii->i_acl = NILFS_ACL_NOT_CACHED; | ||
193 | } | ||
194 | if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) { | ||
195 | posix_acl_release(ii->i_default_acl); | ||
196 | ii->i_default_acl = NILFS_ACL_NOT_CACHED; | ||
197 | } | ||
198 | #endif | ||
199 | /* | ||
200 | * Free resources allocated in nilfs_read_inode(), here. | ||
201 | */ | ||
202 | BUG_ON(!list_empty(&ii->i_dirty)); | ||
203 | brelse(ii->i_bh); | ||
204 | ii->i_bh = NULL; | ||
205 | |||
206 | if (test_bit(NILFS_I_BMAP, &ii->i_state)) | ||
207 | nilfs_bmap_clear(ii->i_bmap); | ||
208 | |||
209 | nilfs_btnode_cache_clear(&ii->i_btnode_cache); | ||
210 | } | ||
211 | |||
212 | static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb) | ||
213 | { | ||
214 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
215 | int err; | ||
216 | int barrier_done = 0; | ||
217 | |||
218 | if (nilfs_test_opt(sbi, BARRIER)) { | ||
219 | set_buffer_ordered(nilfs->ns_sbh[0]); | ||
220 | barrier_done = 1; | ||
221 | } | ||
222 | retry: | ||
223 | set_buffer_dirty(nilfs->ns_sbh[0]); | ||
224 | err = sync_dirty_buffer(nilfs->ns_sbh[0]); | ||
225 | if (err == -EOPNOTSUPP && barrier_done) { | ||
226 | nilfs_warning(sbi->s_super, __func__, | ||
227 | "barrier-based sync failed. " | ||
228 | "disabling barriers\n"); | ||
229 | nilfs_clear_opt(sbi, BARRIER); | ||
230 | barrier_done = 0; | ||
231 | clear_buffer_ordered(nilfs->ns_sbh[0]); | ||
232 | goto retry; | ||
233 | } | ||
234 | if (unlikely(err)) { | ||
235 | printk(KERN_ERR | ||
236 | "NILFS: unable to write superblock (err=%d)\n", err); | ||
237 | if (err == -EIO && nilfs->ns_sbh[1]) { | ||
238 | nilfs_fall_back_super_block(nilfs); | ||
239 | goto retry; | ||
240 | } | ||
241 | } else { | ||
242 | struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; | ||
243 | |||
244 | /* | ||
245 | * The latest segment becomes trailable from the position | ||
246 | * written in superblock. | ||
247 | */ | ||
248 | clear_nilfs_discontinued(nilfs); | ||
249 | |||
250 | /* update GC protection for recent segments */ | ||
251 | if (nilfs->ns_sbh[1]) { | ||
252 | sbp = NULL; | ||
253 | if (dupsb) { | ||
254 | set_buffer_dirty(nilfs->ns_sbh[1]); | ||
255 | if (!sync_dirty_buffer(nilfs->ns_sbh[1])) | ||
256 | sbp = nilfs->ns_sbp[1]; | ||
257 | } | ||
258 | } | ||
259 | if (sbp) { | ||
260 | spin_lock(&nilfs->ns_last_segment_lock); | ||
261 | nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); | ||
262 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
263 | } | ||
264 | } | ||
265 | |||
266 | return err; | ||
267 | } | ||
268 | |||
269 | int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb) | ||
270 | { | ||
271 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
272 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | ||
273 | sector_t nfreeblocks; | ||
274 | time_t t; | ||
275 | int err; | ||
276 | |||
277 | /* nilfs->sem must be locked by the caller. */ | ||
278 | if (sbp[0]->s_magic != NILFS_SUPER_MAGIC) { | ||
279 | if (sbp[1] && sbp[1]->s_magic == NILFS_SUPER_MAGIC) | ||
280 | nilfs_swap_super_block(nilfs); | ||
281 | else { | ||
282 | printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", | ||
283 | sbi->s_super->s_id); | ||
284 | return -EIO; | ||
285 | } | ||
286 | } | ||
287 | err = nilfs_count_free_blocks(nilfs, &nfreeblocks); | ||
288 | if (unlikely(err)) { | ||
289 | printk(KERN_ERR "NILFS: failed to count free blocks\n"); | ||
290 | return err; | ||
291 | } | ||
292 | spin_lock(&nilfs->ns_last_segment_lock); | ||
293 | sbp[0]->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); | ||
294 | sbp[0]->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); | ||
295 | sbp[0]->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); | ||
296 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
297 | |||
298 | t = get_seconds(); | ||
299 | nilfs->ns_sbwtime[0] = t; | ||
300 | sbp[0]->s_free_blocks_count = cpu_to_le64(nfreeblocks); | ||
301 | sbp[0]->s_wtime = cpu_to_le64(t); | ||
302 | sbp[0]->s_sum = 0; | ||
303 | sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, | ||
304 | (unsigned char *)sbp[0], | ||
305 | nilfs->ns_sbsize)); | ||
306 | if (dupsb && sbp[1]) { | ||
307 | memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); | ||
308 | nilfs->ns_sbwtime[1] = t; | ||
309 | } | ||
310 | sbi->s_super->s_dirt = 0; | ||
311 | return nilfs_sync_super(sbi, dupsb); | ||
312 | } | ||
313 | |||
314 | static void nilfs_put_super(struct super_block *sb) | ||
315 | { | ||
316 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
317 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
318 | |||
319 | nilfs_detach_segment_constructor(sbi); | ||
320 | |||
321 | if (!(sb->s_flags & MS_RDONLY)) { | ||
322 | down_write(&nilfs->ns_sem); | ||
323 | nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); | ||
324 | nilfs_commit_super(sbi, 1); | ||
325 | up_write(&nilfs->ns_sem); | ||
326 | } | ||
327 | |||
328 | nilfs_detach_checkpoint(sbi); | ||
329 | put_nilfs(sbi->s_nilfs); | ||
330 | sbi->s_super = NULL; | ||
331 | sb->s_fs_info = NULL; | ||
332 | kfree(sbi); | ||
333 | } | ||
334 | |||
335 | /** | ||
336 | * nilfs_write_super - write super block(s) of NILFS | ||
337 | * @sb: super_block | ||
338 | * | ||
339 | * nilfs_write_super() gets a fs-dependent lock, writes super block(s), and | ||
340 | * clears s_dirt. This function is called in the section protected by | ||
341 | * lock_super(). | ||
342 | * | ||
343 | * The s_dirt flag is managed by each filesystem and we protect it by ns_sem | ||
344 | * of the struct the_nilfs. Lock order must be as follows: | ||
345 | * | ||
346 | * 1. lock_super() | ||
347 | * 2. down_write(&nilfs->ns_sem) | ||
348 | * | ||
349 | * Inside NILFS, locking ns_sem is enough to protect s_dirt and the buffer | ||
350 | * of the super block (nilfs->ns_sbp[]). | ||
351 | * | ||
352 | * In most cases, VFS functions call lock_super() before calling these | ||
353 | * methods. So we must be careful not to bring on deadlocks when using | ||
354 | * lock_super(); see generic_shutdown_super(), write_super(), and so on. | ||
355 | * | ||
356 | * Note that order of lock_kernel() and lock_super() depends on contexts | ||
357 | * of VFS. We should also note that lock_kernel() can be used in its | ||
358 | * protective section and only the outermost one has an effect. | ||
359 | */ | ||
360 | static void nilfs_write_super(struct super_block *sb) | ||
361 | { | ||
362 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
363 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
364 | |||
365 | down_write(&nilfs->ns_sem); | ||
366 | if (!(sb->s_flags & MS_RDONLY)) { | ||
367 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | ||
368 | u64 t = get_seconds(); | ||
369 | int dupsb; | ||
370 | |||
371 | if (!nilfs_discontinued(nilfs) && t >= nilfs->ns_sbwtime[0] && | ||
372 | t < nilfs->ns_sbwtime[0] + NILFS_SB_FREQ) { | ||
373 | up_write(&nilfs->ns_sem); | ||
374 | return; | ||
375 | } | ||
376 | dupsb = sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ; | ||
377 | nilfs_commit_super(sbi, dupsb); | ||
378 | } | ||
379 | sb->s_dirt = 0; | ||
380 | up_write(&nilfs->ns_sem); | ||
381 | } | ||
382 | |||
383 | static int nilfs_sync_fs(struct super_block *sb, int wait) | ||
384 | { | ||
385 | int err = 0; | ||
386 | |||
387 | /* This function is called when super block should be written back */ | ||
388 | if (wait) | ||
389 | err = nilfs_construct_segment(sb); | ||
390 | return err; | ||
391 | } | ||
392 | |||
393 | int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) | ||
394 | { | ||
395 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
396 | struct nilfs_checkpoint *raw_cp; | ||
397 | struct buffer_head *bh_cp; | ||
398 | int err; | ||
399 | |||
400 | down_write(&nilfs->ns_sem); | ||
401 | list_add(&sbi->s_list, &nilfs->ns_supers); | ||
402 | up_write(&nilfs->ns_sem); | ||
403 | |||
404 | sbi->s_ifile = nilfs_mdt_new( | ||
405 | nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP); | ||
406 | if (!sbi->s_ifile) | ||
407 | return -ENOMEM; | ||
408 | |||
409 | err = nilfs_palloc_init_blockgroup(sbi->s_ifile, nilfs->ns_inode_size); | ||
410 | if (unlikely(err)) | ||
411 | goto failed; | ||
412 | |||
413 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, | ||
414 | &bh_cp); | ||
415 | if (unlikely(err)) { | ||
416 | if (err == -ENOENT || err == -EINVAL) { | ||
417 | printk(KERN_ERR | ||
418 | "NILFS: Invalid checkpoint " | ||
419 | "(checkpoint number=%llu)\n", | ||
420 | (unsigned long long)cno); | ||
421 | err = -EINVAL; | ||
422 | } | ||
423 | goto failed; | ||
424 | } | ||
425 | err = nilfs_read_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode); | ||
426 | if (unlikely(err)) | ||
427 | goto failed_bh; | ||
428 | atomic_set(&sbi->s_inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); | ||
429 | atomic_set(&sbi->s_blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); | ||
430 | |||
431 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); | ||
432 | return 0; | ||
433 | |||
434 | failed_bh: | ||
435 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); | ||
436 | failed: | ||
437 | nilfs_mdt_destroy(sbi->s_ifile); | ||
438 | sbi->s_ifile = NULL; | ||
439 | |||
440 | down_write(&nilfs->ns_sem); | ||
441 | list_del_init(&sbi->s_list); | ||
442 | up_write(&nilfs->ns_sem); | ||
443 | |||
444 | return err; | ||
445 | } | ||
446 | |||
447 | void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) | ||
448 | { | ||
449 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
450 | |||
451 | nilfs_mdt_clear(sbi->s_ifile); | ||
452 | nilfs_mdt_destroy(sbi->s_ifile); | ||
453 | sbi->s_ifile = NULL; | ||
454 | down_write(&nilfs->ns_sem); | ||
455 | list_del_init(&sbi->s_list); | ||
456 | up_write(&nilfs->ns_sem); | ||
457 | } | ||
458 | |||
459 | static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi) | ||
460 | { | ||
461 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
462 | int err = 0; | ||
463 | |||
464 | down_write(&nilfs->ns_sem); | ||
465 | if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { | ||
466 | nilfs->ns_mount_state |= NILFS_VALID_FS; | ||
467 | err = nilfs_commit_super(sbi, 1); | ||
468 | if (likely(!err)) | ||
469 | printk(KERN_INFO "NILFS: recovery complete.\n"); | ||
470 | } | ||
471 | up_write(&nilfs->ns_sem); | ||
472 | return err; | ||
473 | } | ||
474 | |||
475 | static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) | ||
476 | { | ||
477 | struct super_block *sb = dentry->d_sb; | ||
478 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
479 | unsigned long long blocks; | ||
480 | unsigned long overhead; | ||
481 | unsigned long nrsvblocks; | ||
482 | sector_t nfreeblocks; | ||
483 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
484 | int err; | ||
485 | |||
486 | /* | ||
487 | * Compute all of the segment blocks | ||
488 | * | ||
489 | * The blocks before first segment and after last segment | ||
490 | * are excluded. | ||
491 | */ | ||
492 | blocks = nilfs->ns_blocks_per_segment * nilfs->ns_nsegments | ||
493 | - nilfs->ns_first_data_block; | ||
494 | nrsvblocks = nilfs->ns_nrsvsegs * nilfs->ns_blocks_per_segment; | ||
495 | |||
496 | /* | ||
497 | * Compute the overhead | ||
498 | * | ||
499 | * When distributing meta data blocks outside semgent structure, | ||
500 | * We must count them as the overhead. | ||
501 | */ | ||
502 | overhead = 0; | ||
503 | |||
504 | err = nilfs_count_free_blocks(nilfs, &nfreeblocks); | ||
505 | if (unlikely(err)) | ||
506 | return err; | ||
507 | |||
508 | buf->f_type = NILFS_SUPER_MAGIC; | ||
509 | buf->f_bsize = sb->s_blocksize; | ||
510 | buf->f_blocks = blocks - overhead; | ||
511 | buf->f_bfree = nfreeblocks; | ||
512 | buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? | ||
513 | (buf->f_bfree - nrsvblocks) : 0; | ||
514 | buf->f_files = atomic_read(&sbi->s_inodes_count); | ||
515 | buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ | ||
516 | buf->f_namelen = NILFS_NAME_LEN; | ||
517 | return 0; | ||
518 | } | ||
519 | |||
520 | static struct super_operations nilfs_sops = { | ||
521 | .alloc_inode = nilfs_alloc_inode, | ||
522 | .destroy_inode = nilfs_destroy_inode, | ||
523 | .dirty_inode = nilfs_dirty_inode, | ||
524 | /* .write_inode = nilfs_write_inode, */ | ||
525 | /* .put_inode = nilfs_put_inode, */ | ||
526 | /* .drop_inode = nilfs_drop_inode, */ | ||
527 | .delete_inode = nilfs_delete_inode, | ||
528 | .put_super = nilfs_put_super, | ||
529 | .write_super = nilfs_write_super, | ||
530 | .sync_fs = nilfs_sync_fs, | ||
531 | /* .write_super_lockfs */ | ||
532 | /* .unlockfs */ | ||
533 | .statfs = nilfs_statfs, | ||
534 | .remount_fs = nilfs_remount, | ||
535 | .clear_inode = nilfs_clear_inode, | ||
536 | /* .umount_begin */ | ||
537 | /* .show_options */ | ||
538 | }; | ||
539 | |||
540 | static struct inode * | ||
541 | nilfs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) | ||
542 | { | ||
543 | struct inode *inode; | ||
544 | |||
545 | if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO && | ||
546 | ino != NILFS_SKETCH_INO) | ||
547 | return ERR_PTR(-ESTALE); | ||
548 | |||
549 | inode = nilfs_iget(sb, ino); | ||
550 | if (IS_ERR(inode)) | ||
551 | return ERR_CAST(inode); | ||
552 | if (generation && inode->i_generation != generation) { | ||
553 | iput(inode); | ||
554 | return ERR_PTR(-ESTALE); | ||
555 | } | ||
556 | |||
557 | return inode; | ||
558 | } | ||
559 | |||
560 | static struct dentry * | ||
561 | nilfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, | ||
562 | int fh_type) | ||
563 | { | ||
564 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | ||
565 | nilfs_nfs_get_inode); | ||
566 | } | ||
567 | |||
568 | static struct dentry * | ||
569 | nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, | ||
570 | int fh_type) | ||
571 | { | ||
572 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
573 | nilfs_nfs_get_inode); | ||
574 | } | ||
575 | |||
576 | static struct export_operations nilfs_export_ops = { | ||
577 | .fh_to_dentry = nilfs_fh_to_dentry, | ||
578 | .fh_to_parent = nilfs_fh_to_parent, | ||
579 | .get_parent = nilfs_get_parent, | ||
580 | }; | ||
581 | |||
582 | enum { | ||
583 | Opt_err_cont, Opt_err_panic, Opt_err_ro, | ||
584 | Opt_barrier, Opt_snapshot, Opt_order, | ||
585 | Opt_err, | ||
586 | }; | ||
587 | |||
588 | static match_table_t tokens = { | ||
589 | {Opt_err_cont, "errors=continue"}, | ||
590 | {Opt_err_panic, "errors=panic"}, | ||
591 | {Opt_err_ro, "errors=remount-ro"}, | ||
592 | {Opt_barrier, "barrier=%s"}, | ||
593 | {Opt_snapshot, "cp=%u"}, | ||
594 | {Opt_order, "order=%s"}, | ||
595 | {Opt_err, NULL} | ||
596 | }; | ||
597 | |||
598 | static int match_bool(substring_t *s, int *result) | ||
599 | { | ||
600 | int len = s->to - s->from; | ||
601 | |||
602 | if (strncmp(s->from, "on", len) == 0) | ||
603 | *result = 1; | ||
604 | else if (strncmp(s->from, "off", len) == 0) | ||
605 | *result = 0; | ||
606 | else | ||
607 | return 1; | ||
608 | return 0; | ||
609 | } | ||
610 | |||
611 | static int parse_options(char *options, struct super_block *sb) | ||
612 | { | ||
613 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
614 | char *p; | ||
615 | substring_t args[MAX_OPT_ARGS]; | ||
616 | int option; | ||
617 | |||
618 | if (!options) | ||
619 | return 1; | ||
620 | |||
621 | while ((p = strsep(&options, ",")) != NULL) { | ||
622 | int token; | ||
623 | if (!*p) | ||
624 | continue; | ||
625 | |||
626 | token = match_token(p, tokens, args); | ||
627 | switch (token) { | ||
628 | case Opt_barrier: | ||
629 | if (match_bool(&args[0], &option)) | ||
630 | return 0; | ||
631 | if (option) | ||
632 | nilfs_set_opt(sbi, BARRIER); | ||
633 | else | ||
634 | nilfs_clear_opt(sbi, BARRIER); | ||
635 | break; | ||
636 | case Opt_order: | ||
637 | if (strcmp(args[0].from, "relaxed") == 0) | ||
638 | /* Ordered data semantics */ | ||
639 | nilfs_clear_opt(sbi, STRICT_ORDER); | ||
640 | else if (strcmp(args[0].from, "strict") == 0) | ||
641 | /* Strict in-order semantics */ | ||
642 | nilfs_set_opt(sbi, STRICT_ORDER); | ||
643 | else | ||
644 | return 0; | ||
645 | break; | ||
646 | case Opt_err_panic: | ||
647 | nilfs_write_opt(sbi, ERROR_MODE, ERRORS_PANIC); | ||
648 | break; | ||
649 | case Opt_err_ro: | ||
650 | nilfs_write_opt(sbi, ERROR_MODE, ERRORS_RO); | ||
651 | break; | ||
652 | case Opt_err_cont: | ||
653 | nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT); | ||
654 | break; | ||
655 | case Opt_snapshot: | ||
656 | if (match_int(&args[0], &option) || option <= 0) | ||
657 | return 0; | ||
658 | if (!(sb->s_flags & MS_RDONLY)) | ||
659 | return 0; | ||
660 | sbi->s_snapshot_cno = option; | ||
661 | nilfs_set_opt(sbi, SNAPSHOT); | ||
662 | break; | ||
663 | default: | ||
664 | printk(KERN_ERR | ||
665 | "NILFS: Unrecognized mount option \"%s\"\n", p); | ||
666 | return 0; | ||
667 | } | ||
668 | } | ||
669 | return 1; | ||
670 | } | ||
671 | |||
672 | static inline void | ||
673 | nilfs_set_default_options(struct nilfs_sb_info *sbi, | ||
674 | struct nilfs_super_block *sbp) | ||
675 | { | ||
676 | sbi->s_mount_opt = | ||
677 | NILFS_MOUNT_ERRORS_CONT | NILFS_MOUNT_BARRIER; | ||
678 | } | ||
679 | |||
680 | static int nilfs_setup_super(struct nilfs_sb_info *sbi) | ||
681 | { | ||
682 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
683 | struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; | ||
684 | int max_mnt_count = le16_to_cpu(sbp->s_max_mnt_count); | ||
685 | int mnt_count = le16_to_cpu(sbp->s_mnt_count); | ||
686 | |||
687 | /* nilfs->sem must be locked by the caller. */ | ||
688 | if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { | ||
689 | printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); | ||
690 | } else if (nilfs->ns_mount_state & NILFS_ERROR_FS) { | ||
691 | printk(KERN_WARNING | ||
692 | "NILFS warning: mounting fs with errors\n"); | ||
693 | #if 0 | ||
694 | } else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) { | ||
695 | printk(KERN_WARNING | ||
696 | "NILFS warning: maximal mount count reached\n"); | ||
697 | #endif | ||
698 | } | ||
699 | if (!max_mnt_count) | ||
700 | sbp->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); | ||
701 | |||
702 | sbp->s_mnt_count = cpu_to_le16(mnt_count + 1); | ||
703 | sbp->s_state = cpu_to_le16(le16_to_cpu(sbp->s_state) & ~NILFS_VALID_FS); | ||
704 | sbp->s_mtime = cpu_to_le64(get_seconds()); | ||
705 | return nilfs_commit_super(sbi, 1); | ||
706 | } | ||
707 | |||
708 | struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, | ||
709 | u64 pos, int blocksize, | ||
710 | struct buffer_head **pbh) | ||
711 | { | ||
712 | unsigned long long sb_index = pos; | ||
713 | unsigned long offset; | ||
714 | |||
715 | offset = do_div(sb_index, blocksize); | ||
716 | *pbh = sb_bread(sb, sb_index); | ||
717 | if (!*pbh) | ||
718 | return NULL; | ||
719 | return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); | ||
720 | } | ||
721 | |||
722 | int nilfs_store_magic_and_option(struct super_block *sb, | ||
723 | struct nilfs_super_block *sbp, | ||
724 | char *data) | ||
725 | { | ||
726 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
727 | |||
728 | sb->s_magic = le16_to_cpu(sbp->s_magic); | ||
729 | |||
730 | /* FS independent flags */ | ||
731 | #ifdef NILFS_ATIME_DISABLE | ||
732 | sb->s_flags |= MS_NOATIME; | ||
733 | #endif | ||
734 | |||
735 | nilfs_set_default_options(sbi, sbp); | ||
736 | |||
737 | sbi->s_resuid = le16_to_cpu(sbp->s_def_resuid); | ||
738 | sbi->s_resgid = le16_to_cpu(sbp->s_def_resgid); | ||
739 | sbi->s_interval = le32_to_cpu(sbp->s_c_interval); | ||
740 | sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); | ||
741 | |||
742 | return !parse_options(data, sb) ? -EINVAL : 0 ; | ||
743 | } | ||
744 | |||
745 | /** | ||
746 | * nilfs_fill_super() - initialize a super block instance | ||
747 | * @sb: super_block | ||
748 | * @data: mount options | ||
749 | * @silent: silent mode flag | ||
750 | * @nilfs: the_nilfs struct | ||
751 | * | ||
752 | * This function is called exclusively by bd_mount_mutex. | ||
753 | * So, the recovery process is protected from other simultaneous mounts. | ||
754 | */ | ||
755 | static int | ||
756 | nilfs_fill_super(struct super_block *sb, void *data, int silent, | ||
757 | struct the_nilfs *nilfs) | ||
758 | { | ||
759 | struct nilfs_sb_info *sbi; | ||
760 | struct inode *root; | ||
761 | __u64 cno; | ||
762 | int err; | ||
763 | |||
764 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | ||
765 | if (!sbi) | ||
766 | return -ENOMEM; | ||
767 | |||
768 | sb->s_fs_info = sbi; | ||
769 | |||
770 | get_nilfs(nilfs); | ||
771 | sbi->s_nilfs = nilfs; | ||
772 | sbi->s_super = sb; | ||
773 | |||
774 | err = init_nilfs(nilfs, sbi, (char *)data); | ||
775 | if (err) | ||
776 | goto failed_sbi; | ||
777 | |||
778 | spin_lock_init(&sbi->s_inode_lock); | ||
779 | INIT_LIST_HEAD(&sbi->s_dirty_files); | ||
780 | INIT_LIST_HEAD(&sbi->s_list); | ||
781 | |||
782 | /* | ||
783 | * Following initialization is overlapped because | ||
784 | * nilfs_sb_info structure has been cleared at the beginning. | ||
785 | * But we reserve them to keep our interest and make ready | ||
786 | * for the future change. | ||
787 | */ | ||
788 | get_random_bytes(&sbi->s_next_generation, | ||
789 | sizeof(sbi->s_next_generation)); | ||
790 | spin_lock_init(&sbi->s_next_gen_lock); | ||
791 | |||
792 | sb->s_op = &nilfs_sops; | ||
793 | sb->s_export_op = &nilfs_export_ops; | ||
794 | sb->s_root = NULL; | ||
795 | sb->s_time_gran = 1; | ||
796 | |||
797 | if (!nilfs_loaded(nilfs)) { | ||
798 | err = load_nilfs(nilfs, sbi); | ||
799 | if (err) | ||
800 | goto failed_sbi; | ||
801 | } | ||
802 | cno = nilfs_last_cno(nilfs); | ||
803 | |||
804 | if (sb->s_flags & MS_RDONLY) { | ||
805 | if (nilfs_test_opt(sbi, SNAPSHOT)) { | ||
806 | err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, | ||
807 | sbi->s_snapshot_cno); | ||
808 | if (err < 0) | ||
809 | goto failed_sbi; | ||
810 | if (!err) { | ||
811 | printk(KERN_ERR | ||
812 | "NILFS: The specified checkpoint is " | ||
813 | "not a snapshot " | ||
814 | "(checkpoint number=%llu).\n", | ||
815 | (unsigned long long)sbi->s_snapshot_cno); | ||
816 | err = -EINVAL; | ||
817 | goto failed_sbi; | ||
818 | } | ||
819 | cno = sbi->s_snapshot_cno; | ||
820 | } else | ||
821 | /* Read-only mount */ | ||
822 | sbi->s_snapshot_cno = cno; | ||
823 | } | ||
824 | |||
825 | err = nilfs_attach_checkpoint(sbi, cno); | ||
826 | if (err) { | ||
827 | printk(KERN_ERR "NILFS: error loading a checkpoint" | ||
828 | " (checkpoint number=%llu).\n", (unsigned long long)cno); | ||
829 | goto failed_sbi; | ||
830 | } | ||
831 | |||
832 | if (!(sb->s_flags & MS_RDONLY)) { | ||
833 | err = nilfs_attach_segment_constructor(sbi); | ||
834 | if (err) | ||
835 | goto failed_checkpoint; | ||
836 | } | ||
837 | |||
838 | root = nilfs_iget(sb, NILFS_ROOT_INO); | ||
839 | if (IS_ERR(root)) { | ||
840 | printk(KERN_ERR "NILFS: get root inode failed\n"); | ||
841 | err = PTR_ERR(root); | ||
842 | goto failed_segctor; | ||
843 | } | ||
844 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { | ||
845 | iput(root); | ||
846 | printk(KERN_ERR "NILFS: corrupt root inode.\n"); | ||
847 | err = -EINVAL; | ||
848 | goto failed_segctor; | ||
849 | } | ||
850 | sb->s_root = d_alloc_root(root); | ||
851 | if (!sb->s_root) { | ||
852 | iput(root); | ||
853 | printk(KERN_ERR "NILFS: get root dentry failed\n"); | ||
854 | err = -ENOMEM; | ||
855 | goto failed_segctor; | ||
856 | } | ||
857 | |||
858 | if (!(sb->s_flags & MS_RDONLY)) { | ||
859 | down_write(&nilfs->ns_sem); | ||
860 | nilfs_setup_super(sbi); | ||
861 | up_write(&nilfs->ns_sem); | ||
862 | } | ||
863 | |||
864 | err = nilfs_mark_recovery_complete(sbi); | ||
865 | if (unlikely(err)) { | ||
866 | printk(KERN_ERR "NILFS: recovery failed.\n"); | ||
867 | goto failed_root; | ||
868 | } | ||
869 | |||
870 | return 0; | ||
871 | |||
872 | failed_root: | ||
873 | dput(sb->s_root); | ||
874 | sb->s_root = NULL; | ||
875 | |||
876 | failed_segctor: | ||
877 | nilfs_detach_segment_constructor(sbi); | ||
878 | |||
879 | failed_checkpoint: | ||
880 | nilfs_detach_checkpoint(sbi); | ||
881 | |||
882 | failed_sbi: | ||
883 | put_nilfs(nilfs); | ||
884 | sb->s_fs_info = NULL; | ||
885 | kfree(sbi); | ||
886 | return err; | ||
887 | } | ||
888 | |||
889 | static int nilfs_remount(struct super_block *sb, int *flags, char *data) | ||
890 | { | ||
891 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
892 | struct nilfs_super_block *sbp; | ||
893 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
894 | unsigned long old_sb_flags; | ||
895 | struct nilfs_mount_options old_opts; | ||
896 | int err; | ||
897 | |||
898 | old_sb_flags = sb->s_flags; | ||
899 | old_opts.mount_opt = sbi->s_mount_opt; | ||
900 | old_opts.snapshot_cno = sbi->s_snapshot_cno; | ||
901 | |||
902 | if (!parse_options(data, sb)) { | ||
903 | err = -EINVAL; | ||
904 | goto restore_opts; | ||
905 | } | ||
906 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL); | ||
907 | |||
908 | if ((*flags & MS_RDONLY) && | ||
909 | sbi->s_snapshot_cno != old_opts.snapshot_cno) { | ||
910 | printk(KERN_WARNING "NILFS (device %s): couldn't " | ||
911 | "remount to a different snapshot. \n", | ||
912 | sb->s_id); | ||
913 | err = -EINVAL; | ||
914 | goto restore_opts; | ||
915 | } | ||
916 | |||
917 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | ||
918 | goto out; | ||
919 | if (*flags & MS_RDONLY) { | ||
920 | /* Shutting down the segment constructor */ | ||
921 | nilfs_detach_segment_constructor(sbi); | ||
922 | sb->s_flags |= MS_RDONLY; | ||
923 | |||
924 | sbi->s_snapshot_cno = nilfs_last_cno(nilfs); | ||
925 | /* nilfs_set_opt(sbi, SNAPSHOT); */ | ||
926 | |||
927 | /* | ||
928 | * Remounting a valid RW partition RDONLY, so set | ||
929 | * the RDONLY flag and then mark the partition as valid again. | ||
930 | */ | ||
931 | down_write(&nilfs->ns_sem); | ||
932 | sbp = nilfs->ns_sbp[0]; | ||
933 | if (!(sbp->s_state & le16_to_cpu(NILFS_VALID_FS)) && | ||
934 | (nilfs->ns_mount_state & NILFS_VALID_FS)) | ||
935 | sbp->s_state = cpu_to_le16(nilfs->ns_mount_state); | ||
936 | sbp->s_mtime = cpu_to_le64(get_seconds()); | ||
937 | nilfs_commit_super(sbi, 1); | ||
938 | up_write(&nilfs->ns_sem); | ||
939 | } else { | ||
940 | /* | ||
941 | * Mounting a RDONLY partition read-write, so reread and | ||
942 | * store the current valid flag. (It may have been changed | ||
943 | * by fsck since we originally mounted the partition.) | ||
944 | */ | ||
945 | down(&sb->s_bdev->bd_mount_sem); | ||
946 | /* Check existing RW-mount */ | ||
947 | if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) { | ||
948 | printk(KERN_WARNING "NILFS (device %s): couldn't " | ||
949 | "remount because a RW-mount exists.\n", | ||
950 | sb->s_id); | ||
951 | err = -EBUSY; | ||
952 | goto rw_remount_failed; | ||
953 | } | ||
954 | if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) { | ||
955 | printk(KERN_WARNING "NILFS (device %s): couldn't " | ||
956 | "remount because the current RO-mount is not " | ||
957 | "the latest one.\n", | ||
958 | sb->s_id); | ||
959 | err = -EINVAL; | ||
960 | goto rw_remount_failed; | ||
961 | } | ||
962 | sb->s_flags &= ~MS_RDONLY; | ||
963 | nilfs_clear_opt(sbi, SNAPSHOT); | ||
964 | sbi->s_snapshot_cno = 0; | ||
965 | |||
966 | err = nilfs_attach_segment_constructor(sbi); | ||
967 | if (err) | ||
968 | goto rw_remount_failed; | ||
969 | |||
970 | down_write(&nilfs->ns_sem); | ||
971 | nilfs_setup_super(sbi); | ||
972 | up_write(&nilfs->ns_sem); | ||
973 | |||
974 | up(&sb->s_bdev->bd_mount_sem); | ||
975 | } | ||
976 | out: | ||
977 | return 0; | ||
978 | |||
979 | rw_remount_failed: | ||
980 | up(&sb->s_bdev->bd_mount_sem); | ||
981 | restore_opts: | ||
982 | sb->s_flags = old_sb_flags; | ||
983 | sbi->s_mount_opt = old_opts.mount_opt; | ||
984 | sbi->s_snapshot_cno = old_opts.snapshot_cno; | ||
985 | return err; | ||
986 | } | ||
987 | |||
988 | struct nilfs_super_data { | ||
989 | struct block_device *bdev; | ||
990 | __u64 cno; | ||
991 | int flags; | ||
992 | }; | ||
993 | |||
994 | /** | ||
995 | * nilfs_identify - pre-read mount options needed to identify mount instance | ||
996 | * @data: mount options | ||
997 | * @sd: nilfs_super_data | ||
998 | */ | ||
999 | static int nilfs_identify(char *data, struct nilfs_super_data *sd) | ||
1000 | { | ||
1001 | char *p, *options = data; | ||
1002 | substring_t args[MAX_OPT_ARGS]; | ||
1003 | int option, token; | ||
1004 | int ret = 0; | ||
1005 | |||
1006 | do { | ||
1007 | p = strsep(&options, ","); | ||
1008 | if (p != NULL && *p) { | ||
1009 | token = match_token(p, tokens, args); | ||
1010 | if (token == Opt_snapshot) { | ||
1011 | if (!(sd->flags & MS_RDONLY)) | ||
1012 | ret++; | ||
1013 | else { | ||
1014 | ret = match_int(&args[0], &option); | ||
1015 | if (!ret) { | ||
1016 | if (option > 0) | ||
1017 | sd->cno = option; | ||
1018 | else | ||
1019 | ret++; | ||
1020 | } | ||
1021 | } | ||
1022 | } | ||
1023 | if (ret) | ||
1024 | printk(KERN_ERR | ||
1025 | "NILFS: invalid mount option: %s\n", p); | ||
1026 | } | ||
1027 | if (!options) | ||
1028 | break; | ||
1029 | BUG_ON(options == data); | ||
1030 | *(options - 1) = ','; | ||
1031 | } while (!ret); | ||
1032 | return ret; | ||
1033 | } | ||
1034 | |||
1035 | static int nilfs_set_bdev_super(struct super_block *s, void *data) | ||
1036 | { | ||
1037 | struct nilfs_super_data *sd = data; | ||
1038 | |||
1039 | s->s_bdev = sd->bdev; | ||
1040 | s->s_dev = s->s_bdev->bd_dev; | ||
1041 | return 0; | ||
1042 | } | ||
1043 | |||
1044 | static int nilfs_test_bdev_super(struct super_block *s, void *data) | ||
1045 | { | ||
1046 | struct nilfs_super_data *sd = data; | ||
1047 | |||
1048 | return s->s_bdev == sd->bdev; | ||
1049 | } | ||
1050 | |||
1051 | static int nilfs_test_bdev_super2(struct super_block *s, void *data) | ||
1052 | { | ||
1053 | struct nilfs_super_data *sd = data; | ||
1054 | int ret; | ||
1055 | |||
1056 | if (s->s_bdev != sd->bdev) | ||
1057 | return 0; | ||
1058 | |||
1059 | if (!((s->s_flags | sd->flags) & MS_RDONLY)) | ||
1060 | return 1; /* Reuse an old R/W-mode super_block */ | ||
1061 | |||
1062 | if (s->s_flags & sd->flags & MS_RDONLY) { | ||
1063 | if (down_read_trylock(&s->s_umount)) { | ||
1064 | ret = s->s_root && | ||
1065 | (sd->cno == NILFS_SB(s)->s_snapshot_cno); | ||
1066 | up_read(&s->s_umount); | ||
1067 | /* | ||
1068 | * This path is locked with sb_lock by sget(). | ||
1069 | * So, drop_super() causes deadlock. | ||
1070 | */ | ||
1071 | return ret; | ||
1072 | } | ||
1073 | } | ||
1074 | return 0; | ||
1075 | } | ||
1076 | |||
1077 | static int | ||
1078 | nilfs_get_sb(struct file_system_type *fs_type, int flags, | ||
1079 | const char *dev_name, void *data, struct vfsmount *mnt) | ||
1080 | { | ||
1081 | struct nilfs_super_data sd; | ||
1082 | struct super_block *s, *s2; | ||
1083 | struct the_nilfs *nilfs = NULL; | ||
1084 | int err, need_to_close = 1; | ||
1085 | |||
1086 | sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type); | ||
1087 | if (IS_ERR(sd.bdev)) | ||
1088 | return PTR_ERR(sd.bdev); | ||
1089 | |||
1090 | /* | ||
1091 | * To get mount instance using sget() vfs-routine, NILFS needs | ||
1092 | * much more information than normal filesystems to identify mount | ||
1093 | * instance. For snapshot mounts, not only a mount type (ro-mount | ||
1094 | * or rw-mount) but also a checkpoint number is required. | ||
1095 | * The results are passed in sget() using nilfs_super_data. | ||
1096 | */ | ||
1097 | sd.cno = 0; | ||
1098 | sd.flags = flags; | ||
1099 | if (nilfs_identify((char *)data, &sd)) { | ||
1100 | err = -EINVAL; | ||
1101 | goto failed; | ||
1102 | } | ||
1103 | |||
1104 | /* | ||
1105 | * once the super is inserted into the list by sget, s_umount | ||
1106 | * will protect the lockfs code from trying to start a snapshot | ||
1107 | * while we are mounting | ||
1108 | */ | ||
1109 | down(&sd.bdev->bd_mount_sem); | ||
1110 | if (!sd.cno && | ||
1111 | (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) { | ||
1112 | err = (err < 0) ? : -EBUSY; | ||
1113 | goto failed_unlock; | ||
1114 | } | ||
1115 | |||
1116 | /* | ||
1117 | * Phase-1: search any existent instance and get the_nilfs | ||
1118 | */ | ||
1119 | s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); | ||
1120 | if (IS_ERR(s)) | ||
1121 | goto error_s; | ||
1122 | |||
1123 | if (!s->s_root) { | ||
1124 | err = -ENOMEM; | ||
1125 | nilfs = alloc_nilfs(sd.bdev); | ||
1126 | if (!nilfs) | ||
1127 | goto cancel_new; | ||
1128 | } else { | ||
1129 | struct nilfs_sb_info *sbi = NILFS_SB(s); | ||
1130 | |||
1131 | /* | ||
1132 | * s_umount protects super_block from unmount process; | ||
1133 | * It covers pointers of nilfs_sb_info and the_nilfs. | ||
1134 | */ | ||
1135 | nilfs = sbi->s_nilfs; | ||
1136 | get_nilfs(nilfs); | ||
1137 | up_write(&s->s_umount); | ||
1138 | |||
1139 | /* | ||
1140 | * Phase-2: search specified snapshot or R/W mode super_block | ||
1141 | */ | ||
1142 | if (!sd.cno) | ||
1143 | /* trying to get the latest checkpoint. */ | ||
1144 | sd.cno = nilfs_last_cno(nilfs); | ||
1145 | |||
1146 | s2 = sget(fs_type, nilfs_test_bdev_super2, | ||
1147 | nilfs_set_bdev_super, &sd); | ||
1148 | deactivate_super(s); | ||
1149 | /* | ||
1150 | * Although deactivate_super() invokes close_bdev_exclusive() at | ||
1151 | * kill_block_super(). Here, s is an existent mount; we need | ||
1152 | * one more close_bdev_exclusive() call. | ||
1153 | */ | ||
1154 | s = s2; | ||
1155 | if (IS_ERR(s)) | ||
1156 | goto error_s; | ||
1157 | } | ||
1158 | |||
1159 | if (!s->s_root) { | ||
1160 | char b[BDEVNAME_SIZE]; | ||
1161 | |||
1162 | s->s_flags = flags; | ||
1163 | strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); | ||
1164 | sb_set_blocksize(s, block_size(sd.bdev)); | ||
1165 | |||
1166 | err = nilfs_fill_super(s, data, flags & MS_VERBOSE, nilfs); | ||
1167 | if (err) | ||
1168 | goto cancel_new; | ||
1169 | |||
1170 | s->s_flags |= MS_ACTIVE; | ||
1171 | need_to_close = 0; | ||
1172 | } else if (!(s->s_flags & MS_RDONLY)) { | ||
1173 | err = -EBUSY; | ||
1174 | } | ||
1175 | |||
1176 | up(&sd.bdev->bd_mount_sem); | ||
1177 | put_nilfs(nilfs); | ||
1178 | if (need_to_close) | ||
1179 | close_bdev_exclusive(sd.bdev, flags); | ||
1180 | simple_set_mnt(mnt, s); | ||
1181 | return 0; | ||
1182 | |||
1183 | error_s: | ||
1184 | up(&sd.bdev->bd_mount_sem); | ||
1185 | if (nilfs) | ||
1186 | put_nilfs(nilfs); | ||
1187 | close_bdev_exclusive(sd.bdev, flags); | ||
1188 | return PTR_ERR(s); | ||
1189 | |||
1190 | failed_unlock: | ||
1191 | up(&sd.bdev->bd_mount_sem); | ||
1192 | failed: | ||
1193 | close_bdev_exclusive(sd.bdev, flags); | ||
1194 | |||
1195 | return err; | ||
1196 | |||
1197 | cancel_new: | ||
1198 | /* Abandoning the newly allocated superblock */ | ||
1199 | up(&sd.bdev->bd_mount_sem); | ||
1200 | if (nilfs) | ||
1201 | put_nilfs(nilfs); | ||
1202 | up_write(&s->s_umount); | ||
1203 | deactivate_super(s); | ||
1204 | /* | ||
1205 | * deactivate_super() invokes close_bdev_exclusive(). | ||
1206 | * We must finish all post-cleaning before this call; | ||
1207 | * put_nilfs() and unlocking bd_mount_sem need the block device. | ||
1208 | */ | ||
1209 | return err; | ||
1210 | } | ||
1211 | |||
1212 | static int nilfs_test_bdev_super3(struct super_block *s, void *data) | ||
1213 | { | ||
1214 | struct nilfs_super_data *sd = data; | ||
1215 | int ret; | ||
1216 | |||
1217 | if (s->s_bdev != sd->bdev) | ||
1218 | return 0; | ||
1219 | if (down_read_trylock(&s->s_umount)) { | ||
1220 | ret = (s->s_flags & MS_RDONLY) && s->s_root && | ||
1221 | nilfs_test_opt(NILFS_SB(s), SNAPSHOT); | ||
1222 | up_read(&s->s_umount); | ||
1223 | if (ret) | ||
1224 | return 0; /* ignore snapshot mounts */ | ||
1225 | } | ||
1226 | return !((sd->flags ^ s->s_flags) & MS_RDONLY); | ||
1227 | } | ||
1228 | |||
1229 | static int __false_bdev_super(struct super_block *s, void *data) | ||
1230 | { | ||
1231 | #if 0 /* XXX: workaround for lock debug. This is not good idea */ | ||
1232 | up_write(&s->s_umount); | ||
1233 | #endif | ||
1234 | return -EFAULT; | ||
1235 | } | ||
1236 | |||
1237 | /** | ||
1238 | * test_exclusive_mount - check whether an exclusive RW/RO mount exists or not. | ||
1239 | * fs_type: filesystem type | ||
1240 | * bdev: block device | ||
1241 | * flag: 0 (check rw-mount) or MS_RDONLY (check ro-mount) | ||
1242 | * res: pointer to an integer to store result | ||
1243 | * | ||
1244 | * This function must be called within a section protected by bd_mount_mutex. | ||
1245 | */ | ||
1246 | static int test_exclusive_mount(struct file_system_type *fs_type, | ||
1247 | struct block_device *bdev, int flags) | ||
1248 | { | ||
1249 | struct super_block *s; | ||
1250 | struct nilfs_super_data sd = { .flags = flags, .bdev = bdev }; | ||
1251 | |||
1252 | s = sget(fs_type, nilfs_test_bdev_super3, __false_bdev_super, &sd); | ||
1253 | if (IS_ERR(s)) { | ||
1254 | if (PTR_ERR(s) != -EFAULT) | ||
1255 | return PTR_ERR(s); | ||
1256 | return 0; /* Not found */ | ||
1257 | } | ||
1258 | up_write(&s->s_umount); | ||
1259 | deactivate_super(s); | ||
1260 | return 1; /* Found */ | ||
1261 | } | ||
1262 | |||
1263 | struct file_system_type nilfs_fs_type = { | ||
1264 | .owner = THIS_MODULE, | ||
1265 | .name = "nilfs2", | ||
1266 | .get_sb = nilfs_get_sb, | ||
1267 | .kill_sb = kill_block_super, | ||
1268 | .fs_flags = FS_REQUIRES_DEV, | ||
1269 | }; | ||
1270 | |||
1271 | static int __init init_nilfs_fs(void) | ||
1272 | { | ||
1273 | int err; | ||
1274 | |||
1275 | err = nilfs_init_inode_cache(); | ||
1276 | if (err) | ||
1277 | goto failed; | ||
1278 | |||
1279 | err = nilfs_init_transaction_cache(); | ||
1280 | if (err) | ||
1281 | goto failed_inode_cache; | ||
1282 | |||
1283 | err = nilfs_init_segbuf_cache(); | ||
1284 | if (err) | ||
1285 | goto failed_transaction_cache; | ||
1286 | |||
1287 | err = nilfs_btree_path_cache_init(); | ||
1288 | if (err) | ||
1289 | goto failed_segbuf_cache; | ||
1290 | |||
1291 | err = register_filesystem(&nilfs_fs_type); | ||
1292 | if (err) | ||
1293 | goto failed_btree_path_cache; | ||
1294 | |||
1295 | return 0; | ||
1296 | |||
1297 | failed_btree_path_cache: | ||
1298 | nilfs_btree_path_cache_destroy(); | ||
1299 | |||
1300 | failed_segbuf_cache: | ||
1301 | nilfs_destroy_segbuf_cache(); | ||
1302 | |||
1303 | failed_transaction_cache: | ||
1304 | nilfs_destroy_transaction_cache(); | ||
1305 | |||
1306 | failed_inode_cache: | ||
1307 | nilfs_destroy_inode_cache(); | ||
1308 | |||
1309 | failed: | ||
1310 | return err; | ||
1311 | } | ||
1312 | |||
1313 | static void __exit exit_nilfs_fs(void) | ||
1314 | { | ||
1315 | nilfs_destroy_segbuf_cache(); | ||
1316 | nilfs_destroy_transaction_cache(); | ||
1317 | nilfs_destroy_inode_cache(); | ||
1318 | nilfs_btree_path_cache_destroy(); | ||
1319 | unregister_filesystem(&nilfs_fs_type); | ||
1320 | } | ||
1321 | |||
1322 | module_init(init_nilfs_fs) | ||
1323 | module_exit(exit_nilfs_fs) | ||
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c new file mode 100644 index 000000000000..33400cf0bbe2 --- /dev/null +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -0,0 +1,637 @@ | |||
1 | /* | ||
2 | * the_nilfs.c - the_nilfs shared structure. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/buffer_head.h> | ||
25 | #include <linux/slab.h> | ||
26 | #include <linux/blkdev.h> | ||
27 | #include <linux/backing-dev.h> | ||
28 | #include <linux/crc32.h> | ||
29 | #include "nilfs.h" | ||
30 | #include "segment.h" | ||
31 | #include "alloc.h" | ||
32 | #include "cpfile.h" | ||
33 | #include "sufile.h" | ||
34 | #include "dat.h" | ||
35 | #include "seglist.h" | ||
36 | #include "segbuf.h" | ||
37 | |||
38 | void nilfs_set_last_segment(struct the_nilfs *nilfs, | ||
39 | sector_t start_blocknr, u64 seq, __u64 cno) | ||
40 | { | ||
41 | spin_lock(&nilfs->ns_last_segment_lock); | ||
42 | nilfs->ns_last_pseg = start_blocknr; | ||
43 | nilfs->ns_last_seq = seq; | ||
44 | nilfs->ns_last_cno = cno; | ||
45 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
46 | } | ||
47 | |||
48 | /** | ||
49 | * alloc_nilfs - allocate the_nilfs structure | ||
50 | * @bdev: block device to which the_nilfs is related | ||
51 | * | ||
52 | * alloc_nilfs() allocates memory for the_nilfs and | ||
53 | * initializes its reference count and locks. | ||
54 | * | ||
55 | * Return Value: On success, pointer to the_nilfs is returned. | ||
56 | * On error, NULL is returned. | ||
57 | */ | ||
58 | struct the_nilfs *alloc_nilfs(struct block_device *bdev) | ||
59 | { | ||
60 | struct the_nilfs *nilfs; | ||
61 | |||
62 | nilfs = kzalloc(sizeof(*nilfs), GFP_KERNEL); | ||
63 | if (!nilfs) | ||
64 | return NULL; | ||
65 | |||
66 | nilfs->ns_bdev = bdev; | ||
67 | atomic_set(&nilfs->ns_count, 1); | ||
68 | atomic_set(&nilfs->ns_writer_refcount, -1); | ||
69 | atomic_set(&nilfs->ns_ndirtyblks, 0); | ||
70 | init_rwsem(&nilfs->ns_sem); | ||
71 | mutex_init(&nilfs->ns_writer_mutex); | ||
72 | INIT_LIST_HEAD(&nilfs->ns_supers); | ||
73 | spin_lock_init(&nilfs->ns_last_segment_lock); | ||
74 | nilfs->ns_gc_inodes_h = NULL; | ||
75 | init_rwsem(&nilfs->ns_segctor_sem); | ||
76 | |||
77 | return nilfs; | ||
78 | } | ||
79 | |||
80 | /** | ||
81 | * put_nilfs - release a reference to the_nilfs | ||
82 | * @nilfs: the_nilfs structure to be released | ||
83 | * | ||
84 | * put_nilfs() decrements a reference counter of the_nilfs. | ||
85 | * If the reference count reaches zero, the_nilfs is freed. | ||
86 | */ | ||
87 | void put_nilfs(struct the_nilfs *nilfs) | ||
88 | { | ||
89 | if (!atomic_dec_and_test(&nilfs->ns_count)) | ||
90 | return; | ||
91 | /* | ||
92 | * Increment of ns_count never occur below because the caller | ||
93 | * of get_nilfs() holds at least one reference to the_nilfs. | ||
94 | * Thus its exclusion control is not required here. | ||
95 | */ | ||
96 | might_sleep(); | ||
97 | if (nilfs_loaded(nilfs)) { | ||
98 | nilfs_mdt_clear(nilfs->ns_sufile); | ||
99 | nilfs_mdt_destroy(nilfs->ns_sufile); | ||
100 | nilfs_mdt_clear(nilfs->ns_cpfile); | ||
101 | nilfs_mdt_destroy(nilfs->ns_cpfile); | ||
102 | nilfs_mdt_clear(nilfs->ns_dat); | ||
103 | nilfs_mdt_destroy(nilfs->ns_dat); | ||
104 | /* XXX: how and when to clear nilfs->ns_gc_dat? */ | ||
105 | nilfs_mdt_destroy(nilfs->ns_gc_dat); | ||
106 | } | ||
107 | if (nilfs_init(nilfs)) { | ||
108 | nilfs_destroy_gccache(nilfs); | ||
109 | brelse(nilfs->ns_sbh[0]); | ||
110 | brelse(nilfs->ns_sbh[1]); | ||
111 | } | ||
112 | kfree(nilfs); | ||
113 | } | ||
114 | |||
115 | static int nilfs_load_super_root(struct the_nilfs *nilfs, | ||
116 | struct nilfs_sb_info *sbi, sector_t sr_block) | ||
117 | { | ||
118 | struct buffer_head *bh_sr; | ||
119 | struct nilfs_super_root *raw_sr; | ||
120 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | ||
121 | unsigned dat_entry_size, segment_usage_size, checkpoint_size; | ||
122 | unsigned inode_size; | ||
123 | int err; | ||
124 | |||
125 | err = nilfs_read_super_root_block(sbi->s_super, sr_block, &bh_sr, 1); | ||
126 | if (unlikely(err)) | ||
127 | return err; | ||
128 | |||
129 | down_read(&nilfs->ns_sem); | ||
130 | dat_entry_size = le16_to_cpu(sbp[0]->s_dat_entry_size); | ||
131 | checkpoint_size = le16_to_cpu(sbp[0]->s_checkpoint_size); | ||
132 | segment_usage_size = le16_to_cpu(sbp[0]->s_segment_usage_size); | ||
133 | up_read(&nilfs->ns_sem); | ||
134 | |||
135 | inode_size = nilfs->ns_inode_size; | ||
136 | |||
137 | err = -ENOMEM; | ||
138 | nilfs->ns_dat = nilfs_mdt_new( | ||
139 | nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP); | ||
140 | if (unlikely(!nilfs->ns_dat)) | ||
141 | goto failed; | ||
142 | |||
143 | nilfs->ns_gc_dat = nilfs_mdt_new( | ||
144 | nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP); | ||
145 | if (unlikely(!nilfs->ns_gc_dat)) | ||
146 | goto failed_dat; | ||
147 | |||
148 | nilfs->ns_cpfile = nilfs_mdt_new( | ||
149 | nilfs, NULL, NILFS_CPFILE_INO, NILFS_CPFILE_GFP); | ||
150 | if (unlikely(!nilfs->ns_cpfile)) | ||
151 | goto failed_gc_dat; | ||
152 | |||
153 | nilfs->ns_sufile = nilfs_mdt_new( | ||
154 | nilfs, NULL, NILFS_SUFILE_INO, NILFS_SUFILE_GFP); | ||
155 | if (unlikely(!nilfs->ns_sufile)) | ||
156 | goto failed_cpfile; | ||
157 | |||
158 | err = nilfs_palloc_init_blockgroup(nilfs->ns_dat, dat_entry_size); | ||
159 | if (unlikely(err)) | ||
160 | goto failed_sufile; | ||
161 | |||
162 | err = nilfs_palloc_init_blockgroup(nilfs->ns_gc_dat, dat_entry_size); | ||
163 | if (unlikely(err)) | ||
164 | goto failed_sufile; | ||
165 | |||
166 | nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); | ||
167 | nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size, | ||
168 | sizeof(struct nilfs_cpfile_header)); | ||
169 | nilfs_mdt_set_entry_size(nilfs->ns_sufile, segment_usage_size, | ||
170 | sizeof(struct nilfs_sufile_header)); | ||
171 | |||
172 | err = nilfs_mdt_read_inode_direct( | ||
173 | nilfs->ns_dat, bh_sr, NILFS_SR_DAT_OFFSET(inode_size)); | ||
174 | if (unlikely(err)) | ||
175 | goto failed_sufile; | ||
176 | |||
177 | err = nilfs_mdt_read_inode_direct( | ||
178 | nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(inode_size)); | ||
179 | if (unlikely(err)) | ||
180 | goto failed_sufile; | ||
181 | |||
182 | err = nilfs_mdt_read_inode_direct( | ||
183 | nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(inode_size)); | ||
184 | if (unlikely(err)) | ||
185 | goto failed_sufile; | ||
186 | |||
187 | raw_sr = (struct nilfs_super_root *)bh_sr->b_data; | ||
188 | nilfs->ns_nongc_ctime = le64_to_cpu(raw_sr->sr_nongc_ctime); | ||
189 | |||
190 | failed: | ||
191 | brelse(bh_sr); | ||
192 | return err; | ||
193 | |||
194 | failed_sufile: | ||
195 | nilfs_mdt_destroy(nilfs->ns_sufile); | ||
196 | |||
197 | failed_cpfile: | ||
198 | nilfs_mdt_destroy(nilfs->ns_cpfile); | ||
199 | |||
200 | failed_gc_dat: | ||
201 | nilfs_mdt_destroy(nilfs->ns_gc_dat); | ||
202 | |||
203 | failed_dat: | ||
204 | nilfs_mdt_destroy(nilfs->ns_dat); | ||
205 | goto failed; | ||
206 | } | ||
207 | |||
208 | static void nilfs_init_recovery_info(struct nilfs_recovery_info *ri) | ||
209 | { | ||
210 | memset(ri, 0, sizeof(*ri)); | ||
211 | INIT_LIST_HEAD(&ri->ri_used_segments); | ||
212 | } | ||
213 | |||
214 | static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri) | ||
215 | { | ||
216 | nilfs_dispose_segment_list(&ri->ri_used_segments); | ||
217 | } | ||
218 | |||
219 | /** | ||
220 | * load_nilfs - load and recover the nilfs | ||
221 | * @nilfs: the_nilfs structure to be released | ||
222 | * @sbi: nilfs_sb_info used to recover past segment | ||
223 | * | ||
224 | * load_nilfs() searches and load the latest super root, | ||
225 | * attaches the last segment, and does recovery if needed. | ||
226 | * The caller must call this exclusively for simultaneous mounts. | ||
227 | */ | ||
228 | int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | ||
229 | { | ||
230 | struct nilfs_recovery_info ri; | ||
231 | unsigned int s_flags = sbi->s_super->s_flags; | ||
232 | int really_read_only = bdev_read_only(nilfs->ns_bdev); | ||
233 | unsigned valid_fs; | ||
234 | int err = 0; | ||
235 | |||
236 | nilfs_init_recovery_info(&ri); | ||
237 | |||
238 | down_write(&nilfs->ns_sem); | ||
239 | valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); | ||
240 | up_write(&nilfs->ns_sem); | ||
241 | |||
242 | if (!valid_fs && (s_flags & MS_RDONLY)) { | ||
243 | printk(KERN_INFO "NILFS: INFO: recovery " | ||
244 | "required for readonly filesystem.\n"); | ||
245 | if (really_read_only) { | ||
246 | printk(KERN_ERR "NILFS: write access " | ||
247 | "unavailable, cannot proceed.\n"); | ||
248 | err = -EROFS; | ||
249 | goto failed; | ||
250 | } | ||
251 | printk(KERN_INFO "NILFS: write access will " | ||
252 | "be enabled during recovery.\n"); | ||
253 | sbi->s_super->s_flags &= ~MS_RDONLY; | ||
254 | } | ||
255 | |||
256 | err = nilfs_search_super_root(nilfs, sbi, &ri); | ||
257 | if (unlikely(err)) { | ||
258 | printk(KERN_ERR "NILFS: error searching super root.\n"); | ||
259 | goto failed; | ||
260 | } | ||
261 | |||
262 | err = nilfs_load_super_root(nilfs, sbi, ri.ri_super_root); | ||
263 | if (unlikely(err)) { | ||
264 | printk(KERN_ERR "NILFS: error loading super root.\n"); | ||
265 | goto failed; | ||
266 | } | ||
267 | |||
268 | if (!valid_fs) { | ||
269 | err = nilfs_recover_logical_segments(nilfs, sbi, &ri); | ||
270 | if (unlikely(err)) { | ||
271 | nilfs_mdt_destroy(nilfs->ns_cpfile); | ||
272 | nilfs_mdt_destroy(nilfs->ns_sufile); | ||
273 | nilfs_mdt_destroy(nilfs->ns_dat); | ||
274 | goto failed; | ||
275 | } | ||
276 | if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED) | ||
277 | sbi->s_super->s_dirt = 1; | ||
278 | } | ||
279 | |||
280 | set_nilfs_loaded(nilfs); | ||
281 | |||
282 | failed: | ||
283 | nilfs_clear_recovery_info(&ri); | ||
284 | sbi->s_super->s_flags = s_flags; | ||
285 | return err; | ||
286 | } | ||
287 | |||
288 | static unsigned long long nilfs_max_size(unsigned int blkbits) | ||
289 | { | ||
290 | unsigned int max_bits; | ||
291 | unsigned long long res = MAX_LFS_FILESIZE; /* page cache limit */ | ||
292 | |||
293 | max_bits = blkbits + NILFS_BMAP_KEY_BIT; /* bmap size limit */ | ||
294 | if (max_bits < 64) | ||
295 | res = min_t(unsigned long long, res, (1ULL << max_bits) - 1); | ||
296 | return res; | ||
297 | } | ||
298 | |||
299 | static int nilfs_store_disk_layout(struct the_nilfs *nilfs, | ||
300 | struct nilfs_super_block *sbp) | ||
301 | { | ||
302 | if (le32_to_cpu(sbp->s_rev_level) != NILFS_CURRENT_REV) { | ||
303 | printk(KERN_ERR "NILFS: revision mismatch " | ||
304 | "(superblock rev.=%d.%d, current rev.=%d.%d). " | ||
305 | "Please check the version of mkfs.nilfs.\n", | ||
306 | le32_to_cpu(sbp->s_rev_level), | ||
307 | le16_to_cpu(sbp->s_minor_rev_level), | ||
308 | NILFS_CURRENT_REV, NILFS_MINOR_REV); | ||
309 | return -EINVAL; | ||
310 | } | ||
311 | nilfs->ns_sbsize = le16_to_cpu(sbp->s_bytes); | ||
312 | if (nilfs->ns_sbsize > BLOCK_SIZE) | ||
313 | return -EINVAL; | ||
314 | |||
315 | nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); | ||
316 | nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); | ||
317 | |||
318 | nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); | ||
319 | if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) { | ||
320 | printk(KERN_ERR "NILFS: too short segment. \n"); | ||
321 | return -EINVAL; | ||
322 | } | ||
323 | |||
324 | nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); | ||
325 | nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments); | ||
326 | nilfs->ns_r_segments_percentage = | ||
327 | le32_to_cpu(sbp->s_r_segments_percentage); | ||
328 | nilfs->ns_nrsvsegs = | ||
329 | max_t(unsigned long, NILFS_MIN_NRSVSEGS, | ||
330 | DIV_ROUND_UP(nilfs->ns_nsegments * | ||
331 | nilfs->ns_r_segments_percentage, 100)); | ||
332 | nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); | ||
333 | return 0; | ||
334 | } | ||
335 | |||
336 | static int nilfs_valid_sb(struct nilfs_super_block *sbp) | ||
337 | { | ||
338 | static unsigned char sum[4]; | ||
339 | const int sumoff = offsetof(struct nilfs_super_block, s_sum); | ||
340 | size_t bytes; | ||
341 | u32 crc; | ||
342 | |||
343 | if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC) | ||
344 | return 0; | ||
345 | bytes = le16_to_cpu(sbp->s_bytes); | ||
346 | if (bytes > BLOCK_SIZE) | ||
347 | return 0; | ||
348 | crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp, | ||
349 | sumoff); | ||
350 | crc = crc32_le(crc, sum, 4); | ||
351 | crc = crc32_le(crc, (unsigned char *)sbp + sumoff + 4, | ||
352 | bytes - sumoff - 4); | ||
353 | return crc == le32_to_cpu(sbp->s_sum); | ||
354 | } | ||
355 | |||
356 | static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) | ||
357 | { | ||
358 | return offset < ((le64_to_cpu(sbp->s_nsegments) * | ||
359 | le32_to_cpu(sbp->s_blocks_per_segment)) << | ||
360 | (le32_to_cpu(sbp->s_log_block_size) + 10)); | ||
361 | } | ||
362 | |||
363 | static void nilfs_release_super_block(struct the_nilfs *nilfs) | ||
364 | { | ||
365 | int i; | ||
366 | |||
367 | for (i = 0; i < 2; i++) { | ||
368 | if (nilfs->ns_sbp[i]) { | ||
369 | brelse(nilfs->ns_sbh[i]); | ||
370 | nilfs->ns_sbh[i] = NULL; | ||
371 | nilfs->ns_sbp[i] = NULL; | ||
372 | } | ||
373 | } | ||
374 | } | ||
375 | |||
376 | void nilfs_fall_back_super_block(struct the_nilfs *nilfs) | ||
377 | { | ||
378 | brelse(nilfs->ns_sbh[0]); | ||
379 | nilfs->ns_sbh[0] = nilfs->ns_sbh[1]; | ||
380 | nilfs->ns_sbp[0] = nilfs->ns_sbp[1]; | ||
381 | nilfs->ns_sbh[1] = NULL; | ||
382 | nilfs->ns_sbp[1] = NULL; | ||
383 | } | ||
384 | |||
385 | void nilfs_swap_super_block(struct the_nilfs *nilfs) | ||
386 | { | ||
387 | struct buffer_head *tsbh = nilfs->ns_sbh[0]; | ||
388 | struct nilfs_super_block *tsbp = nilfs->ns_sbp[0]; | ||
389 | |||
390 | nilfs->ns_sbh[0] = nilfs->ns_sbh[1]; | ||
391 | nilfs->ns_sbp[0] = nilfs->ns_sbp[1]; | ||
392 | nilfs->ns_sbh[1] = tsbh; | ||
393 | nilfs->ns_sbp[1] = tsbp; | ||
394 | } | ||
395 | |||
396 | static int nilfs_load_super_block(struct the_nilfs *nilfs, | ||
397 | struct super_block *sb, int blocksize, | ||
398 | struct nilfs_super_block **sbpp) | ||
399 | { | ||
400 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | ||
401 | struct buffer_head **sbh = nilfs->ns_sbh; | ||
402 | u64 sb2off = NILFS_SB2_OFFSET_BYTES(nilfs->ns_bdev->bd_inode->i_size); | ||
403 | int valid[2], swp = 0; | ||
404 | |||
405 | sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize, | ||
406 | &sbh[0]); | ||
407 | sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]); | ||
408 | |||
409 | if (!sbp[0]) { | ||
410 | if (!sbp[1]) { | ||
411 | printk(KERN_ERR "NILFS: unable to read superblock\n"); | ||
412 | return -EIO; | ||
413 | } | ||
414 | printk(KERN_WARNING | ||
415 | "NILFS warning: unable to read primary superblock\n"); | ||
416 | } else if (!sbp[1]) | ||
417 | printk(KERN_WARNING | ||
418 | "NILFS warning: unable to read secondary superblock\n"); | ||
419 | |||
420 | valid[0] = nilfs_valid_sb(sbp[0]); | ||
421 | valid[1] = nilfs_valid_sb(sbp[1]); | ||
422 | swp = valid[1] && | ||
423 | (!valid[0] || | ||
424 | le64_to_cpu(sbp[1]->s_wtime) > le64_to_cpu(sbp[0]->s_wtime)); | ||
425 | |||
426 | if (valid[swp] && nilfs_sb2_bad_offset(sbp[swp], sb2off)) { | ||
427 | brelse(sbh[1]); | ||
428 | sbh[1] = NULL; | ||
429 | sbp[1] = NULL; | ||
430 | swp = 0; | ||
431 | } | ||
432 | if (!valid[swp]) { | ||
433 | nilfs_release_super_block(nilfs); | ||
434 | printk(KERN_ERR "NILFS: Can't find nilfs on dev %s.\n", | ||
435 | sb->s_id); | ||
436 | return -EINVAL; | ||
437 | } | ||
438 | |||
439 | if (swp) { | ||
440 | printk(KERN_WARNING "NILFS warning: broken superblock. " | ||
441 | "using spare superblock.\n"); | ||
442 | nilfs_swap_super_block(nilfs); | ||
443 | } | ||
444 | |||
445 | nilfs->ns_sbwtime[0] = le64_to_cpu(sbp[0]->s_wtime); | ||
446 | nilfs->ns_sbwtime[1] = valid[!swp] ? le64_to_cpu(sbp[1]->s_wtime) : 0; | ||
447 | nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq); | ||
448 | *sbpp = sbp[0]; | ||
449 | return 0; | ||
450 | } | ||
451 | |||
452 | /** | ||
453 | * init_nilfs - initialize a NILFS instance. | ||
454 | * @nilfs: the_nilfs structure | ||
455 | * @sbi: nilfs_sb_info | ||
456 | * @sb: super block | ||
457 | * @data: mount options | ||
458 | * | ||
459 | * init_nilfs() performs common initialization per block device (e.g. | ||
460 | * reading the super block, getting disk layout information, initializing | ||
461 | * shared fields in the_nilfs). It takes on some portion of the jobs | ||
462 | * typically done by a fill_super() routine. This division arises from | ||
463 | * the nature that multiple NILFS instances may be simultaneously | ||
464 | * mounted on a device. | ||
465 | * For multiple mounts on the same device, only the first mount | ||
466 | * invokes these tasks. | ||
467 | * | ||
468 | * Return Value: On success, 0 is returned. On error, a negative error | ||
469 | * code is returned. | ||
470 | */ | ||
471 | int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) | ||
472 | { | ||
473 | struct super_block *sb = sbi->s_super; | ||
474 | struct nilfs_super_block *sbp; | ||
475 | struct backing_dev_info *bdi; | ||
476 | int blocksize; | ||
477 | int err; | ||
478 | |||
479 | down_write(&nilfs->ns_sem); | ||
480 | if (nilfs_init(nilfs)) { | ||
481 | /* Load values from existing the_nilfs */ | ||
482 | sbp = nilfs->ns_sbp[0]; | ||
483 | err = nilfs_store_magic_and_option(sb, sbp, data); | ||
484 | if (err) | ||
485 | goto out; | ||
486 | |||
487 | blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); | ||
488 | if (sb->s_blocksize != blocksize && | ||
489 | !sb_set_blocksize(sb, blocksize)) { | ||
490 | printk(KERN_ERR "NILFS: blocksize %d unfit to device\n", | ||
491 | blocksize); | ||
492 | err = -EINVAL; | ||
493 | } | ||
494 | sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); | ||
495 | goto out; | ||
496 | } | ||
497 | |||
498 | blocksize = sb_min_blocksize(sb, BLOCK_SIZE); | ||
499 | if (!blocksize) { | ||
500 | printk(KERN_ERR "NILFS: unable to set blocksize\n"); | ||
501 | err = -EINVAL; | ||
502 | goto out; | ||
503 | } | ||
504 | err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); | ||
505 | if (err) | ||
506 | goto out; | ||
507 | |||
508 | err = nilfs_store_magic_and_option(sb, sbp, data); | ||
509 | if (err) | ||
510 | goto failed_sbh; | ||
511 | |||
512 | blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); | ||
513 | if (sb->s_blocksize != blocksize) { | ||
514 | int hw_blocksize = bdev_hardsect_size(sb->s_bdev); | ||
515 | |||
516 | if (blocksize < hw_blocksize) { | ||
517 | printk(KERN_ERR | ||
518 | "NILFS: blocksize %d too small for device " | ||
519 | "(sector-size = %d).\n", | ||
520 | blocksize, hw_blocksize); | ||
521 | err = -EINVAL; | ||
522 | goto failed_sbh; | ||
523 | } | ||
524 | nilfs_release_super_block(nilfs); | ||
525 | sb_set_blocksize(sb, blocksize); | ||
526 | |||
527 | err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp); | ||
528 | if (err) | ||
529 | goto out; | ||
530 | /* not failed_sbh; sbh is released automatically | ||
531 | when reloading fails. */ | ||
532 | } | ||
533 | nilfs->ns_blocksize_bits = sb->s_blocksize_bits; | ||
534 | |||
535 | err = nilfs_store_disk_layout(nilfs, sbp); | ||
536 | if (err) | ||
537 | goto failed_sbh; | ||
538 | |||
539 | sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); | ||
540 | |||
541 | nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); | ||
542 | |||
543 | bdi = nilfs->ns_bdev->bd_inode_backing_dev_info; | ||
544 | if (!bdi) | ||
545 | bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; | ||
546 | nilfs->ns_bdi = bdi ? : &default_backing_dev_info; | ||
547 | |||
548 | /* Finding last segment */ | ||
549 | nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg); | ||
550 | nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno); | ||
551 | nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq); | ||
552 | |||
553 | nilfs->ns_seg_seq = nilfs->ns_last_seq; | ||
554 | nilfs->ns_segnum = | ||
555 | nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg); | ||
556 | nilfs->ns_cno = nilfs->ns_last_cno + 1; | ||
557 | if (nilfs->ns_segnum >= nilfs->ns_nsegments) { | ||
558 | printk(KERN_ERR "NILFS invalid last segment number.\n"); | ||
559 | err = -EINVAL; | ||
560 | goto failed_sbh; | ||
561 | } | ||
562 | /* Dummy values */ | ||
563 | nilfs->ns_free_segments_count = | ||
564 | nilfs->ns_nsegments - (nilfs->ns_segnum + 1); | ||
565 | |||
566 | /* Initialize gcinode cache */ | ||
567 | err = nilfs_init_gccache(nilfs); | ||
568 | if (err) | ||
569 | goto failed_sbh; | ||
570 | |||
571 | set_nilfs_init(nilfs); | ||
572 | err = 0; | ||
573 | out: | ||
574 | up_write(&nilfs->ns_sem); | ||
575 | return err; | ||
576 | |||
577 | failed_sbh: | ||
578 | nilfs_release_super_block(nilfs); | ||
579 | goto out; | ||
580 | } | ||
581 | |||
582 | int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) | ||
583 | { | ||
584 | struct inode *dat = nilfs_dat_inode(nilfs); | ||
585 | unsigned long ncleansegs; | ||
586 | int err; | ||
587 | |||
588 | down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
589 | err = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile, &ncleansegs); | ||
590 | up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | ||
591 | if (likely(!err)) | ||
592 | *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; | ||
593 | return err; | ||
594 | } | ||
595 | |||
596 | int nilfs_near_disk_full(struct the_nilfs *nilfs) | ||
597 | { | ||
598 | struct inode *sufile = nilfs->ns_sufile; | ||
599 | unsigned long ncleansegs, nincsegs; | ||
600 | int ret; | ||
601 | |||
602 | ret = nilfs_sufile_get_ncleansegs(sufile, &ncleansegs); | ||
603 | if (likely(!ret)) { | ||
604 | nincsegs = atomic_read(&nilfs->ns_ndirtyblks) / | ||
605 | nilfs->ns_blocks_per_segment + 1; | ||
606 | if (ncleansegs <= nilfs->ns_nrsvsegs + nincsegs) | ||
607 | ret++; | ||
608 | } | ||
609 | return ret; | ||
610 | } | ||
611 | |||
612 | int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, | ||
613 | int snapshot_mount) | ||
614 | { | ||
615 | struct nilfs_sb_info *sbi; | ||
616 | int ret = 0; | ||
617 | |||
618 | down_read(&nilfs->ns_sem); | ||
619 | if (cno == 0 || cno > nilfs->ns_cno) | ||
620 | goto out_unlock; | ||
621 | |||
622 | list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { | ||
623 | if (sbi->s_snapshot_cno == cno && | ||
624 | (!snapshot_mount || nilfs_test_opt(sbi, SNAPSHOT))) { | ||
625 | /* exclude read-only mounts */ | ||
626 | ret++; | ||
627 | break; | ||
628 | } | ||
629 | } | ||
630 | /* for protecting recent checkpoints */ | ||
631 | if (cno >= nilfs_last_cno(nilfs)) | ||
632 | ret++; | ||
633 | |||
634 | out_unlock: | ||
635 | up_read(&nilfs->ns_sem); | ||
636 | return ret; | ||
637 | } | ||
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h new file mode 100644 index 000000000000..30fe58778d05 --- /dev/null +++ b/fs/nilfs2/the_nilfs.h | |||
@@ -0,0 +1,298 @@ | |||
1 | /* | ||
2 | * the_nilfs.h - the_nilfs shared structure. | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net> | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #ifndef _THE_NILFS_H | ||
25 | #define _THE_NILFS_H | ||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/fs.h> | ||
30 | #include <linux/blkdev.h> | ||
31 | #include <linux/backing-dev.h> | ||
32 | #include "sb.h" | ||
33 | |||
34 | /* the_nilfs struct */ | ||
35 | enum { | ||
36 | THE_NILFS_INIT = 0, /* Information from super_block is set */ | ||
37 | THE_NILFS_LOADED, /* Roll-back/roll-forward has done and | ||
38 | the latest checkpoint was loaded */ | ||
39 | THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ | ||
40 | }; | ||
41 | |||
42 | /** | ||
43 | * struct the_nilfs - struct to supervise multiple nilfs mount points | ||
44 | * @ns_flags: flags | ||
45 | * @ns_count: reference count | ||
46 | * @ns_bdev: block device | ||
47 | * @ns_bdi: backing dev info | ||
48 | * @ns_writer: back pointer to writable nilfs_sb_info | ||
49 | * @ns_sem: semaphore for shared states | ||
50 | * @ns_writer_mutex: mutex protecting ns_writer attach/detach | ||
51 | * @ns_writer_refcount: number of referrers on ns_writer | ||
52 | * @ns_sbh: buffer heads of on-disk super blocks | ||
53 | * @ns_sbp: pointers to super block data | ||
54 | * @ns_sbwtime: previous write time of super blocks | ||
55 | * @ns_sbsize: size of valid data in super block | ||
56 | * @ns_supers: list of nilfs super block structs | ||
57 | * @ns_seg_seq: segment sequence counter | ||
58 | * @ns_segnum: index number of the latest full segment. | ||
59 | * @ns_nextnum: index number of the full segment index to be used next | ||
60 | * @ns_pseg_offset: offset of next partial segment in the current full segment | ||
61 | * @ns_cno: next checkpoint number | ||
62 | * @ns_ctime: write time of the last segment | ||
63 | * @ns_nongc_ctime: write time of the last segment not for cleaner operation | ||
64 | * @ns_ndirtyblks: Number of dirty data blocks | ||
65 | * @ns_last_segment_lock: lock protecting fields for the latest segment | ||
66 | * @ns_last_pseg: start block number of the latest segment | ||
67 | * @ns_last_seq: sequence value of the latest segment | ||
68 | * @ns_last_cno: checkpoint number of the latest segment | ||
69 | * @ns_prot_seq: least sequence number of segments which must not be reclaimed | ||
70 | * @ns_free_segments_count: counter of free segments | ||
71 | * @ns_segctor_sem: segment constructor semaphore | ||
72 | * @ns_dat: DAT file inode | ||
73 | * @ns_cpfile: checkpoint file inode | ||
74 | * @ns_sufile: segusage file inode | ||
75 | * @ns_gc_dat: shadow inode of the DAT file inode for GC | ||
76 | * @ns_gc_inodes: dummy inodes to keep live blocks | ||
77 | * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks | ||
78 | * @ns_blocksize_bits: bit length of block size | ||
79 | * @ns_nsegments: number of segments in filesystem | ||
80 | * @ns_blocks_per_segment: number of blocks per segment | ||
81 | * @ns_r_segments_percentage: reserved segments percentage | ||
82 | * @ns_nrsvsegs: number of reserved segments | ||
83 | * @ns_first_data_block: block number of first data block | ||
84 | * @ns_inode_size: size of on-disk inode | ||
85 | * @ns_first_ino: first not-special inode number | ||
86 | * @ns_crc_seed: seed value of CRC32 calculation | ||
87 | */ | ||
88 | struct the_nilfs { | ||
89 | unsigned long ns_flags; | ||
90 | atomic_t ns_count; | ||
91 | |||
92 | struct block_device *ns_bdev; | ||
93 | struct backing_dev_info *ns_bdi; | ||
94 | struct nilfs_sb_info *ns_writer; | ||
95 | struct rw_semaphore ns_sem; | ||
96 | struct mutex ns_writer_mutex; | ||
97 | atomic_t ns_writer_refcount; | ||
98 | |||
99 | /* | ||
100 | * used for | ||
101 | * - loading the latest checkpoint exclusively. | ||
102 | * - allocating a new full segment. | ||
103 | * - protecting s_dirt in the super_block struct | ||
104 | * (see nilfs_write_super) and the following fields. | ||
105 | */ | ||
106 | struct buffer_head *ns_sbh[2]; | ||
107 | struct nilfs_super_block *ns_sbp[2]; | ||
108 | time_t ns_sbwtime[2]; | ||
109 | unsigned ns_sbsize; | ||
110 | unsigned ns_mount_state; | ||
111 | struct list_head ns_supers; | ||
112 | |||
113 | /* | ||
114 | * Following fields are dedicated to a writable FS-instance. | ||
115 | * Except for the period seeking checkpoint, code outside the segment | ||
116 | * constructor must lock a segment semaphore while accessing these | ||
117 | * fields. | ||
118 | * The writable FS-instance is sole during a lifetime of the_nilfs. | ||
119 | */ | ||
120 | u64 ns_seg_seq; | ||
121 | __u64 ns_segnum; | ||
122 | __u64 ns_nextnum; | ||
123 | unsigned long ns_pseg_offset; | ||
124 | __u64 ns_cno; | ||
125 | time_t ns_ctime; | ||
126 | time_t ns_nongc_ctime; | ||
127 | atomic_t ns_ndirtyblks; | ||
128 | |||
129 | /* | ||
130 | * The following fields hold information on the latest partial segment | ||
131 | * written to disk with a super root. These fields are protected by | ||
132 | * ns_last_segment_lock. | ||
133 | */ | ||
134 | spinlock_t ns_last_segment_lock; | ||
135 | sector_t ns_last_pseg; | ||
136 | u64 ns_last_seq; | ||
137 | __u64 ns_last_cno; | ||
138 | u64 ns_prot_seq; | ||
139 | unsigned long ns_free_segments_count; | ||
140 | |||
141 | struct rw_semaphore ns_segctor_sem; | ||
142 | |||
143 | /* | ||
144 | * Following fields are lock free except for the period before | ||
145 | * the_nilfs is initialized. | ||
146 | */ | ||
147 | struct inode *ns_dat; | ||
148 | struct inode *ns_cpfile; | ||
149 | struct inode *ns_sufile; | ||
150 | struct inode *ns_gc_dat; | ||
151 | |||
152 | /* GC inode list and hash table head */ | ||
153 | struct list_head ns_gc_inodes; | ||
154 | struct hlist_head *ns_gc_inodes_h; | ||
155 | |||
156 | /* Disk layout information (static) */ | ||
157 | unsigned int ns_blocksize_bits; | ||
158 | unsigned long ns_nsegments; | ||
159 | unsigned long ns_blocks_per_segment; | ||
160 | unsigned long ns_r_segments_percentage; | ||
161 | unsigned long ns_nrsvsegs; | ||
162 | unsigned long ns_first_data_block; | ||
163 | int ns_inode_size; | ||
164 | int ns_first_ino; | ||
165 | u32 ns_crc_seed; | ||
166 | }; | ||
167 | |||
168 | #define NILFS_GCINODE_HASH_BITS 8 | ||
169 | #define NILFS_GCINODE_HASH_SIZE (1<<NILFS_GCINODE_HASH_BITS) | ||
170 | |||
171 | #define THE_NILFS_FNS(bit, name) \ | ||
172 | static inline void set_nilfs_##name(struct the_nilfs *nilfs) \ | ||
173 | { \ | ||
174 | set_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \ | ||
175 | } \ | ||
176 | static inline void clear_nilfs_##name(struct the_nilfs *nilfs) \ | ||
177 | { \ | ||
178 | clear_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \ | ||
179 | } \ | ||
180 | static inline int nilfs_##name(struct the_nilfs *nilfs) \ | ||
181 | { \ | ||
182 | return test_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \ | ||
183 | } | ||
184 | |||
185 | THE_NILFS_FNS(INIT, init) | ||
186 | THE_NILFS_FNS(LOADED, loaded) | ||
187 | THE_NILFS_FNS(DISCONTINUED, discontinued) | ||
188 | |||
189 | /* Minimum interval of periodical update of superblocks (in seconds) */ | ||
190 | #define NILFS_SB_FREQ 10 | ||
191 | #define NILFS_ALTSB_FREQ 60 /* spare superblock */ | ||
192 | |||
193 | void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); | ||
194 | struct the_nilfs *alloc_nilfs(struct block_device *); | ||
195 | void put_nilfs(struct the_nilfs *); | ||
196 | int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); | ||
197 | int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); | ||
198 | int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); | ||
199 | int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); | ||
200 | int nilfs_near_disk_full(struct the_nilfs *); | ||
201 | void nilfs_fall_back_super_block(struct the_nilfs *); | ||
202 | void nilfs_swap_super_block(struct the_nilfs *); | ||
203 | |||
204 | |||
205 | static inline void get_nilfs(struct the_nilfs *nilfs) | ||
206 | { | ||
207 | /* Caller must have at least one reference of the_nilfs. */ | ||
208 | atomic_inc(&nilfs->ns_count); | ||
209 | } | ||
210 | |||
211 | static inline struct nilfs_sb_info *nilfs_get_writer(struct the_nilfs *nilfs) | ||
212 | { | ||
213 | if (atomic_inc_and_test(&nilfs->ns_writer_refcount)) | ||
214 | mutex_lock(&nilfs->ns_writer_mutex); | ||
215 | return nilfs->ns_writer; | ||
216 | } | ||
217 | |||
218 | static inline void nilfs_put_writer(struct the_nilfs *nilfs) | ||
219 | { | ||
220 | if (atomic_add_negative(-1, &nilfs->ns_writer_refcount)) | ||
221 | mutex_unlock(&nilfs->ns_writer_mutex); | ||
222 | } | ||
223 | |||
224 | static inline void | ||
225 | nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | ||
226 | { | ||
227 | mutex_lock(&nilfs->ns_writer_mutex); | ||
228 | nilfs->ns_writer = sbi; | ||
229 | mutex_unlock(&nilfs->ns_writer_mutex); | ||
230 | } | ||
231 | |||
232 | static inline void | ||
233 | nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | ||
234 | { | ||
235 | mutex_lock(&nilfs->ns_writer_mutex); | ||
236 | if (sbi == nilfs->ns_writer) | ||
237 | nilfs->ns_writer = NULL; | ||
238 | mutex_unlock(&nilfs->ns_writer_mutex); | ||
239 | } | ||
240 | |||
241 | static inline void | ||
242 | nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum, | ||
243 | sector_t *seg_start, sector_t *seg_end) | ||
244 | { | ||
245 | *seg_start = (sector_t)nilfs->ns_blocks_per_segment * segnum; | ||
246 | *seg_end = *seg_start + nilfs->ns_blocks_per_segment - 1; | ||
247 | if (segnum == 0) | ||
248 | *seg_start = nilfs->ns_first_data_block; | ||
249 | } | ||
250 | |||
251 | static inline sector_t | ||
252 | nilfs_get_segment_start_blocknr(struct the_nilfs *nilfs, __u64 segnum) | ||
253 | { | ||
254 | return (segnum == 0) ? nilfs->ns_first_data_block : | ||
255 | (sector_t)nilfs->ns_blocks_per_segment * segnum; | ||
256 | } | ||
257 | |||
258 | static inline __u64 | ||
259 | nilfs_get_segnum_of_block(struct the_nilfs *nilfs, sector_t blocknr) | ||
260 | { | ||
261 | sector_t segnum = blocknr; | ||
262 | |||
263 | sector_div(segnum, nilfs->ns_blocks_per_segment); | ||
264 | return segnum; | ||
265 | } | ||
266 | |||
267 | static inline void | ||
268 | nilfs_terminate_segment(struct the_nilfs *nilfs, sector_t seg_start, | ||
269 | sector_t seg_end) | ||
270 | { | ||
271 | /* terminate the current full segment (used in case of I/O-error) */ | ||
272 | nilfs->ns_pseg_offset = seg_end - seg_start + 1; | ||
273 | } | ||
274 | |||
275 | static inline void nilfs_shift_to_next_segment(struct the_nilfs *nilfs) | ||
276 | { | ||
277 | /* move forward with a full segment */ | ||
278 | nilfs->ns_segnum = nilfs->ns_nextnum; | ||
279 | nilfs->ns_pseg_offset = 0; | ||
280 | nilfs->ns_seg_seq++; | ||
281 | } | ||
282 | |||
283 | static inline __u64 nilfs_last_cno(struct the_nilfs *nilfs) | ||
284 | { | ||
285 | __u64 cno; | ||
286 | |||
287 | spin_lock(&nilfs->ns_last_segment_lock); | ||
288 | cno = nilfs->ns_last_cno; | ||
289 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
290 | return cno; | ||
291 | } | ||
292 | |||
293 | static inline int nilfs_segment_is_active(struct the_nilfs *nilfs, __u64 n) | ||
294 | { | ||
295 | return n == nilfs->ns_segnum || n == nilfs->ns_nextnum; | ||
296 | } | ||
297 | |||
298 | #endif /* _THE_NILFS_H */ | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index a5887df2cd8a..8672b9536039 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1926,7 +1926,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |||
1926 | out->f_path.dentry->d_name.len, | 1926 | out->f_path.dentry->d_name.len, |
1927 | out->f_path.dentry->d_name.name); | 1927 | out->f_path.dentry->d_name.name); |
1928 | 1928 | ||
1929 | inode_double_lock(inode, pipe->inode); | 1929 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); |
1930 | 1930 | ||
1931 | ret = ocfs2_rw_lock(inode, 1); | 1931 | ret = ocfs2_rw_lock(inode, 1); |
1932 | if (ret < 0) { | 1932 | if (ret < 0) { |
@@ -1941,12 +1941,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |||
1941 | goto out_unlock; | 1941 | goto out_unlock; |
1942 | } | 1942 | } |
1943 | 1943 | ||
1944 | if (pipe->inode) | ||
1945 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | ||
1944 | ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); | 1946 | ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); |
1947 | if (pipe->inode) | ||
1948 | mutex_unlock(&pipe->inode->i_mutex); | ||
1945 | 1949 | ||
1946 | out_unlock: | 1950 | out_unlock: |
1947 | ocfs2_rw_unlock(inode, 1); | 1951 | ocfs2_rw_unlock(inode, 1); |
1948 | out: | 1952 | out: |
1949 | inode_double_unlock(inode, pipe->inode); | 1953 | mutex_unlock(&inode->i_mutex); |
1950 | 1954 | ||
1951 | mlog_exit(ret); | 1955 | mlog_exit(ret); |
1952 | return ret; | 1956 | return ret; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index b0ae0be4801f..39e4ad4f59f4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -204,6 +204,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
204 | struct file *file = vma->vm_file; | 204 | struct file *file = vma->vm_file; |
205 | int flags = vma->vm_flags; | 205 | int flags = vma->vm_flags; |
206 | unsigned long ino = 0; | 206 | unsigned long ino = 0; |
207 | unsigned long long pgoff = 0; | ||
207 | dev_t dev = 0; | 208 | dev_t dev = 0; |
208 | int len; | 209 | int len; |
209 | 210 | ||
@@ -211,6 +212,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
211 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 212 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
212 | dev = inode->i_sb->s_dev; | 213 | dev = inode->i_sb->s_dev; |
213 | ino = inode->i_ino; | 214 | ino = inode->i_ino; |
215 | pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT; | ||
214 | } | 216 | } |
215 | 217 | ||
216 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", | 218 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", |
@@ -220,7 +222,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
220 | flags & VM_WRITE ? 'w' : '-', | 222 | flags & VM_WRITE ? 'w' : '-', |
221 | flags & VM_EXEC ? 'x' : '-', | 223 | flags & VM_EXEC ? 'x' : '-', |
222 | flags & VM_MAYSHARE ? 's' : 'p', | 224 | flags & VM_MAYSHARE ? 's' : 'p', |
223 | ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, | 225 | pgoff, |
224 | MAJOR(dev), MINOR(dev), ino, &len); | 226 | MAJOR(dev), MINOR(dev), ino, &len); |
225 | 227 | ||
226 | /* | 228 | /* |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 863464d5519c..64a72e2e7650 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -126,6 +126,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |||
126 | struct file *file; | 126 | struct file *file; |
127 | dev_t dev = 0; | 127 | dev_t dev = 0; |
128 | int flags, len; | 128 | int flags, len; |
129 | unsigned long long pgoff = 0; | ||
129 | 130 | ||
130 | flags = vma->vm_flags; | 131 | flags = vma->vm_flags; |
131 | file = vma->vm_file; | 132 | file = vma->vm_file; |
@@ -134,6 +135,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |||
134 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 135 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
135 | dev = inode->i_sb->s_dev; | 136 | dev = inode->i_sb->s_dev; |
136 | ino = inode->i_ino; | 137 | ino = inode->i_ino; |
138 | pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; | ||
137 | } | 139 | } |
138 | 140 | ||
139 | seq_printf(m, | 141 | seq_printf(m, |
@@ -144,7 +146,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |||
144 | flags & VM_WRITE ? 'w' : '-', | 146 | flags & VM_WRITE ? 'w' : '-', |
145 | flags & VM_EXEC ? 'x' : '-', | 147 | flags & VM_EXEC ? 'x' : '-', |
146 | flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', | 148 | flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', |
147 | (unsigned long long) vma->vm_pgoff << PAGE_SHIFT, | 149 | pgoff, |
148 | MAJOR(dev), MINOR(dev), ino, &len); | 150 | MAJOR(dev), MINOR(dev), ino, &len); |
149 | 151 | ||
150 | if (file) { | 152 | if (file) { |
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index a404fb88e456..3a6b193d8444 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
@@ -221,22 +221,23 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent) | |||
221 | save_mount_options(sb, data); | 221 | save_mount_options(sb, data); |
222 | 222 | ||
223 | fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL); | 223 | fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL); |
224 | sb->s_fs_info = fsi; | ||
224 | if (!fsi) { | 225 | if (!fsi) { |
225 | err = -ENOMEM; | 226 | err = -ENOMEM; |
226 | goto fail; | 227 | goto fail; |
227 | } | 228 | } |
228 | sb->s_fs_info = fsi; | ||
229 | 229 | ||
230 | err = ramfs_parse_options(data, &fsi->mount_opts); | 230 | err = ramfs_parse_options(data, &fsi->mount_opts); |
231 | if (err) | 231 | if (err) |
232 | goto fail; | 232 | goto fail; |
233 | 233 | ||
234 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 234 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
235 | sb->s_blocksize = PAGE_CACHE_SIZE; | 235 | sb->s_blocksize = PAGE_CACHE_SIZE; |
236 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | 236 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; |
237 | sb->s_magic = RAMFS_MAGIC; | 237 | sb->s_magic = RAMFS_MAGIC; |
238 | sb->s_op = &ramfs_ops; | 238 | sb->s_op = &ramfs_ops; |
239 | sb->s_time_gran = 1; | 239 | sb->s_time_gran = 1; |
240 | |||
240 | inode = ramfs_get_inode(sb, S_IFDIR | fsi->mount_opts.mode, 0); | 241 | inode = ramfs_get_inode(sb, S_IFDIR | fsi->mount_opts.mode, 0); |
241 | if (!inode) { | 242 | if (!inode) { |
242 | err = -ENOMEM; | 243 | err = -ENOMEM; |
@@ -244,14 +245,16 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent) | |||
244 | } | 245 | } |
245 | 246 | ||
246 | root = d_alloc_root(inode); | 247 | root = d_alloc_root(inode); |
248 | sb->s_root = root; | ||
247 | if (!root) { | 249 | if (!root) { |
248 | err = -ENOMEM; | 250 | err = -ENOMEM; |
249 | goto fail; | 251 | goto fail; |
250 | } | 252 | } |
251 | sb->s_root = root; | 253 | |
252 | return 0; | 254 | return 0; |
253 | fail: | 255 | fail: |
254 | kfree(fsi); | 256 | kfree(fsi); |
257 | sb->s_fs_info = NULL; | ||
255 | iput(inode); | 258 | iput(inode); |
256 | return err; | 259 | return err; |
257 | } | 260 | } |
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 1e548a4975ba..10ca7d984a8b 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -408,12 +408,17 @@ static void romfs_destroy_inode(struct inode *inode) | |||
408 | */ | 408 | */ |
409 | static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 409 | static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
410 | { | 410 | { |
411 | struct super_block *sb = dentry->d_sb; | ||
412 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
413 | |||
411 | buf->f_type = ROMFS_MAGIC; | 414 | buf->f_type = ROMFS_MAGIC; |
412 | buf->f_namelen = ROMFS_MAXFN; | 415 | buf->f_namelen = ROMFS_MAXFN; |
413 | buf->f_bsize = ROMBSIZE; | 416 | buf->f_bsize = ROMBSIZE; |
414 | buf->f_bfree = buf->f_bavail = buf->f_ffree; | 417 | buf->f_bfree = buf->f_bavail = buf->f_ffree; |
415 | buf->f_blocks = | 418 | buf->f_blocks = |
416 | (romfs_maxsize(dentry->d_sb) + ROMBSIZE - 1) >> ROMBSBITS; | 419 | (romfs_maxsize(dentry->d_sb) + ROMBSIZE - 1) >> ROMBSBITS; |
420 | buf->f_fsid.val[0] = (u32)id; | ||
421 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
417 | return 0; | 422 | return 0; |
418 | } | 423 | } |
419 | 424 | ||
diff --git a/fs/splice.c b/fs/splice.c index dd727d43e5b7..c18aa7e03e2b 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -737,10 +737,19 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
737 | * ->write_end. Most of the time, these expect i_mutex to | 737 | * ->write_end. Most of the time, these expect i_mutex to |
738 | * be held. Since this may result in an ABBA deadlock with | 738 | * be held. Since this may result in an ABBA deadlock with |
739 | * pipe->inode, we have to order lock acquiry here. | 739 | * pipe->inode, we have to order lock acquiry here. |
740 | * | ||
741 | * Outer lock must be inode->i_mutex, as pipe_wait() will | ||
742 | * release and reacquire pipe->inode->i_mutex, AND inode must | ||
743 | * never be a pipe. | ||
740 | */ | 744 | */ |
741 | inode_double_lock(inode, pipe->inode); | 745 | WARN_ON(S_ISFIFO(inode->i_mode)); |
746 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | ||
747 | if (pipe->inode) | ||
748 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | ||
742 | ret = __splice_from_pipe(pipe, &sd, actor); | 749 | ret = __splice_from_pipe(pipe, &sd, actor); |
743 | inode_double_unlock(inode, pipe->inode); | 750 | if (pipe->inode) |
751 | mutex_unlock(&pipe->inode->i_mutex); | ||
752 | mutex_unlock(&inode->i_mutex); | ||
744 | 753 | ||
745 | return ret; | 754 | return ret; |
746 | } | 755 | } |
@@ -831,11 +840,17 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | |||
831 | }; | 840 | }; |
832 | ssize_t ret; | 841 | ssize_t ret; |
833 | 842 | ||
834 | inode_double_lock(inode, pipe->inode); | 843 | WARN_ON(S_ISFIFO(inode->i_mode)); |
844 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | ||
835 | ret = file_remove_suid(out); | 845 | ret = file_remove_suid(out); |
836 | if (likely(!ret)) | 846 | if (likely(!ret)) { |
847 | if (pipe->inode) | ||
848 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | ||
837 | ret = __splice_from_pipe(pipe, &sd, pipe_to_file); | 849 | ret = __splice_from_pipe(pipe, &sd, pipe_to_file); |
838 | inode_double_unlock(inode, pipe->inode); | 850 | if (pipe->inode) |
851 | mutex_unlock(&pipe->inode->i_mutex); | ||
852 | } | ||
853 | mutex_unlock(&inode->i_mutex); | ||
839 | if (ret > 0) { | 854 | if (ret > 0) { |
840 | unsigned long nr_pages; | 855 | unsigned long nr_pages; |
841 | 856 | ||
diff --git a/fs/super.c b/fs/super.c index 77cb4ec919b9..786fe7d72790 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -771,6 +771,46 @@ void kill_litter_super(struct super_block *sb) | |||
771 | 771 | ||
772 | EXPORT_SYMBOL(kill_litter_super); | 772 | EXPORT_SYMBOL(kill_litter_super); |
773 | 773 | ||
774 | static int ns_test_super(struct super_block *sb, void *data) | ||
775 | { | ||
776 | return sb->s_fs_info == data; | ||
777 | } | ||
778 | |||
779 | static int ns_set_super(struct super_block *sb, void *data) | ||
780 | { | ||
781 | sb->s_fs_info = data; | ||
782 | return set_anon_super(sb, NULL); | ||
783 | } | ||
784 | |||
785 | int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, | ||
786 | int (*fill_super)(struct super_block *, void *, int), | ||
787 | struct vfsmount *mnt) | ||
788 | { | ||
789 | struct super_block *sb; | ||
790 | |||
791 | sb = sget(fs_type, ns_test_super, ns_set_super, data); | ||
792 | if (IS_ERR(sb)) | ||
793 | return PTR_ERR(sb); | ||
794 | |||
795 | if (!sb->s_root) { | ||
796 | int err; | ||
797 | sb->s_flags = flags; | ||
798 | err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); | ||
799 | if (err) { | ||
800 | up_write(&sb->s_umount); | ||
801 | deactivate_super(sb); | ||
802 | return err; | ||
803 | } | ||
804 | |||
805 | sb->s_flags |= MS_ACTIVE; | ||
806 | } | ||
807 | |||
808 | simple_set_mnt(mnt, sb); | ||
809 | return 0; | ||
810 | } | ||
811 | |||
812 | EXPORT_SYMBOL(get_sb_ns); | ||
813 | |||
774 | #ifdef CONFIG_BLOCK | 814 | #ifdef CONFIG_BLOCK |
775 | static int set_bdev_super(struct super_block *s, void *data) | 815 | static int set_bdev_super(struct super_block *s, void *data) |
776 | { | 816 | { |