aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig27
-rw-r--r--fs/Makefile1
-rw-r--r--fs/afs/netdevices.c3
-rw-r--r--fs/befs/super.c1
-rw-r--r--fs/buffer.c13
-rw-r--r--fs/ext3/Kconfig19
-rw-r--r--fs/ext3/inode.c23
-rw-r--r--fs/ext3/super.c8
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nilfs2/Makefile5
-rw-r--r--fs/nilfs2/alloc.c504
-rw-r--r--fs/nilfs2/alloc.h72
-rw-r--r--fs/nilfs2/bmap.c783
-rw-r--r--fs/nilfs2/bmap.h244
-rw-r--r--fs/nilfs2/bmap_union.h42
-rw-r--r--fs/nilfs2/btnode.c316
-rw-r--r--fs/nilfs2/btnode.h58
-rw-r--r--fs/nilfs2/btree.c2269
-rw-r--r--fs/nilfs2/btree.h117
-rw-r--r--fs/nilfs2/cpfile.c925
-rw-r--r--fs/nilfs2/cpfile.h45
-rw-r--r--fs/nilfs2/dat.c430
-rw-r--r--fs/nilfs2/dat.h52
-rw-r--r--fs/nilfs2/dir.c711
-rw-r--r--fs/nilfs2/direct.c436
-rw-r--r--fs/nilfs2/direct.h78
-rw-r--r--fs/nilfs2/file.c160
-rw-r--r--fs/nilfs2/gcdat.c84
-rw-r--r--fs/nilfs2/gcinode.c288
-rw-r--r--fs/nilfs2/ifile.c150
-rw-r--r--fs/nilfs2/ifile.h53
-rw-r--r--fs/nilfs2/inode.c785
-rw-r--r--fs/nilfs2/ioctl.c654
-rw-r--r--fs/nilfs2/mdt.c563
-rw-r--r--fs/nilfs2/mdt.h125
-rw-r--r--fs/nilfs2/namei.c474
-rw-r--r--fs/nilfs2/nilfs.h318
-rw-r--r--fs/nilfs2/page.c540
-rw-r--r--fs/nilfs2/page.h76
-rw-r--r--fs/nilfs2/recovery.c929
-rw-r--r--fs/nilfs2/sb.h102
-rw-r--r--fs/nilfs2/segbuf.c439
-rw-r--r--fs/nilfs2/segbuf.h201
-rw-r--r--fs/nilfs2/seglist.h85
-rw-r--r--fs/nilfs2/segment.c2977
-rw-r--r--fs/nilfs2/segment.h243
-rw-r--r--fs/nilfs2/sufile.c640
-rw-r--r--fs/nilfs2/sufile.h54
-rw-r--r--fs/nilfs2/super.c1323
-rw-r--r--fs/nilfs2/the_nilfs.c637
-rw-r--r--fs/nilfs2/the_nilfs.h298
-rw-r--r--fs/ocfs2/file.c8
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/proc/task_nommu.c4
-rw-r--r--fs/ramfs/inode.c19
-rw-r--r--fs/romfs/super.c5
-rw-r--r--fs/splice.c25
-rw-r--r--fs/super.c40
59 files changed, 19459 insertions, 30 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 86b203fc3c56..9f7270f36b2a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -175,9 +175,34 @@ source "fs/qnx4/Kconfig"
175source "fs/romfs/Kconfig" 175source "fs/romfs/Kconfig"
176source "fs/sysv/Kconfig" 176source "fs/sysv/Kconfig"
177source "fs/ufs/Kconfig" 177source "fs/ufs/Kconfig"
178
179source "fs/exofs/Kconfig" 178source "fs/exofs/Kconfig"
180 179
180config NILFS2_FS
181 tristate "NILFS2 file system support (EXPERIMENTAL)"
182 depends on BLOCK && EXPERIMENTAL
183 select CRC32
184 help
185 NILFS2 is a log-structured file system (LFS) supporting continuous
186 snapshotting. In addition to versioning capability of the entire
187 file system, users can even restore files mistakenly overwritten or
188 destroyed just a few seconds ago. Since this file system can keep
189 consistency like conventional LFS, it achieves quick recovery after
190 system crashes.
191
192 NILFS2 creates a number of checkpoints every few seconds or per
193 synchronous write basis (unless there is no change). Users can
194 select significant versions among continuously created checkpoints,
195 and can change them into snapshots which will be preserved for long
196 periods until they are changed back to checkpoints. Each
197 snapshot is mountable as a read-only file system concurrently with
198 its writable mount, and this feature is convenient for online backup.
199
200 Some features including atime, extended attributes, and POSIX ACLs,
201 are not supported yet.
202
203 To compile this file system support as a module, choose M here: the
204 module will be called nilfs2. If unsure, say N.
205
181endif # MISC_FILESYSTEMS 206endif # MISC_FILESYSTEMS
182 207
183menuconfig NETWORK_FILESYSTEMS 208menuconfig NETWORK_FILESYSTEMS
diff --git a/fs/Makefile b/fs/Makefile
index 70b2aed87133..af6d04700d9c 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -114,6 +114,7 @@ obj-$(CONFIG_JFS_FS) += jfs/
114obj-$(CONFIG_XFS_FS) += xfs/ 114obj-$(CONFIG_XFS_FS) += xfs/
115obj-$(CONFIG_9P_FS) += 9p/ 115obj-$(CONFIG_9P_FS) += 9p/
116obj-$(CONFIG_AFS_FS) += afs/ 116obj-$(CONFIG_AFS_FS) += afs/
117obj-$(CONFIG_NILFS2_FS) += nilfs2/
117obj-$(CONFIG_BEFS_FS) += befs/ 118obj-$(CONFIG_BEFS_FS) += befs/
118obj-$(CONFIG_HOSTFS) += hostfs/ 119obj-$(CONFIG_HOSTFS) += hostfs/
119obj-$(CONFIG_HPPFS) += hppfs/ 120obj-$(CONFIG_HPPFS) += hppfs/
diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c
index 49f189423063..7ad36506c256 100644
--- a/fs/afs/netdevices.c
+++ b/fs/afs/netdevices.c
@@ -20,8 +20,7 @@ int afs_get_MAC_address(u8 *mac, size_t maclen)
20 struct net_device *dev; 20 struct net_device *dev;
21 int ret = -ENODEV; 21 int ret = -ENODEV;
22 22
23 if (maclen != ETH_ALEN) 23 BUG_ON(maclen != ETH_ALEN);
24 BUG();
25 24
26 rtnl_lock(); 25 rtnl_lock();
27 dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER); 26 dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER);
diff --git a/fs/befs/super.c b/fs/befs/super.c
index 41f2b4d0093e..ca40f828f64d 100644
--- a/fs/befs/super.c
+++ b/fs/befs/super.c
@@ -8,6 +8,7 @@
8 */ 8 */
9 9
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <asm/page.h> /* for PAGE_SIZE */
11 12
12#include "befs.h" 13#include "befs.h"
13#include "super.h" 14#include "super.h"
diff --git a/fs/buffer.c b/fs/buffer.c
index 6e35762b6169..13edf7ad3ff1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1596,6 +1596,16 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
1596 * locked buffer. This only can happen if someone has written the buffer 1596 * locked buffer. This only can happen if someone has written the buffer
1597 * directly, with submit_bh(). At the address_space level PageWriteback 1597 * directly, with submit_bh(). At the address_space level PageWriteback
1598 * prevents this contention from occurring. 1598 * prevents this contention from occurring.
1599 *
1600 * If block_write_full_page() is called with wbc->sync_mode ==
1601 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this
1602 * causes the writes to be flagged as synchronous writes, but the
1603 * block device queue will NOT be unplugged, since usually many pages
1604 * will be pushed to the out before the higher-level caller actually
1605 * waits for the writes to be completed. The various wait functions,
1606 * such as wait_on_writeback_range() will ultimately call sync_page()
1607 * which will ultimately call blk_run_backing_dev(), which will end up
1608 * unplugging the device queue.
1599 */ 1609 */
1600static int __block_write_full_page(struct inode *inode, struct page *page, 1610static int __block_write_full_page(struct inode *inode, struct page *page,
1601 get_block_t *get_block, struct writeback_control *wbc) 1611 get_block_t *get_block, struct writeback_control *wbc)
@@ -1606,7 +1616,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1606 struct buffer_head *bh, *head; 1616 struct buffer_head *bh, *head;
1607 const unsigned blocksize = 1 << inode->i_blkbits; 1617 const unsigned blocksize = 1 << inode->i_blkbits;
1608 int nr_underway = 0; 1618 int nr_underway = 0;
1609 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); 1619 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1620 WRITE_SYNC_PLUG : WRITE);
1610 1621
1611 BUG_ON(!PageLocked(page)); 1622 BUG_ON(!PageLocked(page));
1612 1623
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig
index 8e0cfe44b0fc..fb3c1a21b135 100644
--- a/fs/ext3/Kconfig
+++ b/fs/ext3/Kconfig
@@ -28,6 +28,25 @@ config EXT3_FS
28 To compile this file system support as a module, choose M here: the 28 To compile this file system support as a module, choose M here: the
29 module will be called ext3. 29 module will be called ext3.
30 30
31config EXT3_DEFAULTS_TO_ORDERED
32 bool "Default to 'data=ordered' in ext3 (legacy option)"
33 depends on EXT3_FS
34 help
35 If a filesystem does not explicitly specify a data ordering
36 mode, and the journal capability allowed it, ext3 used to
37 historically default to 'data=ordered'.
38
39 That was a rather unfortunate choice, because it leads to all
40 kinds of latency problems, and the 'data=writeback' mode is more
41 appropriate these days.
42
43 You should probably always answer 'n' here, and if you really
44 want to use 'data=ordered' mode, set it in the filesystem itself
45 with 'tune2fs -o journal_data_ordered'.
46
47 But if you really want to enable the legacy default, you can do
48 so by answering 'y' to this question.
49
31config EXT3_FS_XATTR 50config EXT3_FS_XATTR
32 bool "Ext3 extended attributes" 51 bool "Ext3 extended attributes"
33 depends on EXT3_FS 52 depends on EXT3_FS
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 466a332e0bd1..fcfa24361856 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1521,12 +1521,16 @@ static int ext3_ordered_writepage(struct page *page,
1521 if (!page_has_buffers(page)) { 1521 if (!page_has_buffers(page)) {
1522 create_empty_buffers(page, inode->i_sb->s_blocksize, 1522 create_empty_buffers(page, inode->i_sb->s_blocksize,
1523 (1 << BH_Dirty)|(1 << BH_Uptodate)); 1523 (1 << BH_Dirty)|(1 << BH_Uptodate));
1524 } else if (!walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { 1524 page_bufs = page_buffers(page);
1525 /* Provide NULL instead of get_block so that we catch bugs if buffers weren't really mapped */ 1525 } else {
1526 return block_write_full_page(page, NULL, wbc); 1526 page_bufs = page_buffers(page);
1527 if (!walk_page_buffers(NULL, page_bufs, 0, PAGE_CACHE_SIZE,
1528 NULL, buffer_unmapped)) {
1529 /* Provide NULL get_block() to catch bugs if buffers
1530 * weren't really mapped */
1531 return block_write_full_page(page, NULL, wbc);
1532 }
1527 } 1533 }
1528 page_bufs = page_buffers(page);
1529
1530 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); 1534 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
1531 1535
1532 if (IS_ERR(handle)) { 1536 if (IS_ERR(handle)) {
@@ -1581,6 +1585,15 @@ static int ext3_writeback_writepage(struct page *page,
1581 if (ext3_journal_current_handle()) 1585 if (ext3_journal_current_handle())
1582 goto out_fail; 1586 goto out_fail;
1583 1587
1588 if (page_has_buffers(page)) {
1589 if (!walk_page_buffers(NULL, page_buffers(page), 0,
1590 PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
1591 /* Provide NULL get_block() to catch bugs if buffers
1592 * weren't really mapped */
1593 return block_write_full_page(page, NULL, wbc);
1594 }
1595 }
1596
1584 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); 1597 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
1585 if (IS_ERR(handle)) { 1598 if (IS_ERR(handle)) {
1586 ret = PTR_ERR(handle); 1599 ret = PTR_ERR(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 9e5b8e387e1e..599dbfe504c3 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -44,6 +44,12 @@
44#include "acl.h" 44#include "acl.h"
45#include "namei.h" 45#include "namei.h"
46 46
47#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
48 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
49#else
50 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA
51#endif
52
47static int ext3_load_journal(struct super_block *, struct ext3_super_block *, 53static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
48 unsigned long journal_devnum); 54 unsigned long journal_devnum);
49static int ext3_create_journal(struct super_block *, struct ext3_super_block *, 55static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
@@ -1919,7 +1925,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1919 cope, else JOURNAL_DATA */ 1925 cope, else JOURNAL_DATA */
1920 if (journal_check_available_features 1926 if (journal_check_available_features
1921 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) 1927 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
1922 set_opt(sbi->s_mount_opt, ORDERED_DATA); 1928 set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE);
1923 else 1929 else
1924 set_opt(sbi->s_mount_opt, JOURNAL_DATA); 1930 set_opt(sbi->s_mount_opt, JOURNAL_DATA);
1925 break; 1931 break;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3523b895eb4b..5a97bcfe03e5 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -516,8 +516,6 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
516 goto out_unlock; 516 goto out_unlock;
517 517
518 ret = nfs_updatepage(filp, page, 0, pagelen); 518 ret = nfs_updatepage(filp, page, 0, pagelen);
519 if (ret == 0)
520 ret = pagelen;
521out_unlock: 519out_unlock:
522 unlock_page(page); 520 unlock_page(page);
523 if (ret) 521 if (ret)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 82eaadbff408..6717200923fe 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1228,7 +1228,6 @@ static int nfs_parse_mount_options(char *raw,
1228 goto out_nomem; 1228 goto out_nomem;
1229 token = match_token(string, 1229 token = match_token(string,
1230 nfs_xprt_protocol_tokens, args); 1230 nfs_xprt_protocol_tokens, args);
1231 kfree(string);
1232 1231
1233 switch (token) { 1232 switch (token) {
1234 case Opt_xprt_udp: 1233 case Opt_xprt_udp:
@@ -1258,6 +1257,7 @@ static int nfs_parse_mount_options(char *raw,
1258 goto out_nomem; 1257 goto out_nomem;
1259 token = match_token(string, 1258 token = match_token(string,
1260 nfs_xprt_protocol_tokens, args); 1259 nfs_xprt_protocol_tokens, args);
1260 kfree(string);
1261 1261
1262 switch (token) { 1262 switch (token) {
1263 case Opt_xprt_udp: 1263 case Opt_xprt_udp:
diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile
new file mode 100644
index 000000000000..df3e62c1ddc5
--- /dev/null
+++ b/fs/nilfs2/Makefile
@@ -0,0 +1,5 @@
1obj-$(CONFIG_NILFS2_FS) += nilfs2.o
2nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \
3 btnode.o bmap.o btree.o direct.o dat.o recovery.o \
4 the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \
5 ifile.o alloc.o gcinode.o ioctl.o gcdat.o
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
new file mode 100644
index 000000000000..d69e6ae59251
--- /dev/null
+++ b/fs/nilfs2/alloc.c
@@ -0,0 +1,504 @@
1/*
2 * alloc.c - NILFS dat/inode allocator
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Original code was written by Koji Sato <koji@osrg.net>.
21 * Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>,
22 * Amagai Yoshiji <amagai@osrg.net>.
23 */
24
25#include <linux/types.h>
26#include <linux/buffer_head.h>
27#include <linux/fs.h>
28#include <linux/bitops.h>
29#include "mdt.h"
30#include "alloc.h"
31
32
33static inline unsigned long
34nilfs_palloc_groups_per_desc_block(const struct inode *inode)
35{
36 return (1UL << inode->i_blkbits) /
37 sizeof(struct nilfs_palloc_group_desc);
38}
39
40static inline unsigned long
41nilfs_palloc_groups_count(const struct inode *inode)
42{
43 return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */));
44}
45
46int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size)
47{
48 struct nilfs_mdt_info *mi = NILFS_MDT(inode);
49
50 mi->mi_bgl = kmalloc(sizeof(*mi->mi_bgl), GFP_NOFS);
51 if (!mi->mi_bgl)
52 return -ENOMEM;
53
54 bgl_lock_init(mi->mi_bgl);
55
56 nilfs_mdt_set_entry_size(inode, entry_size, 0);
57
58 mi->mi_blocks_per_group =
59 DIV_ROUND_UP(nilfs_palloc_entries_per_group(inode),
60 mi->mi_entries_per_block) + 1;
61 /* Number of blocks in a group including entry blocks and
62 a bitmap block */
63 mi->mi_blocks_per_desc_block =
64 nilfs_palloc_groups_per_desc_block(inode) *
65 mi->mi_blocks_per_group + 1;
66 /* Number of blocks per descriptor including the
67 descriptor block */
68 return 0;
69}
70
71static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr,
72 unsigned long *offset)
73{
74 __u64 group = nr;
75
76 *offset = do_div(group, nilfs_palloc_entries_per_group(inode));
77 return group;
78}
79
80static unsigned long
81nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group)
82{
83 unsigned long desc_block =
84 group / nilfs_palloc_groups_per_desc_block(inode);
85 return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block;
86}
87
88static unsigned long
89nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
90{
91 unsigned long desc_offset =
92 group % nilfs_palloc_groups_per_desc_block(inode);
93 return nilfs_palloc_desc_blkoff(inode, group) + 1 +
94 desc_offset * NILFS_MDT(inode)->mi_blocks_per_group;
95}
96
97static unsigned long
98nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group,
99 const struct nilfs_palloc_group_desc *desc)
100{
101 unsigned long nfree;
102
103 spin_lock(nilfs_mdt_bgl_lock(inode, group));
104 nfree = le32_to_cpu(desc->pg_nfrees);
105 spin_unlock(nilfs_mdt_bgl_lock(inode, group));
106 return nfree;
107}
108
109static void
110nilfs_palloc_group_desc_add_entries(struct inode *inode,
111 unsigned long group,
112 struct nilfs_palloc_group_desc *desc,
113 u32 n)
114{
115 spin_lock(nilfs_mdt_bgl_lock(inode, group));
116 le32_add_cpu(&desc->pg_nfrees, n);
117 spin_unlock(nilfs_mdt_bgl_lock(inode, group));
118}
119
120static unsigned long
121nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr)
122{
123 unsigned long group, group_offset;
124
125 group = nilfs_palloc_group(inode, nr, &group_offset);
126
127 return nilfs_palloc_bitmap_blkoff(inode, group) + 1 +
128 group_offset / NILFS_MDT(inode)->mi_entries_per_block;
129}
130
131static void nilfs_palloc_desc_block_init(struct inode *inode,
132 struct buffer_head *bh, void *kaddr)
133{
134 struct nilfs_palloc_group_desc *desc = kaddr + bh_offset(bh);
135 unsigned long n = nilfs_palloc_groups_per_desc_block(inode);
136 __le32 nfrees;
137
138 nfrees = cpu_to_le32(nilfs_palloc_entries_per_group(inode));
139 while (n-- > 0) {
140 desc->pg_nfrees = nfrees;
141 desc++;
142 }
143}
144
145static int nilfs_palloc_get_desc_block(struct inode *inode,
146 unsigned long group,
147 int create, struct buffer_head **bhp)
148{
149 return nilfs_mdt_get_block(inode,
150 nilfs_palloc_desc_blkoff(inode, group),
151 create, nilfs_palloc_desc_block_init, bhp);
152}
153
154static int nilfs_palloc_get_bitmap_block(struct inode *inode,
155 unsigned long group,
156 int create, struct buffer_head **bhp)
157{
158 return nilfs_mdt_get_block(inode,
159 nilfs_palloc_bitmap_blkoff(inode, group),
160 create, NULL, bhp);
161}
162
163int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
164 int create, struct buffer_head **bhp)
165{
166 return nilfs_mdt_get_block(inode, nilfs_palloc_entry_blkoff(inode, nr),
167 create, NULL, bhp);
168}
169
170static struct nilfs_palloc_group_desc *
171nilfs_palloc_block_get_group_desc(const struct inode *inode,
172 unsigned long group,
173 const struct buffer_head *bh, void *kaddr)
174{
175 return (struct nilfs_palloc_group_desc *)(kaddr + bh_offset(bh)) +
176 group % nilfs_palloc_groups_per_desc_block(inode);
177}
178
179static unsigned char *
180nilfs_palloc_block_get_bitmap(const struct inode *inode,
181 const struct buffer_head *bh, void *kaddr)
182{
183 return (unsigned char *)(kaddr + bh_offset(bh));
184}
185
186void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
187 const struct buffer_head *bh, void *kaddr)
188{
189 unsigned long entry_offset, group_offset;
190
191 nilfs_palloc_group(inode, nr, &group_offset);
192 entry_offset = group_offset % NILFS_MDT(inode)->mi_entries_per_block;
193
194 return kaddr + bh_offset(bh) +
195 entry_offset * NILFS_MDT(inode)->mi_entry_size;
196}
197
198static int nilfs_palloc_find_available_slot(struct inode *inode,
199 unsigned long group,
200 unsigned long target,
201 unsigned char *bitmap,
202 int bsize) /* size in bits */
203{
204 int curr, pos, end, i;
205
206 if (target > 0) {
207 end = (target + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1);
208 if (end > bsize)
209 end = bsize;
210 pos = nilfs_find_next_zero_bit(bitmap, end, target);
211 if (pos < end &&
212 !nilfs_set_bit_atomic(
213 nilfs_mdt_bgl_lock(inode, group), pos, bitmap))
214 return pos;
215 } else
216 end = 0;
217
218 for (i = 0, curr = end;
219 i < bsize;
220 i += BITS_PER_LONG, curr += BITS_PER_LONG) {
221 /* wrap around */
222 if (curr >= bsize)
223 curr = 0;
224 while (*((unsigned long *)bitmap + curr / BITS_PER_LONG)
225 != ~0UL) {
226 end = curr + BITS_PER_LONG;
227 if (end > bsize)
228 end = bsize;
229 pos = nilfs_find_next_zero_bit(bitmap, end, curr);
230 if ((pos < end) &&
231 !nilfs_set_bit_atomic(
232 nilfs_mdt_bgl_lock(inode, group), pos,
233 bitmap))
234 return pos;
235 }
236 }
237 return -ENOSPC;
238}
239
240static unsigned long
241nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
242 unsigned long curr, unsigned long max)
243{
244 return min_t(unsigned long,
245 nilfs_palloc_groups_per_desc_block(inode) -
246 curr % nilfs_palloc_groups_per_desc_block(inode),
247 max - curr + 1);
248}
249
250int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
251 struct nilfs_palloc_req *req)
252{
253 struct buffer_head *desc_bh, *bitmap_bh;
254 struct nilfs_palloc_group_desc *desc;
255 unsigned char *bitmap;
256 void *desc_kaddr, *bitmap_kaddr;
257 unsigned long group, maxgroup, ngroups;
258 unsigned long group_offset, maxgroup_offset;
259 unsigned long n, entries_per_group, groups_per_desc_block;
260 unsigned long i, j;
261 int pos, ret;
262
263 ngroups = nilfs_palloc_groups_count(inode);
264 maxgroup = ngroups - 1;
265 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
266 entries_per_group = nilfs_palloc_entries_per_group(inode);
267 groups_per_desc_block = nilfs_palloc_groups_per_desc_block(inode);
268
269 for (i = 0; i < ngroups; i += n) {
270 if (group >= ngroups) {
271 /* wrap around */
272 group = 0;
273 maxgroup = nilfs_palloc_group(inode, req->pr_entry_nr,
274 &maxgroup_offset) - 1;
275 }
276 ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
277 if (ret < 0)
278 return ret;
279 desc_kaddr = kmap(desc_bh->b_page);
280 desc = nilfs_palloc_block_get_group_desc(
281 inode, group, desc_bh, desc_kaddr);
282 n = nilfs_palloc_rest_groups_in_desc_block(inode, group,
283 maxgroup);
284 for (j = 0; j < n; j++, desc++, group++) {
285 if (nilfs_palloc_group_desc_nfrees(inode, group, desc)
286 > 0) {
287 ret = nilfs_palloc_get_bitmap_block(
288 inode, group, 1, &bitmap_bh);
289 if (ret < 0)
290 goto out_desc;
291 bitmap_kaddr = kmap(bitmap_bh->b_page);
292 bitmap = nilfs_palloc_block_get_bitmap(
293 inode, bitmap_bh, bitmap_kaddr);
294 pos = nilfs_palloc_find_available_slot(
295 inode, group, group_offset, bitmap,
296 entries_per_group);
297 if (pos >= 0) {
298 /* found a free entry */
299 nilfs_palloc_group_desc_add_entries(
300 inode, group, desc, -1);
301 req->pr_entry_nr =
302 entries_per_group * group + pos;
303 kunmap(desc_bh->b_page);
304 kunmap(bitmap_bh->b_page);
305
306 req->pr_desc_bh = desc_bh;
307 req->pr_bitmap_bh = bitmap_bh;
308 return 0;
309 }
310 kunmap(bitmap_bh->b_page);
311 brelse(bitmap_bh);
312 }
313
314 group_offset = 0;
315 }
316
317 kunmap(desc_bh->b_page);
318 brelse(desc_bh);
319 }
320
321 /* no entries left */
322 return -ENOSPC;
323
324 out_desc:
325 kunmap(desc_bh->b_page);
326 brelse(desc_bh);
327 return ret;
328}
329
330void nilfs_palloc_commit_alloc_entry(struct inode *inode,
331 struct nilfs_palloc_req *req)
332{
333 nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh);
334 nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh);
335 nilfs_mdt_mark_dirty(inode);
336
337 brelse(req->pr_bitmap_bh);
338 brelse(req->pr_desc_bh);
339}
340
341void nilfs_palloc_commit_free_entry(struct inode *inode,
342 struct nilfs_palloc_req *req)
343{
344 struct nilfs_palloc_group_desc *desc;
345 unsigned long group, group_offset;
346 unsigned char *bitmap;
347 void *desc_kaddr, *bitmap_kaddr;
348
349 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
350 desc_kaddr = kmap(req->pr_desc_bh->b_page);
351 desc = nilfs_palloc_block_get_group_desc(inode, group,
352 req->pr_desc_bh, desc_kaddr);
353 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
354 bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh,
355 bitmap_kaddr);
356
357 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
358 group_offset, bitmap))
359 printk(KERN_WARNING "%s: entry number %llu already freed\n",
360 __func__, (unsigned long long)req->pr_entry_nr);
361
362 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
363
364 kunmap(req->pr_bitmap_bh->b_page);
365 kunmap(req->pr_desc_bh->b_page);
366
367 nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh);
368 nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh);
369 nilfs_mdt_mark_dirty(inode);
370
371 brelse(req->pr_bitmap_bh);
372 brelse(req->pr_desc_bh);
373}
374
375void nilfs_palloc_abort_alloc_entry(struct inode *inode,
376 struct nilfs_palloc_req *req)
377{
378 struct nilfs_palloc_group_desc *desc;
379 void *desc_kaddr, *bitmap_kaddr;
380 unsigned char *bitmap;
381 unsigned long group, group_offset;
382
383 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
384 desc_kaddr = kmap(req->pr_desc_bh->b_page);
385 desc = nilfs_palloc_block_get_group_desc(inode, group,
386 req->pr_desc_bh, desc_kaddr);
387 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
388 bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh,
389 bitmap_kaddr);
390 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
391 group_offset, bitmap))
392 printk(KERN_WARNING "%s: entry numer %llu already freed\n",
393 __func__, (unsigned long long)req->pr_entry_nr);
394
395 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
396
397 kunmap(req->pr_bitmap_bh->b_page);
398 kunmap(req->pr_desc_bh->b_page);
399
400 brelse(req->pr_bitmap_bh);
401 brelse(req->pr_desc_bh);
402
403 req->pr_entry_nr = 0;
404 req->pr_bitmap_bh = NULL;
405 req->pr_desc_bh = NULL;
406}
407
408int nilfs_palloc_prepare_free_entry(struct inode *inode,
409 struct nilfs_palloc_req *req)
410{
411 struct buffer_head *desc_bh, *bitmap_bh;
412 unsigned long group, group_offset;
413 int ret;
414
415 group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
416 ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
417 if (ret < 0)
418 return ret;
419 ret = nilfs_palloc_get_bitmap_block(inode, group, 1, &bitmap_bh);
420 if (ret < 0) {
421 brelse(desc_bh);
422 return ret;
423 }
424
425 req->pr_desc_bh = desc_bh;
426 req->pr_bitmap_bh = bitmap_bh;
427 return 0;
428}
429
430void nilfs_palloc_abort_free_entry(struct inode *inode,
431 struct nilfs_palloc_req *req)
432{
433 brelse(req->pr_bitmap_bh);
434 brelse(req->pr_desc_bh);
435
436 req->pr_entry_nr = 0;
437 req->pr_bitmap_bh = NULL;
438 req->pr_desc_bh = NULL;
439}
440
441static int
442nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr)
443{
444 __u64 first, last;
445
446 first = group * nilfs_palloc_entries_per_group(inode);
447 last = first + nilfs_palloc_entries_per_group(inode) - 1;
448 return (nr >= first) && (nr <= last);
449}
450
451int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
452{
453 struct buffer_head *desc_bh, *bitmap_bh;
454 struct nilfs_palloc_group_desc *desc;
455 unsigned char *bitmap;
456 void *desc_kaddr, *bitmap_kaddr;
457 unsigned long group, group_offset;
458 int i, j, n, ret;
459
460 for (i = 0; i < nitems; i += n) {
461 group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset);
462 ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh);
463 if (ret < 0)
464 return ret;
465 ret = nilfs_palloc_get_bitmap_block(inode, group, 0,
466 &bitmap_bh);
467 if (ret < 0) {
468 brelse(desc_bh);
469 return ret;
470 }
471 desc_kaddr = kmap(desc_bh->b_page);
472 desc = nilfs_palloc_block_get_group_desc(
473 inode, group, desc_bh, desc_kaddr);
474 bitmap_kaddr = kmap(bitmap_bh->b_page);
475 bitmap = nilfs_palloc_block_get_bitmap(
476 inode, bitmap_bh, bitmap_kaddr);
477 for (j = i, n = 0;
478 (j < nitems) && nilfs_palloc_group_is_in(inode, group,
479 entry_nrs[j]);
480 j++, n++) {
481 nilfs_palloc_group(inode, entry_nrs[j], &group_offset);
482 if (!nilfs_clear_bit_atomic(
483 nilfs_mdt_bgl_lock(inode, group),
484 group_offset, bitmap)) {
485 printk(KERN_WARNING
486 "%s: entry number %llu already freed\n",
487 __func__,
488 (unsigned long long)entry_nrs[j]);
489 }
490 }
491 nilfs_palloc_group_desc_add_entries(inode, group, desc, n);
492
493 kunmap(bitmap_bh->b_page);
494 kunmap(desc_bh->b_page);
495
496 nilfs_mdt_mark_buffer_dirty(desc_bh);
497 nilfs_mdt_mark_buffer_dirty(bitmap_bh);
498 nilfs_mdt_mark_dirty(inode);
499
500 brelse(bitmap_bh);
501 brelse(desc_bh);
502 }
503 return 0;
504}
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
new file mode 100644
index 000000000000..4ace5475c2c7
--- /dev/null
+++ b/fs/nilfs2/alloc.h
@@ -0,0 +1,72 @@
1/*
2 * alloc.h - persistent object (dat entry/disk inode) allocator/deallocator
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Original code was written by Koji Sato <koji@osrg.net>.
21 * Two allocators were unified by Ryusuke Konishi <ryusuke@osrg.net>,
22 * Amagai Yoshiji <amagai@osrg.net>.
23 */
24
25#ifndef _NILFS_ALLOC_H
26#define _NILFS_ALLOC_H
27
28#include <linux/types.h>
29#include <linux/buffer_head.h>
30#include <linux/fs.h>
31
32static inline unsigned long
33nilfs_palloc_entries_per_group(const struct inode *inode)
34{
35 return 1UL << (inode->i_blkbits + 3 /* log2(8 = CHAR_BITS) */);
36}
37
38int nilfs_palloc_init_blockgroup(struct inode *, unsigned);
39int nilfs_palloc_get_entry_block(struct inode *, __u64, int,
40 struct buffer_head **);
41void *nilfs_palloc_block_get_entry(const struct inode *, __u64,
42 const struct buffer_head *, void *);
43
44/**
45 * nilfs_palloc_req - persistent alloctor request and reply
46 * @pr_entry_nr: entry number (vblocknr or inode number)
47 * @pr_desc_bh: buffer head of the buffer containing block group descriptors
48 * @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap
49 * @pr_entry_bh: buffer head of the buffer containing translation entries
50 */
51struct nilfs_palloc_req {
52 __u64 pr_entry_nr;
53 struct buffer_head *pr_desc_bh;
54 struct buffer_head *pr_bitmap_bh;
55 struct buffer_head *pr_entry_bh;
56};
57
58int nilfs_palloc_prepare_alloc_entry(struct inode *,
59 struct nilfs_palloc_req *);
60void nilfs_palloc_commit_alloc_entry(struct inode *,
61 struct nilfs_palloc_req *);
62void nilfs_palloc_abort_alloc_entry(struct inode *, struct nilfs_palloc_req *);
63void nilfs_palloc_commit_free_entry(struct inode *, struct nilfs_palloc_req *);
64int nilfs_palloc_prepare_free_entry(struct inode *, struct nilfs_palloc_req *);
65void nilfs_palloc_abort_free_entry(struct inode *, struct nilfs_palloc_req *);
66int nilfs_palloc_freev(struct inode *, __u64 *, size_t);
67
68#define nilfs_set_bit_atomic ext2_set_bit_atomic
69#define nilfs_clear_bit_atomic ext2_clear_bit_atomic
70#define nilfs_find_next_zero_bit ext2_find_next_zero_bit
71
72#endif /* _NILFS_ALLOC_H */
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
new file mode 100644
index 000000000000..24638e059bf3
--- /dev/null
+++ b/fs/nilfs2/bmap.c
@@ -0,0 +1,783 @@
1/*
2 * bmap.c - NILFS block mapping.
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#include <linux/fs.h>
24#include <linux/string.h>
25#include <linux/errno.h>
26#include "nilfs.h"
27#include "bmap.h"
28#include "sb.h"
29#include "btnode.h"
30#include "mdt.h"
31#include "dat.h"
32#include "alloc.h"
33
34int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
35 __u64 *ptrp)
36{
37 __u64 ptr;
38 int ret;
39
40 down_read(&bmap->b_sem);
41 ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp);
42 if (ret < 0)
43 goto out;
44 if (bmap->b_pops->bpop_translate != NULL) {
45 ret = bmap->b_pops->bpop_translate(bmap, *ptrp, &ptr);
46 if (ret < 0)
47 goto out;
48 *ptrp = ptr;
49 }
50
51 out:
52 up_read(&bmap->b_sem);
53 return ret;
54}
55
56
57/**
58 * nilfs_bmap_lookup - find a record
59 * @bmap: bmap
60 * @key: key
61 * @recp: pointer to record
62 *
63 * Description: nilfs_bmap_lookup() finds a record whose key matches @key in
64 * @bmap.
65 *
66 * Return Value: On success, 0 is returned and the record associated with @key
67 * is stored in the place pointed by @recp. On error, one of the following
68 * negative error codes is returned.
69 *
70 * %-EIO - I/O error.
71 *
72 * %-ENOMEM - Insufficient amount of memory available.
73 *
74 * %-ENOENT - A record associated with @key does not exist.
75 */
76int nilfs_bmap_lookup(struct nilfs_bmap *bmap,
77 unsigned long key,
78 unsigned long *recp)
79{
80 __u64 ptr;
81 int ret;
82
83 /* XXX: use macro for level 1 */
84 ret = nilfs_bmap_lookup_at_level(bmap, key, 1, &ptr);
85 if (recp != NULL)
86 *recp = ptr;
87 return ret;
88}
89
90static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
91{
92 __u64 keys[NILFS_BMAP_SMALL_HIGH + 1];
93 __u64 ptrs[NILFS_BMAP_SMALL_HIGH + 1];
94 int ret, n;
95
96 if (bmap->b_ops->bop_check_insert != NULL) {
97 ret = bmap->b_ops->bop_check_insert(bmap, key);
98 if (ret > 0) {
99 n = bmap->b_ops->bop_gather_data(
100 bmap, keys, ptrs, NILFS_BMAP_SMALL_HIGH + 1);
101 if (n < 0)
102 return n;
103 ret = nilfs_btree_convert_and_insert(
104 bmap, key, ptr, keys, ptrs, n,
105 NILFS_BMAP_LARGE_LOW, NILFS_BMAP_LARGE_HIGH);
106 if (ret == 0)
107 bmap->b_u.u_flags |= NILFS_BMAP_LARGE;
108
109 return ret;
110 } else if (ret < 0)
111 return ret;
112 }
113
114 return bmap->b_ops->bop_insert(bmap, key, ptr);
115}
116
117/**
118 * nilfs_bmap_insert - insert a new key-record pair into a bmap
119 * @bmap: bmap
120 * @key: key
121 * @rec: record
122 *
123 * Description: nilfs_bmap_insert() inserts the new key-record pair specified
124 * by @key and @rec into @bmap.
125 *
126 * Return Value: On success, 0 is returned. On error, one of the following
127 * negative error codes is returned.
128 *
129 * %-EIO - I/O error.
130 *
131 * %-ENOMEM - Insufficient amount of memory available.
132 *
133 * %-EEXIST - A record associated with @key already exist.
134 */
135int nilfs_bmap_insert(struct nilfs_bmap *bmap,
136 unsigned long key,
137 unsigned long rec)
138{
139 int ret;
140
141 down_write(&bmap->b_sem);
142 ret = nilfs_bmap_do_insert(bmap, key, rec);
143 up_write(&bmap->b_sem);
144 return ret;
145}
146
147static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key)
148{
149 __u64 keys[NILFS_BMAP_LARGE_LOW + 1];
150 __u64 ptrs[NILFS_BMAP_LARGE_LOW + 1];
151 int ret, n;
152
153 if (bmap->b_ops->bop_check_delete != NULL) {
154 ret = bmap->b_ops->bop_check_delete(bmap, key);
155 if (ret > 0) {
156 n = bmap->b_ops->bop_gather_data(
157 bmap, keys, ptrs, NILFS_BMAP_LARGE_LOW + 1);
158 if (n < 0)
159 return n;
160 ret = nilfs_direct_delete_and_convert(
161 bmap, key, keys, ptrs, n,
162 NILFS_BMAP_SMALL_LOW, NILFS_BMAP_SMALL_HIGH);
163 if (ret == 0)
164 bmap->b_u.u_flags &= ~NILFS_BMAP_LARGE;
165
166 return ret;
167 } else if (ret < 0)
168 return ret;
169 }
170
171 return bmap->b_ops->bop_delete(bmap, key);
172}
173
174int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key)
175{
176 __u64 lastkey;
177 int ret;
178
179 down_read(&bmap->b_sem);
180 ret = bmap->b_ops->bop_last_key(bmap, &lastkey);
181 if (!ret)
182 *key = lastkey;
183 up_read(&bmap->b_sem);
184 return ret;
185}
186
187/**
188 * nilfs_bmap_delete - delete a key-record pair from a bmap
189 * @bmap: bmap
190 * @key: key
191 *
192 * Description: nilfs_bmap_delete() deletes the key-record pair specified by
193 * @key from @bmap.
194 *
195 * Return Value: On success, 0 is returned. On error, one of the following
196 * negative error codes is returned.
197 *
198 * %-EIO - I/O error.
199 *
200 * %-ENOMEM - Insufficient amount of memory available.
201 *
202 * %-ENOENT - A record associated with @key does not exist.
203 */
204int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key)
205{
206 int ret;
207
208 down_write(&bmap->b_sem);
209 ret = nilfs_bmap_do_delete(bmap, key);
210 up_write(&bmap->b_sem);
211 return ret;
212}
213
214static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key)
215{
216 __u64 lastkey;
217 int ret;
218
219 ret = bmap->b_ops->bop_last_key(bmap, &lastkey);
220 if (ret < 0) {
221 if (ret == -ENOENT)
222 ret = 0;
223 return ret;
224 }
225
226 while (key <= lastkey) {
227 ret = nilfs_bmap_do_delete(bmap, lastkey);
228 if (ret < 0)
229 return ret;
230 ret = bmap->b_ops->bop_last_key(bmap, &lastkey);
231 if (ret < 0) {
232 if (ret == -ENOENT)
233 ret = 0;
234 return ret;
235 }
236 }
237 return 0;
238}
239
240/**
241 * nilfs_bmap_truncate - truncate a bmap to a specified key
242 * @bmap: bmap
243 * @key: key
244 *
245 * Description: nilfs_bmap_truncate() removes key-record pairs whose keys are
246 * greater than or equal to @key from @bmap.
247 *
248 * Return Value: On success, 0 is returned. On error, one of the following
249 * negative error codes is returned.
250 *
251 * %-EIO - I/O error.
252 *
253 * %-ENOMEM - Insufficient amount of memory available.
254 */
255int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key)
256{
257 int ret;
258
259 down_write(&bmap->b_sem);
260 ret = nilfs_bmap_do_truncate(bmap, key);
261 up_write(&bmap->b_sem);
262 return ret;
263}
264
265/**
266 * nilfs_bmap_clear - free resources a bmap holds
267 * @bmap: bmap
268 *
269 * Description: nilfs_bmap_clear() frees resources associated with @bmap.
270 */
271void nilfs_bmap_clear(struct nilfs_bmap *bmap)
272{
273 down_write(&bmap->b_sem);
274 if (bmap->b_ops->bop_clear != NULL)
275 bmap->b_ops->bop_clear(bmap);
276 up_write(&bmap->b_sem);
277}
278
279/**
280 * nilfs_bmap_propagate - propagate dirty state
281 * @bmap: bmap
282 * @bh: buffer head
283 *
284 * Description: nilfs_bmap_propagate() marks the buffers that directly or
285 * indirectly refer to the block specified by @bh dirty.
286 *
287 * Return Value: On success, 0 is returned. On error, one of the following
288 * negative error codes is returned.
289 *
290 * %-EIO - I/O error.
291 *
292 * %-ENOMEM - Insufficient amount of memory available.
293 */
294int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh)
295{
296 int ret;
297
298 down_write(&bmap->b_sem);
299 ret = bmap->b_ops->bop_propagate(bmap, bh);
300 up_write(&bmap->b_sem);
301 return ret;
302}
303
304/**
305 * nilfs_bmap_lookup_dirty_buffers -
306 * @bmap: bmap
307 * @listp: pointer to buffer head list
308 */
309void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap,
310 struct list_head *listp)
311{
312 if (bmap->b_ops->bop_lookup_dirty_buffers != NULL)
313 bmap->b_ops->bop_lookup_dirty_buffers(bmap, listp);
314}
315
316/**
317 * nilfs_bmap_assign - assign a new block number to a block
318 * @bmap: bmap
319 * @bhp: pointer to buffer head
320 * @blocknr: block number
321 * @binfo: block information
322 *
323 * Description: nilfs_bmap_assign() assigns the block number @blocknr to the
324 * buffer specified by @bh.
325 *
326 * Return Value: On success, 0 is returned and the buffer head of a newly
327 * create buffer and the block information associated with the buffer are
328 * stored in the place pointed by @bh and @binfo, respectively. On error, one
329 * of the following negative error codes is returned.
330 *
331 * %-EIO - I/O error.
332 *
333 * %-ENOMEM - Insufficient amount of memory available.
334 */
335int nilfs_bmap_assign(struct nilfs_bmap *bmap,
336 struct buffer_head **bh,
337 unsigned long blocknr,
338 union nilfs_binfo *binfo)
339{
340 int ret;
341
342 down_write(&bmap->b_sem);
343 ret = bmap->b_ops->bop_assign(bmap, bh, blocknr, binfo);
344 up_write(&bmap->b_sem);
345 return ret;
346}
347
348/**
349 * nilfs_bmap_mark - mark block dirty
350 * @bmap: bmap
351 * @key: key
352 * @level: level
353 *
354 * Description: nilfs_bmap_mark() marks the block specified by @key and @level
355 * as dirty.
356 *
357 * Return Value: On success, 0 is returned. On error, one of the following
358 * negative error codes is returned.
359 *
360 * %-EIO - I/O error.
361 *
362 * %-ENOMEM - Insufficient amount of memory available.
363 */
364int nilfs_bmap_mark(struct nilfs_bmap *bmap, __u64 key, int level)
365{
366 int ret;
367
368 if (bmap->b_ops->bop_mark == NULL)
369 return 0;
370
371 down_write(&bmap->b_sem);
372 ret = bmap->b_ops->bop_mark(bmap, key, level);
373 up_write(&bmap->b_sem);
374 return ret;
375}
376
377/**
378 * nilfs_bmap_test_and_clear_dirty - test and clear a bmap dirty state
379 * @bmap: bmap
380 *
381 * Description: nilfs_test_and_clear() is the atomic operation to test and
382 * clear the dirty state of @bmap.
383 *
384 * Return Value: 1 is returned if @bmap is dirty, or 0 if clear.
385 */
386int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
387{
388 int ret;
389
390 down_write(&bmap->b_sem);
391 ret = nilfs_bmap_dirty(bmap);
392 nilfs_bmap_clear_dirty(bmap);
393 up_write(&bmap->b_sem);
394 return ret;
395}
396
397
398/*
399 * Internal use only
400 */
401
402void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n)
403{
404 inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
405 if (NILFS_MDT(bmap->b_inode))
406 nilfs_mdt_mark_dirty(bmap->b_inode);
407 else
408 mark_inode_dirty(bmap->b_inode);
409}
410
411void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n)
412{
413 inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
414 if (NILFS_MDT(bmap->b_inode))
415 nilfs_mdt_mark_dirty(bmap->b_inode);
416 else
417 mark_inode_dirty(bmap->b_inode);
418}
419
420int nilfs_bmap_get_block(const struct nilfs_bmap *bmap, __u64 ptr,
421 struct buffer_head **bhp)
422{
423 return nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache,
424 ptr, 0, bhp, 0);
425}
426
427void nilfs_bmap_put_block(const struct nilfs_bmap *bmap,
428 struct buffer_head *bh)
429{
430 brelse(bh);
431}
432
433int nilfs_bmap_get_new_block(const struct nilfs_bmap *bmap, __u64 ptr,
434 struct buffer_head **bhp)
435{
436 int ret;
437
438 ret = nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache,
439 ptr, 0, bhp, 1);
440 if (ret < 0)
441 return ret;
442 set_buffer_nilfs_volatile(*bhp);
443 return 0;
444}
445
446void nilfs_bmap_delete_block(const struct nilfs_bmap *bmap,
447 struct buffer_head *bh)
448{
449 nilfs_btnode_delete(bh);
450}
451
452__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
453 const struct buffer_head *bh)
454{
455 struct buffer_head *pbh;
456 __u64 key;
457
458 key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT -
459 bmap->b_inode->i_blkbits);
460 for (pbh = page_buffers(bh->b_page); pbh != bh;
461 pbh = pbh->b_this_page, key++);
462
463 return key;
464}
465
466__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key)
467{
468 __s64 diff;
469
470 diff = key - bmap->b_last_allocated_key;
471 if ((nilfs_bmap_keydiff_abs(diff) < NILFS_INODE_BMAP_SIZE) &&
472 (bmap->b_last_allocated_ptr != NILFS_BMAP_INVALID_PTR) &&
473 (bmap->b_last_allocated_ptr + diff > 0))
474 return bmap->b_last_allocated_ptr + diff;
475 else
476 return NILFS_BMAP_INVALID_PTR;
477}
478
479static struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
480{
481 return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode));
482}
483
484#define NILFS_BMAP_GROUP_DIV 8
485__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap)
486{
487 struct inode *dat = nilfs_bmap_get_dat(bmap);
488 unsigned long entries_per_group = nilfs_palloc_entries_per_group(dat);
489 unsigned long group = bmap->b_inode->i_ino / entries_per_group;
490
491 return group * entries_per_group +
492 (bmap->b_inode->i_ino % NILFS_BMAP_GROUP_DIV) *
493 (entries_per_group / NILFS_BMAP_GROUP_DIV);
494}
495
496static int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap,
497 union nilfs_bmap_ptr_req *req)
498{
499 return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
500}
501
502static void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap,
503 union nilfs_bmap_ptr_req *req)
504{
505 nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
506}
507
508static void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap,
509 union nilfs_bmap_ptr_req *req)
510{
511 nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
512}
513
514static int nilfs_bmap_prepare_start_v(struct nilfs_bmap *bmap,
515 union nilfs_bmap_ptr_req *req)
516{
517 return nilfs_dat_prepare_start(nilfs_bmap_get_dat(bmap), &req->bpr_req);
518}
519
520static void nilfs_bmap_commit_start_v(struct nilfs_bmap *bmap,
521 union nilfs_bmap_ptr_req *req,
522 sector_t blocknr)
523{
524 nilfs_dat_commit_start(nilfs_bmap_get_dat(bmap), &req->bpr_req,
525 blocknr);
526}
527
528static void nilfs_bmap_abort_start_v(struct nilfs_bmap *bmap,
529 union nilfs_bmap_ptr_req *req)
530{
531 nilfs_dat_abort_start(nilfs_bmap_get_dat(bmap), &req->bpr_req);
532}
533
534static int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap,
535 union nilfs_bmap_ptr_req *req)
536{
537 return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req);
538}
539
540static void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap,
541 union nilfs_bmap_ptr_req *req)
542{
543 nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 0);
544}
545
546static void nilfs_bmap_commit_end_vmdt(struct nilfs_bmap *bmap,
547 union nilfs_bmap_ptr_req *req)
548{
549 nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 1);
550}
551
552static void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap,
553 union nilfs_bmap_ptr_req *req)
554{
555 nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req);
556}
557
558int nilfs_bmap_move_v(const struct nilfs_bmap *bmap, __u64 vblocknr,
559 sector_t blocknr)
560{
561 return nilfs_dat_move(nilfs_bmap_get_dat(bmap), vblocknr, blocknr);
562}
563
564int nilfs_bmap_mark_dirty(const struct nilfs_bmap *bmap, __u64 vblocknr)
565{
566 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr);
567}
568
569int nilfs_bmap_prepare_update(struct nilfs_bmap *bmap,
570 union nilfs_bmap_ptr_req *oldreq,
571 union nilfs_bmap_ptr_req *newreq)
572{
573 int ret;
574
575 ret = bmap->b_pops->bpop_prepare_end_ptr(bmap, oldreq);
576 if (ret < 0)
577 return ret;
578 ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, newreq);
579 if (ret < 0)
580 bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq);
581
582 return ret;
583}
584
585void nilfs_bmap_commit_update(struct nilfs_bmap *bmap,
586 union nilfs_bmap_ptr_req *oldreq,
587 union nilfs_bmap_ptr_req *newreq)
588{
589 bmap->b_pops->bpop_commit_end_ptr(bmap, oldreq);
590 bmap->b_pops->bpop_commit_alloc_ptr(bmap, newreq);
591}
592
593void nilfs_bmap_abort_update(struct nilfs_bmap *bmap,
594 union nilfs_bmap_ptr_req *oldreq,
595 union nilfs_bmap_ptr_req *newreq)
596{
597 bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq);
598 bmap->b_pops->bpop_abort_alloc_ptr(bmap, newreq);
599}
600
601static int nilfs_bmap_translate_v(const struct nilfs_bmap *bmap, __u64 ptr,
602 __u64 *ptrp)
603{
604 sector_t blocknr;
605 int ret;
606
607 ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), ptr, &blocknr);
608 if (ret < 0)
609 return ret;
610 if (ptrp != NULL)
611 *ptrp = blocknr;
612 return 0;
613}
614
615static int nilfs_bmap_prepare_alloc_p(struct nilfs_bmap *bmap,
616 union nilfs_bmap_ptr_req *req)
617{
618 /* ignore target ptr */
619 req->bpr_ptr = bmap->b_last_allocated_ptr++;
620 return 0;
621}
622
623static void nilfs_bmap_commit_alloc_p(struct nilfs_bmap *bmap,
624 union nilfs_bmap_ptr_req *req)
625{
626 /* do nothing */
627}
628
629static void nilfs_bmap_abort_alloc_p(struct nilfs_bmap *bmap,
630 union nilfs_bmap_ptr_req *req)
631{
632 bmap->b_last_allocated_ptr--;
633}
634
635static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_v = {
636 .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v,
637 .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v,
638 .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v,
639 .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v,
640 .bpop_commit_start_ptr = nilfs_bmap_commit_start_v,
641 .bpop_abort_start_ptr = nilfs_bmap_abort_start_v,
642 .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v,
643 .bpop_commit_end_ptr = nilfs_bmap_commit_end_v,
644 .bpop_abort_end_ptr = nilfs_bmap_abort_end_v,
645
646 .bpop_translate = nilfs_bmap_translate_v,
647};
648
649static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_vmdt = {
650 .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v,
651 .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v,
652 .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v,
653 .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v,
654 .bpop_commit_start_ptr = nilfs_bmap_commit_start_v,
655 .bpop_abort_start_ptr = nilfs_bmap_abort_start_v,
656 .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v,
657 .bpop_commit_end_ptr = nilfs_bmap_commit_end_vmdt,
658 .bpop_abort_end_ptr = nilfs_bmap_abort_end_v,
659
660 .bpop_translate = nilfs_bmap_translate_v,
661};
662
663static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_p = {
664 .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_p,
665 .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_p,
666 .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_p,
667 .bpop_prepare_start_ptr = NULL,
668 .bpop_commit_start_ptr = NULL,
669 .bpop_abort_start_ptr = NULL,
670 .bpop_prepare_end_ptr = NULL,
671 .bpop_commit_end_ptr = NULL,
672 .bpop_abort_end_ptr = NULL,
673
674 .bpop_translate = NULL,
675};
676
677static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = {
678 .bpop_prepare_alloc_ptr = NULL,
679 .bpop_commit_alloc_ptr = NULL,
680 .bpop_abort_alloc_ptr = NULL,
681 .bpop_prepare_start_ptr = NULL,
682 .bpop_commit_start_ptr = NULL,
683 .bpop_abort_start_ptr = NULL,
684 .bpop_prepare_end_ptr = NULL,
685 .bpop_commit_end_ptr = NULL,
686 .bpop_abort_end_ptr = NULL,
687
688 .bpop_translate = NULL,
689};
690
691/**
692 * nilfs_bmap_read - read a bmap from an inode
693 * @bmap: bmap
694 * @raw_inode: on-disk inode
695 *
696 * Description: nilfs_bmap_read() initializes the bmap @bmap.
697 *
698 * Return Value: On success, 0 is returned. On error, the following negative
699 * error code is returned.
700 *
701 * %-ENOMEM - Insufficient amount of memory available.
702 */
703int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
704{
705 if (raw_inode == NULL)
706 memset(bmap->b_u.u_data, 0, NILFS_BMAP_SIZE);
707 else
708 memcpy(bmap->b_u.u_data, raw_inode->i_bmap, NILFS_BMAP_SIZE);
709
710 init_rwsem(&bmap->b_sem);
711 bmap->b_state = 0;
712 bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
713 switch (bmap->b_inode->i_ino) {
714 case NILFS_DAT_INO:
715 bmap->b_pops = &nilfs_bmap_ptr_ops_p;
716 bmap->b_last_allocated_key = 0; /* XXX: use macro */
717 bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT;
718 break;
719 case NILFS_CPFILE_INO:
720 case NILFS_SUFILE_INO:
721 bmap->b_pops = &nilfs_bmap_ptr_ops_vmdt;
722 bmap->b_last_allocated_key = 0; /* XXX: use macro */
723 bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
724 break;
725 default:
726 bmap->b_pops = &nilfs_bmap_ptr_ops_v;
727 bmap->b_last_allocated_key = 0; /* XXX: use macro */
728 bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
729 break;
730 }
731
732 return (bmap->b_u.u_flags & NILFS_BMAP_LARGE) ?
733 nilfs_btree_init(bmap,
734 NILFS_BMAP_LARGE_LOW,
735 NILFS_BMAP_LARGE_HIGH) :
736 nilfs_direct_init(bmap,
737 NILFS_BMAP_SMALL_LOW,
738 NILFS_BMAP_SMALL_HIGH);
739}
740
741/**
742 * nilfs_bmap_write - write back a bmap to an inode
743 * @bmap: bmap
744 * @raw_inode: on-disk inode
745 *
746 * Description: nilfs_bmap_write() stores @bmap in @raw_inode.
747 */
748void nilfs_bmap_write(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
749{
750 down_write(&bmap->b_sem);
751 memcpy(raw_inode->i_bmap, bmap->b_u.u_data,
752 NILFS_INODE_BMAP_SIZE * sizeof(__le64));
753 if (bmap->b_inode->i_ino == NILFS_DAT_INO)
754 bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT;
755
756 up_write(&bmap->b_sem);
757}
758
759void nilfs_bmap_init_gc(struct nilfs_bmap *bmap)
760{
761 memset(&bmap->b_u, 0, NILFS_BMAP_SIZE);
762 init_rwsem(&bmap->b_sem);
763 bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
764 bmap->b_pops = &nilfs_bmap_ptr_ops_gc;
765 bmap->b_last_allocated_key = 0;
766 bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
767 bmap->b_state = 0;
768 nilfs_btree_init_gc(bmap);
769}
770
771void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
772{
773 memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union));
774 init_rwsem(&gcbmap->b_sem);
775 gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode;
776}
777
778void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
779{
780 memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union));
781 init_rwsem(&bmap->b_sem);
782 bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
783}
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
new file mode 100644
index 000000000000..4f2708abb1ba
--- /dev/null
+++ b/fs/nilfs2/bmap.h
@@ -0,0 +1,244 @@
1/*
2 * bmap.h - NILFS block mapping.
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#ifndef _NILFS_BMAP_H
24#define _NILFS_BMAP_H
25
26#include <linux/types.h>
27#include <linux/fs.h>
28#include <linux/buffer_head.h>
29#include <linux/nilfs2_fs.h>
30#include "alloc.h"
31
32#define NILFS_BMAP_INVALID_PTR 0
33
34#define nilfs_bmap_dkey_to_key(dkey) le64_to_cpu(dkey)
35#define nilfs_bmap_key_to_dkey(key) cpu_to_le64(key)
36#define nilfs_bmap_dptr_to_ptr(dptr) le64_to_cpu(dptr)
37#define nilfs_bmap_ptr_to_dptr(ptr) cpu_to_le64(ptr)
38
39#define nilfs_bmap_keydiff_abs(diff) ((diff) < 0 ? -(diff) : (diff))
40
41
42struct nilfs_bmap;
43
44/**
45 * union nilfs_bmap_ptr_req - request for bmap ptr
46 * @bpr_ptr: bmap pointer
47 * @bpr_req: request for persistent allocator
48 */
49union nilfs_bmap_ptr_req {
50 __u64 bpr_ptr;
51 struct nilfs_palloc_req bpr_req;
52};
53
54/**
55 * struct nilfs_bmap_stats - bmap statistics
56 * @bs_nblocks: number of blocks created or deleted
57 */
58struct nilfs_bmap_stats {
59 unsigned int bs_nblocks;
60};
61
62/**
63 * struct nilfs_bmap_operations - bmap operation table
64 */
65struct nilfs_bmap_operations {
66 int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *);
67 int (*bop_insert)(struct nilfs_bmap *, __u64, __u64);
68 int (*bop_delete)(struct nilfs_bmap *, __u64);
69 void (*bop_clear)(struct nilfs_bmap *);
70
71 int (*bop_propagate)(const struct nilfs_bmap *, struct buffer_head *);
72 void (*bop_lookup_dirty_buffers)(struct nilfs_bmap *,
73 struct list_head *);
74
75 int (*bop_assign)(struct nilfs_bmap *,
76 struct buffer_head **,
77 sector_t,
78 union nilfs_binfo *);
79 int (*bop_mark)(struct nilfs_bmap *, __u64, int);
80
81 /* The following functions are internal use only. */
82 int (*bop_last_key)(const struct nilfs_bmap *, __u64 *);
83 int (*bop_check_insert)(const struct nilfs_bmap *, __u64);
84 int (*bop_check_delete)(struct nilfs_bmap *, __u64);
85 int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int);
86};
87
88
89/**
90 * struct nilfs_bmap_ptr_operations - bmap ptr operation table
91 */
92struct nilfs_bmap_ptr_operations {
93 int (*bpop_prepare_alloc_ptr)(struct nilfs_bmap *,
94 union nilfs_bmap_ptr_req *);
95 void (*bpop_commit_alloc_ptr)(struct nilfs_bmap *,
96 union nilfs_bmap_ptr_req *);
97 void (*bpop_abort_alloc_ptr)(struct nilfs_bmap *,
98 union nilfs_bmap_ptr_req *);
99 int (*bpop_prepare_start_ptr)(struct nilfs_bmap *,
100 union nilfs_bmap_ptr_req *);
101 void (*bpop_commit_start_ptr)(struct nilfs_bmap *,
102 union nilfs_bmap_ptr_req *,
103 sector_t);
104 void (*bpop_abort_start_ptr)(struct nilfs_bmap *,
105 union nilfs_bmap_ptr_req *);
106 int (*bpop_prepare_end_ptr)(struct nilfs_bmap *,
107 union nilfs_bmap_ptr_req *);
108 void (*bpop_commit_end_ptr)(struct nilfs_bmap *,
109 union nilfs_bmap_ptr_req *);
110 void (*bpop_abort_end_ptr)(struct nilfs_bmap *,
111 union nilfs_bmap_ptr_req *);
112
113 int (*bpop_translate)(const struct nilfs_bmap *, __u64, __u64 *);
114};
115
116
117#define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64))
118#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */)
119#define NILFS_BMAP_NEW_PTR_INIT \
120 (1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1))
121
122static inline int nilfs_bmap_is_new_ptr(unsigned long ptr)
123{
124 return !!(ptr & NILFS_BMAP_NEW_PTR_INIT);
125}
126
127
128/**
129 * struct nilfs_bmap - bmap structure
130 * @b_u: raw data
131 * @b_sem: semaphore
132 * @b_inode: owner of bmap
133 * @b_ops: bmap operation table
134 * @b_pops: bmap ptr operation table
135 * @b_low: low watermark of conversion
136 * @b_high: high watermark of conversion
137 * @b_last_allocated_key: last allocated key for data block
138 * @b_last_allocated_ptr: last allocated ptr for data block
139 * @b_state: state
140 */
141struct nilfs_bmap {
142 union {
143 __u8 u_flags;
144 __le64 u_data[NILFS_BMAP_SIZE / sizeof(__le64)];
145 } b_u;
146 struct rw_semaphore b_sem;
147 struct inode *b_inode;
148 const struct nilfs_bmap_operations *b_ops;
149 const struct nilfs_bmap_ptr_operations *b_pops;
150 __u64 b_low;
151 __u64 b_high;
152 __u64 b_last_allocated_key;
153 __u64 b_last_allocated_ptr;
154 int b_state;
155};
156
157/* state */
158#define NILFS_BMAP_DIRTY 0x00000001
159
160
161int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *);
162int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *);
163void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *);
164int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *);
165int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long);
166int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long);
167int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *);
168int nilfs_bmap_truncate(struct nilfs_bmap *, unsigned long);
169void nilfs_bmap_clear(struct nilfs_bmap *);
170int nilfs_bmap_propagate(struct nilfs_bmap *, struct buffer_head *);
171void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *, struct list_head *);
172int nilfs_bmap_assign(struct nilfs_bmap *, struct buffer_head **,
173 unsigned long, union nilfs_binfo *);
174int nilfs_bmap_lookup_at_level(struct nilfs_bmap *, __u64, int, __u64 *);
175int nilfs_bmap_mark(struct nilfs_bmap *, __u64, int);
176
177void nilfs_bmap_init_gc(struct nilfs_bmap *);
178void nilfs_bmap_init_gcdat(struct nilfs_bmap *, struct nilfs_bmap *);
179void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *);
180
181
182/*
183 * Internal use only
184 */
185
186int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t);
187int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64);
188
189
190__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *,
191 const struct buffer_head *);
192
193__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64);
194__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *);
195
196int nilfs_bmap_prepare_update(struct nilfs_bmap *,
197 union nilfs_bmap_ptr_req *,
198 union nilfs_bmap_ptr_req *);
199void nilfs_bmap_commit_update(struct nilfs_bmap *,
200 union nilfs_bmap_ptr_req *,
201 union nilfs_bmap_ptr_req *);
202void nilfs_bmap_abort_update(struct nilfs_bmap *,
203 union nilfs_bmap_ptr_req *,
204 union nilfs_bmap_ptr_req *);
205
206void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int);
207void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int);
208
209
210int nilfs_bmap_get_block(const struct nilfs_bmap *, __u64,
211 struct buffer_head **);
212void nilfs_bmap_put_block(const struct nilfs_bmap *, struct buffer_head *);
213int nilfs_bmap_get_new_block(const struct nilfs_bmap *, __u64,
214 struct buffer_head **);
215void nilfs_bmap_delete_block(const struct nilfs_bmap *, struct buffer_head *);
216
217
218/* Assume that bmap semaphore is locked. */
219static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap)
220{
221 return !!(bmap->b_state & NILFS_BMAP_DIRTY);
222}
223
224/* Assume that bmap semaphore is locked. */
225static inline void nilfs_bmap_set_dirty(struct nilfs_bmap *bmap)
226{
227 bmap->b_state |= NILFS_BMAP_DIRTY;
228}
229
230/* Assume that bmap semaphore is locked. */
231static inline void nilfs_bmap_clear_dirty(struct nilfs_bmap *bmap)
232{
233 bmap->b_state &= ~NILFS_BMAP_DIRTY;
234}
235
236
237#define NILFS_BMAP_LARGE 0x1
238
239#define NILFS_BMAP_SMALL_LOW NILFS_DIRECT_KEY_MIN
240#define NILFS_BMAP_SMALL_HIGH NILFS_DIRECT_KEY_MAX
241#define NILFS_BMAP_LARGE_LOW NILFS_BTREE_ROOT_NCHILDREN_MAX
242#define NILFS_BMAP_LARGE_HIGH NILFS_BTREE_KEY_MAX
243
244#endif /* _NILFS_BMAP_H */
diff --git a/fs/nilfs2/bmap_union.h b/fs/nilfs2/bmap_union.h
new file mode 100644
index 000000000000..d41509bff47b
--- /dev/null
+++ b/fs/nilfs2/bmap_union.h
@@ -0,0 +1,42 @@
1/*
2 * bmap_union.h - NILFS block mapping.
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#ifndef _NILFS_BMAP_UNION_H
24#define _NILFS_BMAP_UNION_H
25
26#include "bmap.h"
27#include "direct.h"
28#include "btree.h"
29
30/**
31 * nilfs_bmap_union -
32 * @bi_bmap: bmap structure
33 * @bi_btree: direct map structure
34 * @bi_direct: B-tree structure
35 */
36union nilfs_bmap_union {
37 struct nilfs_bmap bi_bmap;
38 struct nilfs_direct bi_direct;
39 struct nilfs_btree bi_btree;
40};
41
42#endif /* _NILFS_BMAP_UNION_H */
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
new file mode 100644
index 000000000000..4cc07b2c30e0
--- /dev/null
+++ b/fs/nilfs2/btnode.c
@@ -0,0 +1,316 @@
1/*
2 * btnode.c - NILFS B-tree node cache
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * This file was originally written by Seiji Kihara <kihara@osrg.net>
21 * and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for
22 * stabilization and simplification.
23 *
24 */
25
26#include <linux/types.h>
27#include <linux/buffer_head.h>
28#include <linux/mm.h>
29#include <linux/backing-dev.h>
30#include "nilfs.h"
31#include "mdt.h"
32#include "dat.h"
33#include "page.h"
34#include "btnode.h"
35
36
37void nilfs_btnode_cache_init_once(struct address_space *btnc)
38{
39 INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC);
40 spin_lock_init(&btnc->tree_lock);
41 INIT_LIST_HEAD(&btnc->private_list);
42 spin_lock_init(&btnc->private_lock);
43
44 spin_lock_init(&btnc->i_mmap_lock);
45 INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap);
46 INIT_LIST_HEAD(&btnc->i_mmap_nonlinear);
47}
48
49static struct address_space_operations def_btnode_aops;
50
51void nilfs_btnode_cache_init(struct address_space *btnc)
52{
53 btnc->host = NULL; /* can safely set to host inode ? */
54 btnc->flags = 0;
55 mapping_set_gfp_mask(btnc, GFP_NOFS);
56 btnc->assoc_mapping = NULL;
57 btnc->backing_dev_info = &default_backing_dev_info;
58 btnc->a_ops = &def_btnode_aops;
59}
60
61void nilfs_btnode_cache_clear(struct address_space *btnc)
62{
63 invalidate_mapping_pages(btnc, 0, -1);
64 truncate_inode_pages(btnc, 0);
65}
66
67int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
68 sector_t pblocknr, struct buffer_head **pbh,
69 int newblk)
70{
71 struct buffer_head *bh;
72 struct inode *inode = NILFS_BTNC_I(btnc);
73 int err;
74
75 bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
76 if (unlikely(!bh))
77 return -ENOMEM;
78
79 err = -EEXIST; /* internal code */
80 if (newblk) {
81 if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
82 buffer_dirty(bh))) {
83 brelse(bh);
84 BUG();
85 }
86 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
87 bh->b_blocknr = blocknr;
88 set_buffer_mapped(bh);
89 set_buffer_uptodate(bh);
90 goto found;
91 }
92
93 if (buffer_uptodate(bh) || buffer_dirty(bh))
94 goto found;
95
96 if (pblocknr == 0) {
97 pblocknr = blocknr;
98 if (inode->i_ino != NILFS_DAT_INO) {
99 struct inode *dat =
100 nilfs_dat_inode(NILFS_I_NILFS(inode));
101
102 /* blocknr is a virtual block number */
103 err = nilfs_dat_translate(dat, blocknr, &pblocknr);
104 if (unlikely(err)) {
105 brelse(bh);
106 goto out_locked;
107 }
108 }
109 }
110 lock_buffer(bh);
111 if (buffer_uptodate(bh)) {
112 unlock_buffer(bh);
113 err = -EEXIST; /* internal code */
114 goto found;
115 }
116 set_buffer_mapped(bh);
117 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
118 bh->b_blocknr = pblocknr; /* set block address for read */
119 bh->b_end_io = end_buffer_read_sync;
120 get_bh(bh);
121 submit_bh(READ, bh);
122 bh->b_blocknr = blocknr; /* set back to the given block address */
123 err = 0;
124found:
125 *pbh = bh;
126
127out_locked:
128 unlock_page(bh->b_page);
129 page_cache_release(bh->b_page);
130 return err;
131}
132
133int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr,
134 sector_t pblocknr, struct buffer_head **pbh, int newblk)
135{
136 struct buffer_head *bh;
137 int err;
138
139 err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk);
140 if (err == -EEXIST) /* internal code (cache hit) */
141 return 0;
142 if (unlikely(err))
143 return err;
144
145 bh = *pbh;
146 wait_on_buffer(bh);
147 if (!buffer_uptodate(bh)) {
148 brelse(bh);
149 return -EIO;
150 }
151 return 0;
152}
153
154/**
155 * nilfs_btnode_delete - delete B-tree node buffer
156 * @bh: buffer to be deleted
157 *
158 * nilfs_btnode_delete() invalidates the specified buffer and delete the page
159 * including the buffer if the page gets unbusy.
160 */
161void nilfs_btnode_delete(struct buffer_head *bh)
162{
163 struct address_space *mapping;
164 struct page *page = bh->b_page;
165 pgoff_t index = page_index(page);
166 int still_dirty;
167
168 page_cache_get(page);
169 lock_page(page);
170 wait_on_page_writeback(page);
171
172 nilfs_forget_buffer(bh);
173 still_dirty = PageDirty(page);
174 mapping = page->mapping;
175 unlock_page(page);
176 page_cache_release(page);
177
178 if (!still_dirty && mapping)
179 invalidate_inode_pages2_range(mapping, index, index);
180}
181
182/**
183 * nilfs_btnode_prepare_change_key
184 * prepare to move contents of the block for old key to one of new key.
185 * the old buffer will not be removed, but might be reused for new buffer.
186 * it might return -ENOMEM because of memory allocation errors,
187 * and might return -EIO because of disk read errors.
188 */
189int nilfs_btnode_prepare_change_key(struct address_space *btnc,
190 struct nilfs_btnode_chkey_ctxt *ctxt)
191{
192 struct buffer_head *obh, *nbh;
193 struct inode *inode = NILFS_BTNC_I(btnc);
194 __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
195 int err;
196
197 if (oldkey == newkey)
198 return 0;
199
200 obh = ctxt->bh;
201 ctxt->newbh = NULL;
202
203 if (inode->i_blkbits == PAGE_CACHE_SHIFT) {
204 lock_page(obh->b_page);
205 /*
206 * We cannot call radix_tree_preload for the kernels older
207 * than 2.6.23, because it is not exported for modules.
208 */
209 err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
210 if (err)
211 goto failed_unlock;
212 /* BUG_ON(oldkey != obh->b_page->index); */
213 if (unlikely(oldkey != obh->b_page->index))
214 NILFS_PAGE_BUG(obh->b_page,
215 "invalid oldkey %lld (newkey=%lld)",
216 (unsigned long long)oldkey,
217 (unsigned long long)newkey);
218
219retry:
220 spin_lock_irq(&btnc->tree_lock);
221 err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
222 spin_unlock_irq(&btnc->tree_lock);
223 /*
224 * Note: page->index will not change to newkey until
225 * nilfs_btnode_commit_change_key() will be called.
226 * To protect the page in intermediate state, the page lock
227 * is held.
228 */
229 radix_tree_preload_end();
230 if (!err)
231 return 0;
232 else if (err != -EEXIST)
233 goto failed_unlock;
234
235 err = invalidate_inode_pages2_range(btnc, newkey, newkey);
236 if (!err)
237 goto retry;
238 /* fallback to copy mode */
239 unlock_page(obh->b_page);
240 }
241
242 err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1);
243 if (likely(!err)) {
244 BUG_ON(nbh == obh);
245 ctxt->newbh = nbh;
246 }
247 return err;
248
249 failed_unlock:
250 unlock_page(obh->b_page);
251 return err;
252}
253
254/**
255 * nilfs_btnode_commit_change_key
256 * commit the change_key operation prepared by prepare_change_key().
257 */
258void nilfs_btnode_commit_change_key(struct address_space *btnc,
259 struct nilfs_btnode_chkey_ctxt *ctxt)
260{
261 struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh;
262 __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
263 struct page *opage;
264
265 if (oldkey == newkey)
266 return;
267
268 if (nbh == NULL) { /* blocksize == pagesize */
269 opage = obh->b_page;
270 if (unlikely(oldkey != opage->index))
271 NILFS_PAGE_BUG(opage,
272 "invalid oldkey %lld (newkey=%lld)",
273 (unsigned long long)oldkey,
274 (unsigned long long)newkey);
275 if (!test_set_buffer_dirty(obh) && TestSetPageDirty(opage))
276 BUG();
277
278 spin_lock_irq(&btnc->tree_lock);
279 radix_tree_delete(&btnc->page_tree, oldkey);
280 radix_tree_tag_set(&btnc->page_tree, newkey,
281 PAGECACHE_TAG_DIRTY);
282 spin_unlock_irq(&btnc->tree_lock);
283
284 opage->index = obh->b_blocknr = newkey;
285 unlock_page(opage);
286 } else {
287 nilfs_copy_buffer(nbh, obh);
288 nilfs_btnode_mark_dirty(nbh);
289
290 nbh->b_blocknr = newkey;
291 ctxt->bh = nbh;
292 nilfs_btnode_delete(obh); /* will decrement bh->b_count */
293 }
294}
295
296/**
297 * nilfs_btnode_abort_change_key
298 * abort the change_key operation prepared by prepare_change_key().
299 */
300void nilfs_btnode_abort_change_key(struct address_space *btnc,
301 struct nilfs_btnode_chkey_ctxt *ctxt)
302{
303 struct buffer_head *nbh = ctxt->newbh;
304 __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
305
306 if (oldkey == newkey)
307 return;
308
309 if (nbh == NULL) { /* blocksize == pagesize */
310 spin_lock_irq(&btnc->tree_lock);
311 radix_tree_delete(&btnc->page_tree, newkey);
312 spin_unlock_irq(&btnc->tree_lock);
313 unlock_page(ctxt->bh->b_page);
314 } else
315 brelse(nbh);
316}
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
new file mode 100644
index 000000000000..35faa86444a7
--- /dev/null
+++ b/fs/nilfs2/btnode.h
@@ -0,0 +1,58 @@
1/*
2 * btnode.h - NILFS B-tree node cache
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Seiji Kihara <kihara@osrg.net>
21 * Revised by Ryusuke Konishi <ryusuke@osrg.net>
22 */
23
24#ifndef _NILFS_BTNODE_H
25#define _NILFS_BTNODE_H
26
27#include <linux/types.h>
28#include <linux/buffer_head.h>
29#include <linux/fs.h>
30#include <linux/backing-dev.h>
31
32
33struct nilfs_btnode_chkey_ctxt {
34 __u64 oldkey;
35 __u64 newkey;
36 struct buffer_head *bh;
37 struct buffer_head *newbh;
38};
39
40void nilfs_btnode_cache_init_once(struct address_space *);
41void nilfs_btnode_cache_init(struct address_space *);
42void nilfs_btnode_cache_clear(struct address_space *);
43int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t,
44 struct buffer_head **, int);
45int nilfs_btnode_get(struct address_space *, __u64, sector_t,
46 struct buffer_head **, int);
47void nilfs_btnode_delete(struct buffer_head *);
48int nilfs_btnode_prepare_change_key(struct address_space *,
49 struct nilfs_btnode_chkey_ctxt *);
50void nilfs_btnode_commit_change_key(struct address_space *,
51 struct nilfs_btnode_chkey_ctxt *);
52void nilfs_btnode_abort_change_key(struct address_space *,
53 struct nilfs_btnode_chkey_ctxt *);
54
55#define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh)
56
57
58#endif /* _NILFS_BTNODE_H */
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
new file mode 100644
index 000000000000..6b37a2767293
--- /dev/null
+++ b/fs/nilfs2/btree.c
@@ -0,0 +1,2269 @@
1/*
2 * btree.c - NILFS B-tree.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#include <linux/slab.h>
24#include <linux/string.h>
25#include <linux/errno.h>
26#include <linux/pagevec.h>
27#include "nilfs.h"
28#include "page.h"
29#include "btnode.h"
30#include "btree.h"
31#include "alloc.h"
32
33/**
34 * struct nilfs_btree_path - A path on which B-tree operations are executed
35 * @bp_bh: buffer head of node block
36 * @bp_sib_bh: buffer head of sibling node block
37 * @bp_index: index of child node
38 * @bp_oldreq: ptr end request for old ptr
39 * @bp_newreq: ptr alloc request for new ptr
40 * @bp_op: rebalance operation
41 */
42struct nilfs_btree_path {
43 struct buffer_head *bp_bh;
44 struct buffer_head *bp_sib_bh;
45 int bp_index;
46 union nilfs_bmap_ptr_req bp_oldreq;
47 union nilfs_bmap_ptr_req bp_newreq;
48 struct nilfs_btnode_chkey_ctxt bp_ctxt;
49 void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
50 int, __u64 *, __u64 *);
51};
52
53/*
54 * B-tree path operations
55 */
56
57static struct kmem_cache *nilfs_btree_path_cache;
58
59int __init nilfs_btree_path_cache_init(void)
60{
61 nilfs_btree_path_cache =
62 kmem_cache_create("nilfs2_btree_path_cache",
63 sizeof(struct nilfs_btree_path) *
64 NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
65 return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
66}
67
68void nilfs_btree_path_cache_destroy(void)
69{
70 kmem_cache_destroy(nilfs_btree_path_cache);
71}
72
73static inline struct nilfs_btree_path *
74nilfs_btree_alloc_path(const struct nilfs_btree *btree)
75{
76 return (struct nilfs_btree_path *)
77 kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
78}
79
80static inline void nilfs_btree_free_path(const struct nilfs_btree *btree,
81 struct nilfs_btree_path *path)
82{
83 kmem_cache_free(nilfs_btree_path_cache, path);
84}
85
86static void nilfs_btree_init_path(const struct nilfs_btree *btree,
87 struct nilfs_btree_path *path)
88{
89 int level;
90
91 for (level = NILFS_BTREE_LEVEL_DATA;
92 level < NILFS_BTREE_LEVEL_MAX;
93 level++) {
94 path[level].bp_bh = NULL;
95 path[level].bp_sib_bh = NULL;
96 path[level].bp_index = 0;
97 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
98 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
99 path[level].bp_op = NULL;
100 }
101}
102
103static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
104 struct nilfs_btree_path *path)
105{
106 int level;
107
108 for (level = NILFS_BTREE_LEVEL_DATA;
109 level < NILFS_BTREE_LEVEL_MAX;
110 level++) {
111 if (path[level].bp_bh != NULL) {
112 nilfs_bmap_put_block(&btree->bt_bmap,
113 path[level].bp_bh);
114 path[level].bp_bh = NULL;
115 }
116 /* sib_bh is released or deleted by prepare or commit
117 * operations. */
118 path[level].bp_sib_bh = NULL;
119 path[level].bp_index = 0;
120 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
121 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
122 path[level].bp_op = NULL;
123 }
124}
125
126
127/*
128 * B-tree node operations
129 */
130
131static inline int
132nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
133 const struct nilfs_btree_node *node)
134{
135 return node->bn_flags;
136}
137
138static inline void
139nilfs_btree_node_set_flags(struct nilfs_btree *btree,
140 struct nilfs_btree_node *node,
141 int flags)
142{
143 node->bn_flags = flags;
144}
145
146static inline int nilfs_btree_node_root(const struct nilfs_btree *btree,
147 const struct nilfs_btree_node *node)
148{
149 return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT;
150}
151
152static inline int
153nilfs_btree_node_get_level(const struct nilfs_btree *btree,
154 const struct nilfs_btree_node *node)
155{
156 return node->bn_level;
157}
158
159static inline void
160nilfs_btree_node_set_level(struct nilfs_btree *btree,
161 struct nilfs_btree_node *node,
162 int level)
163{
164 node->bn_level = level;
165}
166
167static inline int
168nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree,
169 const struct nilfs_btree_node *node)
170{
171 return le16_to_cpu(node->bn_nchildren);
172}
173
174static inline void
175nilfs_btree_node_set_nchildren(struct nilfs_btree *btree,
176 struct nilfs_btree_node *node,
177 int nchildren)
178{
179 node->bn_nchildren = cpu_to_le16(nchildren);
180}
181
182static inline int
183nilfs_btree_node_size(const struct nilfs_btree *btree)
184{
185 return 1 << btree->bt_bmap.b_inode->i_blkbits;
186}
187
188static inline int
189nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree,
190 const struct nilfs_btree_node *node)
191{
192 return nilfs_btree_node_root(btree, node) ?
193 NILFS_BTREE_ROOT_NCHILDREN_MIN :
194 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
195}
196
197static inline int
198nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree,
199 const struct nilfs_btree_node *node)
200{
201 return nilfs_btree_node_root(btree, node) ?
202 NILFS_BTREE_ROOT_NCHILDREN_MAX :
203 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
204}
205
206static inline __le64 *
207nilfs_btree_node_dkeys(const struct nilfs_btree *btree,
208 const struct nilfs_btree_node *node)
209{
210 return (__le64 *)((char *)(node + 1) +
211 (nilfs_btree_node_root(btree, node) ?
212 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
213}
214
215static inline __le64 *
216nilfs_btree_node_dptrs(const struct nilfs_btree *btree,
217 const struct nilfs_btree_node *node)
218{
219 return (__le64 *)(nilfs_btree_node_dkeys(btree, node) +
220 nilfs_btree_node_nchildren_max(btree, node));
221}
222
223static inline __u64
224nilfs_btree_node_get_key(const struct nilfs_btree *btree,
225 const struct nilfs_btree_node *node, int index)
226{
227 return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) +
228 index));
229}
230
231static inline void
232nilfs_btree_node_set_key(struct nilfs_btree *btree,
233 struct nilfs_btree_node *node, int index, __u64 key)
234{
235 *(nilfs_btree_node_dkeys(btree, node) + index) =
236 nilfs_bmap_key_to_dkey(key);
237}
238
239static inline __u64
240nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
241 const struct nilfs_btree_node *node,
242 int index)
243{
244 return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) +
245 index));
246}
247
248static inline void
249nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
250 struct nilfs_btree_node *node,
251 int index,
252 __u64 ptr)
253{
254 *(nilfs_btree_node_dptrs(btree, node) + index) =
255 nilfs_bmap_ptr_to_dptr(ptr);
256}
257
258static void nilfs_btree_node_init(struct nilfs_btree *btree,
259 struct nilfs_btree_node *node,
260 int flags, int level, int nchildren,
261 const __u64 *keys, const __u64 *ptrs)
262{
263 __le64 *dkeys;
264 __le64 *dptrs;
265 int i;
266
267 nilfs_btree_node_set_flags(btree, node, flags);
268 nilfs_btree_node_set_level(btree, node, level);
269 nilfs_btree_node_set_nchildren(btree, node, nchildren);
270
271 dkeys = nilfs_btree_node_dkeys(btree, node);
272 dptrs = nilfs_btree_node_dptrs(btree, node);
273 for (i = 0; i < nchildren; i++) {
274 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
275 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
276 }
277}
278
279/* Assume the buffer heads corresponding to left and right are locked. */
280static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
281 struct nilfs_btree_node *left,
282 struct nilfs_btree_node *right,
283 int n)
284{
285 __le64 *ldkeys, *rdkeys;
286 __le64 *ldptrs, *rdptrs;
287 int lnchildren, rnchildren;
288
289 ldkeys = nilfs_btree_node_dkeys(btree, left);
290 ldptrs = nilfs_btree_node_dptrs(btree, left);
291 lnchildren = nilfs_btree_node_get_nchildren(btree, left);
292
293 rdkeys = nilfs_btree_node_dkeys(btree, right);
294 rdptrs = nilfs_btree_node_dptrs(btree, right);
295 rnchildren = nilfs_btree_node_get_nchildren(btree, right);
296
297 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
298 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
299 memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
300 memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
301
302 lnchildren += n;
303 rnchildren -= n;
304 nilfs_btree_node_set_nchildren(btree, left, lnchildren);
305 nilfs_btree_node_set_nchildren(btree, right, rnchildren);
306}
307
308/* Assume that the buffer heads corresponding to left and right are locked. */
309static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
310 struct nilfs_btree_node *left,
311 struct nilfs_btree_node *right,
312 int n)
313{
314 __le64 *ldkeys, *rdkeys;
315 __le64 *ldptrs, *rdptrs;
316 int lnchildren, rnchildren;
317
318 ldkeys = nilfs_btree_node_dkeys(btree, left);
319 ldptrs = nilfs_btree_node_dptrs(btree, left);
320 lnchildren = nilfs_btree_node_get_nchildren(btree, left);
321
322 rdkeys = nilfs_btree_node_dkeys(btree, right);
323 rdptrs = nilfs_btree_node_dptrs(btree, right);
324 rnchildren = nilfs_btree_node_get_nchildren(btree, right);
325
326 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
327 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
328 memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
329 memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
330
331 lnchildren -= n;
332 rnchildren += n;
333 nilfs_btree_node_set_nchildren(btree, left, lnchildren);
334 nilfs_btree_node_set_nchildren(btree, right, rnchildren);
335}
336
337/* Assume that the buffer head corresponding to node is locked. */
338static void nilfs_btree_node_insert(struct nilfs_btree *btree,
339 struct nilfs_btree_node *node,
340 __u64 key, __u64 ptr, int index)
341{
342 __le64 *dkeys;
343 __le64 *dptrs;
344 int nchildren;
345
346 dkeys = nilfs_btree_node_dkeys(btree, node);
347 dptrs = nilfs_btree_node_dptrs(btree, node);
348 nchildren = nilfs_btree_node_get_nchildren(btree, node);
349 if (index < nchildren) {
350 memmove(dkeys + index + 1, dkeys + index,
351 (nchildren - index) * sizeof(*dkeys));
352 memmove(dptrs + index + 1, dptrs + index,
353 (nchildren - index) * sizeof(*dptrs));
354 }
355 dkeys[index] = nilfs_bmap_key_to_dkey(key);
356 dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
357 nchildren++;
358 nilfs_btree_node_set_nchildren(btree, node, nchildren);
359}
360
361/* Assume that the buffer head corresponding to node is locked. */
362static void nilfs_btree_node_delete(struct nilfs_btree *btree,
363 struct nilfs_btree_node *node,
364 __u64 *keyp, __u64 *ptrp, int index)
365{
366 __u64 key;
367 __u64 ptr;
368 __le64 *dkeys;
369 __le64 *dptrs;
370 int nchildren;
371
372 dkeys = nilfs_btree_node_dkeys(btree, node);
373 dptrs = nilfs_btree_node_dptrs(btree, node);
374 key = nilfs_bmap_dkey_to_key(dkeys[index]);
375 ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
376 nchildren = nilfs_btree_node_get_nchildren(btree, node);
377 if (keyp != NULL)
378 *keyp = key;
379 if (ptrp != NULL)
380 *ptrp = ptr;
381
382 if (index < nchildren - 1) {
383 memmove(dkeys + index, dkeys + index + 1,
384 (nchildren - index - 1) * sizeof(*dkeys));
385 memmove(dptrs + index, dptrs + index + 1,
386 (nchildren - index - 1) * sizeof(*dptrs));
387 }
388 nchildren--;
389 nilfs_btree_node_set_nchildren(btree, node, nchildren);
390}
391
392static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
393 const struct nilfs_btree_node *node,
394 __u64 key, int *indexp)
395{
396 __u64 nkey;
397 int index, low, high, s;
398
399 /* binary search */
400 low = 0;
401 high = nilfs_btree_node_get_nchildren(btree, node) - 1;
402 index = 0;
403 s = 0;
404 while (low <= high) {
405 index = (low + high) / 2;
406 nkey = nilfs_btree_node_get_key(btree, node, index);
407 if (nkey == key) {
408 s = 0;
409 goto out;
410 } else if (nkey < key) {
411 low = index + 1;
412 s = -1;
413 } else {
414 high = index - 1;
415 s = 1;
416 }
417 }
418
419 /* adjust index */
420 if (nilfs_btree_node_get_level(btree, node) >
421 NILFS_BTREE_LEVEL_NODE_MIN) {
422 if ((s > 0) && (index > 0))
423 index--;
424 } else if (s < 0)
425 index++;
426
427 out:
428 *indexp = index;
429
430 return s == 0;
431}
432
433static inline struct nilfs_btree_node *
434nilfs_btree_get_root(const struct nilfs_btree *btree)
435{
436 return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
437}
438
439static inline struct nilfs_btree_node *
440nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree,
441 const struct nilfs_btree_path *path,
442 int level)
443{
444 return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
445}
446
447static inline struct nilfs_btree_node *
448nilfs_btree_get_sib_node(const struct nilfs_btree *btree,
449 const struct nilfs_btree_path *path,
450 int level)
451{
452 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
453}
454
455static inline int nilfs_btree_height(const struct nilfs_btree *btree)
456{
457 return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree))
458 + 1;
459}
460
461static inline struct nilfs_btree_node *
462nilfs_btree_get_node(const struct nilfs_btree *btree,
463 const struct nilfs_btree_path *path,
464 int level)
465{
466 return (level == nilfs_btree_height(btree) - 1) ?
467 nilfs_btree_get_root(btree) :
468 nilfs_btree_get_nonroot_node(btree, path, level);
469}
470
471static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
472 struct nilfs_btree_path *path,
473 __u64 key, __u64 *ptrp, int minlevel)
474{
475 struct nilfs_btree_node *node;
476 __u64 ptr;
477 int level, index, found, ret;
478
479 node = nilfs_btree_get_root(btree);
480 level = nilfs_btree_node_get_level(btree, node);
481 if ((level < minlevel) ||
482 (nilfs_btree_node_get_nchildren(btree, node) <= 0))
483 return -ENOENT;
484
485 found = nilfs_btree_node_lookup(btree, node, key, &index);
486 ptr = nilfs_btree_node_get_ptr(btree, node, index);
487 path[level].bp_bh = NULL;
488 path[level].bp_index = index;
489
490 for (level--; level >= minlevel; level--) {
491 ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr,
492 &path[level].bp_bh);
493 if (ret < 0)
494 return ret;
495 node = nilfs_btree_get_nonroot_node(btree, path, level);
496 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
497 if (!found)
498 found = nilfs_btree_node_lookup(btree, node, key,
499 &index);
500 else
501 index = 0;
502 if (index < nilfs_btree_node_nchildren_max(btree, node))
503 ptr = nilfs_btree_node_get_ptr(btree, node, index);
504 else {
505 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
506 /* insert */
507 ptr = NILFS_BMAP_INVALID_PTR;
508 }
509 path[level].bp_index = index;
510 }
511 if (!found)
512 return -ENOENT;
513
514 if (ptrp != NULL)
515 *ptrp = ptr;
516
517 return 0;
518}
519
520static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
521 struct nilfs_btree_path *path,
522 __u64 *keyp, __u64 *ptrp)
523{
524 struct nilfs_btree_node *node;
525 __u64 ptr;
526 int index, level, ret;
527
528 node = nilfs_btree_get_root(btree);
529 index = nilfs_btree_node_get_nchildren(btree, node) - 1;
530 if (index < 0)
531 return -ENOENT;
532 level = nilfs_btree_node_get_level(btree, node);
533 ptr = nilfs_btree_node_get_ptr(btree, node, index);
534 path[level].bp_bh = NULL;
535 path[level].bp_index = index;
536
537 for (level--; level > 0; level--) {
538 ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr,
539 &path[level].bp_bh);
540 if (ret < 0)
541 return ret;
542 node = nilfs_btree_get_nonroot_node(btree, path, level);
543 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
544 index = nilfs_btree_node_get_nchildren(btree, node) - 1;
545 ptr = nilfs_btree_node_get_ptr(btree, node, index);
546 path[level].bp_index = index;
547 }
548
549 if (keyp != NULL)
550 *keyp = nilfs_btree_node_get_key(btree, node, index);
551 if (ptrp != NULL)
552 *ptrp = ptr;
553
554 return 0;
555}
556
557static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
558 __u64 key, int level, __u64 *ptrp)
559{
560 struct nilfs_btree *btree;
561 struct nilfs_btree_path *path;
562 __u64 ptr;
563 int ret;
564
565 btree = (struct nilfs_btree *)bmap;
566 path = nilfs_btree_alloc_path(btree);
567 if (path == NULL)
568 return -ENOMEM;
569 nilfs_btree_init_path(btree, path);
570
571 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
572
573 if (ptrp != NULL)
574 *ptrp = ptr;
575
576 nilfs_btree_clear_path(btree, path);
577 nilfs_btree_free_path(btree, path);
578
579 return ret;
580}
581
582static void nilfs_btree_promote_key(struct nilfs_btree *btree,
583 struct nilfs_btree_path *path,
584 int level, __u64 key)
585{
586 if (level < nilfs_btree_height(btree) - 1) {
587 do {
588 lock_buffer(path[level].bp_bh);
589 nilfs_btree_node_set_key(
590 btree,
591 nilfs_btree_get_nonroot_node(
592 btree, path, level),
593 path[level].bp_index, key);
594 if (!buffer_dirty(path[level].bp_bh))
595 nilfs_btnode_mark_dirty(path[level].bp_bh);
596 unlock_buffer(path[level].bp_bh);
597 } while ((path[level].bp_index == 0) &&
598 (++level < nilfs_btree_height(btree) - 1));
599 }
600
601 /* root */
602 if (level == nilfs_btree_height(btree) - 1) {
603 nilfs_btree_node_set_key(btree,
604 nilfs_btree_get_root(btree),
605 path[level].bp_index, key);
606 }
607}
608
609static void nilfs_btree_do_insert(struct nilfs_btree *btree,
610 struct nilfs_btree_path *path,
611 int level, __u64 *keyp, __u64 *ptrp)
612{
613 struct nilfs_btree_node *node;
614
615 if (level < nilfs_btree_height(btree) - 1) {
616 lock_buffer(path[level].bp_bh);
617 node = nilfs_btree_get_nonroot_node(btree, path, level);
618 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
619 path[level].bp_index);
620 if (!buffer_dirty(path[level].bp_bh))
621 nilfs_btnode_mark_dirty(path[level].bp_bh);
622 unlock_buffer(path[level].bp_bh);
623
624 if (path[level].bp_index == 0)
625 nilfs_btree_promote_key(btree, path, level + 1,
626 nilfs_btree_node_get_key(
627 btree, node, 0));
628 } else {
629 node = nilfs_btree_get_root(btree);
630 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
631 path[level].bp_index);
632 }
633}
634
635static void nilfs_btree_carry_left(struct nilfs_btree *btree,
636 struct nilfs_btree_path *path,
637 int level, __u64 *keyp, __u64 *ptrp)
638{
639 struct nilfs_btree_node *node, *left;
640 int nchildren, lnchildren, n, move;
641
642 lock_buffer(path[level].bp_bh);
643 lock_buffer(path[level].bp_sib_bh);
644
645 node = nilfs_btree_get_nonroot_node(btree, path, level);
646 left = nilfs_btree_get_sib_node(btree, path, level);
647 nchildren = nilfs_btree_node_get_nchildren(btree, node);
648 lnchildren = nilfs_btree_node_get_nchildren(btree, left);
649 move = 0;
650
651 n = (nchildren + lnchildren + 1) / 2 - lnchildren;
652 if (n > path[level].bp_index) {
653 /* move insert point */
654 n--;
655 move = 1;
656 }
657
658 nilfs_btree_node_move_left(btree, left, node, n);
659
660 if (!buffer_dirty(path[level].bp_bh))
661 nilfs_btnode_mark_dirty(path[level].bp_bh);
662 if (!buffer_dirty(path[level].bp_sib_bh))
663 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
664
665 unlock_buffer(path[level].bp_bh);
666 unlock_buffer(path[level].bp_sib_bh);
667
668 nilfs_btree_promote_key(btree, path, level + 1,
669 nilfs_btree_node_get_key(btree, node, 0));
670
671 if (move) {
672 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
673 path[level].bp_bh = path[level].bp_sib_bh;
674 path[level].bp_sib_bh = NULL;
675 path[level].bp_index += lnchildren;
676 path[level + 1].bp_index--;
677 } else {
678 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
679 path[level].bp_sib_bh = NULL;
680 path[level].bp_index -= n;
681 }
682
683 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
684}
685
686static void nilfs_btree_carry_right(struct nilfs_btree *btree,
687 struct nilfs_btree_path *path,
688 int level, __u64 *keyp, __u64 *ptrp)
689{
690 struct nilfs_btree_node *node, *right;
691 int nchildren, rnchildren, n, move;
692
693 lock_buffer(path[level].bp_bh);
694 lock_buffer(path[level].bp_sib_bh);
695
696 node = nilfs_btree_get_nonroot_node(btree, path, level);
697 right = nilfs_btree_get_sib_node(btree, path, level);
698 nchildren = nilfs_btree_node_get_nchildren(btree, node);
699 rnchildren = nilfs_btree_node_get_nchildren(btree, right);
700 move = 0;
701
702 n = (nchildren + rnchildren + 1) / 2 - rnchildren;
703 if (n > nchildren - path[level].bp_index) {
704 /* move insert point */
705 n--;
706 move = 1;
707 }
708
709 nilfs_btree_node_move_right(btree, node, right, n);
710
711 if (!buffer_dirty(path[level].bp_bh))
712 nilfs_btnode_mark_dirty(path[level].bp_bh);
713 if (!buffer_dirty(path[level].bp_sib_bh))
714 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
715
716 unlock_buffer(path[level].bp_bh);
717 unlock_buffer(path[level].bp_sib_bh);
718
719 path[level + 1].bp_index++;
720 nilfs_btree_promote_key(btree, path, level + 1,
721 nilfs_btree_node_get_key(btree, right, 0));
722 path[level + 1].bp_index--;
723
724 if (move) {
725 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
726 path[level].bp_bh = path[level].bp_sib_bh;
727 path[level].bp_sib_bh = NULL;
728 path[level].bp_index -=
729 nilfs_btree_node_get_nchildren(btree, node);
730 path[level + 1].bp_index++;
731 } else {
732 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
733 path[level].bp_sib_bh = NULL;
734 }
735
736 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
737}
738
739static void nilfs_btree_split(struct nilfs_btree *btree,
740 struct nilfs_btree_path *path,
741 int level, __u64 *keyp, __u64 *ptrp)
742{
743 struct nilfs_btree_node *node, *right;
744 __u64 newkey;
745 __u64 newptr;
746 int nchildren, n, move;
747
748 lock_buffer(path[level].bp_bh);
749 lock_buffer(path[level].bp_sib_bh);
750
751 node = nilfs_btree_get_nonroot_node(btree, path, level);
752 right = nilfs_btree_get_sib_node(btree, path, level);
753 nchildren = nilfs_btree_node_get_nchildren(btree, node);
754 move = 0;
755
756 n = (nchildren + 1) / 2;
757 if (n > nchildren - path[level].bp_index) {
758 n--;
759 move = 1;
760 }
761
762 nilfs_btree_node_move_right(btree, node, right, n);
763
764 if (!buffer_dirty(path[level].bp_bh))
765 nilfs_btnode_mark_dirty(path[level].bp_bh);
766 if (!buffer_dirty(path[level].bp_sib_bh))
767 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
768
769 unlock_buffer(path[level].bp_bh);
770 unlock_buffer(path[level].bp_sib_bh);
771
772 newkey = nilfs_btree_node_get_key(btree, right, 0);
773 newptr = path[level].bp_newreq.bpr_ptr;
774
775 if (move) {
776 path[level].bp_index -=
777 nilfs_btree_node_get_nchildren(btree, node);
778 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
779 path[level].bp_index);
780
781 *keyp = nilfs_btree_node_get_key(btree, right, 0);
782 *ptrp = path[level].bp_newreq.bpr_ptr;
783
784 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
785 path[level].bp_bh = path[level].bp_sib_bh;
786 path[level].bp_sib_bh = NULL;
787 } else {
788 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
789
790 *keyp = nilfs_btree_node_get_key(btree, right, 0);
791 *ptrp = path[level].bp_newreq.bpr_ptr;
792
793 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
794 path[level].bp_sib_bh = NULL;
795 }
796
797 path[level + 1].bp_index++;
798}
799
800static void nilfs_btree_grow(struct nilfs_btree *btree,
801 struct nilfs_btree_path *path,
802 int level, __u64 *keyp, __u64 *ptrp)
803{
804 struct nilfs_btree_node *root, *child;
805 int n;
806
807 lock_buffer(path[level].bp_sib_bh);
808
809 root = nilfs_btree_get_root(btree);
810 child = nilfs_btree_get_sib_node(btree, path, level);
811
812 n = nilfs_btree_node_get_nchildren(btree, root);
813
814 nilfs_btree_node_move_right(btree, root, child, n);
815 nilfs_btree_node_set_level(btree, root, level + 1);
816
817 if (!buffer_dirty(path[level].bp_sib_bh))
818 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
819
820 unlock_buffer(path[level].bp_sib_bh);
821
822 path[level].bp_bh = path[level].bp_sib_bh;
823 path[level].bp_sib_bh = NULL;
824
825 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
826
827 *keyp = nilfs_btree_node_get_key(btree, child, 0);
828 *ptrp = path[level].bp_newreq.bpr_ptr;
829}
830
831static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
832 const struct nilfs_btree_path *path)
833{
834 struct nilfs_btree_node *node;
835 int level;
836
837 if (path == NULL)
838 return NILFS_BMAP_INVALID_PTR;
839
840 /* left sibling */
841 level = NILFS_BTREE_LEVEL_NODE_MIN;
842 if (path[level].bp_index > 0) {
843 node = nilfs_btree_get_node(btree, path, level);
844 return nilfs_btree_node_get_ptr(btree, node,
845 path[level].bp_index - 1);
846 }
847
848 /* parent */
849 level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
850 if (level <= nilfs_btree_height(btree) - 1) {
851 node = nilfs_btree_get_node(btree, path, level);
852 return nilfs_btree_node_get_ptr(btree, node,
853 path[level].bp_index);
854 }
855
856 return NILFS_BMAP_INVALID_PTR;
857}
858
859static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
860 const struct nilfs_btree_path *path,
861 __u64 key)
862{
863 __u64 ptr;
864
865 ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
866 if (ptr != NILFS_BMAP_INVALID_PTR)
867 /* sequential access */
868 return ptr;
869 else {
870 ptr = nilfs_btree_find_near(btree, path);
871 if (ptr != NILFS_BMAP_INVALID_PTR)
872 /* near */
873 return ptr;
874 }
875 /* block group */
876 return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
877}
878
879static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
880 __u64 ptr)
881{
882 btree->bt_bmap.b_last_allocated_key = key;
883 btree->bt_bmap.b_last_allocated_ptr = ptr;
884}
885
886static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
887 struct nilfs_btree_path *path,
888 int *levelp, __u64 key, __u64 ptr,
889 struct nilfs_bmap_stats *stats)
890{
891 struct buffer_head *bh;
892 struct nilfs_btree_node *node, *parent, *sib;
893 __u64 sibptr;
894 int pindex, level, ret;
895
896 stats->bs_nblocks = 0;
897 level = NILFS_BTREE_LEVEL_DATA;
898
899 /* allocate a new ptr for data block */
900 if (btree->bt_ops->btop_find_target != NULL)
901 path[level].bp_newreq.bpr_ptr =
902 btree->bt_ops->btop_find_target(btree, path, key);
903
904 ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
905 &btree->bt_bmap, &path[level].bp_newreq);
906 if (ret < 0)
907 goto err_out_data;
908
909 for (level = NILFS_BTREE_LEVEL_NODE_MIN;
910 level < nilfs_btree_height(btree) - 1;
911 level++) {
912 node = nilfs_btree_get_nonroot_node(btree, path, level);
913 if (nilfs_btree_node_get_nchildren(btree, node) <
914 nilfs_btree_node_nchildren_max(btree, node)) {
915 path[level].bp_op = nilfs_btree_do_insert;
916 stats->bs_nblocks++;
917 goto out;
918 }
919
920 parent = nilfs_btree_get_node(btree, path, level + 1);
921 pindex = path[level + 1].bp_index;
922
923 /* left sibling */
924 if (pindex > 0) {
925 sibptr = nilfs_btree_node_get_ptr(btree, parent,
926 pindex - 1);
927 ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
928 &bh);
929 if (ret < 0)
930 goto err_out_child_node;
931 sib = (struct nilfs_btree_node *)bh->b_data;
932 if (nilfs_btree_node_get_nchildren(btree, sib) <
933 nilfs_btree_node_nchildren_max(btree, sib)) {
934 path[level].bp_sib_bh = bh;
935 path[level].bp_op = nilfs_btree_carry_left;
936 stats->bs_nblocks++;
937 goto out;
938 } else
939 nilfs_bmap_put_block(&btree->bt_bmap, bh);
940 }
941
942 /* right sibling */
943 if (pindex <
944 nilfs_btree_node_get_nchildren(btree, parent) - 1) {
945 sibptr = nilfs_btree_node_get_ptr(btree, parent,
946 pindex + 1);
947 ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
948 &bh);
949 if (ret < 0)
950 goto err_out_child_node;
951 sib = (struct nilfs_btree_node *)bh->b_data;
952 if (nilfs_btree_node_get_nchildren(btree, sib) <
953 nilfs_btree_node_nchildren_max(btree, sib)) {
954 path[level].bp_sib_bh = bh;
955 path[level].bp_op = nilfs_btree_carry_right;
956 stats->bs_nblocks++;
957 goto out;
958 } else
959 nilfs_bmap_put_block(&btree->bt_bmap, bh);
960 }
961
962 /* split */
963 path[level].bp_newreq.bpr_ptr =
964 path[level - 1].bp_newreq.bpr_ptr + 1;
965 ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
966 &btree->bt_bmap, &path[level].bp_newreq);
967 if (ret < 0)
968 goto err_out_child_node;
969 ret = nilfs_bmap_get_new_block(&btree->bt_bmap,
970 path[level].bp_newreq.bpr_ptr,
971 &bh);
972 if (ret < 0)
973 goto err_out_curr_node;
974
975 stats->bs_nblocks++;
976
977 lock_buffer(bh);
978 nilfs_btree_node_init(btree,
979 (struct nilfs_btree_node *)bh->b_data,
980 0, level, 0, NULL, NULL);
981 unlock_buffer(bh);
982 path[level].bp_sib_bh = bh;
983 path[level].bp_op = nilfs_btree_split;
984 }
985
986 /* root */
987 node = nilfs_btree_get_root(btree);
988 if (nilfs_btree_node_get_nchildren(btree, node) <
989 nilfs_btree_node_nchildren_max(btree, node)) {
990 path[level].bp_op = nilfs_btree_do_insert;
991 stats->bs_nblocks++;
992 goto out;
993 }
994
995 /* grow */
996 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
997 ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
998 &btree->bt_bmap, &path[level].bp_newreq);
999 if (ret < 0)
1000 goto err_out_child_node;
1001 ret = nilfs_bmap_get_new_block(&btree->bt_bmap,
1002 path[level].bp_newreq.bpr_ptr, &bh);
1003 if (ret < 0)
1004 goto err_out_curr_node;
1005
1006 lock_buffer(bh);
1007 nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1008 0, level, 0, NULL, NULL);
1009 unlock_buffer(bh);
1010 path[level].bp_sib_bh = bh;
1011 path[level].bp_op = nilfs_btree_grow;
1012
1013 level++;
1014 path[level].bp_op = nilfs_btree_do_insert;
1015
1016 /* a newly-created node block and a data block are added */
1017 stats->bs_nblocks += 2;
1018
1019 /* success */
1020 out:
1021 *levelp = level;
1022 return ret;
1023
1024 /* error */
1025 err_out_curr_node:
1026 btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1027 &path[level].bp_newreq);
1028 err_out_child_node:
1029 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1030 nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh);
1031 btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(
1032 &btree->bt_bmap, &path[level].bp_newreq);
1033
1034 }
1035
1036 btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1037 &path[level].bp_newreq);
1038 err_out_data:
1039 *levelp = level;
1040 stats->bs_nblocks = 0;
1041 return ret;
1042}
1043
1044static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1045 struct nilfs_btree_path *path,
1046 int maxlevel, __u64 key, __u64 ptr)
1047{
1048 int level;
1049
1050 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1051 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1052 if (btree->bt_ops->btop_set_target != NULL)
1053 btree->bt_ops->btop_set_target(btree, key, ptr);
1054
1055 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1056 if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) {
1057 btree->bt_bmap.b_pops->bpop_commit_alloc_ptr(
1058 &btree->bt_bmap, &path[level - 1].bp_newreq);
1059 }
1060 path[level].bp_op(btree, path, level, &key, &ptr);
1061 }
1062
1063 if (!nilfs_bmap_dirty(&btree->bt_bmap))
1064 nilfs_bmap_set_dirty(&btree->bt_bmap);
1065}
1066
1067static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1068{
1069 struct nilfs_btree *btree;
1070 struct nilfs_btree_path *path;
1071 struct nilfs_bmap_stats stats;
1072 int level, ret;
1073
1074 btree = (struct nilfs_btree *)bmap;
1075 path = nilfs_btree_alloc_path(btree);
1076 if (path == NULL)
1077 return -ENOMEM;
1078 nilfs_btree_init_path(btree, path);
1079
1080 ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1081 NILFS_BTREE_LEVEL_NODE_MIN);
1082 if (ret != -ENOENT) {
1083 if (ret == 0)
1084 ret = -EEXIST;
1085 goto out;
1086 }
1087
1088 ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1089 if (ret < 0)
1090 goto out;
1091 nilfs_btree_commit_insert(btree, path, level, key, ptr);
1092 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1093
1094 out:
1095 nilfs_btree_clear_path(btree, path);
1096 nilfs_btree_free_path(btree, path);
1097 return ret;
1098}
1099
1100static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1101 struct nilfs_btree_path *path,
1102 int level, __u64 *keyp, __u64 *ptrp)
1103{
1104 struct nilfs_btree_node *node;
1105
1106 if (level < nilfs_btree_height(btree) - 1) {
1107 lock_buffer(path[level].bp_bh);
1108 node = nilfs_btree_get_nonroot_node(btree, path, level);
1109 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1110 path[level].bp_index);
1111 if (!buffer_dirty(path[level].bp_bh))
1112 nilfs_btnode_mark_dirty(path[level].bp_bh);
1113 unlock_buffer(path[level].bp_bh);
1114 if (path[level].bp_index == 0)
1115 nilfs_btree_promote_key(btree, path, level + 1,
1116 nilfs_btree_node_get_key(btree, node, 0));
1117 } else {
1118 node = nilfs_btree_get_root(btree);
1119 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1120 path[level].bp_index);
1121 }
1122}
1123
1124static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1125 struct nilfs_btree_path *path,
1126 int level, __u64 *keyp, __u64 *ptrp)
1127{
1128 struct nilfs_btree_node *node, *left;
1129 int nchildren, lnchildren, n;
1130
1131 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1132
1133 lock_buffer(path[level].bp_bh);
1134 lock_buffer(path[level].bp_sib_bh);
1135
1136 node = nilfs_btree_get_nonroot_node(btree, path, level);
1137 left = nilfs_btree_get_sib_node(btree, path, level);
1138 nchildren = nilfs_btree_node_get_nchildren(btree, node);
1139 lnchildren = nilfs_btree_node_get_nchildren(btree, left);
1140
1141 n = (nchildren + lnchildren) / 2 - nchildren;
1142
1143 nilfs_btree_node_move_right(btree, left, node, n);
1144
1145 if (!buffer_dirty(path[level].bp_bh))
1146 nilfs_btnode_mark_dirty(path[level].bp_bh);
1147 if (!buffer_dirty(path[level].bp_sib_bh))
1148 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1149
1150 unlock_buffer(path[level].bp_bh);
1151 unlock_buffer(path[level].bp_sib_bh);
1152
1153 nilfs_btree_promote_key(btree, path, level + 1,
1154 nilfs_btree_node_get_key(btree, node, 0));
1155
1156 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
1157 path[level].bp_sib_bh = NULL;
1158 path[level].bp_index += n;
1159}
1160
1161static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1162 struct nilfs_btree_path *path,
1163 int level, __u64 *keyp, __u64 *ptrp)
1164{
1165 struct nilfs_btree_node *node, *right;
1166 int nchildren, rnchildren, n;
1167
1168 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1169
1170 lock_buffer(path[level].bp_bh);
1171 lock_buffer(path[level].bp_sib_bh);
1172
1173 node = nilfs_btree_get_nonroot_node(btree, path, level);
1174 right = nilfs_btree_get_sib_node(btree, path, level);
1175 nchildren = nilfs_btree_node_get_nchildren(btree, node);
1176 rnchildren = nilfs_btree_node_get_nchildren(btree, right);
1177
1178 n = (nchildren + rnchildren) / 2 - nchildren;
1179
1180 nilfs_btree_node_move_left(btree, node, right, n);
1181
1182 if (!buffer_dirty(path[level].bp_bh))
1183 nilfs_btnode_mark_dirty(path[level].bp_bh);
1184 if (!buffer_dirty(path[level].bp_sib_bh))
1185 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1186
1187 unlock_buffer(path[level].bp_bh);
1188 unlock_buffer(path[level].bp_sib_bh);
1189
1190 path[level + 1].bp_index++;
1191 nilfs_btree_promote_key(btree, path, level + 1,
1192 nilfs_btree_node_get_key(btree, right, 0));
1193 path[level + 1].bp_index--;
1194
1195 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
1196 path[level].bp_sib_bh = NULL;
1197}
1198
1199static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1200 struct nilfs_btree_path *path,
1201 int level, __u64 *keyp, __u64 *ptrp)
1202{
1203 struct nilfs_btree_node *node, *left;
1204 int n;
1205
1206 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1207
1208 lock_buffer(path[level].bp_bh);
1209 lock_buffer(path[level].bp_sib_bh);
1210
1211 node = nilfs_btree_get_nonroot_node(btree, path, level);
1212 left = nilfs_btree_get_sib_node(btree, path, level);
1213
1214 n = nilfs_btree_node_get_nchildren(btree, node);
1215
1216 nilfs_btree_node_move_left(btree, left, node, n);
1217
1218 if (!buffer_dirty(path[level].bp_sib_bh))
1219 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1220
1221 unlock_buffer(path[level].bp_bh);
1222 unlock_buffer(path[level].bp_sib_bh);
1223
1224 nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh);
1225 path[level].bp_bh = path[level].bp_sib_bh;
1226 path[level].bp_sib_bh = NULL;
1227 path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
1228}
1229
1230static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1231 struct nilfs_btree_path *path,
1232 int level, __u64 *keyp, __u64 *ptrp)
1233{
1234 struct nilfs_btree_node *node, *right;
1235 int n;
1236
1237 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1238
1239 lock_buffer(path[level].bp_bh);
1240 lock_buffer(path[level].bp_sib_bh);
1241
1242 node = nilfs_btree_get_nonroot_node(btree, path, level);
1243 right = nilfs_btree_get_sib_node(btree, path, level);
1244
1245 n = nilfs_btree_node_get_nchildren(btree, right);
1246
1247 nilfs_btree_node_move_left(btree, node, right, n);
1248
1249 if (!buffer_dirty(path[level].bp_bh))
1250 nilfs_btnode_mark_dirty(path[level].bp_bh);
1251
1252 unlock_buffer(path[level].bp_bh);
1253 unlock_buffer(path[level].bp_sib_bh);
1254
1255 nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh);
1256 path[level].bp_sib_bh = NULL;
1257 path[level + 1].bp_index++;
1258}
1259
1260static void nilfs_btree_shrink(struct nilfs_btree *btree,
1261 struct nilfs_btree_path *path,
1262 int level, __u64 *keyp, __u64 *ptrp)
1263{
1264 struct nilfs_btree_node *root, *child;
1265 int n;
1266
1267 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1268
1269 lock_buffer(path[level].bp_bh);
1270 root = nilfs_btree_get_root(btree);
1271 child = nilfs_btree_get_nonroot_node(btree, path, level);
1272
1273 nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1274 nilfs_btree_node_set_level(btree, root, level);
1275 n = nilfs_btree_node_get_nchildren(btree, child);
1276 nilfs_btree_node_move_left(btree, root, child, n);
1277 unlock_buffer(path[level].bp_bh);
1278
1279 nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh);
1280 path[level].bp_bh = NULL;
1281}
1282
1283
1284static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1285 struct nilfs_btree_path *path,
1286 int *levelp,
1287 struct nilfs_bmap_stats *stats)
1288{
1289 struct buffer_head *bh;
1290 struct nilfs_btree_node *node, *parent, *sib;
1291 __u64 sibptr;
1292 int pindex, level, ret;
1293
1294 ret = 0;
1295 stats->bs_nblocks = 0;
1296 for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1297 level < nilfs_btree_height(btree) - 1;
1298 level++) {
1299 node = nilfs_btree_get_nonroot_node(btree, path, level);
1300 path[level].bp_oldreq.bpr_ptr =
1301 nilfs_btree_node_get_ptr(btree, node,
1302 path[level].bp_index);
1303 if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1304 ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1305 &btree->bt_bmap, &path[level].bp_oldreq);
1306 if (ret < 0)
1307 goto err_out_child_node;
1308 }
1309
1310 if (nilfs_btree_node_get_nchildren(btree, node) >
1311 nilfs_btree_node_nchildren_min(btree, node)) {
1312 path[level].bp_op = nilfs_btree_do_delete;
1313 stats->bs_nblocks++;
1314 goto out;
1315 }
1316
1317 parent = nilfs_btree_get_node(btree, path, level + 1);
1318 pindex = path[level + 1].bp_index;
1319
1320 if (pindex > 0) {
1321 /* left sibling */
1322 sibptr = nilfs_btree_node_get_ptr(btree, parent,
1323 pindex - 1);
1324 ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
1325 &bh);
1326 if (ret < 0)
1327 goto err_out_curr_node;
1328 sib = (struct nilfs_btree_node *)bh->b_data;
1329 if (nilfs_btree_node_get_nchildren(btree, sib) >
1330 nilfs_btree_node_nchildren_min(btree, sib)) {
1331 path[level].bp_sib_bh = bh;
1332 path[level].bp_op = nilfs_btree_borrow_left;
1333 stats->bs_nblocks++;
1334 goto out;
1335 } else {
1336 path[level].bp_sib_bh = bh;
1337 path[level].bp_op = nilfs_btree_concat_left;
1338 stats->bs_nblocks++;
1339 /* continue; */
1340 }
1341 } else if (pindex <
1342 nilfs_btree_node_get_nchildren(btree, parent) - 1) {
1343 /* right sibling */
1344 sibptr = nilfs_btree_node_get_ptr(btree, parent,
1345 pindex + 1);
1346 ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
1347 &bh);
1348 if (ret < 0)
1349 goto err_out_curr_node;
1350 sib = (struct nilfs_btree_node *)bh->b_data;
1351 if (nilfs_btree_node_get_nchildren(btree, sib) >
1352 nilfs_btree_node_nchildren_min(btree, sib)) {
1353 path[level].bp_sib_bh = bh;
1354 path[level].bp_op = nilfs_btree_borrow_right;
1355 stats->bs_nblocks++;
1356 goto out;
1357 } else {
1358 path[level].bp_sib_bh = bh;
1359 path[level].bp_op = nilfs_btree_concat_right;
1360 stats->bs_nblocks++;
1361 /* continue; */
1362 }
1363 } else {
1364 /* no siblings */
1365 /* the only child of the root node */
1366 WARN_ON(level != nilfs_btree_height(btree) - 2);
1367 if (nilfs_btree_node_get_nchildren(btree, node) - 1 <=
1368 NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1369 path[level].bp_op = nilfs_btree_shrink;
1370 stats->bs_nblocks += 2;
1371 } else {
1372 path[level].bp_op = nilfs_btree_do_delete;
1373 stats->bs_nblocks++;
1374 }
1375
1376 goto out;
1377
1378 }
1379 }
1380
1381 node = nilfs_btree_get_root(btree);
1382 path[level].bp_oldreq.bpr_ptr =
1383 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1384 if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1385 ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1386 &btree->bt_bmap, &path[level].bp_oldreq);
1387 if (ret < 0)
1388 goto err_out_child_node;
1389 }
1390 /* child of the root node is deleted */
1391 path[level].bp_op = nilfs_btree_do_delete;
1392 stats->bs_nblocks++;
1393
1394 /* success */
1395 out:
1396 *levelp = level;
1397 return ret;
1398
1399 /* error */
1400 err_out_curr_node:
1401 if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1402 btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1403 &btree->bt_bmap, &path[level].bp_oldreq);
1404 err_out_child_node:
1405 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1406 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
1407 if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1408 btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1409 &btree->bt_bmap, &path[level].bp_oldreq);
1410 }
1411 *levelp = level;
1412 stats->bs_nblocks = 0;
1413 return ret;
1414}
1415
1416static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1417 struct nilfs_btree_path *path,
1418 int maxlevel)
1419{
1420 int level;
1421
1422 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1423 if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL)
1424 btree->bt_bmap.b_pops->bpop_commit_end_ptr(
1425 &btree->bt_bmap, &path[level].bp_oldreq);
1426 path[level].bp_op(btree, path, level, NULL, NULL);
1427 }
1428
1429 if (!nilfs_bmap_dirty(&btree->bt_bmap))
1430 nilfs_bmap_set_dirty(&btree->bt_bmap);
1431}
1432
1433static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1434
1435{
1436 struct nilfs_btree *btree;
1437 struct nilfs_btree_path *path;
1438 struct nilfs_bmap_stats stats;
1439 int level, ret;
1440
1441 btree = (struct nilfs_btree *)bmap;
1442 path = nilfs_btree_alloc_path(btree);
1443 if (path == NULL)
1444 return -ENOMEM;
1445 nilfs_btree_init_path(btree, path);
1446 ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1447 NILFS_BTREE_LEVEL_NODE_MIN);
1448 if (ret < 0)
1449 goto out;
1450
1451 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats);
1452 if (ret < 0)
1453 goto out;
1454 nilfs_btree_commit_delete(btree, path, level);
1455 nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1456
1457out:
1458 nilfs_btree_clear_path(btree, path);
1459 nilfs_btree_free_path(btree, path);
1460 return ret;
1461}
1462
1463static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1464{
1465 struct nilfs_btree *btree;
1466 struct nilfs_btree_path *path;
1467 int ret;
1468
1469 btree = (struct nilfs_btree *)bmap;
1470 path = nilfs_btree_alloc_path(btree);
1471 if (path == NULL)
1472 return -ENOMEM;
1473 nilfs_btree_init_path(btree, path);
1474
1475 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1476
1477 nilfs_btree_clear_path(btree, path);
1478 nilfs_btree_free_path(btree, path);
1479
1480 return ret;
1481}
1482
1483static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1484{
1485 struct buffer_head *bh;
1486 struct nilfs_btree *btree;
1487 struct nilfs_btree_node *root, *node;
1488 __u64 maxkey, nextmaxkey;
1489 __u64 ptr;
1490 int nchildren, ret;
1491
1492 btree = (struct nilfs_btree *)bmap;
1493 root = nilfs_btree_get_root(btree);
1494 switch (nilfs_btree_height(btree)) {
1495 case 2:
1496 bh = NULL;
1497 node = root;
1498 break;
1499 case 3:
1500 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1501 if (nchildren > 1)
1502 return 0;
1503 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1504 ret = nilfs_bmap_get_block(bmap, ptr, &bh);
1505 if (ret < 0)
1506 return ret;
1507 node = (struct nilfs_btree_node *)bh->b_data;
1508 break;
1509 default:
1510 return 0;
1511 }
1512
1513 nchildren = nilfs_btree_node_get_nchildren(btree, node);
1514 maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1);
1515 nextmaxkey = (nchildren > 1) ?
1516 nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
1517 if (bh != NULL)
1518 nilfs_bmap_put_block(bmap, bh);
1519
1520 return (maxkey == key) && (nextmaxkey < bmap->b_low);
1521}
1522
1523static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1524 __u64 *keys, __u64 *ptrs, int nitems)
1525{
1526 struct buffer_head *bh;
1527 struct nilfs_btree *btree;
1528 struct nilfs_btree_node *node, *root;
1529 __le64 *dkeys;
1530 __le64 *dptrs;
1531 __u64 ptr;
1532 int nchildren, i, ret;
1533
1534 btree = (struct nilfs_btree *)bmap;
1535 root = nilfs_btree_get_root(btree);
1536 switch (nilfs_btree_height(btree)) {
1537 case 2:
1538 bh = NULL;
1539 node = root;
1540 break;
1541 case 3:
1542 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1543 WARN_ON(nchildren > 1);
1544 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1545 ret = nilfs_bmap_get_block(bmap, ptr, &bh);
1546 if (ret < 0)
1547 return ret;
1548 node = (struct nilfs_btree_node *)bh->b_data;
1549 break;
1550 default:
1551 node = NULL;
1552 return -EINVAL;
1553 }
1554
1555 nchildren = nilfs_btree_node_get_nchildren(btree, node);
1556 if (nchildren < nitems)
1557 nitems = nchildren;
1558 dkeys = nilfs_btree_node_dkeys(btree, node);
1559 dptrs = nilfs_btree_node_dptrs(btree, node);
1560 for (i = 0; i < nitems; i++) {
1561 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1562 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1563 }
1564
1565 if (bh != NULL)
1566 nilfs_bmap_put_block(bmap, bh);
1567
1568 return nitems;
1569}
1570
1571static int
1572nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1573 union nilfs_bmap_ptr_req *dreq,
1574 union nilfs_bmap_ptr_req *nreq,
1575 struct buffer_head **bhp,
1576 struct nilfs_bmap_stats *stats)
1577{
1578 struct buffer_head *bh;
1579 struct nilfs_btree *btree;
1580 int ret;
1581
1582 btree = (struct nilfs_btree *)bmap;
1583 stats->bs_nblocks = 0;
1584
1585 /* for data */
1586 /* cannot find near ptr */
1587 if (btree->bt_ops->btop_find_target != NULL)
1588 dreq->bpr_ptr
1589 = btree->bt_ops->btop_find_target(btree, NULL, key);
1590 ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq);
1591 if (ret < 0)
1592 return ret;
1593
1594 *bhp = NULL;
1595 stats->bs_nblocks++;
1596 if (nreq != NULL) {
1597 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1598 ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq);
1599 if (ret < 0)
1600 goto err_out_dreq;
1601
1602 ret = nilfs_bmap_get_new_block(bmap, nreq->bpr_ptr, &bh);
1603 if (ret < 0)
1604 goto err_out_nreq;
1605
1606 *bhp = bh;
1607 stats->bs_nblocks++;
1608 }
1609
1610 /* success */
1611 return 0;
1612
1613 /* error */
1614 err_out_nreq:
1615 bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq);
1616 err_out_dreq:
1617 bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq);
1618 stats->bs_nblocks = 0;
1619 return ret;
1620
1621}
1622
1623static void
1624nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1625 __u64 key, __u64 ptr,
1626 const __u64 *keys, const __u64 *ptrs,
1627 int n, __u64 low, __u64 high,
1628 union nilfs_bmap_ptr_req *dreq,
1629 union nilfs_bmap_ptr_req *nreq,
1630 struct buffer_head *bh)
1631{
1632 struct nilfs_btree *btree;
1633 struct nilfs_btree_node *node;
1634 __u64 tmpptr;
1635
1636 /* free resources */
1637 if (bmap->b_ops->bop_clear != NULL)
1638 bmap->b_ops->bop_clear(bmap);
1639
1640 /* ptr must be a pointer to a buffer head. */
1641 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1642
1643 /* convert and insert */
1644 btree = (struct nilfs_btree *)bmap;
1645 nilfs_btree_init(bmap, low, high);
1646 if (nreq != NULL) {
1647 if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) {
1648 bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1649 bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq);
1650 }
1651
1652 /* create child node at level 1 */
1653 lock_buffer(bh);
1654 node = (struct nilfs_btree_node *)bh->b_data;
1655 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1656 nilfs_btree_node_insert(btree, node,
1657 key, dreq->bpr_ptr, n);
1658 if (!buffer_dirty(bh))
1659 nilfs_btnode_mark_dirty(bh);
1660 if (!nilfs_bmap_dirty(bmap))
1661 nilfs_bmap_set_dirty(bmap);
1662
1663 unlock_buffer(bh);
1664 nilfs_bmap_put_block(bmap, bh);
1665
1666 /* create root node at level 2 */
1667 node = nilfs_btree_get_root(btree);
1668 tmpptr = nreq->bpr_ptr;
1669 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1670 2, 1, &keys[0], &tmpptr);
1671 } else {
1672 if (bmap->b_pops->bpop_commit_alloc_ptr != NULL)
1673 bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1674
1675 /* create root node at level 1 */
1676 node = nilfs_btree_get_root(btree);
1677 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1678 1, n, keys, ptrs);
1679 nilfs_btree_node_insert(btree, node,
1680 key, dreq->bpr_ptr, n);
1681 if (!nilfs_bmap_dirty(bmap))
1682 nilfs_bmap_set_dirty(bmap);
1683 }
1684
1685 if (btree->bt_ops->btop_set_target != NULL)
1686 btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr);
1687}
1688
1689/**
1690 * nilfs_btree_convert_and_insert -
1691 * @bmap:
1692 * @key:
1693 * @ptr:
1694 * @keys:
1695 * @ptrs:
1696 * @n:
1697 * @low:
1698 * @high:
1699 */
1700int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1701 __u64 key, __u64 ptr,
1702 const __u64 *keys, const __u64 *ptrs,
1703 int n, __u64 low, __u64 high)
1704{
1705 struct buffer_head *bh;
1706 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1707 struct nilfs_bmap_stats stats;
1708 int ret;
1709
1710 if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1711 di = &dreq;
1712 ni = NULL;
1713 } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1714 1 << bmap->b_inode->i_blkbits)) {
1715 di = &dreq;
1716 ni = &nreq;
1717 } else {
1718 di = NULL;
1719 ni = NULL;
1720 BUG();
1721 }
1722
1723 ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1724 &stats);
1725 if (ret < 0)
1726 return ret;
1727 nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1728 low, high, di, ni, bh);
1729 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1730 return 0;
1731}
1732
1733static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1734 struct nilfs_btree_path *path,
1735 int level,
1736 struct buffer_head *bh)
1737{
1738 while ((++level < nilfs_btree_height(btree) - 1) &&
1739 !buffer_dirty(path[level].bp_bh))
1740 nilfs_btnode_mark_dirty(path[level].bp_bh);
1741
1742 return 0;
1743}
1744
1745static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1746 struct nilfs_btree_path *path,
1747 int level)
1748{
1749 struct nilfs_btree_node *parent;
1750 int ret;
1751
1752 parent = nilfs_btree_get_node(btree, path, level + 1);
1753 path[level].bp_oldreq.bpr_ptr =
1754 nilfs_btree_node_get_ptr(btree, parent,
1755 path[level + 1].bp_index);
1756 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1757 ret = nilfs_bmap_prepare_update(&btree->bt_bmap,
1758 &path[level].bp_oldreq,
1759 &path[level].bp_newreq);
1760 if (ret < 0)
1761 return ret;
1762
1763 if (buffer_nilfs_node(path[level].bp_bh)) {
1764 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1765 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1766 path[level].bp_ctxt.bh = path[level].bp_bh;
1767 ret = nilfs_btnode_prepare_change_key(
1768 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1769 &path[level].bp_ctxt);
1770 if (ret < 0) {
1771 nilfs_bmap_abort_update(&btree->bt_bmap,
1772 &path[level].bp_oldreq,
1773 &path[level].bp_newreq);
1774 return ret;
1775 }
1776 }
1777
1778 return 0;
1779}
1780
1781static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1782 struct nilfs_btree_path *path,
1783 int level)
1784{
1785 struct nilfs_btree_node *parent;
1786
1787 nilfs_bmap_commit_update(&btree->bt_bmap,
1788 &path[level].bp_oldreq,
1789 &path[level].bp_newreq);
1790
1791 if (buffer_nilfs_node(path[level].bp_bh)) {
1792 nilfs_btnode_commit_change_key(
1793 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1794 &path[level].bp_ctxt);
1795 path[level].bp_bh = path[level].bp_ctxt.bh;
1796 }
1797 set_buffer_nilfs_volatile(path[level].bp_bh);
1798
1799 parent = nilfs_btree_get_node(btree, path, level + 1);
1800 nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1801 path[level].bp_newreq.bpr_ptr);
1802}
1803
1804static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1805 struct nilfs_btree_path *path,
1806 int level)
1807{
1808 nilfs_bmap_abort_update(&btree->bt_bmap,
1809 &path[level].bp_oldreq,
1810 &path[level].bp_newreq);
1811 if (buffer_nilfs_node(path[level].bp_bh))
1812 nilfs_btnode_abort_change_key(
1813 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1814 &path[level].bp_ctxt);
1815}
1816
1817static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1818 struct nilfs_btree_path *path,
1819 int minlevel,
1820 int *maxlevelp)
1821{
1822 int level, ret;
1823
1824 level = minlevel;
1825 if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1826 ret = nilfs_btree_prepare_update_v(btree, path, level);
1827 if (ret < 0)
1828 return ret;
1829 }
1830 while ((++level < nilfs_btree_height(btree) - 1) &&
1831 !buffer_dirty(path[level].bp_bh)) {
1832
1833 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1834 ret = nilfs_btree_prepare_update_v(btree, path, level);
1835 if (ret < 0)
1836 goto out;
1837 }
1838
1839 /* success */
1840 *maxlevelp = level - 1;
1841 return 0;
1842
1843 /* error */
1844 out:
1845 while (--level > minlevel)
1846 nilfs_btree_abort_update_v(btree, path, level);
1847 if (!buffer_nilfs_volatile(path[level].bp_bh))
1848 nilfs_btree_abort_update_v(btree, path, level);
1849 return ret;
1850}
1851
1852static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1853 struct nilfs_btree_path *path,
1854 int minlevel,
1855 int maxlevel,
1856 struct buffer_head *bh)
1857{
1858 int level;
1859
1860 if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1861 nilfs_btree_commit_update_v(btree, path, minlevel);
1862
1863 for (level = minlevel + 1; level <= maxlevel; level++)
1864 nilfs_btree_commit_update_v(btree, path, level);
1865}
1866
1867static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1868 struct nilfs_btree_path *path,
1869 int level,
1870 struct buffer_head *bh)
1871{
1872 int maxlevel, ret;
1873 struct nilfs_btree_node *parent;
1874 __u64 ptr;
1875
1876 get_bh(bh);
1877 path[level].bp_bh = bh;
1878 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel);
1879 if (ret < 0)
1880 goto out;
1881
1882 if (buffer_nilfs_volatile(path[level].bp_bh)) {
1883 parent = nilfs_btree_get_node(btree, path, level + 1);
1884 ptr = nilfs_btree_node_get_ptr(btree, parent,
1885 path[level + 1].bp_index);
1886 ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr);
1887 if (ret < 0)
1888 goto out;
1889 }
1890
1891 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh);
1892
1893 out:
1894 brelse(path[level].bp_bh);
1895 path[level].bp_bh = NULL;
1896 return ret;
1897}
1898
1899static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1900 struct buffer_head *bh)
1901{
1902 struct nilfs_btree *btree;
1903 struct nilfs_btree_path *path;
1904 struct nilfs_btree_node *node;
1905 __u64 key;
1906 int level, ret;
1907
1908 WARN_ON(!buffer_dirty(bh));
1909
1910 btree = (struct nilfs_btree *)bmap;
1911 path = nilfs_btree_alloc_path(btree);
1912 if (path == NULL)
1913 return -ENOMEM;
1914 nilfs_btree_init_path(btree, path);
1915
1916 if (buffer_nilfs_node(bh)) {
1917 node = (struct nilfs_btree_node *)bh->b_data;
1918 key = nilfs_btree_node_get_key(btree, node, 0);
1919 level = nilfs_btree_node_get_level(btree, node);
1920 } else {
1921 key = nilfs_bmap_data_get_key(bmap, bh);
1922 level = NILFS_BTREE_LEVEL_DATA;
1923 }
1924
1925 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1926 if (ret < 0) {
1927 if (unlikely(ret == -ENOENT))
1928 printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1929 __func__, (unsigned long long)key, level);
1930 goto out;
1931 }
1932
1933 ret = btree->bt_ops->btop_propagate(btree, path, level, bh);
1934
1935 out:
1936 nilfs_btree_clear_path(btree, path);
1937 nilfs_btree_free_path(btree, path);
1938
1939 return ret;
1940}
1941
1942static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1943 struct buffer_head *bh)
1944{
1945 return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr);
1946}
1947
1948static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1949 struct list_head *lists,
1950 struct buffer_head *bh)
1951{
1952 struct list_head *head;
1953 struct buffer_head *cbh;
1954 struct nilfs_btree_node *node, *cnode;
1955 __u64 key, ckey;
1956 int level;
1957
1958 get_bh(bh);
1959 node = (struct nilfs_btree_node *)bh->b_data;
1960 key = nilfs_btree_node_get_key(btree, node, 0);
1961 level = nilfs_btree_node_get_level(btree, node);
1962 list_for_each(head, &lists[level]) {
1963 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1964 cnode = (struct nilfs_btree_node *)cbh->b_data;
1965 ckey = nilfs_btree_node_get_key(btree, cnode, 0);
1966 if (key < ckey)
1967 break;
1968 }
1969 list_add_tail(&bh->b_assoc_buffers, head);
1970}
1971
1972static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1973 struct list_head *listp)
1974{
1975 struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1976 struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1977 struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1978 struct pagevec pvec;
1979 struct buffer_head *bh, *head;
1980 pgoff_t index = 0;
1981 int level, i;
1982
1983 for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1984 level < NILFS_BTREE_LEVEL_MAX;
1985 level++)
1986 INIT_LIST_HEAD(&lists[level]);
1987
1988 pagevec_init(&pvec, 0);
1989
1990 while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
1991 PAGEVEC_SIZE)) {
1992 for (i = 0; i < pagevec_count(&pvec); i++) {
1993 bh = head = page_buffers(pvec.pages[i]);
1994 do {
1995 if (buffer_dirty(bh))
1996 nilfs_btree_add_dirty_buffer(btree,
1997 lists, bh);
1998 } while ((bh = bh->b_this_page) != head);
1999 }
2000 pagevec_release(&pvec);
2001 cond_resched();
2002 }
2003
2004 for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2005 level < NILFS_BTREE_LEVEL_MAX;
2006 level++)
2007 list_splice(&lists[level], listp->prev);
2008}
2009
2010static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2011 struct nilfs_btree_path *path,
2012 int level,
2013 struct buffer_head **bh,
2014 sector_t blocknr,
2015 union nilfs_binfo *binfo)
2016{
2017 struct nilfs_btree_node *parent;
2018 __u64 key;
2019 __u64 ptr;
2020 int ret;
2021
2022 parent = nilfs_btree_get_node(btree, path, level + 1);
2023 ptr = nilfs_btree_node_get_ptr(btree, parent,
2024 path[level + 1].bp_index);
2025 if (buffer_nilfs_node(*bh)) {
2026 path[level].bp_ctxt.oldkey = ptr;
2027 path[level].bp_ctxt.newkey = blocknr;
2028 path[level].bp_ctxt.bh = *bh;
2029 ret = nilfs_btnode_prepare_change_key(
2030 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2031 &path[level].bp_ctxt);
2032 if (ret < 0)
2033 return ret;
2034 nilfs_btnode_commit_change_key(
2035 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2036 &path[level].bp_ctxt);
2037 *bh = path[level].bp_ctxt.bh;
2038 }
2039
2040 nilfs_btree_node_set_ptr(btree, parent,
2041 path[level + 1].bp_index, blocknr);
2042
2043 key = nilfs_btree_node_get_key(btree, parent,
2044 path[level + 1].bp_index);
2045 /* on-disk format */
2046 binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2047 binfo->bi_dat.bi_level = level;
2048
2049 return 0;
2050}
2051
2052static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2053 struct nilfs_btree_path *path,
2054 int level,
2055 struct buffer_head **bh,
2056 sector_t blocknr,
2057 union nilfs_binfo *binfo)
2058{
2059 struct nilfs_btree_node *parent;
2060 __u64 key;
2061 __u64 ptr;
2062 union nilfs_bmap_ptr_req req;
2063 int ret;
2064
2065 parent = nilfs_btree_get_node(btree, path, level + 1);
2066 ptr = nilfs_btree_node_get_ptr(btree, parent,
2067 path[level + 1].bp_index);
2068 req.bpr_ptr = ptr;
2069 ret = btree->bt_bmap.b_pops->bpop_prepare_start_ptr(&btree->bt_bmap,
2070 &req);
2071 if (ret < 0)
2072 return ret;
2073 btree->bt_bmap.b_pops->bpop_commit_start_ptr(&btree->bt_bmap,
2074 &req, blocknr);
2075
2076 key = nilfs_btree_node_get_key(btree, parent,
2077 path[level + 1].bp_index);
2078 /* on-disk format */
2079 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2080 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2081
2082 return 0;
2083}
2084
2085static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2086 struct buffer_head **bh,
2087 sector_t blocknr,
2088 union nilfs_binfo *binfo)
2089{
2090 struct nilfs_btree *btree;
2091 struct nilfs_btree_path *path;
2092 struct nilfs_btree_node *node;
2093 __u64 key;
2094 int level, ret;
2095
2096 btree = (struct nilfs_btree *)bmap;
2097 path = nilfs_btree_alloc_path(btree);
2098 if (path == NULL)
2099 return -ENOMEM;
2100 nilfs_btree_init_path(btree, path);
2101
2102 if (buffer_nilfs_node(*bh)) {
2103 node = (struct nilfs_btree_node *)(*bh)->b_data;
2104 key = nilfs_btree_node_get_key(btree, node, 0);
2105 level = nilfs_btree_node_get_level(btree, node);
2106 } else {
2107 key = nilfs_bmap_data_get_key(bmap, *bh);
2108 level = NILFS_BTREE_LEVEL_DATA;
2109 }
2110
2111 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2112 if (ret < 0) {
2113 WARN_ON(ret == -ENOENT);
2114 goto out;
2115 }
2116
2117 ret = btree->bt_ops->btop_assign(btree, path, level, bh,
2118 blocknr, binfo);
2119
2120 out:
2121 nilfs_btree_clear_path(btree, path);
2122 nilfs_btree_free_path(btree, path);
2123
2124 return ret;
2125}
2126
2127static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2128 struct buffer_head **bh,
2129 sector_t blocknr,
2130 union nilfs_binfo *binfo)
2131{
2132 struct nilfs_btree *btree;
2133 struct nilfs_btree_node *node;
2134 __u64 key;
2135 int ret;
2136
2137 btree = (struct nilfs_btree *)bmap;
2138 ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr);
2139 if (ret < 0)
2140 return ret;
2141
2142 if (buffer_nilfs_node(*bh)) {
2143 node = (struct nilfs_btree_node *)(*bh)->b_data;
2144 key = nilfs_btree_node_get_key(btree, node, 0);
2145 } else
2146 key = nilfs_bmap_data_get_key(bmap, *bh);
2147
2148 /* on-disk format */
2149 binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2150 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2151
2152 return 0;
2153}
2154
2155static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2156{
2157 struct buffer_head *bh;
2158 struct nilfs_btree *btree;
2159 struct nilfs_btree_path *path;
2160 __u64 ptr;
2161 int ret;
2162
2163 btree = (struct nilfs_btree *)bmap;
2164 path = nilfs_btree_alloc_path(btree);
2165 if (path == NULL)
2166 return -ENOMEM;
2167 nilfs_btree_init_path(btree, path);
2168
2169 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2170 if (ret < 0) {
2171 WARN_ON(ret == -ENOENT);
2172 goto out;
2173 }
2174 ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh);
2175 if (ret < 0) {
2176 WARN_ON(ret == -ENOENT);
2177 goto out;
2178 }
2179
2180 if (!buffer_dirty(bh))
2181 nilfs_btnode_mark_dirty(bh);
2182 nilfs_bmap_put_block(&btree->bt_bmap, bh);
2183 if (!nilfs_bmap_dirty(&btree->bt_bmap))
2184 nilfs_bmap_set_dirty(&btree->bt_bmap);
2185
2186 out:
2187 nilfs_btree_clear_path(btree, path);
2188 nilfs_btree_free_path(btree, path);
2189 return ret;
2190}
2191
2192static const struct nilfs_bmap_operations nilfs_btree_ops = {
2193 .bop_lookup = nilfs_btree_lookup,
2194 .bop_insert = nilfs_btree_insert,
2195 .bop_delete = nilfs_btree_delete,
2196 .bop_clear = NULL,
2197
2198 .bop_propagate = nilfs_btree_propagate,
2199
2200 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers,
2201
2202 .bop_assign = nilfs_btree_assign,
2203 .bop_mark = nilfs_btree_mark,
2204
2205 .bop_last_key = nilfs_btree_last_key,
2206 .bop_check_insert = NULL,
2207 .bop_check_delete = nilfs_btree_check_delete,
2208 .bop_gather_data = nilfs_btree_gather_data,
2209};
2210
2211static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2212 .bop_lookup = NULL,
2213 .bop_insert = NULL,
2214 .bop_delete = NULL,
2215 .bop_clear = NULL,
2216
2217 .bop_propagate = nilfs_btree_propagate_gc,
2218
2219 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers,
2220
2221 .bop_assign = nilfs_btree_assign_gc,
2222 .bop_mark = NULL,
2223
2224 .bop_last_key = NULL,
2225 .bop_check_insert = NULL,
2226 .bop_check_delete = NULL,
2227 .bop_gather_data = NULL,
2228};
2229
2230static const struct nilfs_btree_operations nilfs_btree_ops_v = {
2231 .btop_find_target = nilfs_btree_find_target_v,
2232 .btop_set_target = nilfs_btree_set_target_v,
2233 .btop_propagate = nilfs_btree_propagate_v,
2234 .btop_assign = nilfs_btree_assign_v,
2235};
2236
2237static const struct nilfs_btree_operations nilfs_btree_ops_p = {
2238 .btop_find_target = NULL,
2239 .btop_set_target = NULL,
2240 .btop_propagate = nilfs_btree_propagate_p,
2241 .btop_assign = nilfs_btree_assign_p,
2242};
2243
2244int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high)
2245{
2246 struct nilfs_btree *btree;
2247
2248 btree = (struct nilfs_btree *)bmap;
2249 bmap->b_ops = &nilfs_btree_ops;
2250 bmap->b_low = low;
2251 bmap->b_high = high;
2252 switch (bmap->b_inode->i_ino) {
2253 case NILFS_DAT_INO:
2254 btree->bt_ops = &nilfs_btree_ops_p;
2255 break;
2256 default:
2257 btree->bt_ops = &nilfs_btree_ops_v;
2258 break;
2259 }
2260
2261 return 0;
2262}
2263
2264void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2265{
2266 bmap->b_low = NILFS_BMAP_LARGE_LOW;
2267 bmap->b_high = NILFS_BMAP_LARGE_HIGH;
2268 bmap->b_ops = &nilfs_btree_ops_gc;
2269}
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
new file mode 100644
index 000000000000..4766deb52fb1
--- /dev/null
+++ b/fs/nilfs2/btree.h
@@ -0,0 +1,117 @@
1/*
2 * btree.h - NILFS B-tree.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#ifndef _NILFS_BTREE_H
24#define _NILFS_BTREE_H
25
26#include <linux/types.h>
27#include <linux/buffer_head.h>
28#include <linux/list.h>
29#include <linux/nilfs2_fs.h>
30#include "btnode.h"
31#include "bmap.h"
32
33struct nilfs_btree;
34struct nilfs_btree_path;
35
36/**
37 * struct nilfs_btree_operations - B-tree operation table
38 */
39struct nilfs_btree_operations {
40 __u64 (*btop_find_target)(const struct nilfs_btree *,
41 const struct nilfs_btree_path *, __u64);
42 void (*btop_set_target)(struct nilfs_btree *, __u64, __u64);
43
44 struct the_nilfs *(*btop_get_nilfs)(struct nilfs_btree *);
45
46 int (*btop_propagate)(struct nilfs_btree *,
47 struct nilfs_btree_path *,
48 int,
49 struct buffer_head *);
50 int (*btop_assign)(struct nilfs_btree *,
51 struct nilfs_btree_path *,
52 int,
53 struct buffer_head **,
54 sector_t,
55 union nilfs_binfo *);
56};
57
58/**
59 * struct nilfs_btree_node - B-tree node
60 * @bn_flags: flags
61 * @bn_level: level
62 * @bn_nchildren: number of children
63 * @bn_pad: padding
64 */
65struct nilfs_btree_node {
66 __u8 bn_flags;
67 __u8 bn_level;
68 __le16 bn_nchildren;
69 __le32 bn_pad;
70};
71
72/* flags */
73#define NILFS_BTREE_NODE_ROOT 0x01
74
75/* level */
76#define NILFS_BTREE_LEVEL_DATA 0
77#define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1)
78#define NILFS_BTREE_LEVEL_MAX 14
79
80/**
81 * struct nilfs_btree - B-tree structure
82 * @bt_bmap: bmap base structure
83 * @bt_ops: B-tree operation table
84 */
85struct nilfs_btree {
86 struct nilfs_bmap bt_bmap;
87
88 /* B-tree-specific members */
89 const struct nilfs_btree_operations *bt_ops;
90};
91
92
93#define NILFS_BTREE_ROOT_SIZE NILFS_BMAP_SIZE
94#define NILFS_BTREE_ROOT_NCHILDREN_MAX \
95 ((NILFS_BTREE_ROOT_SIZE - sizeof(struct nilfs_btree_node)) / \
96 (sizeof(__le64 /* dkey */) + sizeof(__le64 /* dptr */)))
97#define NILFS_BTREE_ROOT_NCHILDREN_MIN 0
98#define NILFS_BTREE_NODE_EXTRA_PAD_SIZE (sizeof(__le64))
99#define NILFS_BTREE_NODE_NCHILDREN_MAX(nodesize) \
100 (((nodesize) - sizeof(struct nilfs_btree_node) - \
101 NILFS_BTREE_NODE_EXTRA_PAD_SIZE) / \
102 (sizeof(__le64 /* dkey */) + sizeof(__le64 /* dptr */)))
103#define NILFS_BTREE_NODE_NCHILDREN_MIN(nodesize) \
104 ((NILFS_BTREE_NODE_NCHILDREN_MAX(nodesize) - 1) / 2 + 1)
105#define NILFS_BTREE_KEY_MIN ((__u64)0)
106#define NILFS_BTREE_KEY_MAX (~(__u64)0)
107
108
109int nilfs_btree_path_cache_init(void);
110void nilfs_btree_path_cache_destroy(void);
111int nilfs_btree_init(struct nilfs_bmap *, __u64, __u64);
112int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64,
113 const __u64 *, const __u64 *,
114 int, __u64, __u64);
115void nilfs_btree_init_gc(struct nilfs_bmap *);
116
117#endif /* _NILFS_BTREE_H */
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
new file mode 100644
index 000000000000..e90b60dfced9
--- /dev/null
+++ b/fs/nilfs2/cpfile.c
@@ -0,0 +1,925 @@
1/*
2 * cpfile.c - NILFS checkpoint file.
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#include <linux/kernel.h>
24#include <linux/fs.h>
25#include <linux/string.h>
26#include <linux/buffer_head.h>
27#include <linux/errno.h>
28#include <linux/nilfs2_fs.h>
29#include "mdt.h"
30#include "cpfile.h"
31
32
33static inline unsigned long
34nilfs_cpfile_checkpoints_per_block(const struct inode *cpfile)
35{
36 return NILFS_MDT(cpfile)->mi_entries_per_block;
37}
38
39/* block number from the beginning of the file */
40static unsigned long
41nilfs_cpfile_get_blkoff(const struct inode *cpfile, __u64 cno)
42{
43 __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1;
44 do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
45 return (unsigned long)tcno;
46}
47
48/* offset in block */
49static unsigned long
50nilfs_cpfile_get_offset(const struct inode *cpfile, __u64 cno)
51{
52 __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1;
53 return do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
54}
55
56static unsigned long
57nilfs_cpfile_checkpoints_in_block(const struct inode *cpfile,
58 __u64 curr,
59 __u64 max)
60{
61 return min_t(__u64,
62 nilfs_cpfile_checkpoints_per_block(cpfile) -
63 nilfs_cpfile_get_offset(cpfile, curr),
64 max - curr);
65}
66
67static inline int nilfs_cpfile_is_in_first(const struct inode *cpfile,
68 __u64 cno)
69{
70 return nilfs_cpfile_get_blkoff(cpfile, cno) == 0;
71}
72
73static unsigned int
74nilfs_cpfile_block_add_valid_checkpoints(const struct inode *cpfile,
75 struct buffer_head *bh,
76 void *kaddr,
77 unsigned int n)
78{
79 struct nilfs_checkpoint *cp = kaddr + bh_offset(bh);
80 unsigned int count;
81
82 count = le32_to_cpu(cp->cp_checkpoints_count) + n;
83 cp->cp_checkpoints_count = cpu_to_le32(count);
84 return count;
85}
86
87static unsigned int
88nilfs_cpfile_block_sub_valid_checkpoints(const struct inode *cpfile,
89 struct buffer_head *bh,
90 void *kaddr,
91 unsigned int n)
92{
93 struct nilfs_checkpoint *cp = kaddr + bh_offset(bh);
94 unsigned int count;
95
96 WARN_ON(le32_to_cpu(cp->cp_checkpoints_count) < n);
97 count = le32_to_cpu(cp->cp_checkpoints_count) - n;
98 cp->cp_checkpoints_count = cpu_to_le32(count);
99 return count;
100}
101
102static inline struct nilfs_cpfile_header *
103nilfs_cpfile_block_get_header(const struct inode *cpfile,
104 struct buffer_head *bh,
105 void *kaddr)
106{
107 return kaddr + bh_offset(bh);
108}
109
110static struct nilfs_checkpoint *
111nilfs_cpfile_block_get_checkpoint(const struct inode *cpfile, __u64 cno,
112 struct buffer_head *bh,
113 void *kaddr)
114{
115 return kaddr + bh_offset(bh) + nilfs_cpfile_get_offset(cpfile, cno) *
116 NILFS_MDT(cpfile)->mi_entry_size;
117}
118
119static void nilfs_cpfile_block_init(struct inode *cpfile,
120 struct buffer_head *bh,
121 void *kaddr)
122{
123 struct nilfs_checkpoint *cp = kaddr + bh_offset(bh);
124 size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
125 int n = nilfs_cpfile_checkpoints_per_block(cpfile);
126
127 while (n-- > 0) {
128 nilfs_checkpoint_set_invalid(cp);
129 cp = (void *)cp + cpsz;
130 }
131}
132
133static inline int nilfs_cpfile_get_header_block(struct inode *cpfile,
134 struct buffer_head **bhp)
135{
136 return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp);
137}
138
139static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile,
140 __u64 cno,
141 int create,
142 struct buffer_head **bhp)
143{
144 return nilfs_mdt_get_block(cpfile,
145 nilfs_cpfile_get_blkoff(cpfile, cno),
146 create, nilfs_cpfile_block_init, bhp);
147}
148
149static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile,
150 __u64 cno)
151{
152 return nilfs_mdt_delete_block(cpfile,
153 nilfs_cpfile_get_blkoff(cpfile, cno));
154}
155
156/**
157 * nilfs_cpfile_get_checkpoint - get a checkpoint
158 * @cpfile: inode of checkpoint file
159 * @cno: checkpoint number
160 * @create: create flag
161 * @cpp: pointer to a checkpoint
162 * @bhp: pointer to a buffer head
163 *
164 * Description: nilfs_cpfile_get_checkpoint() acquires the checkpoint
165 * specified by @cno. A new checkpoint will be created if @cno is the current
166 * checkpoint number and @create is nonzero.
167 *
168 * Return Value: On success, 0 is returned, and the checkpoint and the
169 * buffer head of the buffer on which the checkpoint is located are stored in
170 * the place pointed by @cpp and @bhp, respectively. On error, one of the
171 * following negative error codes is returned.
172 *
173 * %-EIO - I/O error.
174 *
175 * %-ENOMEM - Insufficient amount of memory available.
176 *
177 * %-ENOENT - No such checkpoint.
178 *
179 * %-EINVAL - invalid checkpoint.
180 */
181int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
182 __u64 cno,
183 int create,
184 struct nilfs_checkpoint **cpp,
185 struct buffer_head **bhp)
186{
187 struct buffer_head *header_bh, *cp_bh;
188 struct nilfs_cpfile_header *header;
189 struct nilfs_checkpoint *cp;
190 void *kaddr;
191 int ret;
192
193 if (unlikely(cno < 1 || cno > nilfs_mdt_cno(cpfile) ||
194 (cno < nilfs_mdt_cno(cpfile) && create)))
195 return -EINVAL;
196
197 down_write(&NILFS_MDT(cpfile)->mi_sem);
198
199 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
200 if (ret < 0)
201 goto out_sem;
202 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, create, &cp_bh);
203 if (ret < 0)
204 goto out_header;
205 kaddr = kmap(cp_bh->b_page);
206 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
207 if (nilfs_checkpoint_invalid(cp)) {
208 if (!create) {
209 kunmap(cp_bh->b_page);
210 brelse(cp_bh);
211 ret = -ENOENT;
212 goto out_header;
213 }
214 /* a newly-created checkpoint */
215 nilfs_checkpoint_clear_invalid(cp);
216 if (!nilfs_cpfile_is_in_first(cpfile, cno))
217 nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh,
218 kaddr, 1);
219 nilfs_mdt_mark_buffer_dirty(cp_bh);
220
221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
222 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
223 kaddr);
224 le64_add_cpu(&header->ch_ncheckpoints, 1);
225 kunmap_atomic(kaddr, KM_USER0);
226 nilfs_mdt_mark_buffer_dirty(header_bh);
227 nilfs_mdt_mark_dirty(cpfile);
228 }
229
230 if (cpp != NULL)
231 *cpp = cp;
232 *bhp = cp_bh;
233
234 out_header:
235 brelse(header_bh);
236
237 out_sem:
238 up_write(&NILFS_MDT(cpfile)->mi_sem);
239 return ret;
240}
241
242/**
243 * nilfs_cpfile_put_checkpoint - put a checkpoint
244 * @cpfile: inode of checkpoint file
245 * @cno: checkpoint number
246 * @bh: buffer head
247 *
248 * Description: nilfs_cpfile_put_checkpoint() releases the checkpoint
249 * specified by @cno. @bh must be the buffer head which has been returned by
250 * a previous call to nilfs_cpfile_get_checkpoint() with @cno.
251 */
252void nilfs_cpfile_put_checkpoint(struct inode *cpfile, __u64 cno,
253 struct buffer_head *bh)
254{
255 kunmap(bh->b_page);
256 brelse(bh);
257}
258
259/**
260 * nilfs_cpfile_delete_checkpoints - delete checkpoints
261 * @cpfile: inode of checkpoint file
262 * @start: start checkpoint number
263 * @end: end checkpoint numer
264 *
265 * Description: nilfs_cpfile_delete_checkpoints() deletes the checkpoints in
266 * the period from @start to @end, excluding @end itself. The checkpoints
267 * which have been already deleted are ignored.
268 *
269 * Return Value: On success, 0 is returned. On error, one of the following
270 * negative error codes is returned.
271 *
272 * %-EIO - I/O error.
273 *
274 * %-ENOMEM - Insufficient amount of memory available.
275 *
276 * %-EINVAL - invalid checkpoints.
277 */
278int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
279 __u64 start,
280 __u64 end)
281{
282 struct buffer_head *header_bh, *cp_bh;
283 struct nilfs_cpfile_header *header;
284 struct nilfs_checkpoint *cp;
285 size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
286 __u64 cno;
287 void *kaddr;
288 unsigned long tnicps;
289 int ret, ncps, nicps, count, i;
290
291 if (unlikely(start == 0 || start > end)) {
292 printk(KERN_ERR "%s: invalid range of checkpoint numbers: "
293 "[%llu, %llu)\n", __func__,
294 (unsigned long long)start, (unsigned long long)end);
295 return -EINVAL;
296 }
297
298 /* cannot delete the latest checkpoint */
299 if (start == nilfs_mdt_cno(cpfile) - 1)
300 return -EPERM;
301
302 down_write(&NILFS_MDT(cpfile)->mi_sem);
303
304 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
305 if (ret < 0)
306 goto out_sem;
307 tnicps = 0;
308
309 for (cno = start; cno < end; cno += ncps) {
310 ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, end);
311 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
312 if (ret < 0) {
313 if (ret != -ENOENT)
314 goto out_sem;
315 /* skip hole */
316 ret = 0;
317 continue;
318 }
319
320 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0);
321 cp = nilfs_cpfile_block_get_checkpoint(
322 cpfile, cno, cp_bh, kaddr);
323 nicps = 0;
324 for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) {
325 WARN_ON(nilfs_checkpoint_snapshot(cp));
326 if (!nilfs_checkpoint_invalid(cp)) {
327 nilfs_checkpoint_set_invalid(cp);
328 nicps++;
329 }
330 }
331 if (nicps > 0) {
332 tnicps += nicps;
333 nilfs_mdt_mark_buffer_dirty(cp_bh);
334 nilfs_mdt_mark_dirty(cpfile);
335 if (!nilfs_cpfile_is_in_first(cpfile, cno) &&
336 (count = nilfs_cpfile_block_sub_valid_checkpoints(
337 cpfile, cp_bh, kaddr, nicps)) == 0) {
338 /* make hole */
339 kunmap_atomic(kaddr, KM_USER0);
340 brelse(cp_bh);
341 ret = nilfs_cpfile_delete_checkpoint_block(
342 cpfile, cno);
343 if (ret == 0)
344 continue;
345 printk(KERN_ERR "%s: cannot delete block\n",
346 __func__);
347 goto out_sem;
348 }
349 }
350
351 kunmap_atomic(kaddr, KM_USER0);
352 brelse(cp_bh);
353 }
354
355 if (tnicps > 0) {
356 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
357 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
358 kaddr);
359 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
360 nilfs_mdt_mark_buffer_dirty(header_bh);
361 nilfs_mdt_mark_dirty(cpfile);
362 kunmap_atomic(kaddr, KM_USER0);
363 }
364 brelse(header_bh);
365
366 out_sem:
367 up_write(&NILFS_MDT(cpfile)->mi_sem);
368 return ret;
369}
370
371static void nilfs_cpfile_checkpoint_to_cpinfo(struct inode *cpfile,
372 struct nilfs_checkpoint *cp,
373 struct nilfs_cpinfo *ci)
374{
375 ci->ci_flags = le32_to_cpu(cp->cp_flags);
376 ci->ci_cno = le64_to_cpu(cp->cp_cno);
377 ci->ci_create = le64_to_cpu(cp->cp_create);
378 ci->ci_nblk_inc = le64_to_cpu(cp->cp_nblk_inc);
379 ci->ci_inodes_count = le64_to_cpu(cp->cp_inodes_count);
380 ci->ci_blocks_count = le64_to_cpu(cp->cp_blocks_count);
381 ci->ci_next = le64_to_cpu(cp->cp_snapshot_list.ssl_next);
382}
383
384static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
385 struct nilfs_cpinfo *ci, size_t nci)
386{
387 struct nilfs_checkpoint *cp;
388 struct buffer_head *bh;
389 size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
390 __u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop;
391 void *kaddr;
392 int n, ret;
393 int ncps, i;
394
395 if (cno == 0)
396 return -ENOENT; /* checkpoint number 0 is invalid */
397 down_read(&NILFS_MDT(cpfile)->mi_sem);
398
399 for (n = 0; cno < cur_cno && n < nci; cno += ncps) {
400 ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno);
401 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
402 if (ret < 0) {
403 if (ret != -ENOENT)
404 goto out;
405 continue; /* skip hole */
406 }
407
408 kaddr = kmap_atomic(bh->b_page, KM_USER0);
409 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
410 for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) {
411 if (!nilfs_checkpoint_invalid(cp))
412 nilfs_cpfile_checkpoint_to_cpinfo(
413 cpfile, cp, &ci[n++]);
414 }
415 kunmap_atomic(kaddr, KM_USER0);
416 brelse(bh);
417 }
418
419 ret = n;
420 if (n > 0)
421 *cnop = ci[n - 1].ci_cno + 1;
422
423 out:
424 up_read(&NILFS_MDT(cpfile)->mi_sem);
425 return ret;
426}
427
428static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
429 struct nilfs_cpinfo *ci, size_t nci)
430{
431 struct buffer_head *bh;
432 struct nilfs_cpfile_header *header;
433 struct nilfs_checkpoint *cp;
434 __u64 curr = *cnop, next;
435 unsigned long curr_blkoff, next_blkoff;
436 void *kaddr;
437 int n = 0, ret;
438
439 down_read(&NILFS_MDT(cpfile)->mi_sem);
440
441 if (curr == 0) {
442 ret = nilfs_cpfile_get_header_block(cpfile, &bh);
443 if (ret < 0)
444 goto out;
445 kaddr = kmap_atomic(bh->b_page, KM_USER0);
446 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
447 curr = le64_to_cpu(header->ch_snapshot_list.ssl_next);
448 kunmap_atomic(kaddr, KM_USER0);
449 brelse(bh);
450 if (curr == 0) {
451 ret = 0;
452 goto out;
453 }
454 } else if (unlikely(curr == ~(__u64)0)) {
455 ret = 0;
456 goto out;
457 }
458
459 curr_blkoff = nilfs_cpfile_get_blkoff(cpfile, curr);
460 ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &bh);
461 if (unlikely(ret < 0)) {
462 if (ret == -ENOENT)
463 ret = 0; /* No snapshots (started from a hole block) */
464 goto out;
465 }
466 kaddr = kmap_atomic(bh->b_page, KM_USER0);
467 while (n < nci) {
468 cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr);
469 curr = ~(__u64)0; /* Terminator */
470 if (unlikely(nilfs_checkpoint_invalid(cp) ||
471 !nilfs_checkpoint_snapshot(cp)))
472 break;
473 nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, &ci[n++]);
474 next = le64_to_cpu(cp->cp_snapshot_list.ssl_next);
475 if (next == 0)
476 break; /* reach end of the snapshot list */
477
478 next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next);
479 if (curr_blkoff != next_blkoff) {
480 kunmap_atomic(kaddr, KM_USER0);
481 brelse(bh);
482 ret = nilfs_cpfile_get_checkpoint_block(cpfile, next,
483 0, &bh);
484 if (unlikely(ret < 0)) {
485 WARN_ON(ret == -ENOENT);
486 goto out;
487 }
488 kaddr = kmap_atomic(bh->b_page, KM_USER0);
489 }
490 curr = next;
491 curr_blkoff = next_blkoff;
492 }
493 kunmap_atomic(kaddr, KM_USER0);
494 brelse(bh);
495 *cnop = curr;
496 ret = n;
497
498 out:
499 up_read(&NILFS_MDT(cpfile)->mi_sem);
500 return ret;
501}
502
503/**
504 * nilfs_cpfile_get_cpinfo -
505 * @cpfile:
506 * @cno:
507 * @ci:
508 * @nci:
509 */
510
511ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode,
512 struct nilfs_cpinfo *ci, size_t nci)
513{
514 switch (mode) {
515 case NILFS_CHECKPOINT:
516 return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, ci, nci);
517 case NILFS_SNAPSHOT:
518 return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, ci, nci);
519 default:
520 return -EINVAL;
521 }
522}
523
524/**
525 * nilfs_cpfile_delete_checkpoint -
526 * @cpfile:
527 * @cno:
528 */
529int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno)
530{
531 struct nilfs_cpinfo ci;
532 __u64 tcno = cno;
533 ssize_t nci;
534 int ret;
535
536 nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, 1);
537 if (nci < 0)
538 return nci;
539 else if (nci == 0 || ci.ci_cno != cno)
540 return -ENOENT;
541
542 /* cannot delete the latest checkpoint nor snapshots */
543 ret = nilfs_cpinfo_snapshot(&ci);
544 if (ret < 0)
545 return ret;
546 else if (ret > 0 || cno == nilfs_mdt_cno(cpfile) - 1)
547 return -EPERM;
548
549 return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1);
550}
551
552static struct nilfs_snapshot_list *
553nilfs_cpfile_block_get_snapshot_list(const struct inode *cpfile,
554 __u64 cno,
555 struct buffer_head *bh,
556 void *kaddr)
557{
558 struct nilfs_cpfile_header *header;
559 struct nilfs_checkpoint *cp;
560 struct nilfs_snapshot_list *list;
561
562 if (cno != 0) {
563 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
564 list = &cp->cp_snapshot_list;
565 } else {
566 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
567 list = &header->ch_snapshot_list;
568 }
569 return list;
570}
571
572static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
573{
574 struct buffer_head *header_bh, *curr_bh, *prev_bh, *cp_bh;
575 struct nilfs_cpfile_header *header;
576 struct nilfs_checkpoint *cp;
577 struct nilfs_snapshot_list *list;
578 __u64 curr, prev;
579 unsigned long curr_blkoff, prev_blkoff;
580 void *kaddr;
581 int ret;
582
583 if (cno == 0)
584 return -ENOENT; /* checkpoint number 0 is invalid */
585 down_write(&NILFS_MDT(cpfile)->mi_sem);
586
587 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
588 if (ret < 0)
589 goto out_sem;
590 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0);
591 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
592 if (nilfs_checkpoint_invalid(cp)) {
593 ret = -ENOENT;
594 kunmap_atomic(kaddr, KM_USER0);
595 goto out_cp;
596 }
597 if (nilfs_checkpoint_snapshot(cp)) {
598 ret = 0;
599 kunmap_atomic(kaddr, KM_USER0);
600 goto out_cp;
601 }
602 kunmap_atomic(kaddr, KM_USER0);
603
604 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
605 if (ret < 0)
606 goto out_cp;
607 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
608 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
609 list = &header->ch_snapshot_list;
610 curr_bh = header_bh;
611 get_bh(curr_bh);
612 curr = 0;
613 curr_blkoff = 0;
614 prev = le64_to_cpu(list->ssl_prev);
615 while (prev > cno) {
616 prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev);
617 curr = prev;
618 if (curr_blkoff != prev_blkoff) {
619 kunmap_atomic(kaddr, KM_USER0);
620 brelse(curr_bh);
621 ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr,
622 0, &curr_bh);
623 if (ret < 0)
624 goto out_header;
625 kaddr = kmap_atomic(curr_bh->b_page, KM_USER0);
626 }
627 curr_blkoff = prev_blkoff;
628 cp = nilfs_cpfile_block_get_checkpoint(
629 cpfile, curr, curr_bh, kaddr);
630 list = &cp->cp_snapshot_list;
631 prev = le64_to_cpu(list->ssl_prev);
632 }
633 kunmap_atomic(kaddr, KM_USER0);
634
635 if (prev != 0) {
636 ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0,
637 &prev_bh);
638 if (ret < 0)
639 goto out_curr;
640 } else {
641 prev_bh = header_bh;
642 get_bh(prev_bh);
643 }
644
645 kaddr = kmap_atomic(curr_bh->b_page, KM_USER0);
646 list = nilfs_cpfile_block_get_snapshot_list(
647 cpfile, curr, curr_bh, kaddr);
648 list->ssl_prev = cpu_to_le64(cno);
649 kunmap_atomic(kaddr, KM_USER0);
650
651 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0);
652 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
653 cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr);
654 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev);
655 nilfs_checkpoint_set_snapshot(cp);
656 kunmap_atomic(kaddr, KM_USER0);
657
658 kaddr = kmap_atomic(prev_bh->b_page, KM_USER0);
659 list = nilfs_cpfile_block_get_snapshot_list(
660 cpfile, prev, prev_bh, kaddr);
661 list->ssl_next = cpu_to_le64(cno);
662 kunmap_atomic(kaddr, KM_USER0);
663
664 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
665 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
666 le64_add_cpu(&header->ch_nsnapshots, 1);
667 kunmap_atomic(kaddr, KM_USER0);
668
669 nilfs_mdt_mark_buffer_dirty(prev_bh);
670 nilfs_mdt_mark_buffer_dirty(curr_bh);
671 nilfs_mdt_mark_buffer_dirty(cp_bh);
672 nilfs_mdt_mark_buffer_dirty(header_bh);
673 nilfs_mdt_mark_dirty(cpfile);
674
675 brelse(prev_bh);
676
677 out_curr:
678 brelse(curr_bh);
679
680 out_header:
681 brelse(header_bh);
682
683 out_cp:
684 brelse(cp_bh);
685
686 out_sem:
687 up_write(&NILFS_MDT(cpfile)->mi_sem);
688 return ret;
689}
690
691static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
692{
693 struct buffer_head *header_bh, *next_bh, *prev_bh, *cp_bh;
694 struct nilfs_cpfile_header *header;
695 struct nilfs_checkpoint *cp;
696 struct nilfs_snapshot_list *list;
697 __u64 next, prev;
698 void *kaddr;
699 int ret;
700
701 if (cno == 0)
702 return -ENOENT; /* checkpoint number 0 is invalid */
703 down_write(&NILFS_MDT(cpfile)->mi_sem);
704
705 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
706 if (ret < 0)
707 goto out_sem;
708 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0);
709 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
710 if (nilfs_checkpoint_invalid(cp)) {
711 ret = -ENOENT;
712 kunmap_atomic(kaddr, KM_USER0);
713 goto out_cp;
714 }
715 if (!nilfs_checkpoint_snapshot(cp)) {
716 ret = 0;
717 kunmap_atomic(kaddr, KM_USER0);
718 goto out_cp;
719 }
720
721 list = &cp->cp_snapshot_list;
722 next = le64_to_cpu(list->ssl_next);
723 prev = le64_to_cpu(list->ssl_prev);
724 kunmap_atomic(kaddr, KM_USER0);
725
726 ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
727 if (ret < 0)
728 goto out_cp;
729 if (next != 0) {
730 ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0,
731 &next_bh);
732 if (ret < 0)
733 goto out_header;
734 } else {
735 next_bh = header_bh;
736 get_bh(next_bh);
737 }
738 if (prev != 0) {
739 ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0,
740 &prev_bh);
741 if (ret < 0)
742 goto out_next;
743 } else {
744 prev_bh = header_bh;
745 get_bh(prev_bh);
746 }
747
748 kaddr = kmap_atomic(next_bh->b_page, KM_USER0);
749 list = nilfs_cpfile_block_get_snapshot_list(
750 cpfile, next, next_bh, kaddr);
751 list->ssl_prev = cpu_to_le64(prev);
752 kunmap_atomic(kaddr, KM_USER0);
753
754 kaddr = kmap_atomic(prev_bh->b_page, KM_USER0);
755 list = nilfs_cpfile_block_get_snapshot_list(
756 cpfile, prev, prev_bh, kaddr);
757 list->ssl_next = cpu_to_le64(next);
758 kunmap_atomic(kaddr, KM_USER0);
759
760 kaddr = kmap_atomic(cp_bh->b_page, KM_USER0);
761 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
762 cp->cp_snapshot_list.ssl_next = cpu_to_le64(0);
763 cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0);
764 nilfs_checkpoint_clear_snapshot(cp);
765 kunmap_atomic(kaddr, KM_USER0);
766
767 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
768 header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
769 le64_add_cpu(&header->ch_nsnapshots, -1);
770 kunmap_atomic(kaddr, KM_USER0);
771
772 nilfs_mdt_mark_buffer_dirty(next_bh);
773 nilfs_mdt_mark_buffer_dirty(prev_bh);
774 nilfs_mdt_mark_buffer_dirty(cp_bh);
775 nilfs_mdt_mark_buffer_dirty(header_bh);
776 nilfs_mdt_mark_dirty(cpfile);
777
778 brelse(prev_bh);
779
780 out_next:
781 brelse(next_bh);
782
783 out_header:
784 brelse(header_bh);
785
786 out_cp:
787 brelse(cp_bh);
788
789 out_sem:
790 up_write(&NILFS_MDT(cpfile)->mi_sem);
791 return ret;
792}
793
794/**
795 * nilfs_cpfile_is_snapshot -
796 * @cpfile: inode of checkpoint file
797 * @cno: checkpoint number
798 *
799 * Description:
800 *
801 * Return Value: On success, 1 is returned if the checkpoint specified by
802 * @cno is a snapshot, or 0 if not. On error, one of the following negative
803 * error codes is returned.
804 *
805 * %-EIO - I/O error.
806 *
807 * %-ENOMEM - Insufficient amount of memory available.
808 *
809 * %-ENOENT - No such checkpoint.
810 */
811int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
812{
813 struct buffer_head *bh;
814 struct nilfs_checkpoint *cp;
815 void *kaddr;
816 int ret;
817
818 if (cno == 0)
819 return -ENOENT; /* checkpoint number 0 is invalid */
820 down_read(&NILFS_MDT(cpfile)->mi_sem);
821
822 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
823 if (ret < 0)
824 goto out;
825 kaddr = kmap_atomic(bh->b_page, KM_USER0);
826 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
827 ret = nilfs_checkpoint_snapshot(cp);
828 kunmap_atomic(kaddr, KM_USER0);
829 brelse(bh);
830
831 out:
832 up_read(&NILFS_MDT(cpfile)->mi_sem);
833 return ret;
834}
835
836/**
837 * nilfs_cpfile_change_cpmode - change checkpoint mode
838 * @cpfile: inode of checkpoint file
839 * @cno: checkpoint number
840 * @status: mode of checkpoint
841 *
842 * Description: nilfs_change_cpmode() changes the mode of the checkpoint
843 * specified by @cno. The mode @mode is NILFS_CHECKPOINT or NILFS_SNAPSHOT.
844 *
845 * Return Value: On success, 0 is returned. On error, one of the following
846 * negative error codes is returned.
847 *
848 * %-EIO - I/O error.
849 *
850 * %-ENOMEM - Insufficient amount of memory available.
851 *
852 * %-ENOENT - No such checkpoint.
853 */
854int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
855{
856 struct the_nilfs *nilfs;
857 int ret;
858
859 nilfs = NILFS_MDT(cpfile)->mi_nilfs;
860
861 switch (mode) {
862 case NILFS_CHECKPOINT:
863 /*
864 * Check for protecting existing snapshot mounts:
865 * bd_mount_sem is used to make this operation atomic and
866 * exclusive with a new mount job. Though it doesn't cover
867 * umount, it's enough for the purpose.
868 */
869 down(&nilfs->ns_bdev->bd_mount_sem);
870 if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) {
871 /* Current implementation does not have to protect
872 plain read-only mounts since they are exclusive
873 with a read/write mount and are protected from the
874 cleaner. */
875 ret = -EBUSY;
876 } else
877 ret = nilfs_cpfile_clear_snapshot(cpfile, cno);
878 up(&nilfs->ns_bdev->bd_mount_sem);
879 return ret;
880 case NILFS_SNAPSHOT:
881 return nilfs_cpfile_set_snapshot(cpfile, cno);
882 default:
883 return -EINVAL;
884 }
885}
886
887/**
888 * nilfs_cpfile_get_stat - get checkpoint statistics
889 * @cpfile: inode of checkpoint file
890 * @stat: pointer to a structure of checkpoint statistics
891 *
892 * Description: nilfs_cpfile_get_stat() returns information about checkpoints.
893 *
894 * Return Value: On success, 0 is returned, and checkpoints information is
895 * stored in the place pointed by @stat. On error, one of the following
896 * negative error codes is returned.
897 *
898 * %-EIO - I/O error.
899 *
900 * %-ENOMEM - Insufficient amount of memory available.
901 */
902int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
903{
904 struct buffer_head *bh;
905 struct nilfs_cpfile_header *header;
906 void *kaddr;
907 int ret;
908
909 down_read(&NILFS_MDT(cpfile)->mi_sem);
910
911 ret = nilfs_cpfile_get_header_block(cpfile, &bh);
912 if (ret < 0)
913 goto out_sem;
914 kaddr = kmap_atomic(bh->b_page, KM_USER0);
915 header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
916 cpstat->cs_cno = nilfs_mdt_cno(cpfile);
917 cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints);
918 cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots);
919 kunmap_atomic(kaddr, KM_USER0);
920 brelse(bh);
921
922 out_sem:
923 up_read(&NILFS_MDT(cpfile)->mi_sem);
924 return ret;
925}
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h
new file mode 100644
index 000000000000..1a8a1008c342
--- /dev/null
+++ b/fs/nilfs2/cpfile.h
@@ -0,0 +1,45 @@
1/*
2 * cpfile.h - NILFS checkpoint file.
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#ifndef _NILFS_CPFILE_H
24#define _NILFS_CPFILE_H
25
26#include <linux/fs.h>
27#include <linux/buffer_head.h>
28#include <linux/nilfs2_fs.h>
29
30#define NILFS_CPFILE_GFP NILFS_MDT_GFP
31
32
33int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int,
34 struct nilfs_checkpoint **,
35 struct buffer_head **);
36void nilfs_cpfile_put_checkpoint(struct inode *, __u64, struct buffer_head *);
37int nilfs_cpfile_delete_checkpoints(struct inode *, __u64, __u64);
38int nilfs_cpfile_delete_checkpoint(struct inode *, __u64);
39int nilfs_cpfile_change_cpmode(struct inode *, __u64, int);
40int nilfs_cpfile_is_snapshot(struct inode *, __u64);
41int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *);
42ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int,
43 struct nilfs_cpinfo *, size_t);
44
45#endif /* _NILFS_CPFILE_H */
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
new file mode 100644
index 000000000000..bb8a5818e7f1
--- /dev/null
+++ b/fs/nilfs2/dat.c
@@ -0,0 +1,430 @@
1/*
2 * dat.c - NILFS disk address translation.
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#include <linux/types.h>
24#include <linux/buffer_head.h>
25#include <linux/string.h>
26#include <linux/errno.h>
27#include "nilfs.h"
28#include "mdt.h"
29#include "alloc.h"
30#include "dat.h"
31
32
33#define NILFS_CNO_MIN ((__u64)1)
34#define NILFS_CNO_MAX (~(__u64)0)
35
36static int nilfs_dat_prepare_entry(struct inode *dat,
37 struct nilfs_palloc_req *req, int create)
38{
39 return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr,
40 create, &req->pr_entry_bh);
41}
42
43static void nilfs_dat_commit_entry(struct inode *dat,
44 struct nilfs_palloc_req *req)
45{
46 nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh);
47 nilfs_mdt_mark_dirty(dat);
48 brelse(req->pr_entry_bh);
49}
50
51static void nilfs_dat_abort_entry(struct inode *dat,
52 struct nilfs_palloc_req *req)
53{
54 brelse(req->pr_entry_bh);
55}
56
57int nilfs_dat_prepare_alloc(struct inode *dat, struct nilfs_palloc_req *req)
58{
59 int ret;
60
61 ret = nilfs_palloc_prepare_alloc_entry(dat, req);
62 if (ret < 0)
63 return ret;
64
65 ret = nilfs_dat_prepare_entry(dat, req, 1);
66 if (ret < 0)
67 nilfs_palloc_abort_alloc_entry(dat, req);
68
69 return ret;
70}
71
72void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req)
73{
74 struct nilfs_dat_entry *entry;
75 void *kaddr;
76
77 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0);
78 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
79 req->pr_entry_bh, kaddr);
80 entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
81 entry->de_end = cpu_to_le64(NILFS_CNO_MAX);
82 entry->de_blocknr = cpu_to_le64(0);
83 kunmap_atomic(kaddr, KM_USER0);
84
85 nilfs_palloc_commit_alloc_entry(dat, req);
86 nilfs_dat_commit_entry(dat, req);
87}
88
89void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req)
90{
91 nilfs_dat_abort_entry(dat, req);
92 nilfs_palloc_abort_alloc_entry(dat, req);
93}
94
95int nilfs_dat_prepare_free(struct inode *dat, struct nilfs_palloc_req *req)
96{
97 int ret;
98
99 ret = nilfs_palloc_prepare_free_entry(dat, req);
100 if (ret < 0)
101 return ret;
102 ret = nilfs_dat_prepare_entry(dat, req, 0);
103 if (ret < 0) {
104 nilfs_palloc_abort_free_entry(dat, req);
105 return ret;
106 }
107 return 0;
108}
109
110void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req)
111{
112 struct nilfs_dat_entry *entry;
113 void *kaddr;
114
115 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0);
116 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
117 req->pr_entry_bh, kaddr);
118 entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
119 entry->de_end = cpu_to_le64(NILFS_CNO_MIN);
120 entry->de_blocknr = cpu_to_le64(0);
121 kunmap_atomic(kaddr, KM_USER0);
122
123 nilfs_dat_commit_entry(dat, req);
124 nilfs_palloc_commit_free_entry(dat, req);
125}
126
127void nilfs_dat_abort_free(struct inode *dat, struct nilfs_palloc_req *req)
128{
129 nilfs_dat_abort_entry(dat, req);
130 nilfs_palloc_abort_free_entry(dat, req);
131}
132
133int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req)
134{
135 int ret;
136
137 ret = nilfs_dat_prepare_entry(dat, req, 0);
138 WARN_ON(ret == -ENOENT);
139 return ret;
140}
141
142void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
143 sector_t blocknr)
144{
145 struct nilfs_dat_entry *entry;
146 void *kaddr;
147
148 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0);
149 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
150 req->pr_entry_bh, kaddr);
151 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
152 if (entry->de_blocknr != cpu_to_le64(0) ||
153 entry->de_end != cpu_to_le64(NILFS_CNO_MAX)) {
154 printk(KERN_CRIT
155 "%s: vbn = %llu, start = %llu, end = %llu, pbn = %llu\n",
156 __func__, (unsigned long long)req->pr_entry_nr,
157 (unsigned long long)le64_to_cpu(entry->de_start),
158 (unsigned long long)le64_to_cpu(entry->de_end),
159 (unsigned long long)le64_to_cpu(entry->de_blocknr));
160 }
161 entry->de_blocknr = cpu_to_le64(blocknr);
162 kunmap_atomic(kaddr, KM_USER0);
163
164 nilfs_dat_commit_entry(dat, req);
165}
166
167void nilfs_dat_abort_start(struct inode *dat, struct nilfs_palloc_req *req)
168{
169 nilfs_dat_abort_entry(dat, req);
170}
171
172int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
173{
174 struct nilfs_dat_entry *entry;
175 __u64 start;
176 sector_t blocknr;
177 void *kaddr;
178 int ret;
179
180 ret = nilfs_dat_prepare_entry(dat, req, 0);
181 if (ret < 0) {
182 WARN_ON(ret == -ENOENT);
183 return ret;
184 }
185
186 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0);
187 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
188 req->pr_entry_bh, kaddr);
189 start = le64_to_cpu(entry->de_start);
190 blocknr = le64_to_cpu(entry->de_blocknr);
191 kunmap_atomic(kaddr, KM_USER0);
192
193 if (blocknr == 0) {
194 ret = nilfs_palloc_prepare_free_entry(dat, req);
195 if (ret < 0) {
196 nilfs_dat_abort_entry(dat, req);
197 return ret;
198 }
199 }
200
201 return 0;
202}
203
204void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
205 int dead)
206{
207 struct nilfs_dat_entry *entry;
208 __u64 start, end;
209 sector_t blocknr;
210 void *kaddr;
211
212 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0);
213 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
214 req->pr_entry_bh, kaddr);
215 end = start = le64_to_cpu(entry->de_start);
216 if (!dead) {
217 end = nilfs_mdt_cno(dat);
218 WARN_ON(start > end);
219 }
220 entry->de_end = cpu_to_le64(end);
221 blocknr = le64_to_cpu(entry->de_blocknr);
222 kunmap_atomic(kaddr, KM_USER0);
223
224 if (blocknr == 0)
225 nilfs_dat_commit_free(dat, req);
226 else
227 nilfs_dat_commit_entry(dat, req);
228}
229
230void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
231{
232 struct nilfs_dat_entry *entry;
233 __u64 start;
234 sector_t blocknr;
235 void *kaddr;
236
237 kaddr = kmap_atomic(req->pr_entry_bh->b_page, KM_USER0);
238 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
239 req->pr_entry_bh, kaddr);
240 start = le64_to_cpu(entry->de_start);
241 blocknr = le64_to_cpu(entry->de_blocknr);
242 kunmap_atomic(kaddr, KM_USER0);
243
244 if (start == nilfs_mdt_cno(dat) && blocknr == 0)
245 nilfs_palloc_abort_free_entry(dat, req);
246 nilfs_dat_abort_entry(dat, req);
247}
248
249/**
250 * nilfs_dat_mark_dirty -
251 * @dat: DAT file inode
252 * @vblocknr: virtual block number
253 *
254 * Description:
255 *
256 * Return Value: On success, 0 is returned. On error, one of the following
257 * negative error codes is returned.
258 *
259 * %-EIO - I/O error.
260 *
261 * %-ENOMEM - Insufficient amount of memory available.
262 */
263int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr)
264{
265 struct nilfs_palloc_req req;
266 int ret;
267
268 req.pr_entry_nr = vblocknr;
269 ret = nilfs_dat_prepare_entry(dat, &req, 0);
270 if (ret == 0)
271 nilfs_dat_commit_entry(dat, &req);
272 return ret;
273}
274
275/**
276 * nilfs_dat_freev - free virtual block numbers
277 * @dat: DAT file inode
278 * @vblocknrs: array of virtual block numbers
279 * @nitems: number of virtual block numbers
280 *
281 * Description: nilfs_dat_freev() frees the virtual block numbers specified by
282 * @vblocknrs and @nitems.
283 *
284 * Return Value: On success, 0 is returned. On error, one of the following
285 * nagative error codes is returned.
286 *
287 * %-EIO - I/O error.
288 *
289 * %-ENOMEM - Insufficient amount of memory available.
290 *
291 * %-ENOENT - The virtual block number have not been allocated.
292 */
293int nilfs_dat_freev(struct inode *dat, __u64 *vblocknrs, size_t nitems)
294{
295 return nilfs_palloc_freev(dat, vblocknrs, nitems);
296}
297
298/**
299 * nilfs_dat_move - change a block number
300 * @dat: DAT file inode
301 * @vblocknr: virtual block number
302 * @blocknr: block number
303 *
304 * Description: nilfs_dat_move() changes the block number associated with
305 * @vblocknr to @blocknr.
306 *
307 * Return Value: On success, 0 is returned. On error, one of the following
308 * negative error codes is returned.
309 *
310 * %-EIO - I/O error.
311 *
312 * %-ENOMEM - Insufficient amount of memory available.
313 */
314int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
315{
316 struct buffer_head *entry_bh;
317 struct nilfs_dat_entry *entry;
318 void *kaddr;
319 int ret;
320
321 ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh);
322 if (ret < 0)
323 return ret;
324 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0);
325 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
326 if (unlikely(entry->de_blocknr == cpu_to_le64(0))) {
327 printk(KERN_CRIT "%s: vbn = %llu, [%llu, %llu)\n", __func__,
328 (unsigned long long)vblocknr,
329 (unsigned long long)le64_to_cpu(entry->de_start),
330 (unsigned long long)le64_to_cpu(entry->de_end));
331 kunmap_atomic(kaddr, KM_USER0);
332 brelse(entry_bh);
333 return -EINVAL;
334 }
335 WARN_ON(blocknr == 0);
336 entry->de_blocknr = cpu_to_le64(blocknr);
337 kunmap_atomic(kaddr, KM_USER0);
338
339 nilfs_mdt_mark_buffer_dirty(entry_bh);
340 nilfs_mdt_mark_dirty(dat);
341
342 brelse(entry_bh);
343
344 return 0;
345}
346
347/**
348 * nilfs_dat_translate - translate a virtual block number to a block number
349 * @dat: DAT file inode
350 * @vblocknr: virtual block number
351 * @blocknrp: pointer to a block number
352 *
353 * Description: nilfs_dat_translate() maps the virtual block number @vblocknr
354 * to the corresponding block number.
355 *
356 * Return Value: On success, 0 is returned and the block number associated
357 * with @vblocknr is stored in the place pointed by @blocknrp. On error, one
358 * of the following negative error codes is returned.
359 *
360 * %-EIO - I/O error.
361 *
362 * %-ENOMEM - Insufficient amount of memory available.
363 *
364 * %-ENOENT - A block number associated with @vblocknr does not exist.
365 */
366int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
367{
368 struct buffer_head *entry_bh;
369 struct nilfs_dat_entry *entry;
370 sector_t blocknr;
371 void *kaddr;
372 int ret;
373
374 ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh);
375 if (ret < 0)
376 return ret;
377
378 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0);
379 entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
380 blocknr = le64_to_cpu(entry->de_blocknr);
381 if (blocknr == 0) {
382 ret = -ENOENT;
383 goto out;
384 }
385 if (blocknrp != NULL)
386 *blocknrp = blocknr;
387
388 out:
389 kunmap_atomic(kaddr, KM_USER0);
390 brelse(entry_bh);
391 return ret;
392}
393
394ssize_t nilfs_dat_get_vinfo(struct inode *dat, struct nilfs_vinfo *vinfo,
395 size_t nvi)
396{
397 struct buffer_head *entry_bh;
398 struct nilfs_dat_entry *entry;
399 __u64 first, last;
400 void *kaddr;
401 unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block;
402 int i, j, n, ret;
403
404 for (i = 0; i < nvi; i += n) {
405 ret = nilfs_palloc_get_entry_block(dat, vinfo[i].vi_vblocknr,
406 0, &entry_bh);
407 if (ret < 0)
408 return ret;
409 kaddr = kmap_atomic(entry_bh->b_page, KM_USER0);
410 /* last virtual block number in this block */
411 first = vinfo[i].vi_vblocknr;
412 do_div(first, entries_per_block);
413 first *= entries_per_block;
414 last = first + entries_per_block - 1;
415 for (j = i, n = 0;
416 j < nvi && vinfo[j].vi_vblocknr >= first &&
417 vinfo[j].vi_vblocknr <= last;
418 j++, n++) {
419 entry = nilfs_palloc_block_get_entry(
420 dat, vinfo[j].vi_vblocknr, entry_bh, kaddr);
421 vinfo[j].vi_start = le64_to_cpu(entry->de_start);
422 vinfo[j].vi_end = le64_to_cpu(entry->de_end);
423 vinfo[j].vi_blocknr = le64_to_cpu(entry->de_blocknr);
424 }
425 kunmap_atomic(kaddr, KM_USER0);
426 brelse(entry_bh);
427 }
428
429 return nvi;
430}
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
new file mode 100644
index 000000000000..d9560654a4b7
--- /dev/null
+++ b/fs/nilfs2/dat.h
@@ -0,0 +1,52 @@
1/*
2 * dat.h - NILFS disk address translation.
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#ifndef _NILFS_DAT_H
24#define _NILFS_DAT_H
25
26#include <linux/types.h>
27#include <linux/buffer_head.h>
28#include <linux/fs.h>
29
30#define NILFS_DAT_GFP NILFS_MDT_GFP
31
32struct nilfs_palloc_req;
33
34int nilfs_dat_translate(struct inode *, __u64, sector_t *);
35
36int nilfs_dat_prepare_alloc(struct inode *, struct nilfs_palloc_req *);
37void nilfs_dat_commit_alloc(struct inode *, struct nilfs_palloc_req *);
38void nilfs_dat_abort_alloc(struct inode *, struct nilfs_palloc_req *);
39int nilfs_dat_prepare_start(struct inode *, struct nilfs_palloc_req *);
40void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *,
41 sector_t);
42void nilfs_dat_abort_start(struct inode *, struct nilfs_palloc_req *);
43int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *);
44void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int);
45void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *);
46
47int nilfs_dat_mark_dirty(struct inode *, __u64);
48int nilfs_dat_freev(struct inode *, __u64 *, size_t);
49int nilfs_dat_move(struct inode *, __u64, sector_t);
50ssize_t nilfs_dat_get_vinfo(struct inode *, struct nilfs_vinfo *, size_t);
51
52#endif /* _NILFS_DAT_H */
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
new file mode 100644
index 000000000000..54100acc1102
--- /dev/null
+++ b/fs/nilfs2/dir.c
@@ -0,0 +1,711 @@
1/*
2 * dir.c - NILFS directory entry operations
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Modified for NILFS by Amagai Yoshiji <amagai@osrg.net>
21 */
22/*
23 * linux/fs/ext2/dir.c
24 *
25 * Copyright (C) 1992, 1993, 1994, 1995
26 * Remy Card (card@masi.ibp.fr)
27 * Laboratoire MASI - Institut Blaise Pascal
28 * Universite Pierre et Marie Curie (Paris VI)
29 *
30 * from
31 *
32 * linux/fs/minix/dir.c
33 *
34 * Copyright (C) 1991, 1992 Linus Torvalds
35 *
36 * ext2 directory handling functions
37 *
38 * Big-endian to little-endian byte-swapping/bitmaps by
39 * David S. Miller (davem@caip.rutgers.edu), 1995
40 *
41 * All code that works with directory layout had been switched to pagecache
42 * and moved here. AV
43 */
44
45#include <linux/pagemap.h>
46#include <linux/smp_lock.h>
47#include "nilfs.h"
48#include "page.h"
49
50/*
51 * nilfs uses block-sized chunks. Arguably, sector-sized ones would be
52 * more robust, but we have what we have
53 */
54static inline unsigned nilfs_chunk_size(struct inode *inode)
55{
56 return inode->i_sb->s_blocksize;
57}
58
59static inline void nilfs_put_page(struct page *page)
60{
61 kunmap(page);
62 page_cache_release(page);
63}
64
65static inline unsigned long dir_pages(struct inode *inode)
66{
67 return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
68}
69
70/*
71 * Return the offset into page `page_nr' of the last valid
72 * byte in that page, plus one.
73 */
74static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr)
75{
76 unsigned last_byte = inode->i_size;
77
78 last_byte -= page_nr << PAGE_CACHE_SHIFT;
79 if (last_byte > PAGE_CACHE_SIZE)
80 last_byte = PAGE_CACHE_SIZE;
81 return last_byte;
82}
83
84static int nilfs_prepare_chunk_uninterruptible(struct page *page,
85 struct address_space *mapping,
86 unsigned from, unsigned to)
87{
88 loff_t pos = page_offset(page) + from;
89 return block_write_begin(NULL, mapping, pos, to - from,
90 AOP_FLAG_UNINTERRUPTIBLE, &page,
91 NULL, nilfs_get_block);
92}
93
94static int nilfs_prepare_chunk(struct page *page,
95 struct address_space *mapping,
96 unsigned from, unsigned to)
97{
98 loff_t pos = page_offset(page) + from;
99 return block_write_begin(NULL, mapping, pos, to - from, 0, &page,
100 NULL, nilfs_get_block);
101}
102
103static int nilfs_commit_chunk(struct page *page,
104 struct address_space *mapping,
105 unsigned from, unsigned to)
106{
107 struct inode *dir = mapping->host;
108 struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb);
109 loff_t pos = page_offset(page) + from;
110 unsigned len = to - from;
111 unsigned nr_dirty, copied;
112 int err;
113
114 nr_dirty = nilfs_page_count_clean_buffers(page, from, to);
115 copied = block_write_end(NULL, mapping, pos, len, len, page, NULL);
116 if (pos + copied > dir->i_size) {
117 i_size_write(dir, pos + copied);
118 mark_inode_dirty(dir);
119 }
120 if (IS_DIRSYNC(dir))
121 nilfs_set_transaction_flag(NILFS_TI_SYNC);
122 err = nilfs_set_file_dirty(sbi, dir, nr_dirty);
123 unlock_page(page);
124 return err;
125}
126
127static void nilfs_check_page(struct page *page)
128{
129 struct inode *dir = page->mapping->host;
130 struct super_block *sb = dir->i_sb;
131 unsigned chunk_size = nilfs_chunk_size(dir);
132 char *kaddr = page_address(page);
133 unsigned offs, rec_len;
134 unsigned limit = PAGE_CACHE_SIZE;
135 struct nilfs_dir_entry *p;
136 char *error;
137
138 if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
139 limit = dir->i_size & ~PAGE_CACHE_MASK;
140 if (limit & (chunk_size - 1))
141 goto Ebadsize;
142 if (!limit)
143 goto out;
144 }
145 for (offs = 0; offs <= limit - NILFS_DIR_REC_LEN(1); offs += rec_len) {
146 p = (struct nilfs_dir_entry *)(kaddr + offs);
147 rec_len = le16_to_cpu(p->rec_len);
148
149 if (rec_len < NILFS_DIR_REC_LEN(1))
150 goto Eshort;
151 if (rec_len & 3)
152 goto Ealign;
153 if (rec_len < NILFS_DIR_REC_LEN(p->name_len))
154 goto Enamelen;
155 if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))
156 goto Espan;
157 }
158 if (offs != limit)
159 goto Eend;
160out:
161 SetPageChecked(page);
162 return;
163
164 /* Too bad, we had an error */
165
166Ebadsize:
167 nilfs_error(sb, "nilfs_check_page",
168 "size of directory #%lu is not a multiple of chunk size",
169 dir->i_ino
170 );
171 goto fail;
172Eshort:
173 error = "rec_len is smaller than minimal";
174 goto bad_entry;
175Ealign:
176 error = "unaligned directory entry";
177 goto bad_entry;
178Enamelen:
179 error = "rec_len is too small for name_len";
180 goto bad_entry;
181Espan:
182 error = "directory entry across blocks";
183bad_entry:
184 nilfs_error(sb, "nilfs_check_page", "bad entry in directory #%lu: %s - "
185 "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
186 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
187 (unsigned long) le64_to_cpu(p->inode),
188 rec_len, p->name_len);
189 goto fail;
190Eend:
191 p = (struct nilfs_dir_entry *)(kaddr + offs);
192 nilfs_error(sb, "nilfs_check_page",
193 "entry in directory #%lu spans the page boundary"
194 "offset=%lu, inode=%lu",
195 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
196 (unsigned long) le64_to_cpu(p->inode));
197fail:
198 SetPageChecked(page);
199 SetPageError(page);
200}
201
202static struct page *nilfs_get_page(struct inode *dir, unsigned long n)
203{
204 struct address_space *mapping = dir->i_mapping;
205 struct page *page = read_cache_page(mapping, n,
206 (filler_t *)mapping->a_ops->readpage, NULL);
207 if (!IS_ERR(page)) {
208 wait_on_page_locked(page);
209 kmap(page);
210 if (!PageUptodate(page))
211 goto fail;
212 if (!PageChecked(page))
213 nilfs_check_page(page);
214 if (PageError(page))
215 goto fail;
216 }
217 return page;
218
219fail:
220 nilfs_put_page(page);
221 return ERR_PTR(-EIO);
222}
223
224/*
225 * NOTE! unlike strncmp, nilfs_match returns 1 for success, 0 for failure.
226 *
227 * len <= NILFS_NAME_LEN and de != NULL are guaranteed by caller.
228 */
229static int
230nilfs_match(int len, const char * const name, struct nilfs_dir_entry *de)
231{
232 if (len != de->name_len)
233 return 0;
234 if (!de->inode)
235 return 0;
236 return !memcmp(name, de->name, len);
237}
238
239/*
240 * p is at least 6 bytes before the end of page
241 */
242static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p)
243{
244 return (struct nilfs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len));
245}
246
247static unsigned char
248nilfs_filetype_table[NILFS_FT_MAX] = {
249 [NILFS_FT_UNKNOWN] = DT_UNKNOWN,
250 [NILFS_FT_REG_FILE] = DT_REG,
251 [NILFS_FT_DIR] = DT_DIR,
252 [NILFS_FT_CHRDEV] = DT_CHR,
253 [NILFS_FT_BLKDEV] = DT_BLK,
254 [NILFS_FT_FIFO] = DT_FIFO,
255 [NILFS_FT_SOCK] = DT_SOCK,
256 [NILFS_FT_SYMLINK] = DT_LNK,
257};
258
259#define S_SHIFT 12
260static unsigned char
261nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
262 [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE,
263 [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR,
264 [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV,
265 [S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV,
266 [S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO,
267 [S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK,
268 [S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK,
269};
270
271static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode)
272{
273 mode_t mode = inode->i_mode;
274
275 de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
276}
277
278static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
279{
280 loff_t pos = filp->f_pos;
281 struct inode *inode = filp->f_dentry->d_inode;
282 struct super_block *sb = inode->i_sb;
283 unsigned int offset = pos & ~PAGE_CACHE_MASK;
284 unsigned long n = pos >> PAGE_CACHE_SHIFT;
285 unsigned long npages = dir_pages(inode);
286/* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */
287 unsigned char *types = NULL;
288 int ret;
289
290 if (pos > inode->i_size - NILFS_DIR_REC_LEN(1))
291 goto success;
292
293 types = nilfs_filetype_table;
294
295 for ( ; n < npages; n++, offset = 0) {
296 char *kaddr, *limit;
297 struct nilfs_dir_entry *de;
298 struct page *page = nilfs_get_page(inode, n);
299
300 if (IS_ERR(page)) {
301 nilfs_error(sb, __func__, "bad page in #%lu",
302 inode->i_ino);
303 filp->f_pos += PAGE_CACHE_SIZE - offset;
304 ret = -EIO;
305 goto done;
306 }
307 kaddr = page_address(page);
308 de = (struct nilfs_dir_entry *)(kaddr + offset);
309 limit = kaddr + nilfs_last_byte(inode, n) -
310 NILFS_DIR_REC_LEN(1);
311 for ( ; (char *)de <= limit; de = nilfs_next_entry(de)) {
312 if (de->rec_len == 0) {
313 nilfs_error(sb, __func__,
314 "zero-length directory entry");
315 ret = -EIO;
316 nilfs_put_page(page);
317 goto done;
318 }
319 if (de->inode) {
320 int over;
321 unsigned char d_type = DT_UNKNOWN;
322
323 if (types && de->file_type < NILFS_FT_MAX)
324 d_type = types[de->file_type];
325
326 offset = (char *)de - kaddr;
327 over = filldir(dirent, de->name, de->name_len,
328 (n<<PAGE_CACHE_SHIFT) | offset,
329 le64_to_cpu(de->inode), d_type);
330 if (over) {
331 nilfs_put_page(page);
332 goto success;
333 }
334 }
335 filp->f_pos += le16_to_cpu(de->rec_len);
336 }
337 nilfs_put_page(page);
338 }
339
340success:
341 ret = 0;
342done:
343 return ret;
344}
345
346/*
347 * nilfs_find_entry()
348 *
349 * finds an entry in the specified directory with the wanted name. It
350 * returns the page in which the entry was found, and the entry itself
351 * (as a parameter - res_dir). Page is returned mapped and unlocked.
352 * Entry is guaranteed to be valid.
353 */
354struct nilfs_dir_entry *
355nilfs_find_entry(struct inode *dir, struct dentry *dentry,
356 struct page **res_page)
357{
358 const char *name = dentry->d_name.name;
359 int namelen = dentry->d_name.len;
360 unsigned reclen = NILFS_DIR_REC_LEN(namelen);
361 unsigned long start, n;
362 unsigned long npages = dir_pages(dir);
363 struct page *page = NULL;
364 struct nilfs_inode_info *ei = NILFS_I(dir);
365 struct nilfs_dir_entry *de;
366
367 if (npages == 0)
368 goto out;
369
370 /* OFFSET_CACHE */
371 *res_page = NULL;
372
373 start = ei->i_dir_start_lookup;
374 if (start >= npages)
375 start = 0;
376 n = start;
377 do {
378 char *kaddr;
379 page = nilfs_get_page(dir, n);
380 if (!IS_ERR(page)) {
381 kaddr = page_address(page);
382 de = (struct nilfs_dir_entry *)kaddr;
383 kaddr += nilfs_last_byte(dir, n) - reclen;
384 while ((char *) de <= kaddr) {
385 if (de->rec_len == 0) {
386 nilfs_error(dir->i_sb, __func__,
387 "zero-length directory entry");
388 nilfs_put_page(page);
389 goto out;
390 }
391 if (nilfs_match(namelen, name, de))
392 goto found;
393 de = nilfs_next_entry(de);
394 }
395 nilfs_put_page(page);
396 }
397 if (++n >= npages)
398 n = 0;
399 /* next page is past the blocks we've got */
400 if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
401 nilfs_error(dir->i_sb, __func__,
402 "dir %lu size %lld exceeds block cout %llu",
403 dir->i_ino, dir->i_size,
404 (unsigned long long)dir->i_blocks);
405 goto out;
406 }
407 } while (n != start);
408out:
409 return NULL;
410
411found:
412 *res_page = page;
413 ei->i_dir_start_lookup = n;
414 return de;
415}
416
417struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p)
418{
419 struct page *page = nilfs_get_page(dir, 0);
420 struct nilfs_dir_entry *de = NULL;
421
422 if (!IS_ERR(page)) {
423 de = nilfs_next_entry(
424 (struct nilfs_dir_entry *)page_address(page));
425 *p = page;
426 }
427 return de;
428}
429
430ino_t nilfs_inode_by_name(struct inode *dir, struct dentry *dentry)
431{
432 ino_t res = 0;
433 struct nilfs_dir_entry *de;
434 struct page *page;
435
436 de = nilfs_find_entry(dir, dentry, &page);
437 if (de) {
438 res = le64_to_cpu(de->inode);
439 kunmap(page);
440 page_cache_release(page);
441 }
442 return res;
443}
444
445/* Releases the page */
446void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
447 struct page *page, struct inode *inode)
448{
449 unsigned from = (char *) de - (char *) page_address(page);
450 unsigned to = from + le16_to_cpu(de->rec_len);
451 struct address_space *mapping = page->mapping;
452 int err;
453
454 lock_page(page);
455 err = nilfs_prepare_chunk_uninterruptible(page, mapping, from, to);
456 BUG_ON(err);
457 de->inode = cpu_to_le64(inode->i_ino);
458 nilfs_set_de_type(de, inode);
459 err = nilfs_commit_chunk(page, mapping, from, to);
460 nilfs_put_page(page);
461 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
462/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
463 mark_inode_dirty(dir);
464}
465
466/*
467 * Parent is locked.
468 */
469int nilfs_add_link(struct dentry *dentry, struct inode *inode)
470{
471 struct inode *dir = dentry->d_parent->d_inode;
472 const char *name = dentry->d_name.name;
473 int namelen = dentry->d_name.len;
474 unsigned chunk_size = nilfs_chunk_size(dir);
475 unsigned reclen = NILFS_DIR_REC_LEN(namelen);
476 unsigned short rec_len, name_len;
477 struct page *page = NULL;
478 struct nilfs_dir_entry *de;
479 unsigned long npages = dir_pages(dir);
480 unsigned long n;
481 char *kaddr;
482 unsigned from, to;
483 int err;
484
485 /*
486 * We take care of directory expansion in the same loop.
487 * This code plays outside i_size, so it locks the page
488 * to protect that region.
489 */
490 for (n = 0; n <= npages; n++) {
491 char *dir_end;
492
493 page = nilfs_get_page(dir, n);
494 err = PTR_ERR(page);
495 if (IS_ERR(page))
496 goto out;
497 lock_page(page);
498 kaddr = page_address(page);
499 dir_end = kaddr + nilfs_last_byte(dir, n);
500 de = (struct nilfs_dir_entry *)kaddr;
501 kaddr += PAGE_CACHE_SIZE - reclen;
502 while ((char *)de <= kaddr) {
503 if ((char *)de == dir_end) {
504 /* We hit i_size */
505 name_len = 0;
506 rec_len = chunk_size;
507 de->rec_len = cpu_to_le16(chunk_size);
508 de->inode = 0;
509 goto got_it;
510 }
511 if (de->rec_len == 0) {
512 nilfs_error(dir->i_sb, __func__,
513 "zero-length directory entry");
514 err = -EIO;
515 goto out_unlock;
516 }
517 err = -EEXIST;
518 if (nilfs_match(namelen, name, de))
519 goto out_unlock;
520 name_len = NILFS_DIR_REC_LEN(de->name_len);
521 rec_len = le16_to_cpu(de->rec_len);
522 if (!de->inode && rec_len >= reclen)
523 goto got_it;
524 if (rec_len >= name_len + reclen)
525 goto got_it;
526 de = (struct nilfs_dir_entry *)((char *)de + rec_len);
527 }
528 unlock_page(page);
529 nilfs_put_page(page);
530 }
531 BUG();
532 return -EINVAL;
533
534got_it:
535 from = (char *)de - (char *)page_address(page);
536 to = from + rec_len;
537 err = nilfs_prepare_chunk(page, page->mapping, from, to);
538 if (err)
539 goto out_unlock;
540 if (de->inode) {
541 struct nilfs_dir_entry *de1;
542
543 de1 = (struct nilfs_dir_entry *)((char *)de + name_len);
544 de1->rec_len = cpu_to_le16(rec_len - name_len);
545 de->rec_len = cpu_to_le16(name_len);
546 de = de1;
547 }
548 de->name_len = namelen;
549 memcpy(de->name, name, namelen);
550 de->inode = cpu_to_le64(inode->i_ino);
551 nilfs_set_de_type(de, inode);
552 err = nilfs_commit_chunk(page, page->mapping, from, to);
553 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
554/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
555 mark_inode_dirty(dir);
556 /* OFFSET_CACHE */
557out_put:
558 nilfs_put_page(page);
559out:
560 return err;
561out_unlock:
562 unlock_page(page);
563 goto out_put;
564}
565
566/*
567 * nilfs_delete_entry deletes a directory entry by merging it with the
568 * previous entry. Page is up-to-date. Releases the page.
569 */
570int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
571{
572 struct address_space *mapping = page->mapping;
573 struct inode *inode = mapping->host;
574 char *kaddr = page_address(page);
575 unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1);
576 unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len);
577 struct nilfs_dir_entry *pde = NULL;
578 struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from);
579 int err;
580
581 while ((char *)de < (char *)dir) {
582 if (de->rec_len == 0) {
583 nilfs_error(inode->i_sb, __func__,
584 "zero-length directory entry");
585 err = -EIO;
586 goto out;
587 }
588 pde = de;
589 de = nilfs_next_entry(de);
590 }
591 if (pde)
592 from = (char *)pde - (char *)page_address(page);
593 lock_page(page);
594 err = nilfs_prepare_chunk(page, mapping, from, to);
595 BUG_ON(err);
596 if (pde)
597 pde->rec_len = cpu_to_le16(to - from);
598 dir->inode = 0;
599 err = nilfs_commit_chunk(page, mapping, from, to);
600 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
601/* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */
602 mark_inode_dirty(inode);
603out:
604 nilfs_put_page(page);
605 return err;
606}
607
608/*
609 * Set the first fragment of directory.
610 */
611int nilfs_make_empty(struct inode *inode, struct inode *parent)
612{
613 struct address_space *mapping = inode->i_mapping;
614 struct page *page = grab_cache_page(mapping, 0);
615 unsigned chunk_size = nilfs_chunk_size(inode);
616 struct nilfs_dir_entry *de;
617 int err;
618 void *kaddr;
619
620 if (!page)
621 return -ENOMEM;
622
623 err = nilfs_prepare_chunk(page, mapping, 0, chunk_size);
624 if (unlikely(err)) {
625 unlock_page(page);
626 goto fail;
627 }
628 kaddr = kmap_atomic(page, KM_USER0);
629 memset(kaddr, 0, chunk_size);
630 de = (struct nilfs_dir_entry *)kaddr;
631 de->name_len = 1;
632 de->rec_len = cpu_to_le16(NILFS_DIR_REC_LEN(1));
633 memcpy(de->name, ".\0\0", 4);
634 de->inode = cpu_to_le64(inode->i_ino);
635 nilfs_set_de_type(de, inode);
636
637 de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1));
638 de->name_len = 2;
639 de->rec_len = cpu_to_le16(chunk_size - NILFS_DIR_REC_LEN(1));
640 de->inode = cpu_to_le64(parent->i_ino);
641 memcpy(de->name, "..\0", 4);
642 nilfs_set_de_type(de, inode);
643 kunmap_atomic(kaddr, KM_USER0);
644 err = nilfs_commit_chunk(page, mapping, 0, chunk_size);
645fail:
646 page_cache_release(page);
647 return err;
648}
649
650/*
651 * routine to check that the specified directory is empty (for rmdir)
652 */
653int nilfs_empty_dir(struct inode *inode)
654{
655 struct page *page = NULL;
656 unsigned long i, npages = dir_pages(inode);
657
658 for (i = 0; i < npages; i++) {
659 char *kaddr;
660 struct nilfs_dir_entry *de;
661
662 page = nilfs_get_page(inode, i);
663 if (IS_ERR(page))
664 continue;
665
666 kaddr = page_address(page);
667 de = (struct nilfs_dir_entry *)kaddr;
668 kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1);
669
670 while ((char *)de <= kaddr) {
671 if (de->rec_len == 0) {
672 nilfs_error(inode->i_sb, __func__,
673 "zero-length directory entry "
674 "(kaddr=%p, de=%p)\n", kaddr, de);
675 goto not_empty;
676 }
677 if (de->inode != 0) {
678 /* check for . and .. */
679 if (de->name[0] != '.')
680 goto not_empty;
681 if (de->name_len > 2)
682 goto not_empty;
683 if (de->name_len < 2) {
684 if (de->inode !=
685 cpu_to_le64(inode->i_ino))
686 goto not_empty;
687 } else if (de->name[1] != '.')
688 goto not_empty;
689 }
690 de = nilfs_next_entry(de);
691 }
692 nilfs_put_page(page);
693 }
694 return 1;
695
696not_empty:
697 nilfs_put_page(page);
698 return 0;
699}
700
701struct file_operations nilfs_dir_operations = {
702 .llseek = generic_file_llseek,
703 .read = generic_read_dir,
704 .readdir = nilfs_readdir,
705 .unlocked_ioctl = nilfs_ioctl,
706#ifdef CONFIG_COMPAT
707 .compat_ioctl = nilfs_ioctl,
708#endif /* CONFIG_COMPAT */
709 .fsync = nilfs_sync_file,
710
711};
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
new file mode 100644
index 000000000000..c6379e482781
--- /dev/null
+++ b/fs/nilfs2/direct.c
@@ -0,0 +1,436 @@
1/*
2 * direct.c - NILFS direct block pointer.
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#include <linux/errno.h>
24#include "nilfs.h"
25#include "page.h"
26#include "direct.h"
27#include "alloc.h"
28
29static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct)
30{
31 return (__le64 *)
32 ((struct nilfs_direct_node *)direct->d_bmap.b_u.u_data + 1);
33}
34
35static inline __u64
36nilfs_direct_get_ptr(const struct nilfs_direct *direct, __u64 key)
37{
38 return nilfs_bmap_dptr_to_ptr(*(nilfs_direct_dptrs(direct) + key));
39}
40
41static inline void nilfs_direct_set_ptr(struct nilfs_direct *direct,
42 __u64 key, __u64 ptr)
43{
44 *(nilfs_direct_dptrs(direct) + key) = nilfs_bmap_ptr_to_dptr(ptr);
45}
46
47static int nilfs_direct_lookup(const struct nilfs_bmap *bmap,
48 __u64 key, int level, __u64 *ptrp)
49{
50 struct nilfs_direct *direct;
51 __u64 ptr;
52
53 direct = (struct nilfs_direct *)bmap;
54 if ((key > NILFS_DIRECT_KEY_MAX) ||
55 (level != 1) || /* XXX: use macro for level 1 */
56 ((ptr = nilfs_direct_get_ptr(direct, key)) ==
57 NILFS_BMAP_INVALID_PTR))
58 return -ENOENT;
59
60 if (ptrp != NULL)
61 *ptrp = ptr;
62 return 0;
63}
64
65static __u64
66nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key)
67{
68 __u64 ptr;
69
70 ptr = nilfs_bmap_find_target_seq(&direct->d_bmap, key);
71 if (ptr != NILFS_BMAP_INVALID_PTR)
72 /* sequential access */
73 return ptr;
74 else
75 /* block group */
76 return nilfs_bmap_find_target_in_group(&direct->d_bmap);
77}
78
79static void nilfs_direct_set_target_v(struct nilfs_direct *direct,
80 __u64 key, __u64 ptr)
81{
82 direct->d_bmap.b_last_allocated_key = key;
83 direct->d_bmap.b_last_allocated_ptr = ptr;
84}
85
86static int nilfs_direct_prepare_insert(struct nilfs_direct *direct,
87 __u64 key,
88 union nilfs_bmap_ptr_req *req,
89 struct nilfs_bmap_stats *stats)
90{
91 int ret;
92
93 if (direct->d_ops->dop_find_target != NULL)
94 req->bpr_ptr = direct->d_ops->dop_find_target(direct, key);
95 ret = direct->d_bmap.b_pops->bpop_prepare_alloc_ptr(&direct->d_bmap,
96 req);
97 if (ret < 0)
98 return ret;
99
100 stats->bs_nblocks = 1;
101 return 0;
102}
103
104static void nilfs_direct_commit_insert(struct nilfs_direct *direct,
105 union nilfs_bmap_ptr_req *req,
106 __u64 key, __u64 ptr)
107{
108 struct buffer_head *bh;
109
110 /* ptr must be a pointer to a buffer head. */
111 bh = (struct buffer_head *)((unsigned long)ptr);
112 set_buffer_nilfs_volatile(bh);
113
114 if (direct->d_bmap.b_pops->bpop_commit_alloc_ptr != NULL)
115 direct->d_bmap.b_pops->bpop_commit_alloc_ptr(
116 &direct->d_bmap, req);
117 nilfs_direct_set_ptr(direct, key, req->bpr_ptr);
118
119 if (!nilfs_bmap_dirty(&direct->d_bmap))
120 nilfs_bmap_set_dirty(&direct->d_bmap);
121
122 if (direct->d_ops->dop_set_target != NULL)
123 direct->d_ops->dop_set_target(direct, key, req->bpr_ptr);
124}
125
126static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
127{
128 struct nilfs_direct *direct;
129 union nilfs_bmap_ptr_req req;
130 struct nilfs_bmap_stats stats;
131 int ret;
132
133 direct = (struct nilfs_direct *)bmap;
134 if (key > NILFS_DIRECT_KEY_MAX)
135 return -ENOENT;
136 if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR)
137 return -EEXIST;
138
139 ret = nilfs_direct_prepare_insert(direct, key, &req, &stats);
140 if (ret < 0)
141 return ret;
142 nilfs_direct_commit_insert(direct, &req, key, ptr);
143 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
144
145 return 0;
146}
147
148static int nilfs_direct_prepare_delete(struct nilfs_direct *direct,
149 union nilfs_bmap_ptr_req *req,
150 __u64 key,
151 struct nilfs_bmap_stats *stats)
152{
153 int ret;
154
155 if (direct->d_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
156 req->bpr_ptr = nilfs_direct_get_ptr(direct, key);
157 ret = direct->d_bmap.b_pops->bpop_prepare_end_ptr(
158 &direct->d_bmap, req);
159 if (ret < 0)
160 return ret;
161 }
162
163 stats->bs_nblocks = 1;
164 return 0;
165}
166
167static void nilfs_direct_commit_delete(struct nilfs_direct *direct,
168 union nilfs_bmap_ptr_req *req,
169 __u64 key)
170{
171 if (direct->d_bmap.b_pops->bpop_commit_end_ptr != NULL)
172 direct->d_bmap.b_pops->bpop_commit_end_ptr(
173 &direct->d_bmap, req);
174 nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR);
175}
176
177static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
178{
179 struct nilfs_direct *direct;
180 union nilfs_bmap_ptr_req req;
181 struct nilfs_bmap_stats stats;
182 int ret;
183
184 direct = (struct nilfs_direct *)bmap;
185 if ((key > NILFS_DIRECT_KEY_MAX) ||
186 nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR)
187 return -ENOENT;
188
189 ret = nilfs_direct_prepare_delete(direct, &req, key, &stats);
190 if (ret < 0)
191 return ret;
192 nilfs_direct_commit_delete(direct, &req, key);
193 nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
194
195 return 0;
196}
197
198static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
199{
200 struct nilfs_direct *direct;
201 __u64 key, lastkey;
202
203 direct = (struct nilfs_direct *)bmap;
204 lastkey = NILFS_DIRECT_KEY_MAX + 1;
205 for (key = NILFS_DIRECT_KEY_MIN; key <= NILFS_DIRECT_KEY_MAX; key++)
206 if (nilfs_direct_get_ptr(direct, key) !=
207 NILFS_BMAP_INVALID_PTR)
208 lastkey = key;
209
210 if (lastkey == NILFS_DIRECT_KEY_MAX + 1)
211 return -ENOENT;
212
213 *keyp = lastkey;
214
215 return 0;
216}
217
218static int nilfs_direct_check_insert(const struct nilfs_bmap *bmap, __u64 key)
219{
220 return key > NILFS_DIRECT_KEY_MAX;
221}
222
223static int nilfs_direct_gather_data(struct nilfs_bmap *bmap,
224 __u64 *keys, __u64 *ptrs, int nitems)
225{
226 struct nilfs_direct *direct;
227 __u64 key;
228 __u64 ptr;
229 int n;
230
231 direct = (struct nilfs_direct *)bmap;
232 if (nitems > NILFS_DIRECT_NBLOCKS)
233 nitems = NILFS_DIRECT_NBLOCKS;
234 n = 0;
235 for (key = 0; key < nitems; key++) {
236 ptr = nilfs_direct_get_ptr(direct, key);
237 if (ptr != NILFS_BMAP_INVALID_PTR) {
238 keys[n] = key;
239 ptrs[n] = ptr;
240 n++;
241 }
242 }
243 return n;
244}
245
246int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
247 __u64 key, __u64 *keys, __u64 *ptrs,
248 int n, __u64 low, __u64 high)
249{
250 struct nilfs_direct *direct;
251 __le64 *dptrs;
252 int ret, i, j;
253
254 /* no need to allocate any resource for conversion */
255
256 /* delete */
257 ret = bmap->b_ops->bop_delete(bmap, key);
258 if (ret < 0)
259 return ret;
260
261 /* free resources */
262 if (bmap->b_ops->bop_clear != NULL)
263 bmap->b_ops->bop_clear(bmap);
264
265 /* convert */
266 direct = (struct nilfs_direct *)bmap;
267 dptrs = nilfs_direct_dptrs(direct);
268 for (i = 0, j = 0; i < NILFS_DIRECT_NBLOCKS; i++) {
269 if ((j < n) && (i == keys[j])) {
270 dptrs[i] = (i != key) ?
271 nilfs_bmap_ptr_to_dptr(ptrs[j]) :
272 NILFS_BMAP_INVALID_PTR;
273 j++;
274 } else
275 dptrs[i] = NILFS_BMAP_INVALID_PTR;
276 }
277
278 nilfs_direct_init(bmap, low, high);
279
280 return 0;
281}
282
283static int nilfs_direct_propagate_v(struct nilfs_direct *direct,
284 struct buffer_head *bh)
285{
286 union nilfs_bmap_ptr_req oldreq, newreq;
287 __u64 key;
288 __u64 ptr;
289 int ret;
290
291 key = nilfs_bmap_data_get_key(&direct->d_bmap, bh);
292 ptr = nilfs_direct_get_ptr(direct, key);
293 if (!buffer_nilfs_volatile(bh)) {
294 oldreq.bpr_ptr = ptr;
295 newreq.bpr_ptr = ptr;
296 ret = nilfs_bmap_prepare_update(&direct->d_bmap, &oldreq,
297 &newreq);
298 if (ret < 0)
299 return ret;
300 nilfs_bmap_commit_update(&direct->d_bmap, &oldreq, &newreq);
301 set_buffer_nilfs_volatile(bh);
302 nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr);
303 } else
304 ret = nilfs_bmap_mark_dirty(&direct->d_bmap, ptr);
305
306 return ret;
307}
308
309static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
310 struct buffer_head *bh)
311{
312 struct nilfs_direct *direct;
313
314 direct = (struct nilfs_direct *)bmap;
315 return (direct->d_ops->dop_propagate != NULL) ?
316 direct->d_ops->dop_propagate(direct, bh) :
317 0;
318}
319
320static int nilfs_direct_assign_v(struct nilfs_direct *direct,
321 __u64 key, __u64 ptr,
322 struct buffer_head **bh,
323 sector_t blocknr,
324 union nilfs_binfo *binfo)
325{
326 union nilfs_bmap_ptr_req req;
327 int ret;
328
329 req.bpr_ptr = ptr;
330 ret = direct->d_bmap.b_pops->bpop_prepare_start_ptr(
331 &direct->d_bmap, &req);
332 if (ret < 0)
333 return ret;
334 direct->d_bmap.b_pops->bpop_commit_start_ptr(&direct->d_bmap,
335 &req, blocknr);
336
337 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
338 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
339
340 return 0;
341}
342
343static int nilfs_direct_assign_p(struct nilfs_direct *direct,
344 __u64 key, __u64 ptr,
345 struct buffer_head **bh,
346 sector_t blocknr,
347 union nilfs_binfo *binfo)
348{
349 nilfs_direct_set_ptr(direct, key, blocknr);
350
351 binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
352 binfo->bi_dat.bi_level = 0;
353
354 return 0;
355}
356
357static int nilfs_direct_assign(struct nilfs_bmap *bmap,
358 struct buffer_head **bh,
359 sector_t blocknr,
360 union nilfs_binfo *binfo)
361{
362 struct nilfs_direct *direct;
363 __u64 key;
364 __u64 ptr;
365
366 direct = (struct nilfs_direct *)bmap;
367 key = nilfs_bmap_data_get_key(bmap, *bh);
368 if (unlikely(key > NILFS_DIRECT_KEY_MAX)) {
369 printk(KERN_CRIT "%s: invalid key: %llu\n", __func__,
370 (unsigned long long)key);
371 return -EINVAL;
372 }
373 ptr = nilfs_direct_get_ptr(direct, key);
374 if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) {
375 printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__,
376 (unsigned long long)ptr);
377 return -EINVAL;
378 }
379
380 return direct->d_ops->dop_assign(direct, key, ptr, bh,
381 blocknr, binfo);
382}
383
384static const struct nilfs_bmap_operations nilfs_direct_ops = {
385 .bop_lookup = nilfs_direct_lookup,
386 .bop_insert = nilfs_direct_insert,
387 .bop_delete = nilfs_direct_delete,
388 .bop_clear = NULL,
389
390 .bop_propagate = nilfs_direct_propagate,
391
392 .bop_lookup_dirty_buffers = NULL,
393
394 .bop_assign = nilfs_direct_assign,
395 .bop_mark = NULL,
396
397 .bop_last_key = nilfs_direct_last_key,
398 .bop_check_insert = nilfs_direct_check_insert,
399 .bop_check_delete = NULL,
400 .bop_gather_data = nilfs_direct_gather_data,
401};
402
403
404static const struct nilfs_direct_operations nilfs_direct_ops_v = {
405 .dop_find_target = nilfs_direct_find_target_v,
406 .dop_set_target = nilfs_direct_set_target_v,
407 .dop_propagate = nilfs_direct_propagate_v,
408 .dop_assign = nilfs_direct_assign_v,
409};
410
411static const struct nilfs_direct_operations nilfs_direct_ops_p = {
412 .dop_find_target = NULL,
413 .dop_set_target = NULL,
414 .dop_propagate = NULL,
415 .dop_assign = nilfs_direct_assign_p,
416};
417
418int nilfs_direct_init(struct nilfs_bmap *bmap, __u64 low, __u64 high)
419{
420 struct nilfs_direct *direct;
421
422 direct = (struct nilfs_direct *)bmap;
423 bmap->b_ops = &nilfs_direct_ops;
424 bmap->b_low = low;
425 bmap->b_high = high;
426 switch (bmap->b_inode->i_ino) {
427 case NILFS_DAT_INO:
428 direct->d_ops = &nilfs_direct_ops_p;
429 break;
430 default:
431 direct->d_ops = &nilfs_direct_ops_v;
432 break;
433 }
434
435 return 0;
436}
diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h
new file mode 100644
index 000000000000..45d2c5cda812
--- /dev/null
+++ b/fs/nilfs2/direct.h
@@ -0,0 +1,78 @@
1/*
2 * direct.h - NILFS direct block pointer.
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#ifndef _NILFS_DIRECT_H
24#define _NILFS_DIRECT_H
25
26#include <linux/types.h>
27#include <linux/buffer_head.h>
28#include "bmap.h"
29
30
31struct nilfs_direct;
32
33/**
34 * struct nilfs_direct_operations - direct mapping operation table
35 */
36struct nilfs_direct_operations {
37 __u64 (*dop_find_target)(const struct nilfs_direct *, __u64);
38 void (*dop_set_target)(struct nilfs_direct *, __u64, __u64);
39 int (*dop_propagate)(struct nilfs_direct *, struct buffer_head *);
40 int (*dop_assign)(struct nilfs_direct *, __u64, __u64,
41 struct buffer_head **, sector_t,
42 union nilfs_binfo *);
43};
44
45/**
46 * struct nilfs_direct_node - direct node
47 * @dn_flags: flags
48 * @dn_pad: padding
49 */
50struct nilfs_direct_node {
51 __u8 dn_flags;
52 __u8 pad[7];
53};
54
55/**
56 * struct nilfs_direct - direct mapping
57 * @d_bmap: bmap structure
58 * @d_ops: direct mapping operation table
59 */
60struct nilfs_direct {
61 struct nilfs_bmap d_bmap;
62
63 /* direct-mapping-specific members */
64 const struct nilfs_direct_operations *d_ops;
65};
66
67
68#define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1)
69#define NILFS_DIRECT_KEY_MIN 0
70#define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1)
71
72
73int nilfs_direct_init(struct nilfs_bmap *, __u64, __u64);
74int nilfs_direct_delete_and_convert(struct nilfs_bmap *, __u64, __u64 *,
75 __u64 *, int, __u64, __u64);
76
77
78#endif /* _NILFS_DIRECT_H */
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
new file mode 100644
index 000000000000..6bd84a0d8238
--- /dev/null
+++ b/fs/nilfs2/file.c
@@ -0,0 +1,160 @@
1/*
2 * file.c - NILFS regular file handling primitives including fsync().
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Amagai Yoshiji <amagai@osrg.net>,
21 * Ryusuke Konishi <ryusuke@osrg.net>
22 */
23
24#include <linux/fs.h>
25#include <linux/mm.h>
26#include <linux/writeback.h>
27#include "nilfs.h"
28#include "segment.h"
29
30int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
31{
32 /*
33 * Called from fsync() system call
34 * This is the only entry point that can catch write and synch
35 * timing for both data blocks and intermediate blocks.
36 *
37 * This function should be implemented when the writeback function
38 * will be implemented.
39 */
40 struct inode *inode = dentry->d_inode;
41 int err;
42
43 if (!nilfs_inode_dirty(inode))
44 return 0;
45
46 if (datasync)
47 err = nilfs_construct_dsync_segment(inode->i_sb, inode, 0,
48 LLONG_MAX);
49 else
50 err = nilfs_construct_segment(inode->i_sb);
51
52 return err;
53}
54
55static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
56{
57 struct page *page = vmf->page;
58 struct inode *inode = vma->vm_file->f_dentry->d_inode;
59 struct nilfs_transaction_info ti;
60 int ret;
61
62 if (unlikely(nilfs_near_disk_full(NILFS_SB(inode->i_sb)->s_nilfs)))
63 return VM_FAULT_SIGBUS; /* -ENOSPC */
64
65 lock_page(page);
66 if (page->mapping != inode->i_mapping ||
67 page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
68 unlock_page(page);
69 return VM_FAULT_NOPAGE; /* make the VM retry the fault */
70 }
71
72 /*
73 * check to see if the page is mapped already (no holes)
74 */
75 if (PageMappedToDisk(page)) {
76 unlock_page(page);
77 goto mapped;
78 }
79 if (page_has_buffers(page)) {
80 struct buffer_head *bh, *head;
81 int fully_mapped = 1;
82
83 bh = head = page_buffers(page);
84 do {
85 if (!buffer_mapped(bh)) {
86 fully_mapped = 0;
87 break;
88 }
89 } while (bh = bh->b_this_page, bh != head);
90
91 if (fully_mapped) {
92 SetPageMappedToDisk(page);
93 unlock_page(page);
94 goto mapped;
95 }
96 }
97 unlock_page(page);
98
99 /*
100 * fill hole blocks
101 */
102 ret = nilfs_transaction_begin(inode->i_sb, &ti, 1);
103 /* never returns -ENOMEM, but may return -ENOSPC */
104 if (unlikely(ret))
105 return VM_FAULT_SIGBUS;
106
107 ret = block_page_mkwrite(vma, vmf, nilfs_get_block);
108 if (unlikely(ret)) {
109 nilfs_transaction_abort(inode->i_sb);
110 return ret;
111 }
112 nilfs_transaction_commit(inode->i_sb);
113
114 mapped:
115 SetPageChecked(page);
116 wait_on_page_writeback(page);
117 return 0;
118}
119
120struct vm_operations_struct nilfs_file_vm_ops = {
121 .fault = filemap_fault,
122 .page_mkwrite = nilfs_page_mkwrite,
123};
124
125static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
126{
127 file_accessed(file);
128 vma->vm_ops = &nilfs_file_vm_ops;
129 vma->vm_flags |= VM_CAN_NONLINEAR;
130 return 0;
131}
132
133/*
134 * We have mostly NULL's here: the current defaults are ok for
135 * the nilfs filesystem.
136 */
137struct file_operations nilfs_file_operations = {
138 .llseek = generic_file_llseek,
139 .read = do_sync_read,
140 .write = do_sync_write,
141 .aio_read = generic_file_aio_read,
142 .aio_write = generic_file_aio_write,
143 .unlocked_ioctl = nilfs_ioctl,
144#ifdef CONFIG_COMPAT
145 .compat_ioctl = nilfs_ioctl,
146#endif /* CONFIG_COMPAT */
147 .mmap = nilfs_file_mmap,
148 .open = generic_file_open,
149 /* .release = nilfs_release_file, */
150 .fsync = nilfs_sync_file,
151 .splice_read = generic_file_splice_read,
152};
153
154struct inode_operations nilfs_file_inode_operations = {
155 .truncate = nilfs_truncate,
156 .setattr = nilfs_setattr,
157 .permission = nilfs_permission,
158};
159
160/* end of file */
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c
new file mode 100644
index 000000000000..93383c5cee90
--- /dev/null
+++ b/fs/nilfs2/gcdat.c
@@ -0,0 +1,84 @@
1/*
2 * gcdat.c - NILFS shadow DAT inode for GC
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>,
21 * and Ryusuke Konishi <ryusuke@osrg.net>.
22 *
23 */
24
25#include <linux/buffer_head.h>
26#include "nilfs.h"
27#include "page.h"
28#include "mdt.h"
29
30int nilfs_init_gcdat_inode(struct the_nilfs *nilfs)
31{
32 struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat;
33 struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat);
34 int err;
35
36 gcdat->i_state = 0;
37 gcdat->i_blocks = dat->i_blocks;
38 gii->i_flags = dii->i_flags;
39 gii->i_state = dii->i_state | (1 << NILFS_I_GCDAT);
40 gii->i_cno = 0;
41 nilfs_bmap_init_gcdat(gii->i_bmap, dii->i_bmap);
42 err = nilfs_copy_dirty_pages(gcdat->i_mapping, dat->i_mapping);
43 if (unlikely(err))
44 return err;
45
46 return nilfs_copy_dirty_pages(&gii->i_btnode_cache,
47 &dii->i_btnode_cache);
48}
49
50void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs)
51{
52 struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat;
53 struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat);
54 struct address_space *mapping = dat->i_mapping;
55 struct address_space *gmapping = gcdat->i_mapping;
56
57 down_write(&NILFS_MDT(dat)->mi_sem);
58 dat->i_blocks = gcdat->i_blocks;
59 dii->i_flags = gii->i_flags;
60 dii->i_state = gii->i_state & ~(1 << NILFS_I_GCDAT);
61
62 nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap);
63
64 nilfs_clear_dirty_pages(mapping);
65 nilfs_copy_back_pages(mapping, gmapping);
66 /* note: mdt dirty flags should be cleared by segctor. */
67
68 nilfs_clear_dirty_pages(&dii->i_btnode_cache);
69 nilfs_copy_back_pages(&dii->i_btnode_cache, &gii->i_btnode_cache);
70
71 up_write(&NILFS_MDT(dat)->mi_sem);
72}
73
74void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs)
75{
76 struct inode *gcdat = nilfs->ns_gc_dat;
77 struct nilfs_inode_info *gii = NILFS_I(gcdat);
78
79 gcdat->i_state = I_CLEAR;
80 gii->i_flags = 0;
81
82 truncate_inode_pages(gcdat->i_mapping, 0);
83 truncate_inode_pages(&gii->i_btnode_cache, 0);
84}
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
new file mode 100644
index 000000000000..19d2102b6a69
--- /dev/null
+++ b/fs/nilfs2/gcinode.c
@@ -0,0 +1,288 @@
1/*
2 * gcinode.c - dummy inodes to buffer blocks for garbage collection
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>,
21 * and Ryusuke Konishi <ryusuke@osrg.net>.
22 * Revised by Ryusuke Konishi <ryusuke@osrg.net>.
23 *
24 */
25/*
26 * This file adds the cache of on-disk blocks to be moved in garbage
27 * collection. The disk blocks are held with dummy inodes (called
28 * gcinodes), and this file provides lookup function of the dummy
29 * inodes and their buffer read function.
30 *
31 * Since NILFS2 keeps up multiple checkpoints/snapshots accross GC, it
32 * has to treat blocks that belong to a same file but have different
33 * checkpoint numbers. To avoid interference among generations, dummy
34 * inodes are managed separatly from actual inodes, and their lookup
35 * function (nilfs_gc_iget) is designed to be specified with a
36 * checkpoint number argument as well as an inode number.
37 *
38 * Buffers and pages held by the dummy inodes will be released each
39 * time after they are copied to a new log. Dirty blocks made on the
40 * current generation and the blocks to be moved by GC never overlap
41 * because the dirty blocks make a new generation; they rather must be
42 * written individually.
43 */
44
45#include <linux/buffer_head.h>
46#include <linux/mpage.h>
47#include <linux/hash.h>
48#include <linux/swap.h>
49#include "nilfs.h"
50#include "page.h"
51#include "mdt.h"
52#include "dat.h"
53#include "ifile.h"
54
55static struct address_space_operations def_gcinode_aops = {};
56/* XXX need def_gcinode_iops/fops? */
57
58/*
59 * nilfs_gccache_submit_read_data() - add data buffer and submit read request
60 * @inode - gc inode
61 * @blkoff - dummy offset treated as the key for the page cache
62 * @pbn - physical block number of the block
63 * @vbn - virtual block number of the block, 0 for non-virtual block
64 * @out_bh - indirect pointer to a buffer_head struct to receive the results
65 *
66 * Description: nilfs_gccache_submit_read_data() registers the data buffer
67 * specified by @pbn to the GC pagecache with the key @blkoff.
68 * This function sets @vbn (@pbn if @vbn is zero) in b_blocknr of the buffer.
69 *
70 * Return Value: On success, 0 is returned. On Error, one of the following
71 * negative error code is returned.
72 *
73 * %-EIO - I/O error.
74 *
75 * %-ENOMEM - Insufficient amount of memory available.
76 *
77 * %-ENOENT - The block specified with @pbn does not exist.
78 */
79int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
80 sector_t pbn, __u64 vbn,
81 struct buffer_head **out_bh)
82{
83 struct buffer_head *bh;
84 int err;
85
86 bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0);
87 if (unlikely(!bh))
88 return -ENOMEM;
89
90 if (buffer_uptodate(bh))
91 goto out;
92
93 if (pbn == 0) {
94 struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat;
95 /* use original dat, not gc dat. */
96 err = nilfs_dat_translate(dat_inode, vbn, &pbn);
97 if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
98 brelse(bh);
99 goto failed;
100 }
101 }
102
103 lock_buffer(bh);
104 if (buffer_uptodate(bh)) {
105 unlock_buffer(bh);
106 goto out;
107 }
108
109 if (!buffer_mapped(bh)) {
110 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
111 set_buffer_mapped(bh);
112 }
113 bh->b_blocknr = pbn;
114 bh->b_end_io = end_buffer_read_sync;
115 get_bh(bh);
116 submit_bh(READ, bh);
117 if (vbn)
118 bh->b_blocknr = vbn;
119 out:
120 err = 0;
121 *out_bh = bh;
122
123 failed:
124 unlock_page(bh->b_page);
125 page_cache_release(bh->b_page);
126 return err;
127}
128
129/*
130 * nilfs_gccache_submit_read_node() - add node buffer and submit read request
131 * @inode - gc inode
132 * @pbn - physical block number for the block
133 * @vbn - virtual block number for the block
134 * @out_bh - indirect pointer to a buffer_head struct to receive the results
135 *
136 * Description: nilfs_gccache_submit_read_node() registers the node buffer
137 * specified by @vbn to the GC pagecache. @pbn can be supplied by the
138 * caller to avoid translation of the disk block address.
139 *
140 * Return Value: On success, 0 is returned. On Error, one of the following
141 * negative error code is returned.
142 *
143 * %-EIO - I/O error.
144 *
145 * %-ENOMEM - Insufficient amount of memory available.
146 */
147int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
148 __u64 vbn, struct buffer_head **out_bh)
149{
150 int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
151 vbn ? : pbn, pbn, out_bh, 0);
152 if (ret == -EEXIST) /* internal code (cache hit) */
153 ret = 0;
154 return ret;
155}
156
157int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
158{
159 wait_on_buffer(bh);
160 if (!buffer_uptodate(bh))
161 return -EIO;
162 if (buffer_dirty(bh))
163 return -EEXIST;
164
165 if (buffer_nilfs_node(bh))
166 nilfs_btnode_mark_dirty(bh);
167 else
168 nilfs_mdt_mark_buffer_dirty(bh);
169 return 0;
170}
171
172/*
173 * nilfs_init_gccache() - allocate and initialize gc_inode hash table
174 * @nilfs - the_nilfs
175 *
176 * Return Value: On success, 0.
177 * On error, a negative error code is returned.
178 */
179int nilfs_init_gccache(struct the_nilfs *nilfs)
180{
181 int loop;
182
183 BUG_ON(nilfs->ns_gc_inodes_h);
184
185 INIT_LIST_HEAD(&nilfs->ns_gc_inodes);
186
187 nilfs->ns_gc_inodes_h =
188 kmalloc(sizeof(struct hlist_head) * NILFS_GCINODE_HASH_SIZE,
189 GFP_NOFS);
190 if (nilfs->ns_gc_inodes_h == NULL)
191 return -ENOMEM;
192
193 for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++)
194 INIT_HLIST_HEAD(&nilfs->ns_gc_inodes_h[loop]);
195 return 0;
196}
197
198/*
199 * nilfs_destroy_gccache() - free gc_inode hash table
200 * @nilfs - the nilfs
201 */
202void nilfs_destroy_gccache(struct the_nilfs *nilfs)
203{
204 if (nilfs->ns_gc_inodes_h) {
205 nilfs_remove_all_gcinode(nilfs);
206 kfree(nilfs->ns_gc_inodes_h);
207 nilfs->ns_gc_inodes_h = NULL;
208 }
209}
210
211static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino,
212 __u64 cno)
213{
214 struct inode *inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS);
215 struct nilfs_inode_info *ii;
216
217 if (!inode)
218 return NULL;
219
220 inode->i_op = NULL;
221 inode->i_fop = NULL;
222 inode->i_mapping->a_ops = &def_gcinode_aops;
223
224 ii = NILFS_I(inode);
225 ii->i_cno = cno;
226 ii->i_flags = 0;
227 ii->i_state = 1 << NILFS_I_GCINODE;
228 ii->i_bh = NULL;
229 nilfs_bmap_init_gc(ii->i_bmap);
230
231 return inode;
232}
233
234static unsigned long ihash(ino_t ino, __u64 cno)
235{
236 return hash_long((unsigned long)((ino << 2) + cno),
237 NILFS_GCINODE_HASH_BITS);
238}
239
240/*
241 * nilfs_gc_iget() - find or create gc inode with specified (ino,cno)
242 */
243struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno)
244{
245 struct hlist_head *head = nilfs->ns_gc_inodes_h + ihash(ino, cno);
246 struct hlist_node *node;
247 struct inode *inode;
248
249 hlist_for_each_entry(inode, node, head, i_hash) {
250 if (inode->i_ino == ino && NILFS_I(inode)->i_cno == cno)
251 return inode;
252 }
253
254 inode = alloc_gcinode(nilfs, ino, cno);
255 if (likely(inode)) {
256 hlist_add_head(&inode->i_hash, head);
257 list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes);
258 }
259 return inode;
260}
261
262/*
263 * nilfs_clear_gcinode() - clear and free a gc inode
264 */
265void nilfs_clear_gcinode(struct inode *inode)
266{
267 nilfs_mdt_clear(inode);
268 nilfs_mdt_destroy(inode);
269}
270
271/*
272 * nilfs_remove_all_gcinode() - remove all inodes from the_nilfs
273 */
274void nilfs_remove_all_gcinode(struct the_nilfs *nilfs)
275{
276 struct hlist_head *head = nilfs->ns_gc_inodes_h;
277 struct hlist_node *node, *n;
278 struct inode *inode;
279 int loop;
280
281 for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++, head++) {
282 hlist_for_each_entry_safe(inode, node, n, head, i_hash) {
283 hlist_del_init(&inode->i_hash);
284 list_del_init(&NILFS_I(inode)->i_dirty);
285 nilfs_clear_gcinode(inode); /* might sleep */
286 }
287 }
288}
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
new file mode 100644
index 000000000000..de86401f209f
--- /dev/null
+++ b/fs/nilfs2/ifile.c
@@ -0,0 +1,150 @@
1/*
2 * ifile.c - NILFS inode file
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Amagai Yoshiji <amagai@osrg.net>.
21 * Revised by Ryusuke Konishi <ryusuke@osrg.net>.
22 *
23 */
24
25#include <linux/types.h>
26#include <linux/buffer_head.h>
27#include "nilfs.h"
28#include "mdt.h"
29#include "alloc.h"
30#include "ifile.h"
31
32/**
33 * nilfs_ifile_create_inode - create a new disk inode
34 * @ifile: ifile inode
35 * @out_ino: pointer to a variable to store inode number
36 * @out_bh: buffer_head contains newly allocated disk inode
37 *
38 * Return Value: On success, 0 is returned and the newly allocated inode
39 * number is stored in the place pointed by @ino, and buffer_head pointer
40 * that contains newly allocated disk inode structure is stored in the
41 * place pointed by @out_bh
42 * On error, one of the following negative error codes is returned.
43 *
44 * %-EIO - I/O error.
45 *
46 * %-ENOMEM - Insufficient amount of memory available.
47 *
48 * %-ENOSPC - No inode left.
49 */
50int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
51 struct buffer_head **out_bh)
52{
53 struct nilfs_palloc_req req;
54 int ret;
55
56 req.pr_entry_nr = 0; /* 0 says find free inode from beginning of
57 a group. dull code!! */
58 req.pr_entry_bh = NULL;
59
60 ret = nilfs_palloc_prepare_alloc_entry(ifile, &req);
61 if (!ret) {
62 ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 1,
63 &req.pr_entry_bh);
64 if (ret < 0)
65 nilfs_palloc_abort_alloc_entry(ifile, &req);
66 }
67 if (ret < 0) {
68 brelse(req.pr_entry_bh);
69 return ret;
70 }
71 nilfs_palloc_commit_alloc_entry(ifile, &req);
72 nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh);
73 nilfs_mdt_mark_dirty(ifile);
74 *out_ino = (ino_t)req.pr_entry_nr;
75 *out_bh = req.pr_entry_bh;
76 return 0;
77}
78
79/**
80 * nilfs_ifile_delete_inode - delete a disk inode
81 * @ifile: ifile inode
82 * @ino: inode number
83 *
84 * Return Value: On success, 0 is returned. On error, one of the following
85 * negative error codes is returned.
86 *
87 * %-EIO - I/O error.
88 *
89 * %-ENOMEM - Insufficient amount of memory available.
90 *
91 * %-ENOENT - The inode number @ino have not been allocated.
92 */
93int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
94{
95 struct nilfs_palloc_req req = {
96 .pr_entry_nr = ino, .pr_entry_bh = NULL
97 };
98 struct nilfs_inode *raw_inode;
99 void *kaddr;
100 int ret;
101
102 ret = nilfs_palloc_prepare_free_entry(ifile, &req);
103 if (!ret) {
104 ret = nilfs_palloc_get_entry_block(ifile, req.pr_entry_nr, 0,
105 &req.pr_entry_bh);
106 if (ret < 0)
107 nilfs_palloc_abort_free_entry(ifile, &req);
108 }
109 if (ret < 0) {
110 brelse(req.pr_entry_bh);
111 return ret;
112 }
113
114 kaddr = kmap_atomic(req.pr_entry_bh->b_page, KM_USER0);
115 raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr,
116 req.pr_entry_bh, kaddr);
117 raw_inode->i_flags = 0;
118 kunmap_atomic(kaddr, KM_USER0);
119
120 nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh);
121 brelse(req.pr_entry_bh);
122
123 nilfs_palloc_commit_free_entry(ifile, &req);
124
125 return 0;
126}
127
128int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino,
129 struct buffer_head **out_bh)
130{
131 struct super_block *sb = ifile->i_sb;
132 int err;
133
134 if (unlikely(!NILFS_VALID_INODE(sb, ino))) {
135 nilfs_error(sb, __func__, "bad inode number: %lu",
136 (unsigned long) ino);
137 return -EINVAL;
138 }
139
140 err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh);
141 if (unlikely(err)) {
142 if (err == -EINVAL)
143 nilfs_error(sb, __func__, "ifile is broken");
144 else
145 nilfs_warning(sb, __func__,
146 "unable to read inode: %lu",
147 (unsigned long) ino);
148 }
149 return err;
150}
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h
new file mode 100644
index 000000000000..5d30a35679b5
--- /dev/null
+++ b/fs/nilfs2/ifile.h
@@ -0,0 +1,53 @@
1/*
2 * ifile.h - NILFS inode file
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Amagai Yoshiji <amagai@osrg.net>
21 * Revised by Ryusuke Konishi <ryusuke@osrg.net>
22 *
23 */
24
25#ifndef _NILFS_IFILE_H
26#define _NILFS_IFILE_H
27
28#include <linux/fs.h>
29#include <linux/buffer_head.h>
30#include <linux/nilfs2_fs.h>
31#include "mdt.h"
32#include "alloc.h"
33
34#define NILFS_IFILE_GFP NILFS_MDT_GFP
35
36static inline struct nilfs_inode *
37nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh)
38{
39 void *kaddr = kmap(ibh->b_page);
40 return nilfs_palloc_block_get_entry(ifile, ino, ibh, kaddr);
41}
42
43static inline void nilfs_ifile_unmap_inode(struct inode *ifile, ino_t ino,
44 struct buffer_head *ibh)
45{
46 kunmap(ibh->b_page);
47}
48
49int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **);
50int nilfs_ifile_delete_inode(struct inode *, ino_t);
51int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **);
52
53#endif /* _NILFS_IFILE_H */
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
new file mode 100644
index 000000000000..49ab4a49bb4f
--- /dev/null
+++ b/fs/nilfs2/inode.c
@@ -0,0 +1,785 @@
1/*
2 * inode.c - NILFS inode operations.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 *
22 */
23
24#include <linux/buffer_head.h>
25#include <linux/mpage.h>
26#include <linux/writeback.h>
27#include <linux/uio.h>
28#include "nilfs.h"
29#include "segment.h"
30#include "page.h"
31#include "mdt.h"
32#include "cpfile.h"
33#include "ifile.h"
34
35
36/**
37 * nilfs_get_block() - get a file block on the filesystem (callback function)
38 * @inode - inode struct of the target file
39 * @blkoff - file block number
40 * @bh_result - buffer head to be mapped on
41 * @create - indicate whether allocating the block or not when it has not
42 * been allocated yet.
43 *
44 * This function does not issue actual read request of the specified data
45 * block. It is done by VFS.
46 * Bulk read for direct-io is not supported yet. (should be supported)
47 */
48int nilfs_get_block(struct inode *inode, sector_t blkoff,
49 struct buffer_head *bh_result, int create)
50{
51 struct nilfs_inode_info *ii = NILFS_I(inode);
52 unsigned long blknum = 0;
53 int err = 0, ret;
54 struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode));
55
56 /* This exclusion control is a workaround; should be revised */
57 down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
58 ret = nilfs_bmap_lookup(ii->i_bmap, (unsigned long)blkoff, &blknum);
59 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
60 if (ret == 0) { /* found */
61 map_bh(bh_result, inode->i_sb, blknum);
62 goto out;
63 }
64 /* data block was not found */
65 if (ret == -ENOENT && create) {
66 struct nilfs_transaction_info ti;
67
68 bh_result->b_blocknr = 0;
69 err = nilfs_transaction_begin(inode->i_sb, &ti, 1);
70 if (unlikely(err))
71 goto out;
72 err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff,
73 (unsigned long)bh_result);
74 if (unlikely(err != 0)) {
75 if (err == -EEXIST) {
76 /*
77 * The get_block() function could be called
78 * from multiple callers for an inode.
79 * However, the page having this block must
80 * be locked in this case.
81 */
82 printk(KERN_WARNING
83 "nilfs_get_block: a race condition "
84 "while inserting a data block. "
85 "(inode number=%lu, file block "
86 "offset=%llu)\n",
87 inode->i_ino,
88 (unsigned long long)blkoff);
89 err = 0;
90 } else if (err == -EINVAL) {
91 nilfs_error(inode->i_sb, __func__,
92 "broken bmap (inode=%lu)\n",
93 inode->i_ino);
94 err = -EIO;
95 }
96 nilfs_transaction_abort(inode->i_sb);
97 goto out;
98 }
99 nilfs_transaction_commit(inode->i_sb); /* never fails */
100 /* Error handling should be detailed */
101 set_buffer_new(bh_result);
102 map_bh(bh_result, inode->i_sb, 0); /* dbn must be changed
103 to proper value */
104 } else if (ret == -ENOENT) {
105 /* not found is not error (e.g. hole); must return without
106 the mapped state flag. */
107 ;
108 } else {
109 err = ret;
110 }
111
112 out:
113 return err;
114}
115
116/**
117 * nilfs_readpage() - implement readpage() method of nilfs_aops {}
118 * address_space_operations.
119 * @file - file struct of the file to be read
120 * @page - the page to be read
121 */
122static int nilfs_readpage(struct file *file, struct page *page)
123{
124 return mpage_readpage(page, nilfs_get_block);
125}
126
127/**
128 * nilfs_readpages() - implement readpages() method of nilfs_aops {}
129 * address_space_operations.
130 * @file - file struct of the file to be read
131 * @mapping - address_space struct used for reading multiple pages
132 * @pages - the pages to be read
133 * @nr_pages - number of pages to be read
134 */
135static int nilfs_readpages(struct file *file, struct address_space *mapping,
136 struct list_head *pages, unsigned nr_pages)
137{
138 return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block);
139}
140
141static int nilfs_writepages(struct address_space *mapping,
142 struct writeback_control *wbc)
143{
144 struct inode *inode = mapping->host;
145 int err = 0;
146
147 if (wbc->sync_mode == WB_SYNC_ALL)
148 err = nilfs_construct_dsync_segment(inode->i_sb, inode,
149 wbc->range_start,
150 wbc->range_end);
151 return err;
152}
153
154static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
155{
156 struct inode *inode = page->mapping->host;
157 int err;
158
159 redirty_page_for_writepage(wbc, page);
160 unlock_page(page);
161
162 if (wbc->sync_mode == WB_SYNC_ALL) {
163 err = nilfs_construct_segment(inode->i_sb);
164 if (unlikely(err))
165 return err;
166 } else if (wbc->for_reclaim)
167 nilfs_flush_segment(inode->i_sb, inode->i_ino);
168
169 return 0;
170}
171
172static int nilfs_set_page_dirty(struct page *page)
173{
174 int ret = __set_page_dirty_buffers(page);
175
176 if (ret) {
177 struct inode *inode = page->mapping->host;
178 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
179 unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits);
180
181 nilfs_set_file_dirty(sbi, inode, nr_dirty);
182 }
183 return ret;
184}
185
186static int nilfs_write_begin(struct file *file, struct address_space *mapping,
187 loff_t pos, unsigned len, unsigned flags,
188 struct page **pagep, void **fsdata)
189
190{
191 struct inode *inode = mapping->host;
192 int err = nilfs_transaction_begin(inode->i_sb, NULL, 1);
193
194 if (unlikely(err))
195 return err;
196
197 *pagep = NULL;
198 err = block_write_begin(file, mapping, pos, len, flags, pagep,
199 fsdata, nilfs_get_block);
200 if (unlikely(err))
201 nilfs_transaction_abort(inode->i_sb);
202 return err;
203}
204
205static int nilfs_write_end(struct file *file, struct address_space *mapping,
206 loff_t pos, unsigned len, unsigned copied,
207 struct page *page, void *fsdata)
208{
209 struct inode *inode = mapping->host;
210 unsigned start = pos & (PAGE_CACHE_SIZE - 1);
211 unsigned nr_dirty;
212 int err;
213
214 nr_dirty = nilfs_page_count_clean_buffers(page, start,
215 start + copied);
216 copied = generic_write_end(file, mapping, pos, len, copied, page,
217 fsdata);
218 nilfs_set_file_dirty(NILFS_SB(inode->i_sb), inode, nr_dirty);
219 err = nilfs_transaction_commit(inode->i_sb);
220 return err ? : copied;
221}
222
223static ssize_t
224nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
225 loff_t offset, unsigned long nr_segs)
226{
227 struct file *file = iocb->ki_filp;
228 struct inode *inode = file->f_mapping->host;
229 ssize_t size;
230
231 if (rw == WRITE)
232 return 0;
233
234 /* Needs synchronization with the cleaner */
235 size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
236 offset, nr_segs, nilfs_get_block, NULL);
237 return size;
238}
239
240struct address_space_operations nilfs_aops = {
241 .writepage = nilfs_writepage,
242 .readpage = nilfs_readpage,
243 /* .sync_page = nilfs_sync_page, */
244 .writepages = nilfs_writepages,
245 .set_page_dirty = nilfs_set_page_dirty,
246 .readpages = nilfs_readpages,
247 .write_begin = nilfs_write_begin,
248 .write_end = nilfs_write_end,
249 /* .releasepage = nilfs_releasepage, */
250 .invalidatepage = block_invalidatepage,
251 .direct_IO = nilfs_direct_IO,
252};
253
254struct inode *nilfs_new_inode(struct inode *dir, int mode)
255{
256 struct super_block *sb = dir->i_sb;
257 struct nilfs_sb_info *sbi = NILFS_SB(sb);
258 struct inode *inode;
259 struct nilfs_inode_info *ii;
260 int err = -ENOMEM;
261 ino_t ino;
262
263 inode = new_inode(sb);
264 if (unlikely(!inode))
265 goto failed;
266
267 mapping_set_gfp_mask(inode->i_mapping,
268 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
269
270 ii = NILFS_I(inode);
271 ii->i_state = 1 << NILFS_I_NEW;
272
273 err = nilfs_ifile_create_inode(sbi->s_ifile, &ino, &ii->i_bh);
274 if (unlikely(err))
275 goto failed_ifile_create_inode;
276 /* reference count of i_bh inherits from nilfs_mdt_read_block() */
277
278 atomic_inc(&sbi->s_inodes_count);
279
280 inode->i_uid = current_fsuid();
281 if (dir->i_mode & S_ISGID) {
282 inode->i_gid = dir->i_gid;
283 if (S_ISDIR(mode))
284 mode |= S_ISGID;
285 } else
286 inode->i_gid = current_fsgid();
287
288 inode->i_mode = mode;
289 inode->i_ino = ino;
290 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
291
292 if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
293 err = nilfs_bmap_read(ii->i_bmap, NULL);
294 if (err < 0)
295 goto failed_bmap;
296
297 set_bit(NILFS_I_BMAP, &ii->i_state);
298 /* No lock is needed; iget() ensures it. */
299 }
300
301 ii->i_flags = NILFS_I(dir)->i_flags;
302 if (S_ISLNK(mode))
303 ii->i_flags &= ~(NILFS_IMMUTABLE_FL | NILFS_APPEND_FL);
304 if (!S_ISDIR(mode))
305 ii->i_flags &= ~NILFS_DIRSYNC_FL;
306
307 /* ii->i_file_acl = 0; */
308 /* ii->i_dir_acl = 0; */
309 ii->i_dir_start_lookup = 0;
310#ifdef CONFIG_NILFS_FS_POSIX_ACL
311 ii->i_acl = NULL;
312 ii->i_default_acl = NULL;
313#endif
314 ii->i_cno = 0;
315 nilfs_set_inode_flags(inode);
316 spin_lock(&sbi->s_next_gen_lock);
317 inode->i_generation = sbi->s_next_generation++;
318 spin_unlock(&sbi->s_next_gen_lock);
319 insert_inode_hash(inode);
320
321 err = nilfs_init_acl(inode, dir);
322 if (unlikely(err))
323 goto failed_acl; /* never occur. When supporting
324 nilfs_init_acl(), proper cancellation of
325 above jobs should be considered */
326
327 mark_inode_dirty(inode);
328 return inode;
329
330 failed_acl:
331 failed_bmap:
332 inode->i_nlink = 0;
333 iput(inode); /* raw_inode will be deleted through
334 generic_delete_inode() */
335 goto failed;
336
337 failed_ifile_create_inode:
338 make_bad_inode(inode);
339 iput(inode); /* if i_nlink == 1, generic_forget_inode() will be
340 called */
341 failed:
342 return ERR_PTR(err);
343}
344
345void nilfs_free_inode(struct inode *inode)
346{
347 struct super_block *sb = inode->i_sb;
348 struct nilfs_sb_info *sbi = NILFS_SB(sb);
349
350 clear_inode(inode);
351 /* XXX: check error code? Is there any thing I can do? */
352 (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino);
353 atomic_dec(&sbi->s_inodes_count);
354}
355
356void nilfs_set_inode_flags(struct inode *inode)
357{
358 unsigned int flags = NILFS_I(inode)->i_flags;
359
360 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
361 S_DIRSYNC);
362 if (flags & NILFS_SYNC_FL)
363 inode->i_flags |= S_SYNC;
364 if (flags & NILFS_APPEND_FL)
365 inode->i_flags |= S_APPEND;
366 if (flags & NILFS_IMMUTABLE_FL)
367 inode->i_flags |= S_IMMUTABLE;
368#ifndef NILFS_ATIME_DISABLE
369 if (flags & NILFS_NOATIME_FL)
370#endif
371 inode->i_flags |= S_NOATIME;
372 if (flags & NILFS_DIRSYNC_FL)
373 inode->i_flags |= S_DIRSYNC;
374 mapping_set_gfp_mask(inode->i_mapping,
375 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
376}
377
378int nilfs_read_inode_common(struct inode *inode,
379 struct nilfs_inode *raw_inode)
380{
381 struct nilfs_inode_info *ii = NILFS_I(inode);
382 int err;
383
384 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
385 inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid);
386 inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid);
387 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
388 inode->i_size = le64_to_cpu(raw_inode->i_size);
389 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
390 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
391 inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
392 inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
393 inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
394 inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
395 if (inode->i_nlink == 0 && inode->i_mode == 0)
396 return -EINVAL; /* this inode is deleted */
397
398 inode->i_blocks = le64_to_cpu(raw_inode->i_blocks);
399 ii->i_flags = le32_to_cpu(raw_inode->i_flags);
400#if 0
401 ii->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
402 ii->i_dir_acl = S_ISREG(inode->i_mode) ?
403 0 : le32_to_cpu(raw_inode->i_dir_acl);
404#endif
405 ii->i_cno = 0;
406 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
407
408 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
409 S_ISLNK(inode->i_mode)) {
410 err = nilfs_bmap_read(ii->i_bmap, raw_inode);
411 if (err < 0)
412 return err;
413 set_bit(NILFS_I_BMAP, &ii->i_state);
414 /* No lock is needed; iget() ensures it. */
415 }
416 return 0;
417}
418
419static int __nilfs_read_inode(struct super_block *sb, unsigned long ino,
420 struct inode *inode)
421{
422 struct nilfs_sb_info *sbi = NILFS_SB(sb);
423 struct inode *dat = nilfs_dat_inode(sbi->s_nilfs);
424 struct buffer_head *bh;
425 struct nilfs_inode *raw_inode;
426 int err;
427
428 down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
429 err = nilfs_ifile_get_inode_block(sbi->s_ifile, ino, &bh);
430 if (unlikely(err))
431 goto bad_inode;
432
433 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh);
434
435#ifdef CONFIG_NILFS_FS_POSIX_ACL
436 ii->i_acl = NILFS_ACL_NOT_CACHED;
437 ii->i_default_acl = NILFS_ACL_NOT_CACHED;
438#endif
439 if (nilfs_read_inode_common(inode, raw_inode))
440 goto failed_unmap;
441
442 if (S_ISREG(inode->i_mode)) {
443 inode->i_op = &nilfs_file_inode_operations;
444 inode->i_fop = &nilfs_file_operations;
445 inode->i_mapping->a_ops = &nilfs_aops;
446 } else if (S_ISDIR(inode->i_mode)) {
447 inode->i_op = &nilfs_dir_inode_operations;
448 inode->i_fop = &nilfs_dir_operations;
449 inode->i_mapping->a_ops = &nilfs_aops;
450 } else if (S_ISLNK(inode->i_mode)) {
451 inode->i_op = &nilfs_symlink_inode_operations;
452 inode->i_mapping->a_ops = &nilfs_aops;
453 } else {
454 inode->i_op = &nilfs_special_inode_operations;
455 init_special_inode(
456 inode, inode->i_mode,
457 new_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
458 }
459 nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh);
460 brelse(bh);
461 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
462 nilfs_set_inode_flags(inode);
463 return 0;
464
465 failed_unmap:
466 nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh);
467 brelse(bh);
468
469 bad_inode:
470 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
471 return err;
472}
473
474struct inode *nilfs_iget(struct super_block *sb, unsigned long ino)
475{
476 struct inode *inode;
477 int err;
478
479 inode = iget_locked(sb, ino);
480 if (unlikely(!inode))
481 return ERR_PTR(-ENOMEM);
482 if (!(inode->i_state & I_NEW))
483 return inode;
484
485 err = __nilfs_read_inode(sb, ino, inode);
486 if (unlikely(err)) {
487 iget_failed(inode);
488 return ERR_PTR(err);
489 }
490 unlock_new_inode(inode);
491 return inode;
492}
493
494void nilfs_write_inode_common(struct inode *inode,
495 struct nilfs_inode *raw_inode, int has_bmap)
496{
497 struct nilfs_inode_info *ii = NILFS_I(inode);
498
499 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
500 raw_inode->i_uid = cpu_to_le32(inode->i_uid);
501 raw_inode->i_gid = cpu_to_le32(inode->i_gid);
502 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
503 raw_inode->i_size = cpu_to_le64(inode->i_size);
504 raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
505 raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
506 raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
507 raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
508 raw_inode->i_blocks = cpu_to_le64(inode->i_blocks);
509
510 raw_inode->i_flags = cpu_to_le32(ii->i_flags);
511 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
512
513 if (has_bmap)
514 nilfs_bmap_write(ii->i_bmap, raw_inode);
515 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
516 raw_inode->i_device_code =
517 cpu_to_le64(new_encode_dev(inode->i_rdev));
518 /* When extending inode, nilfs->ns_inode_size should be checked
519 for substitutions of appended fields */
520}
521
522void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh)
523{
524 ino_t ino = inode->i_ino;
525 struct nilfs_inode_info *ii = NILFS_I(inode);
526 struct super_block *sb = inode->i_sb;
527 struct nilfs_sb_info *sbi = NILFS_SB(sb);
528 struct nilfs_inode *raw_inode;
529
530 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh);
531
532 /* The buffer is guarded with lock_buffer() by the caller */
533 if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state))
534 memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size);
535 set_bit(NILFS_I_INODE_DIRTY, &ii->i_state);
536
537 nilfs_write_inode_common(inode, raw_inode, 0);
538 /* XXX: call with has_bmap = 0 is a workaround to avoid
539 deadlock of bmap. This delays update of i_bmap to just
540 before writing */
541 nilfs_ifile_unmap_inode(sbi->s_ifile, ino, ibh);
542}
543
544#define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */
545
546static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
547 unsigned long from)
548{
549 unsigned long b;
550 int ret;
551
552 if (!test_bit(NILFS_I_BMAP, &ii->i_state))
553 return;
554 repeat:
555 ret = nilfs_bmap_last_key(ii->i_bmap, &b);
556 if (ret == -ENOENT)
557 return;
558 else if (ret < 0)
559 goto failed;
560
561 if (b < from)
562 return;
563
564 b -= min_t(unsigned long, NILFS_MAX_TRUNCATE_BLOCKS, b - from);
565 ret = nilfs_bmap_truncate(ii->i_bmap, b);
566 nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb);
567 if (!ret || (ret == -ENOMEM &&
568 nilfs_bmap_truncate(ii->i_bmap, b) == 0))
569 goto repeat;
570
571 failed:
572 if (ret == -EINVAL)
573 nilfs_error(ii->vfs_inode.i_sb, __func__,
574 "bmap is broken (ino=%lu)", ii->vfs_inode.i_ino);
575 else
576 nilfs_warning(ii->vfs_inode.i_sb, __func__,
577 "failed to truncate bmap (ino=%lu, err=%d)",
578 ii->vfs_inode.i_ino, ret);
579}
580
581void nilfs_truncate(struct inode *inode)
582{
583 unsigned long blkoff;
584 unsigned int blocksize;
585 struct nilfs_transaction_info ti;
586 struct super_block *sb = inode->i_sb;
587 struct nilfs_inode_info *ii = NILFS_I(inode);
588
589 if (!test_bit(NILFS_I_BMAP, &ii->i_state))
590 return;
591 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
592 return;
593
594 blocksize = sb->s_blocksize;
595 blkoff = (inode->i_size + blocksize - 1) >> sb->s_blocksize_bits;
596 nilfs_transaction_begin(sb, &ti, 0); /* never fails */
597
598 block_truncate_page(inode->i_mapping, inode->i_size, nilfs_get_block);
599
600 nilfs_truncate_bmap(ii, blkoff);
601
602 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
603 if (IS_SYNC(inode))
604 nilfs_set_transaction_flag(NILFS_TI_SYNC);
605
606 nilfs_set_file_dirty(NILFS_SB(sb), inode, 0);
607 nilfs_transaction_commit(sb);
608 /* May construct a logical segment and may fail in sync mode.
609 But truncate has no return value. */
610}
611
612void nilfs_delete_inode(struct inode *inode)
613{
614 struct nilfs_transaction_info ti;
615 struct super_block *sb = inode->i_sb;
616 struct nilfs_inode_info *ii = NILFS_I(inode);
617
618 if (unlikely(is_bad_inode(inode))) {
619 if (inode->i_data.nrpages)
620 truncate_inode_pages(&inode->i_data, 0);
621 clear_inode(inode);
622 return;
623 }
624 nilfs_transaction_begin(sb, &ti, 0); /* never fails */
625
626 if (inode->i_data.nrpages)
627 truncate_inode_pages(&inode->i_data, 0);
628
629 nilfs_truncate_bmap(ii, 0);
630 nilfs_free_inode(inode);
631 /* nilfs_free_inode() marks inode buffer dirty */
632 if (IS_SYNC(inode))
633 nilfs_set_transaction_flag(NILFS_TI_SYNC);
634 nilfs_transaction_commit(sb);
635 /* May construct a logical segment and may fail in sync mode.
636 But delete_inode has no return value. */
637}
638
639int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
640{
641 struct nilfs_transaction_info ti;
642 struct inode *inode = dentry->d_inode;
643 struct super_block *sb = inode->i_sb;
644 int err;
645
646 err = inode_change_ok(inode, iattr);
647 if (err)
648 return err;
649
650 err = nilfs_transaction_begin(sb, &ti, 0);
651 if (unlikely(err))
652 return err;
653 err = inode_setattr(inode, iattr);
654 if (!err && (iattr->ia_valid & ATTR_MODE))
655 err = nilfs_acl_chmod(inode);
656 if (likely(!err))
657 err = nilfs_transaction_commit(sb);
658 else
659 nilfs_transaction_abort(sb);
660
661 return err;
662}
663
664int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode,
665 struct buffer_head **pbh)
666{
667 struct nilfs_inode_info *ii = NILFS_I(inode);
668 int err;
669
670 spin_lock(&sbi->s_inode_lock);
671 /* Caller of this function MUST lock s_inode_lock */
672 if (ii->i_bh == NULL) {
673 spin_unlock(&sbi->s_inode_lock);
674 err = nilfs_ifile_get_inode_block(sbi->s_ifile, inode->i_ino,
675 pbh);
676 if (unlikely(err))
677 return err;
678 spin_lock(&sbi->s_inode_lock);
679 if (ii->i_bh == NULL)
680 ii->i_bh = *pbh;
681 else {
682 brelse(*pbh);
683 *pbh = ii->i_bh;
684 }
685 } else
686 *pbh = ii->i_bh;
687
688 get_bh(*pbh);
689 spin_unlock(&sbi->s_inode_lock);
690 return 0;
691}
692
693int nilfs_inode_dirty(struct inode *inode)
694{
695 struct nilfs_inode_info *ii = NILFS_I(inode);
696 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
697 int ret = 0;
698
699 if (!list_empty(&ii->i_dirty)) {
700 spin_lock(&sbi->s_inode_lock);
701 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) ||
702 test_bit(NILFS_I_BUSY, &ii->i_state);
703 spin_unlock(&sbi->s_inode_lock);
704 }
705 return ret;
706}
707
708int nilfs_set_file_dirty(struct nilfs_sb_info *sbi, struct inode *inode,
709 unsigned nr_dirty)
710{
711 struct nilfs_inode_info *ii = NILFS_I(inode);
712
713 atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks);
714
715 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state))
716 return 0;
717
718 spin_lock(&sbi->s_inode_lock);
719 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
720 !test_bit(NILFS_I_BUSY, &ii->i_state)) {
721 /* Because this routine may race with nilfs_dispose_list(),
722 we have to check NILFS_I_QUEUED here, too. */
723 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) {
724 /* This will happen when somebody is freeing
725 this inode. */
726 nilfs_warning(sbi->s_super, __func__,
727 "cannot get inode (ino=%lu)\n",
728 inode->i_ino);
729 spin_unlock(&sbi->s_inode_lock);
730 return -EINVAL; /* NILFS_I_DIRTY may remain for
731 freeing inode */
732 }
733 list_del(&ii->i_dirty);
734 list_add_tail(&ii->i_dirty, &sbi->s_dirty_files);
735 set_bit(NILFS_I_QUEUED, &ii->i_state);
736 }
737 spin_unlock(&sbi->s_inode_lock);
738 return 0;
739}
740
741int nilfs_mark_inode_dirty(struct inode *inode)
742{
743 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
744 struct buffer_head *ibh;
745 int err;
746
747 err = nilfs_load_inode_block(sbi, inode, &ibh);
748 if (unlikely(err)) {
749 nilfs_warning(inode->i_sb, __func__,
750 "failed to reget inode block.\n");
751 return err;
752 }
753 lock_buffer(ibh);
754 nilfs_update_inode(inode, ibh);
755 unlock_buffer(ibh);
756 nilfs_mdt_mark_buffer_dirty(ibh);
757 nilfs_mdt_mark_dirty(sbi->s_ifile);
758 brelse(ibh);
759 return 0;
760}
761
762/**
763 * nilfs_dirty_inode - reflect changes on given inode to an inode block.
764 * @inode: inode of the file to be registered.
765 *
766 * nilfs_dirty_inode() loads a inode block containing the specified
767 * @inode and copies data from a nilfs_inode to a corresponding inode
768 * entry in the inode block. This operation is excluded from the segment
769 * construction. This function can be called both as a single operation
770 * and as a part of indivisible file operations.
771 */
772void nilfs_dirty_inode(struct inode *inode)
773{
774 struct nilfs_transaction_info ti;
775
776 if (is_bad_inode(inode)) {
777 nilfs_warning(inode->i_sb, __func__,
778 "tried to mark bad_inode dirty. ignored.\n");
779 dump_stack();
780 return;
781 }
782 nilfs_transaction_begin(inode->i_sb, &ti, 0);
783 nilfs_mark_inode_dirty(inode);
784 nilfs_transaction_commit(inode->i_sb); /* never fails */
785}
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
new file mode 100644
index 000000000000..108d281ebca5
--- /dev/null
+++ b/fs/nilfs2/ioctl.c
@@ -0,0 +1,654 @@
1/*
2 * ioctl.c - NILFS ioctl operations.
3 *
4 * Copyright (C) 2007, 2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#include <linux/fs.h>
24#include <linux/wait.h>
25#include <linux/smp_lock.h> /* lock_kernel(), unlock_kernel() */
26#include <linux/capability.h> /* capable() */
27#include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */
28#include <linux/nilfs2_fs.h>
29#include "nilfs.h"
30#include "segment.h"
31#include "bmap.h"
32#include "cpfile.h"
33#include "sufile.h"
34#include "dat.h"
35
36
37static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
38 struct nilfs_argv *argv, int dir,
39 ssize_t (*dofunc)(struct the_nilfs *,
40 __u64 *, int,
41 void *, size_t, size_t))
42{
43 void *buf;
44 void __user *base = (void __user *)(unsigned long)argv->v_base;
45 size_t maxmembs, total, n;
46 ssize_t nr;
47 int ret, i;
48 __u64 pos, ppos;
49
50 if (argv->v_nmembs == 0)
51 return 0;
52
53 if (argv->v_size > PAGE_SIZE)
54 return -EINVAL;
55
56 buf = (void *)__get_free_pages(GFP_NOFS, 0);
57 if (unlikely(!buf))
58 return -ENOMEM;
59 maxmembs = PAGE_SIZE / argv->v_size;
60
61 ret = 0;
62 total = 0;
63 pos = argv->v_index;
64 for (i = 0; i < argv->v_nmembs; i += n) {
65 n = (argv->v_nmembs - i < maxmembs) ?
66 argv->v_nmembs - i : maxmembs;
67 if ((dir & _IOC_WRITE) &&
68 copy_from_user(buf, base + argv->v_size * i,
69 argv->v_size * n)) {
70 ret = -EFAULT;
71 break;
72 }
73 ppos = pos;
74 nr = dofunc(nilfs, &pos, argv->v_flags, buf, argv->v_size,
75 n);
76 if (nr < 0) {
77 ret = nr;
78 break;
79 }
80 if ((dir & _IOC_READ) &&
81 copy_to_user(base + argv->v_size * i, buf,
82 argv->v_size * nr)) {
83 ret = -EFAULT;
84 break;
85 }
86 total += nr;
87 if ((size_t)nr < n)
88 break;
89 if (pos == ppos)
90 pos += n;
91 }
92 argv->v_nmembs = total;
93
94 free_pages((unsigned long)buf, 0);
95 return ret;
96}
97
98static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
99 unsigned int cmd, void __user *argp)
100{
101 struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile;
102 struct nilfs_transaction_info ti;
103 struct nilfs_cpmode cpmode;
104 int ret;
105
106 if (!capable(CAP_SYS_ADMIN))
107 return -EPERM;
108 if (copy_from_user(&cpmode, argp, sizeof(cpmode)))
109 return -EFAULT;
110
111 nilfs_transaction_begin(inode->i_sb, &ti, 0);
112 ret = nilfs_cpfile_change_cpmode(
113 cpfile, cpmode.cm_cno, cpmode.cm_mode);
114 if (unlikely(ret < 0)) {
115 nilfs_transaction_abort(inode->i_sb);
116 return ret;
117 }
118 nilfs_transaction_commit(inode->i_sb); /* never fails */
119 return ret;
120}
121
122static int
123nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
124 unsigned int cmd, void __user *argp)
125{
126 struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile;
127 struct nilfs_transaction_info ti;
128 __u64 cno;
129 int ret;
130
131 if (!capable(CAP_SYS_ADMIN))
132 return -EPERM;
133 if (copy_from_user(&cno, argp, sizeof(cno)))
134 return -EFAULT;
135
136 nilfs_transaction_begin(inode->i_sb, &ti, 0);
137 ret = nilfs_cpfile_delete_checkpoint(cpfile, cno);
138 if (unlikely(ret < 0)) {
139 nilfs_transaction_abort(inode->i_sb);
140 return ret;
141 }
142 nilfs_transaction_commit(inode->i_sb); /* never fails */
143 return ret;
144}
145
146static ssize_t
147nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
148 void *buf, size_t size, size_t nmembs)
149{
150 return nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf,
151 nmembs);
152}
153
154static int nilfs_ioctl_get_cpinfo(struct inode *inode, struct file *filp,
155 unsigned int cmd, void __user *argp)
156{
157 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
158 struct nilfs_argv argv;
159 int ret;
160
161 if (copy_from_user(&argv, argp, sizeof(argv)))
162 return -EFAULT;
163
164 down_read(&nilfs->ns_segctor_sem);
165 ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd),
166 nilfs_ioctl_do_get_cpinfo);
167 up_read(&nilfs->ns_segctor_sem);
168 if (ret < 0)
169 return ret;
170
171 if (copy_to_user(argp, &argv, sizeof(argv)))
172 ret = -EFAULT;
173 return ret;
174}
175
176static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp,
177 unsigned int cmd, void __user *argp)
178{
179 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
180 struct nilfs_cpstat cpstat;
181 int ret;
182
183 down_read(&nilfs->ns_segctor_sem);
184 ret = nilfs_cpfile_get_stat(nilfs->ns_cpfile, &cpstat);
185 up_read(&nilfs->ns_segctor_sem);
186 if (ret < 0)
187 return ret;
188
189 if (copy_to_user(argp, &cpstat, sizeof(cpstat)))
190 ret = -EFAULT;
191 return ret;
192}
193
194static ssize_t
195nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
196 void *buf, size_t size, size_t nmembs)
197{
198 return nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs);
199}
200
201static int nilfs_ioctl_get_suinfo(struct inode *inode, struct file *filp,
202 unsigned int cmd, void __user *argp)
203{
204 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
205 struct nilfs_argv argv;
206 int ret;
207
208 if (copy_from_user(&argv, argp, sizeof(argv)))
209 return -EFAULT;
210
211 down_read(&nilfs->ns_segctor_sem);
212 ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd),
213 nilfs_ioctl_do_get_suinfo);
214 up_read(&nilfs->ns_segctor_sem);
215 if (ret < 0)
216 return ret;
217
218 if (copy_to_user(argp, &argv, sizeof(argv)))
219 ret = -EFAULT;
220 return ret;
221}
222
223static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp,
224 unsigned int cmd, void __user *argp)
225{
226 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
227 struct nilfs_sustat sustat;
228 int ret;
229
230 down_read(&nilfs->ns_segctor_sem);
231 ret = nilfs_sufile_get_stat(nilfs->ns_sufile, &sustat);
232 up_read(&nilfs->ns_segctor_sem);
233 if (ret < 0)
234 return ret;
235
236 if (copy_to_user(argp, &sustat, sizeof(sustat)))
237 ret = -EFAULT;
238 return ret;
239}
240
241static ssize_t
242nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
243 void *buf, size_t size, size_t nmembs)
244{
245 return nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs);
246}
247
248static int nilfs_ioctl_get_vinfo(struct inode *inode, struct file *filp,
249 unsigned int cmd, void __user *argp)
250{
251 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
252 struct nilfs_argv argv;
253 int ret;
254
255 if (copy_from_user(&argv, argp, sizeof(argv)))
256 return -EFAULT;
257
258 down_read(&nilfs->ns_segctor_sem);
259 ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd),
260 nilfs_ioctl_do_get_vinfo);
261 up_read(&nilfs->ns_segctor_sem);
262 if (ret < 0)
263 return ret;
264
265 if (copy_to_user(argp, &argv, sizeof(argv)))
266 ret = -EFAULT;
267 return ret;
268}
269
270static ssize_t
271nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
272 void *buf, size_t size, size_t nmembs)
273{
274 struct inode *dat = nilfs_dat_inode(nilfs);
275 struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap;
276 struct nilfs_bdesc *bdescs = buf;
277 int ret, i;
278
279 for (i = 0; i < nmembs; i++) {
280 ret = nilfs_bmap_lookup_at_level(bmap,
281 bdescs[i].bd_offset,
282 bdescs[i].bd_level + 1,
283 &bdescs[i].bd_blocknr);
284 if (ret < 0) {
285 if (ret != -ENOENT)
286 return ret;
287 bdescs[i].bd_blocknr = 0;
288 }
289 }
290 return nmembs;
291}
292
293static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp,
294 unsigned int cmd, void __user *argp)
295{
296 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
297 struct nilfs_argv argv;
298 int ret;
299
300 if (copy_from_user(&argv, argp, sizeof(argv)))
301 return -EFAULT;
302
303 down_read(&nilfs->ns_segctor_sem);
304 ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd),
305 nilfs_ioctl_do_get_bdescs);
306 up_read(&nilfs->ns_segctor_sem);
307 if (ret < 0)
308 return ret;
309
310 if (copy_to_user(argp, &argv, sizeof(argv)))
311 ret = -EFAULT;
312 return ret;
313}
314
315static int nilfs_ioctl_move_inode_block(struct inode *inode,
316 struct nilfs_vdesc *vdesc,
317 struct list_head *buffers)
318{
319 struct buffer_head *bh;
320 int ret;
321
322 if (vdesc->vd_flags == 0)
323 ret = nilfs_gccache_submit_read_data(
324 inode, vdesc->vd_offset, vdesc->vd_blocknr,
325 vdesc->vd_vblocknr, &bh);
326 else
327 ret = nilfs_gccache_submit_read_node(
328 inode, vdesc->vd_blocknr, vdesc->vd_vblocknr, &bh);
329
330 if (unlikely(ret < 0)) {
331 if (ret == -ENOENT)
332 printk(KERN_CRIT
333 "%s: invalid virtual block address (%s): "
334 "ino=%llu, cno=%llu, offset=%llu, "
335 "blocknr=%llu, vblocknr=%llu\n",
336 __func__, vdesc->vd_flags ? "node" : "data",
337 (unsigned long long)vdesc->vd_ino,
338 (unsigned long long)vdesc->vd_cno,
339 (unsigned long long)vdesc->vd_offset,
340 (unsigned long long)vdesc->vd_blocknr,
341 (unsigned long long)vdesc->vd_vblocknr);
342 return ret;
343 }
344 bh->b_private = vdesc;
345 list_add_tail(&bh->b_assoc_buffers, buffers);
346 return 0;
347}
348
349static ssize_t
350nilfs_ioctl_do_move_blocks(struct the_nilfs *nilfs, __u64 *posp, int flags,
351 void *buf, size_t size, size_t nmembs)
352{
353 struct inode *inode;
354 struct nilfs_vdesc *vdesc;
355 struct buffer_head *bh, *n;
356 LIST_HEAD(buffers);
357 ino_t ino;
358 __u64 cno;
359 int i, ret;
360
361 for (i = 0, vdesc = buf; i < nmembs; ) {
362 ino = vdesc->vd_ino;
363 cno = vdesc->vd_cno;
364 inode = nilfs_gc_iget(nilfs, ino, cno);
365 if (unlikely(inode == NULL)) {
366 ret = -ENOMEM;
367 goto failed;
368 }
369 do {
370 ret = nilfs_ioctl_move_inode_block(inode, vdesc,
371 &buffers);
372 if (unlikely(ret < 0))
373 goto failed;
374 vdesc++;
375 } while (++i < nmembs &&
376 vdesc->vd_ino == ino && vdesc->vd_cno == cno);
377 }
378
379 list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) {
380 ret = nilfs_gccache_wait_and_mark_dirty(bh);
381 if (unlikely(ret < 0)) {
382 if (ret == -EEXIST) {
383 vdesc = bh->b_private;
384 printk(KERN_CRIT
385 "%s: conflicting %s buffer: "
386 "ino=%llu, cno=%llu, offset=%llu, "
387 "blocknr=%llu, vblocknr=%llu\n",
388 __func__,
389 vdesc->vd_flags ? "node" : "data",
390 (unsigned long long)vdesc->vd_ino,
391 (unsigned long long)vdesc->vd_cno,
392 (unsigned long long)vdesc->vd_offset,
393 (unsigned long long)vdesc->vd_blocknr,
394 (unsigned long long)vdesc->vd_vblocknr);
395 }
396 goto failed;
397 }
398 list_del_init(&bh->b_assoc_buffers);
399 bh->b_private = NULL;
400 brelse(bh);
401 }
402 return nmembs;
403
404 failed:
405 list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) {
406 list_del_init(&bh->b_assoc_buffers);
407 bh->b_private = NULL;
408 brelse(bh);
409 }
410 return ret;
411}
412
413static inline int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs,
414 struct nilfs_argv *argv,
415 int dir)
416{
417 return nilfs_ioctl_wrap_copy(nilfs, argv, dir,
418 nilfs_ioctl_do_move_blocks);
419}
420
421static ssize_t
422nilfs_ioctl_do_delete_checkpoints(struct the_nilfs *nilfs, __u64 *posp,
423 int flags, void *buf, size_t size,
424 size_t nmembs)
425{
426 struct inode *cpfile = nilfs->ns_cpfile;
427 struct nilfs_period *periods = buf;
428 int ret, i;
429
430 for (i = 0; i < nmembs; i++) {
431 ret = nilfs_cpfile_delete_checkpoints(
432 cpfile, periods[i].p_start, periods[i].p_end);
433 if (ret < 0)
434 return ret;
435 }
436 return nmembs;
437}
438
439static inline int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs,
440 struct nilfs_argv *argv,
441 int dir)
442{
443 return nilfs_ioctl_wrap_copy(nilfs, argv, dir,
444 nilfs_ioctl_do_delete_checkpoints);
445}
446
447static ssize_t
448nilfs_ioctl_do_free_vblocknrs(struct the_nilfs *nilfs, __u64 *posp, int flags,
449 void *buf, size_t size, size_t nmembs)
450{
451 int ret = nilfs_dat_freev(nilfs_dat_inode(nilfs), buf, nmembs);
452
453 return (ret < 0) ? ret : nmembs;
454}
455
456static inline int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
457 struct nilfs_argv *argv,
458 int dir)
459{
460 return nilfs_ioctl_wrap_copy(nilfs, argv, dir,
461 nilfs_ioctl_do_free_vblocknrs);
462}
463
464static ssize_t
465nilfs_ioctl_do_mark_blocks_dirty(struct the_nilfs *nilfs, __u64 *posp,
466 int flags, void *buf, size_t size,
467 size_t nmembs)
468{
469 struct inode *dat = nilfs_dat_inode(nilfs);
470 struct nilfs_bmap *bmap = NILFS_I(dat)->i_bmap;
471 struct nilfs_bdesc *bdescs = buf;
472 int ret, i;
473
474 for (i = 0; i < nmembs; i++) {
475 /* XXX: use macro or inline func to check liveness */
476 ret = nilfs_bmap_lookup_at_level(bmap,
477 bdescs[i].bd_offset,
478 bdescs[i].bd_level + 1,
479 &bdescs[i].bd_blocknr);
480 if (ret < 0) {
481 if (ret != -ENOENT)
482 return ret;
483 bdescs[i].bd_blocknr = 0;
484 }
485 if (bdescs[i].bd_blocknr != bdescs[i].bd_oblocknr)
486 /* skip dead block */
487 continue;
488 if (bdescs[i].bd_level == 0) {
489 ret = nilfs_mdt_mark_block_dirty(dat,
490 bdescs[i].bd_offset);
491 if (ret < 0) {
492 WARN_ON(ret == -ENOENT);
493 return ret;
494 }
495 } else {
496 ret = nilfs_bmap_mark(bmap, bdescs[i].bd_offset,
497 bdescs[i].bd_level);
498 if (ret < 0) {
499 WARN_ON(ret == -ENOENT);
500 return ret;
501 }
502 }
503 }
504 return nmembs;
505}
506
507static inline int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
508 struct nilfs_argv *argv,
509 int dir)
510{
511 return nilfs_ioctl_wrap_copy(nilfs, argv, dir,
512 nilfs_ioctl_do_mark_blocks_dirty);
513}
514
515static ssize_t
516nilfs_ioctl_do_free_segments(struct the_nilfs *nilfs, __u64 *posp, int flags,
517 void *buf, size_t size, size_t nmembs)
518{
519 struct nilfs_sb_info *sbi = nilfs_get_writer(nilfs);
520 int ret;
521
522 if (unlikely(!sbi))
523 return -EROFS;
524 ret = nilfs_segctor_add_segments_to_be_freed(
525 NILFS_SC(sbi), buf, nmembs);
526 nilfs_put_writer(nilfs);
527
528 return (ret < 0) ? ret : nmembs;
529}
530
531static inline int nilfs_ioctl_free_segments(struct the_nilfs *nilfs,
532 struct nilfs_argv *argv,
533 int dir)
534{
535 return nilfs_ioctl_wrap_copy(nilfs, argv, dir,
536 nilfs_ioctl_do_free_segments);
537}
538
539int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
540 void __user *argp)
541{
542 struct nilfs_argv argv[5];
543 const char *msg;
544 int dir, ret;
545
546 if (copy_from_user(argv, argp, sizeof(argv)))
547 return -EFAULT;
548
549 dir = _IOC_WRITE;
550 ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], dir);
551 if (ret < 0) {
552 msg = "cannot read source blocks";
553 goto failed;
554 }
555 ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], dir);
556 if (ret < 0) {
557 /*
558 * can safely abort because checkpoints can be removed
559 * independently.
560 */
561 msg = "cannot delete checkpoints";
562 goto failed;
563 }
564 ret = nilfs_ioctl_free_vblocknrs(nilfs, &argv[2], dir);
565 if (ret < 0) {
566 /*
567 * can safely abort because DAT file is updated atomically
568 * using a copy-on-write technique.
569 */
570 msg = "cannot delete virtual blocks from DAT file";
571 goto failed;
572 }
573 ret = nilfs_ioctl_mark_blocks_dirty(nilfs, &argv[3], dir);
574 if (ret < 0) {
575 /*
576 * can safely abort because the operation is nondestructive.
577 */
578 msg = "cannot mark copying blocks dirty";
579 goto failed;
580 }
581 ret = nilfs_ioctl_free_segments(nilfs, &argv[4], dir);
582 if (ret < 0) {
583 /*
584 * can safely abort because this operation is atomic.
585 */
586 msg = "cannot set segments to be freed";
587 goto failed;
588 }
589 return 0;
590
591 failed:
592 nilfs_remove_all_gcinode(nilfs);
593 printk(KERN_ERR "NILFS: GC failed during preparation: %s: err=%d\n",
594 msg, ret);
595 return ret;
596}
597
598static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
599 unsigned int cmd, void __user *argp)
600{
601 if (!capable(CAP_SYS_ADMIN))
602 return -EPERM;
603 return nilfs_clean_segments(inode->i_sb, argp);
604}
605
606static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
607 unsigned int cmd, void __user *argp)
608{
609 __u64 cno;
610 int ret;
611
612 ret = nilfs_construct_segment(inode->i_sb);
613 if (ret < 0)
614 return ret;
615
616 if (argp != NULL) {
617 cno = NILFS_SB(inode->i_sb)->s_nilfs->ns_cno - 1;
618 if (copy_to_user(argp, &cno, sizeof(cno)))
619 return -EFAULT;
620 }
621 return 0;
622}
623
624long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
625{
626 struct inode *inode = filp->f_dentry->d_inode;
627 void __user *argp = (void * __user *)arg;
628
629 switch (cmd) {
630 case NILFS_IOCTL_CHANGE_CPMODE:
631 return nilfs_ioctl_change_cpmode(inode, filp, cmd, argp);
632 case NILFS_IOCTL_DELETE_CHECKPOINT:
633 return nilfs_ioctl_delete_checkpoint(inode, filp, cmd, argp);
634 case NILFS_IOCTL_GET_CPINFO:
635 return nilfs_ioctl_get_cpinfo(inode, filp, cmd, argp);
636 case NILFS_IOCTL_GET_CPSTAT:
637 return nilfs_ioctl_get_cpstat(inode, filp, cmd, argp);
638 case NILFS_IOCTL_GET_SUINFO:
639 return nilfs_ioctl_get_suinfo(inode, filp, cmd, argp);
640 case NILFS_IOCTL_GET_SUSTAT:
641 return nilfs_ioctl_get_sustat(inode, filp, cmd, argp);
642 case NILFS_IOCTL_GET_VINFO:
643 /* XXX: rename to ??? */
644 return nilfs_ioctl_get_vinfo(inode, filp, cmd, argp);
645 case NILFS_IOCTL_GET_BDESCS:
646 return nilfs_ioctl_get_bdescs(inode, filp, cmd, argp);
647 case NILFS_IOCTL_CLEAN_SEGMENTS:
648 return nilfs_ioctl_clean_segments(inode, filp, cmd, argp);
649 case NILFS_IOCTL_SYNC:
650 return nilfs_ioctl_sync(inode, filp, cmd, argp);
651 default:
652 return -ENOTTY;
653 }
654}
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
new file mode 100644
index 000000000000..47dd815433fd
--- /dev/null
+++ b/fs/nilfs2/mdt.c
@@ -0,0 +1,563 @@
1/*
2 * mdt.c - meta data file for NILFS
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 */
22
23#include <linux/buffer_head.h>
24#include <linux/mpage.h>
25#include <linux/mm.h>
26#include <linux/writeback.h>
27#include <linux/backing-dev.h>
28#include <linux/swap.h>
29#include "nilfs.h"
30#include "segment.h"
31#include "page.h"
32#include "mdt.h"
33
34
35#define NILFS_MDT_MAX_RA_BLOCKS (16 - 1)
36
37#define INIT_UNUSED_INODE_FIELDS
38
39static int
40nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
41 struct buffer_head *bh,
42 void (*init_block)(struct inode *,
43 struct buffer_head *, void *))
44{
45 struct nilfs_inode_info *ii = NILFS_I(inode);
46 void *kaddr;
47 int ret;
48
49 /* Caller exclude read accesses using page lock */
50
51 /* set_buffer_new(bh); */
52 bh->b_blocknr = 0;
53
54 ret = nilfs_bmap_insert(ii->i_bmap, block, (unsigned long)bh);
55 if (unlikely(ret))
56 return ret;
57
58 set_buffer_mapped(bh);
59
60 kaddr = kmap_atomic(bh->b_page, KM_USER0);
61 memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits);
62 if (init_block)
63 init_block(inode, bh, kaddr);
64 flush_dcache_page(bh->b_page);
65 kunmap_atomic(kaddr, KM_USER0);
66
67 set_buffer_uptodate(bh);
68 nilfs_mark_buffer_dirty(bh);
69 nilfs_mdt_mark_dirty(inode);
70 return 0;
71}
72
73static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
74 struct buffer_head **out_bh,
75 void (*init_block)(struct inode *,
76 struct buffer_head *,
77 void *))
78{
79 struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs;
80 struct nilfs_sb_info *writer = NULL;
81 struct super_block *sb = inode->i_sb;
82 struct nilfs_transaction_info ti;
83 struct buffer_head *bh;
84 int err;
85
86 if (!sb) {
87 writer = nilfs_get_writer(nilfs);
88 if (!writer) {
89 err = -EROFS;
90 goto out;
91 }
92 sb = writer->s_super;
93 }
94
95 nilfs_transaction_begin(sb, &ti, 0);
96
97 err = -ENOMEM;
98 bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0);
99 if (unlikely(!bh))
100 goto failed_unlock;
101
102 err = -EEXIST;
103 if (buffer_uptodate(bh) || buffer_mapped(bh))
104 goto failed_bh;
105#if 0
106 /* The uptodate flag is not protected by the page lock, but
107 the mapped flag is. Thus, we don't have to wait the buffer. */
108 wait_on_buffer(bh);
109 if (buffer_uptodate(bh))
110 goto failed_bh;
111#endif
112
113 bh->b_bdev = nilfs->ns_bdev;
114 err = nilfs_mdt_insert_new_block(inode, block, bh, init_block);
115 if (likely(!err)) {
116 get_bh(bh);
117 *out_bh = bh;
118 }
119
120 failed_bh:
121 unlock_page(bh->b_page);
122 page_cache_release(bh->b_page);
123 brelse(bh);
124
125 failed_unlock:
126 if (likely(!err))
127 err = nilfs_transaction_commit(sb);
128 else
129 nilfs_transaction_abort(sb);
130 if (writer)
131 nilfs_put_writer(nilfs);
132 out:
133 return err;
134}
135
136static int
137nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
138 int mode, struct buffer_head **out_bh)
139{
140 struct buffer_head *bh;
141 unsigned long blknum = 0;
142 int ret = -ENOMEM;
143
144 bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0);
145 if (unlikely(!bh))
146 goto failed;
147
148 ret = -EEXIST; /* internal code */
149 if (buffer_uptodate(bh))
150 goto out;
151
152 if (mode == READA) {
153 if (!trylock_buffer(bh)) {
154 ret = -EBUSY;
155 goto failed_bh;
156 }
157 } else /* mode == READ */
158 lock_buffer(bh);
159
160 if (buffer_uptodate(bh)) {
161 unlock_buffer(bh);
162 goto out;
163 }
164 if (!buffer_mapped(bh)) { /* unused buffer */
165 ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff,
166 &blknum);
167 if (unlikely(ret)) {
168 unlock_buffer(bh);
169 goto failed_bh;
170 }
171 bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev;
172 bh->b_blocknr = blknum;
173 set_buffer_mapped(bh);
174 }
175
176 bh->b_end_io = end_buffer_read_sync;
177 get_bh(bh);
178 submit_bh(mode, bh);
179 ret = 0;
180 out:
181 get_bh(bh);
182 *out_bh = bh;
183
184 failed_bh:
185 unlock_page(bh->b_page);
186 page_cache_release(bh->b_page);
187 brelse(bh);
188 failed:
189 return ret;
190}
191
192static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
193 struct buffer_head **out_bh)
194{
195 struct buffer_head *first_bh, *bh;
196 unsigned long blkoff;
197 int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS;
198 int err;
199
200 err = nilfs_mdt_submit_block(inode, block, READ, &first_bh);
201 if (err == -EEXIST) /* internal code */
202 goto out;
203
204 if (unlikely(err))
205 goto failed;
206
207 blkoff = block + 1;
208 for (i = 0; i < nr_ra_blocks; i++, blkoff++) {
209 err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh);
210 if (likely(!err || err == -EEXIST))
211 brelse(bh);
212 else if (err != -EBUSY)
213 break; /* abort readahead if bmap lookup failed */
214
215 if (!buffer_locked(first_bh))
216 goto out_no_wait;
217 }
218
219 wait_on_buffer(first_bh);
220
221 out_no_wait:
222 err = -EIO;
223 if (!buffer_uptodate(first_bh))
224 goto failed_bh;
225 out:
226 *out_bh = first_bh;
227 return 0;
228
229 failed_bh:
230 brelse(first_bh);
231 failed:
232 return err;
233}
234
235/**
236 * nilfs_mdt_get_block - read or create a buffer on meta data file.
237 * @inode: inode of the meta data file
238 * @blkoff: block offset
239 * @create: create flag
240 * @init_block: initializer used for newly allocated block
241 * @out_bh: output of a pointer to the buffer_head
242 *
243 * nilfs_mdt_get_block() looks up the specified buffer and tries to create
244 * a new buffer if @create is not zero. On success, the returned buffer is
245 * assured to be either existing or formatted using a buffer lock on success.
246 * @out_bh is substituted only when zero is returned.
247 *
248 * Return Value: On success, it returns 0. On error, the following negative
249 * error code is returned.
250 *
251 * %-ENOMEM - Insufficient memory available.
252 *
253 * %-EIO - I/O error
254 *
255 * %-ENOENT - the specified block does not exist (hole block)
256 *
257 * %-EINVAL - bmap is broken. (the caller should call nilfs_error())
258 *
259 * %-EROFS - Read only filesystem (for create mode)
260 */
261int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
262 void (*init_block)(struct inode *,
263 struct buffer_head *, void *),
264 struct buffer_head **out_bh)
265{
266 int ret;
267
268 /* Should be rewritten with merging nilfs_mdt_read_block() */
269 retry:
270 ret = nilfs_mdt_read_block(inode, blkoff, out_bh);
271 if (!create || ret != -ENOENT)
272 return ret;
273
274 ret = nilfs_mdt_create_block(inode, blkoff, out_bh, init_block);
275 if (unlikely(ret == -EEXIST)) {
276 /* create = 0; */ /* limit read-create loop retries */
277 goto retry;
278 }
279 return ret;
280}
281
282/**
283 * nilfs_mdt_delete_block - make a hole on the meta data file.
284 * @inode: inode of the meta data file
285 * @block: block offset
286 *
287 * Return Value: On success, zero is returned.
288 * On error, one of the following negative error code is returned.
289 *
290 * %-ENOMEM - Insufficient memory available.
291 *
292 * %-EIO - I/O error
293 *
294 * %-EINVAL - bmap is broken. (the caller should call nilfs_error())
295 */
296int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
297{
298 struct nilfs_inode_info *ii = NILFS_I(inode);
299 int err;
300
301 err = nilfs_bmap_delete(ii->i_bmap, block);
302 if (likely(!err)) {
303 nilfs_mdt_mark_dirty(inode);
304 nilfs_mdt_forget_block(inode, block);
305 }
306 return err;
307}
308
309/**
310 * nilfs_mdt_forget_block - discard dirty state and try to remove the page
311 * @inode: inode of the meta data file
312 * @block: block offset
313 *
314 * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and
315 * tries to release the page including the buffer from a page cache.
316 *
317 * Return Value: On success, 0 is returned. On error, one of the following
318 * negative error code is returned.
319 *
320 * %-EBUSY - page has an active buffer.
321 *
322 * %-ENOENT - page cache has no page addressed by the offset.
323 */
324int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
325{
326 pgoff_t index = (pgoff_t)block >>
327 (PAGE_CACHE_SHIFT - inode->i_blkbits);
328 struct page *page;
329 unsigned long first_block;
330 int ret = 0;
331 int still_dirty;
332
333 page = find_lock_page(inode->i_mapping, index);
334 if (!page)
335 return -ENOENT;
336
337 wait_on_page_writeback(page);
338
339 first_block = (unsigned long)index <<
340 (PAGE_CACHE_SHIFT - inode->i_blkbits);
341 if (page_has_buffers(page)) {
342 struct buffer_head *bh;
343
344 bh = nilfs_page_get_nth_block(page, block - first_block);
345 nilfs_forget_buffer(bh);
346 }
347 still_dirty = PageDirty(page);
348 unlock_page(page);
349 page_cache_release(page);
350
351 if (still_dirty ||
352 invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0)
353 ret = -EBUSY;
354 return ret;
355}
356
357/**
358 * nilfs_mdt_mark_block_dirty - mark a block on the meta data file dirty.
359 * @inode: inode of the meta data file
360 * @block: block offset
361 *
362 * Return Value: On success, it returns 0. On error, the following negative
363 * error code is returned.
364 *
365 * %-ENOMEM - Insufficient memory available.
366 *
367 * %-EIO - I/O error
368 *
369 * %-ENOENT - the specified block does not exist (hole block)
370 *
371 * %-EINVAL - bmap is broken. (the caller should call nilfs_error())
372 */
373int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
374{
375 struct buffer_head *bh;
376 int err;
377
378 err = nilfs_mdt_read_block(inode, block, &bh);
379 if (unlikely(err))
380 return err;
381 nilfs_mark_buffer_dirty(bh);
382 nilfs_mdt_mark_dirty(inode);
383 brelse(bh);
384 return 0;
385}
386
387int nilfs_mdt_fetch_dirty(struct inode *inode)
388{
389 struct nilfs_inode_info *ii = NILFS_I(inode);
390
391 if (nilfs_bmap_test_and_clear_dirty(ii->i_bmap)) {
392 set_bit(NILFS_I_DIRTY, &ii->i_state);
393 return 1;
394 }
395 return test_bit(NILFS_I_DIRTY, &ii->i_state);
396}
397
398static int
399nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
400{
401 struct inode *inode = container_of(page->mapping,
402 struct inode, i_data);
403 struct super_block *sb = inode->i_sb;
404 struct nilfs_sb_info *writer = NULL;
405 int err = 0;
406
407 redirty_page_for_writepage(wbc, page);
408 unlock_page(page);
409
410 if (page->mapping->assoc_mapping)
411 return 0; /* Do not request flush for shadow page cache */
412 if (!sb) {
413 writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs);
414 if (!writer)
415 return -EROFS;
416 sb = writer->s_super;
417 }
418
419 if (wbc->sync_mode == WB_SYNC_ALL)
420 err = nilfs_construct_segment(sb);
421 else if (wbc->for_reclaim)
422 nilfs_flush_segment(sb, inode->i_ino);
423
424 if (writer)
425 nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs);
426 return err;
427}
428
429
430static struct address_space_operations def_mdt_aops = {
431 .writepage = nilfs_mdt_write_page,
432};
433
434static struct inode_operations def_mdt_iops;
435static struct file_operations def_mdt_fops;
436
437/*
438 * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile,
439 * ifile, or gcinodes. This allows the B-tree code and segment constructor
440 * to treat them like regular files, and this helps to simplify the
441 * implementation.
442 * On the other hand, some of the pseudo inodes have an irregular point:
443 * They don't have valid inode->i_sb pointer because their lifetimes are
444 * longer than those of the super block structs; they may continue for
445 * several consecutive mounts/umounts. This would need discussions.
446 */
447struct inode *
448nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
449 ino_t ino, gfp_t gfp_mask)
450{
451 struct inode *inode = nilfs_alloc_inode(sb);
452
453 if (!inode)
454 return NULL;
455 else {
456 struct address_space * const mapping = &inode->i_data;
457 struct nilfs_mdt_info *mi = kzalloc(sizeof(*mi), GFP_NOFS);
458
459 if (!mi) {
460 nilfs_destroy_inode(inode);
461 return NULL;
462 }
463 mi->mi_nilfs = nilfs;
464 init_rwsem(&mi->mi_sem);
465
466 inode->i_sb = sb; /* sb may be NULL for some meta data files */
467 inode->i_blkbits = nilfs->ns_blocksize_bits;
468 inode->i_flags = 0;
469 atomic_set(&inode->i_count, 1);
470 inode->i_nlink = 1;
471 inode->i_ino = ino;
472 inode->i_mode = S_IFREG;
473 inode->i_private = mi;
474
475#ifdef INIT_UNUSED_INODE_FIELDS
476 atomic_set(&inode->i_writecount, 0);
477 inode->i_size = 0;
478 inode->i_blocks = 0;
479 inode->i_bytes = 0;
480 inode->i_generation = 0;
481#ifdef CONFIG_QUOTA
482 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
483#endif
484 inode->i_pipe = NULL;
485 inode->i_bdev = NULL;
486 inode->i_cdev = NULL;
487 inode->i_rdev = 0;
488#ifdef CONFIG_SECURITY
489 inode->i_security = NULL;
490#endif
491 inode->dirtied_when = 0;
492
493 INIT_LIST_HEAD(&inode->i_list);
494 INIT_LIST_HEAD(&inode->i_sb_list);
495 inode->i_state = 0;
496#endif
497
498 spin_lock_init(&inode->i_lock);
499 mutex_init(&inode->i_mutex);
500 init_rwsem(&inode->i_alloc_sem);
501
502 mapping->host = NULL; /* instead of inode */
503 mapping->flags = 0;
504 mapping_set_gfp_mask(mapping, gfp_mask);
505 mapping->assoc_mapping = NULL;
506 mapping->backing_dev_info = nilfs->ns_bdi;
507
508 inode->i_mapping = mapping;
509 }
510
511 return inode;
512}
513
514struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb,
515 ino_t ino, gfp_t gfp_mask)
516{
517 struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, gfp_mask);
518
519 if (!inode)
520 return NULL;
521
522 inode->i_op = &def_mdt_iops;
523 inode->i_fop = &def_mdt_fops;
524 inode->i_mapping->a_ops = &def_mdt_aops;
525 return inode;
526}
527
528void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size,
529 unsigned header_size)
530{
531 struct nilfs_mdt_info *mi = NILFS_MDT(inode);
532
533 mi->mi_entry_size = entry_size;
534 mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size;
535 mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
536}
537
538void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow)
539{
540 shadow->i_mapping->assoc_mapping = orig->i_mapping;
541 NILFS_I(shadow)->i_btnode_cache.assoc_mapping =
542 &NILFS_I(orig)->i_btnode_cache;
543}
544
545void nilfs_mdt_clear(struct inode *inode)
546{
547 struct nilfs_inode_info *ii = NILFS_I(inode);
548
549 invalidate_mapping_pages(inode->i_mapping, 0, -1);
550 truncate_inode_pages(inode->i_mapping, 0);
551
552 nilfs_bmap_clear(ii->i_bmap);
553 nilfs_btnode_cache_clear(&ii->i_btnode_cache);
554}
555
556void nilfs_mdt_destroy(struct inode *inode)
557{
558 struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
559
560 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
561 kfree(mdi);
562 nilfs_destroy_inode(inode);
563}
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
new file mode 100644
index 000000000000..df683e0bca6a
--- /dev/null
+++ b/fs/nilfs2/mdt.h
@@ -0,0 +1,125 @@
1/*
2 * mdt.h - NILFS meta data file prototype and definitions
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 */
22
23#ifndef _NILFS_MDT_H
24#define _NILFS_MDT_H
25
26#include <linux/buffer_head.h>
27#include <linux/blockgroup_lock.h>
28#include "nilfs.h"
29#include "page.h"
30
31/**
32 * struct nilfs_mdt_info - on-memory private data of meta data files
33 * @mi_nilfs: back pointer to the_nilfs struct
34 * @mi_sem: reader/writer semaphore for meta data operations
35 * @mi_bgl: per-blockgroup locking
36 * @mi_entry_size: size of an entry
37 * @mi_first_entry_offset: offset to the first entry
38 * @mi_entries_per_block: number of entries in a block
39 * @mi_blocks_per_group: number of blocks in a group
40 * @mi_blocks_per_desc_block: number of blocks per descriptor block
41 */
42struct nilfs_mdt_info {
43 struct the_nilfs *mi_nilfs;
44 struct rw_semaphore mi_sem;
45 struct blockgroup_lock *mi_bgl;
46 unsigned mi_entry_size;
47 unsigned mi_first_entry_offset;
48 unsigned long mi_entries_per_block;
49 unsigned long mi_blocks_per_group;
50 unsigned long mi_blocks_per_desc_block;
51};
52
53static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
54{
55 return inode->i_private;
56}
57
58static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode)
59{
60 struct super_block *sb = inode->i_sb;
61
62 return sb ? NILFS_SB(sb)->s_nilfs : NILFS_MDT(inode)->mi_nilfs;
63}
64
65/* Default GFP flags using highmem */
66#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
67
68int nilfs_mdt_get_block(struct inode *, unsigned long, int,
69 void (*init_block)(struct inode *,
70 struct buffer_head *, void *),
71 struct buffer_head **);
72int nilfs_mdt_delete_block(struct inode *, unsigned long);
73int nilfs_mdt_forget_block(struct inode *, unsigned long);
74int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long);
75int nilfs_mdt_fetch_dirty(struct inode *);
76
77struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t,
78 gfp_t);
79struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *,
80 ino_t, gfp_t);
81void nilfs_mdt_destroy(struct inode *);
82void nilfs_mdt_clear(struct inode *);
83void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned);
84void nilfs_mdt_set_shadow(struct inode *, struct inode *);
85
86
87#define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh)
88
89static inline void nilfs_mdt_mark_dirty(struct inode *inode)
90{
91 if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state))
92 set_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state);
93}
94
95static inline void nilfs_mdt_clear_dirty(struct inode *inode)
96{
97 clear_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state);
98}
99
100static inline __u64 nilfs_mdt_cno(struct inode *inode)
101{
102 return NILFS_MDT(inode)->mi_nilfs->ns_cno;
103}
104
105#define nilfs_mdt_bgl_lock(inode, bg) \
106 (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock)
107
108
109static inline int
110nilfs_mdt_read_inode_direct(struct inode *inode, struct buffer_head *bh,
111 unsigned n)
112{
113 return nilfs_read_inode_common(
114 inode, (struct nilfs_inode *)(bh->b_data + n));
115}
116
117static inline void
118nilfs_mdt_write_inode_direct(struct inode *inode, struct buffer_head *bh,
119 unsigned n)
120{
121 nilfs_write_inode_common(
122 inode, (struct nilfs_inode *)(bh->b_data + n), 1);
123}
124
125#endif /* _NILFS_MDT_H */
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
new file mode 100644
index 000000000000..df70dadb336f
--- /dev/null
+++ b/fs/nilfs2/namei.c
@@ -0,0 +1,474 @@
1/*
2 * namei.c - NILFS pathname lookup operations.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Modified for NILFS by Amagai Yoshiji <amagai@osrg.net>,
21 * Ryusuke Konishi <ryusuke@osrg.net>
22 */
23/*
24 * linux/fs/ext2/namei.c
25 *
26 * Copyright (C) 1992, 1993, 1994, 1995
27 * Remy Card (card@masi.ibp.fr)
28 * Laboratoire MASI - Institut Blaise Pascal
29 * Universite Pierre et Marie Curie (Paris VI)
30 *
31 * from
32 *
33 * linux/fs/minix/namei.c
34 *
35 * Copyright (C) 1991, 1992 Linus Torvalds
36 *
37 * Big-endian to little-endian byte-swapping/bitmaps by
38 * David S. Miller (davem@caip.rutgers.edu), 1995
39 */
40
41#include <linux/pagemap.h>
42#include "nilfs.h"
43
44
45static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
46{
47 int err = nilfs_add_link(dentry, inode);
48 if (!err) {
49 d_instantiate(dentry, inode);
50 return 0;
51 }
52 inode_dec_link_count(inode);
53 iput(inode);
54 return err;
55}
56
57/*
58 * Methods themselves.
59 */
60
61static struct dentry *
62nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
63{
64 struct inode *inode;
65 ino_t ino;
66
67 if (dentry->d_name.len > NILFS_NAME_LEN)
68 return ERR_PTR(-ENAMETOOLONG);
69
70 ino = nilfs_inode_by_name(dir, dentry);
71 inode = NULL;
72 if (ino) {
73 inode = nilfs_iget(dir->i_sb, ino);
74 if (IS_ERR(inode))
75 return ERR_CAST(inode);
76 }
77 return d_splice_alias(inode, dentry);
78}
79
80struct dentry *nilfs_get_parent(struct dentry *child)
81{
82 unsigned long ino;
83 struct inode *inode;
84 struct dentry dotdot;
85
86 dotdot.d_name.name = "..";
87 dotdot.d_name.len = 2;
88
89 ino = nilfs_inode_by_name(child->d_inode, &dotdot);
90 if (!ino)
91 return ERR_PTR(-ENOENT);
92
93 inode = nilfs_iget(child->d_inode->i_sb, ino);
94 if (IS_ERR(inode))
95 return ERR_CAST(inode);
96 return d_obtain_alias(inode);
97}
98
99/*
100 * By the time this is called, we already have created
101 * the directory cache entry for the new file, but it
102 * is so far negative - it has no inode.
103 *
104 * If the create succeeds, we fill in the inode information
105 * with d_instantiate().
106 */
107static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode,
108 struct nameidata *nd)
109{
110 struct inode *inode;
111 struct nilfs_transaction_info ti;
112 int err;
113
114 err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
115 if (err)
116 return err;
117 inode = nilfs_new_inode(dir, mode);
118 err = PTR_ERR(inode);
119 if (!IS_ERR(inode)) {
120 inode->i_op = &nilfs_file_inode_operations;
121 inode->i_fop = &nilfs_file_operations;
122 inode->i_mapping->a_ops = &nilfs_aops;
123 mark_inode_dirty(inode);
124 err = nilfs_add_nondir(dentry, inode);
125 }
126 if (!err)
127 err = nilfs_transaction_commit(dir->i_sb);
128 else
129 nilfs_transaction_abort(dir->i_sb);
130
131 return err;
132}
133
134static int
135nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
136{
137 struct inode *inode;
138 struct nilfs_transaction_info ti;
139 int err;
140
141 if (!new_valid_dev(rdev))
142 return -EINVAL;
143
144 err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
145 if (err)
146 return err;
147 inode = nilfs_new_inode(dir, mode);
148 err = PTR_ERR(inode);
149 if (!IS_ERR(inode)) {
150 init_special_inode(inode, inode->i_mode, rdev);
151 mark_inode_dirty(inode);
152 err = nilfs_add_nondir(dentry, inode);
153 }
154 if (!err)
155 err = nilfs_transaction_commit(dir->i_sb);
156 else
157 nilfs_transaction_abort(dir->i_sb);
158
159 return err;
160}
161
162static int nilfs_symlink(struct inode *dir, struct dentry *dentry,
163 const char *symname)
164{
165 struct nilfs_transaction_info ti;
166 struct super_block *sb = dir->i_sb;
167 unsigned l = strlen(symname)+1;
168 struct inode *inode;
169 int err;
170
171 if (l > sb->s_blocksize)
172 return -ENAMETOOLONG;
173
174 err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
175 if (err)
176 return err;
177
178 inode = nilfs_new_inode(dir, S_IFLNK | S_IRWXUGO);
179 err = PTR_ERR(inode);
180 if (IS_ERR(inode))
181 goto out;
182
183 /* slow symlink */
184 inode->i_op = &nilfs_symlink_inode_operations;
185 inode->i_mapping->a_ops = &nilfs_aops;
186 err = page_symlink(inode, symname, l);
187 if (err)
188 goto out_fail;
189
190 /* mark_inode_dirty(inode); */
191 /* nilfs_new_inode() and page_symlink() do this */
192
193 err = nilfs_add_nondir(dentry, inode);
194out:
195 if (!err)
196 err = nilfs_transaction_commit(dir->i_sb);
197 else
198 nilfs_transaction_abort(dir->i_sb);
199
200 return err;
201
202out_fail:
203 inode_dec_link_count(inode);
204 iput(inode);
205 goto out;
206}
207
208static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
209 struct dentry *dentry)
210{
211 struct inode *inode = old_dentry->d_inode;
212 struct nilfs_transaction_info ti;
213 int err;
214
215 if (inode->i_nlink >= NILFS_LINK_MAX)
216 return -EMLINK;
217
218 err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
219 if (err)
220 return err;
221
222 inode->i_ctime = CURRENT_TIME;
223 inode_inc_link_count(inode);
224 atomic_inc(&inode->i_count);
225
226 err = nilfs_add_nondir(dentry, inode);
227 if (!err)
228 err = nilfs_transaction_commit(dir->i_sb);
229 else
230 nilfs_transaction_abort(dir->i_sb);
231
232 return err;
233}
234
235static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
236{
237 struct inode *inode;
238 struct nilfs_transaction_info ti;
239 int err;
240
241 if (dir->i_nlink >= NILFS_LINK_MAX)
242 return -EMLINK;
243
244 err = nilfs_transaction_begin(dir->i_sb, &ti, 1);
245 if (err)
246 return err;
247
248 inode_inc_link_count(dir);
249
250 inode = nilfs_new_inode(dir, S_IFDIR | mode);
251 err = PTR_ERR(inode);
252 if (IS_ERR(inode))
253 goto out_dir;
254
255 inode->i_op = &nilfs_dir_inode_operations;
256 inode->i_fop = &nilfs_dir_operations;
257 inode->i_mapping->a_ops = &nilfs_aops;
258
259 inode_inc_link_count(inode);
260
261 err = nilfs_make_empty(inode, dir);
262 if (err)
263 goto out_fail;
264
265 err = nilfs_add_link(dentry, inode);
266 if (err)
267 goto out_fail;
268
269 d_instantiate(dentry, inode);
270out:
271 if (!err)
272 err = nilfs_transaction_commit(dir->i_sb);
273 else
274 nilfs_transaction_abort(dir->i_sb);
275
276 return err;
277
278out_fail:
279 inode_dec_link_count(inode);
280 inode_dec_link_count(inode);
281 iput(inode);
282out_dir:
283 inode_dec_link_count(dir);
284 goto out;
285}
286
287static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
288{
289 struct inode *inode;
290 struct nilfs_dir_entry *de;
291 struct page *page;
292 struct nilfs_transaction_info ti;
293 int err;
294
295 err = nilfs_transaction_begin(dir->i_sb, &ti, 0);
296 if (err)
297 return err;
298
299 err = -ENOENT;
300 de = nilfs_find_entry(dir, dentry, &page);
301 if (!de)
302 goto out;
303
304 inode = dentry->d_inode;
305 err = -EIO;
306 if (le64_to_cpu(de->inode) != inode->i_ino)
307 goto out;
308
309 if (!inode->i_nlink) {
310 nilfs_warning(inode->i_sb, __func__,
311 "deleting nonexistent file (%lu), %d\n",
312 inode->i_ino, inode->i_nlink);
313 inode->i_nlink = 1;
314 }
315 err = nilfs_delete_entry(de, page);
316 if (err)
317 goto out;
318
319 inode->i_ctime = dir->i_ctime;
320 inode_dec_link_count(inode);
321 err = 0;
322out:
323 if (!err)
324 err = nilfs_transaction_commit(dir->i_sb);
325 else
326 nilfs_transaction_abort(dir->i_sb);
327
328 return err;
329}
330
331static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
332{
333 struct inode *inode = dentry->d_inode;
334 struct nilfs_transaction_info ti;
335 int err;
336
337 err = nilfs_transaction_begin(dir->i_sb, &ti, 0);
338 if (err)
339 return err;
340
341 err = -ENOTEMPTY;
342 if (nilfs_empty_dir(inode)) {
343 err = nilfs_unlink(dir, dentry);
344 if (!err) {
345 inode->i_size = 0;
346 inode_dec_link_count(inode);
347 inode_dec_link_count(dir);
348 }
349 }
350 if (!err)
351 err = nilfs_transaction_commit(dir->i_sb);
352 else
353 nilfs_transaction_abort(dir->i_sb);
354
355 return err;
356}
357
358static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
359 struct inode *new_dir, struct dentry *new_dentry)
360{
361 struct inode *old_inode = old_dentry->d_inode;
362 struct inode *new_inode = new_dentry->d_inode;
363 struct page *dir_page = NULL;
364 struct nilfs_dir_entry *dir_de = NULL;
365 struct page *old_page;
366 struct nilfs_dir_entry *old_de;
367 struct nilfs_transaction_info ti;
368 int err;
369
370 err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1);
371 if (unlikely(err))
372 return err;
373
374 err = -ENOENT;
375 old_de = nilfs_find_entry(old_dir, old_dentry, &old_page);
376 if (!old_de)
377 goto out;
378
379 if (S_ISDIR(old_inode->i_mode)) {
380 err = -EIO;
381 dir_de = nilfs_dotdot(old_inode, &dir_page);
382 if (!dir_de)
383 goto out_old;
384 }
385
386 if (new_inode) {
387 struct page *new_page;
388 struct nilfs_dir_entry *new_de;
389
390 err = -ENOTEMPTY;
391 if (dir_de && !nilfs_empty_dir(new_inode))
392 goto out_dir;
393
394 err = -ENOENT;
395 new_de = nilfs_find_entry(new_dir, new_dentry, &new_page);
396 if (!new_de)
397 goto out_dir;
398 inode_inc_link_count(old_inode);
399 nilfs_set_link(new_dir, new_de, new_page, old_inode);
400 new_inode->i_ctime = CURRENT_TIME;
401 if (dir_de)
402 drop_nlink(new_inode);
403 inode_dec_link_count(new_inode);
404 } else {
405 if (dir_de) {
406 err = -EMLINK;
407 if (new_dir->i_nlink >= NILFS_LINK_MAX)
408 goto out_dir;
409 }
410 inode_inc_link_count(old_inode);
411 err = nilfs_add_link(new_dentry, old_inode);
412 if (err) {
413 inode_dec_link_count(old_inode);
414 goto out_dir;
415 }
416 if (dir_de)
417 inode_inc_link_count(new_dir);
418 }
419
420 /*
421 * Like most other Unix systems, set the ctime for inodes on a
422 * rename.
423 * inode_dec_link_count() will mark the inode dirty.
424 */
425 old_inode->i_ctime = CURRENT_TIME;
426
427 nilfs_delete_entry(old_de, old_page);
428 inode_dec_link_count(old_inode);
429
430 if (dir_de) {
431 nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
432 inode_dec_link_count(old_dir);
433 }
434
435 err = nilfs_transaction_commit(old_dir->i_sb);
436 return err;
437
438out_dir:
439 if (dir_de) {
440 kunmap(dir_page);
441 page_cache_release(dir_page);
442 }
443out_old:
444 kunmap(old_page);
445 page_cache_release(old_page);
446out:
447 nilfs_transaction_abort(old_dir->i_sb);
448 return err;
449}
450
451struct inode_operations nilfs_dir_inode_operations = {
452 .create = nilfs_create,
453 .lookup = nilfs_lookup,
454 .link = nilfs_link,
455 .unlink = nilfs_unlink,
456 .symlink = nilfs_symlink,
457 .mkdir = nilfs_mkdir,
458 .rmdir = nilfs_rmdir,
459 .mknod = nilfs_mknod,
460 .rename = nilfs_rename,
461 .setattr = nilfs_setattr,
462 .permission = nilfs_permission,
463};
464
465struct inode_operations nilfs_special_inode_operations = {
466 .setattr = nilfs_setattr,
467 .permission = nilfs_permission,
468};
469
470struct inode_operations nilfs_symlink_inode_operations = {
471 .readlink = generic_readlink,
472 .follow_link = page_follow_link_light,
473 .put_link = page_put_link,
474};
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
new file mode 100644
index 000000000000..7558c977db02
--- /dev/null
+++ b/fs/nilfs2/nilfs.h
@@ -0,0 +1,318 @@
1/*
2 * nilfs.h - NILFS local header file.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>
21 * Ryusuke Konishi <ryusuke@osrg.net>
22 */
23
24#ifndef _NILFS_H
25#define _NILFS_H
26
27#include <linux/kernel.h>
28#include <linux/buffer_head.h>
29#include <linux/spinlock.h>
30#include <linux/blkdev.h>
31#include <linux/nilfs2_fs.h>
32#include "the_nilfs.h"
33#include "sb.h"
34#include "bmap.h"
35#include "bmap_union.h"
36
37/*
38 * NILFS filesystem version
39 */
40#define NILFS_VERSION "2.0.5"
41
42/*
43 * nilfs inode data in memory
44 */
45struct nilfs_inode_info {
46 __u32 i_flags;
47 unsigned long i_state; /* Dynamic state flags */
48 struct nilfs_bmap *i_bmap;
49 union nilfs_bmap_union i_bmap_union;
50 __u64 i_xattr; /* sector_t ??? */
51 __u32 i_dir_start_lookup;
52 __u64 i_cno; /* check point number for GC inode */
53 struct address_space i_btnode_cache;
54 struct list_head i_dirty; /* List for connecting dirty files */
55
56#ifdef CONFIG_NILFS_XATTR
57 /*
58 * Extended attributes can be read independently of the main file
59 * data. Taking i_sem even when reading would cause contention
60 * between readers of EAs and writers of regular file data, so
61 * instead we synchronize on xattr_sem when reading or changing
62 * EAs.
63 */
64 struct rw_semaphore xattr_sem;
65#endif
66#ifdef CONFIG_NILFS_POSIX_ACL
67 struct posix_acl *i_acl;
68 struct posix_acl *i_default_acl;
69#endif
70 struct buffer_head *i_bh; /* i_bh contains a new or dirty
71 disk inode */
72 struct inode vfs_inode;
73};
74
75static inline struct nilfs_inode_info *NILFS_I(const struct inode *inode)
76{
77 return container_of(inode, struct nilfs_inode_info, vfs_inode);
78}
79
80static inline struct nilfs_inode_info *
81NILFS_BMAP_I(const struct nilfs_bmap *bmap)
82{
83 return container_of((union nilfs_bmap_union *)bmap,
84 struct nilfs_inode_info,
85 i_bmap_union);
86}
87
88static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
89{
90 struct nilfs_inode_info *ii =
91 container_of(btnc, struct nilfs_inode_info, i_btnode_cache);
92 return &ii->vfs_inode;
93}
94
95static inline struct inode *NILFS_AS_I(struct address_space *mapping)
96{
97 return (mapping->host) ? :
98 container_of(mapping, struct inode, i_data);
99}
100
101/*
102 * Dynamic state flags of NILFS on-memory inode (i_state)
103 */
104enum {
105 NILFS_I_NEW = 0, /* Inode is newly created */
106 NILFS_I_DIRTY, /* The file is dirty */
107 NILFS_I_QUEUED, /* inode is in dirty_files list */
108 NILFS_I_BUSY, /* inode is grabbed by a segment
109 constructor */
110 NILFS_I_COLLECTED, /* All dirty blocks are collected */
111 NILFS_I_UPDATED, /* The file has been written back */
112 NILFS_I_INODE_DIRTY, /* write_inode is requested */
113 NILFS_I_BMAP, /* has bmap and btnode_cache */
114 NILFS_I_GCINODE, /* inode for GC, on memory only */
115 NILFS_I_GCDAT, /* shadow DAT, on memory only */
116};
117
118/*
119 * Macros to check inode numbers
120 */
121#define NILFS_MDT_INO_BITS \
122 ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \
123 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \
124 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO))
125
126#define NILFS_SYS_INO_BITS \
127 ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS)
128
129#define NILFS_FIRST_INO(sb) (NILFS_SB(sb)->s_nilfs->ns_first_ino)
130
131#define NILFS_MDT_INODE(sb, ino) \
132 ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino))))
133#define NILFS_VALID_INODE(sb, ino) \
134 ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino))))
135
136/**
137 * struct nilfs_transaction_info: context information for synchronization
138 * @ti_magic: Magic number
139 * @ti_save: Backup of journal_info field of task_struct
140 * @ti_flags: Flags
141 * @ti_count: Nest level
142 * @ti_garbage: List of inode to be put when releasing semaphore
143 */
144struct nilfs_transaction_info {
145 u32 ti_magic;
146 void *ti_save;
147 /* This should never used. If this happens,
148 one of other filesystems has a bug. */
149 unsigned short ti_flags;
150 unsigned short ti_count;
151 struct list_head ti_garbage;
152};
153
154/* ti_magic */
155#define NILFS_TI_MAGIC 0xd9e392fb
156
157/* ti_flags */
158#define NILFS_TI_DYNAMIC_ALLOC 0x0001 /* Allocated from slab */
159#define NILFS_TI_SYNC 0x0002 /* Force to construct segment at the
160 end of transaction. */
161#define NILFS_TI_GC 0x0004 /* GC context */
162#define NILFS_TI_COMMIT 0x0008 /* Change happened or not */
163#define NILFS_TI_WRITER 0x0010 /* Constructor context */
164
165
166int nilfs_transaction_begin(struct super_block *,
167 struct nilfs_transaction_info *, int);
168int nilfs_transaction_commit(struct super_block *);
169void nilfs_transaction_abort(struct super_block *);
170
171static inline void nilfs_set_transaction_flag(unsigned int flag)
172{
173 struct nilfs_transaction_info *ti = current->journal_info;
174
175 ti->ti_flags |= flag;
176}
177
178static inline int nilfs_test_transaction_flag(unsigned int flag)
179{
180 struct nilfs_transaction_info *ti = current->journal_info;
181
182 if (ti == NULL || ti->ti_magic != NILFS_TI_MAGIC)
183 return 0;
184 return !!(ti->ti_flags & flag);
185}
186
187static inline int nilfs_doing_gc(void)
188{
189 return nilfs_test_transaction_flag(NILFS_TI_GC);
190}
191
192static inline int nilfs_doing_construction(void)
193{
194 return nilfs_test_transaction_flag(NILFS_TI_WRITER);
195}
196
197static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs)
198{
199 return nilfs_doing_gc() ? nilfs->ns_gc_dat : nilfs->ns_dat;
200}
201
202/*
203 * function prototype
204 */
205#ifdef CONFIG_NILFS_POSIX_ACL
206#error "NILFS: not yet supported POSIX ACL"
207extern int nilfs_permission(struct inode *, int, struct nameidata *);
208extern int nilfs_acl_chmod(struct inode *);
209extern int nilfs_init_acl(struct inode *, struct inode *);
210#else
211#define nilfs_permission NULL
212
213static inline int nilfs_acl_chmod(struct inode *inode)
214{
215 return 0;
216}
217
218static inline int nilfs_init_acl(struct inode *inode, struct inode *dir)
219{
220 inode->i_mode &= ~current_umask();
221 return 0;
222}
223#endif
224
225#define NILFS_ATIME_DISABLE
226
227/* dir.c */
228extern int nilfs_add_link(struct dentry *, struct inode *);
229extern ino_t nilfs_inode_by_name(struct inode *, struct dentry *);
230extern int nilfs_make_empty(struct inode *, struct inode *);
231extern struct nilfs_dir_entry *
232nilfs_find_entry(struct inode *, struct dentry *, struct page **);
233extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *);
234extern int nilfs_empty_dir(struct inode *);
235extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **);
236extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *,
237 struct page *, struct inode *);
238
239/* file.c */
240extern int nilfs_sync_file(struct file *, struct dentry *, int);
241
242/* ioctl.c */
243long nilfs_ioctl(struct file *, unsigned int, unsigned long);
244int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, void __user *);
245
246/* inode.c */
247extern struct inode *nilfs_new_inode(struct inode *, int);
248extern void nilfs_free_inode(struct inode *);
249extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
250extern void nilfs_set_inode_flags(struct inode *);
251extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *);
252extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int);
253extern struct inode *nilfs_iget(struct super_block *, unsigned long);
254extern void nilfs_update_inode(struct inode *, struct buffer_head *);
255extern void nilfs_truncate(struct inode *);
256extern void nilfs_delete_inode(struct inode *);
257extern int nilfs_setattr(struct dentry *, struct iattr *);
258extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *,
259 struct buffer_head **);
260extern int nilfs_inode_dirty(struct inode *);
261extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *,
262 unsigned);
263extern int nilfs_mark_inode_dirty(struct inode *);
264extern void nilfs_dirty_inode(struct inode *);
265
266/* namei.c */
267extern struct dentry *nilfs_get_parent(struct dentry *);
268
269/* super.c */
270extern struct inode *nilfs_alloc_inode(struct super_block *);
271extern void nilfs_destroy_inode(struct inode *);
272extern void nilfs_error(struct super_block *, const char *, const char *, ...)
273 __attribute__ ((format (printf, 3, 4)));
274extern void nilfs_warning(struct super_block *, const char *, const char *, ...)
275 __attribute__ ((format (printf, 3, 4)));
276extern struct nilfs_super_block *
277nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **);
278extern int nilfs_store_magic_and_option(struct super_block *,
279 struct nilfs_super_block *, char *);
280extern int nilfs_commit_super(struct nilfs_sb_info *, int);
281extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64);
282extern void nilfs_detach_checkpoint(struct nilfs_sb_info *);
283
284/* gcinode.c */
285int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64,
286 struct buffer_head **);
287int nilfs_gccache_submit_read_node(struct inode *, sector_t, __u64,
288 struct buffer_head **);
289int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *);
290int nilfs_init_gccache(struct the_nilfs *);
291void nilfs_destroy_gccache(struct the_nilfs *);
292void nilfs_clear_gcinode(struct inode *);
293struct inode *nilfs_gc_iget(struct the_nilfs *, ino_t, __u64);
294void nilfs_remove_all_gcinode(struct the_nilfs *);
295
296/* gcdat.c */
297int nilfs_init_gcdat_inode(struct the_nilfs *);
298void nilfs_commit_gcdat_inode(struct the_nilfs *);
299void nilfs_clear_gcdat_inode(struct the_nilfs *);
300
301/*
302 * Inodes and files operations
303 */
304extern struct file_operations nilfs_dir_operations;
305extern struct inode_operations nilfs_file_inode_operations;
306extern struct file_operations nilfs_file_operations;
307extern struct address_space_operations nilfs_aops;
308extern struct inode_operations nilfs_dir_inode_operations;
309extern struct inode_operations nilfs_special_inode_operations;
310extern struct inode_operations nilfs_symlink_inode_operations;
311
312/*
313 * filesystem type
314 */
315extern struct file_system_type nilfs_fs_type;
316
317
318#endif /* _NILFS_H */
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
new file mode 100644
index 000000000000..1bfbba9c0e9a
--- /dev/null
+++ b/fs/nilfs2/page.c
@@ -0,0 +1,540 @@
1/*
2 * page.c - buffer/page management specific to NILFS
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>,
21 * Seiji Kihara <kihara@osrg.net>.
22 */
23
24#include <linux/pagemap.h>
25#include <linux/writeback.h>
26#include <linux/swap.h>
27#include <linux/bitops.h>
28#include <linux/page-flags.h>
29#include <linux/list.h>
30#include <linux/highmem.h>
31#include <linux/pagevec.h>
32#include "nilfs.h"
33#include "page.h"
34#include "mdt.h"
35
36
37#define NILFS_BUFFER_INHERENT_BITS \
38 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
39 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated))
40
41static struct buffer_head *
42__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
43 int blkbits, unsigned long b_state)
44
45{
46 unsigned long first_block;
47 struct buffer_head *bh;
48
49 if (!page_has_buffers(page))
50 create_empty_buffers(page, 1 << blkbits, b_state);
51
52 first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
53 bh = nilfs_page_get_nth_block(page, block - first_block);
54
55 touch_buffer(bh);
56 wait_on_buffer(bh);
57 return bh;
58}
59
60/*
61 * Since the page cache of B-tree node pages or data page cache of pseudo
62 * inodes does not have a valid mapping->host pointer, calling
63 * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
64 * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
65 * To avoid this problem, the old style mark_buffer_dirty() is used instead.
66 */
67void nilfs_mark_buffer_dirty(struct buffer_head *bh)
68{
69 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
70 __set_page_dirty_nobuffers(bh->b_page);
71}
72
73struct buffer_head *nilfs_grab_buffer(struct inode *inode,
74 struct address_space *mapping,
75 unsigned long blkoff,
76 unsigned long b_state)
77{
78 int blkbits = inode->i_blkbits;
79 pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
80 struct page *page, *opage;
81 struct buffer_head *bh, *obh;
82
83 page = grab_cache_page(mapping, index);
84 if (unlikely(!page))
85 return NULL;
86
87 bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
88 if (unlikely(!bh)) {
89 unlock_page(page);
90 page_cache_release(page);
91 return NULL;
92 }
93 if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) {
94 /*
95 * Shadow page cache uses assoc_mapping to point its original
96 * page cache. The following code tries the original cache
97 * if the given cache is a shadow and it didn't hit.
98 */
99 opage = find_lock_page(mapping->assoc_mapping, index);
100 if (!opage)
101 return bh;
102
103 obh = __nilfs_get_page_block(opage, blkoff, index, blkbits,
104 b_state);
105 if (buffer_uptodate(obh)) {
106 nilfs_copy_buffer(bh, obh);
107 if (buffer_dirty(obh)) {
108 nilfs_mark_buffer_dirty(bh);
109 if (!buffer_nilfs_node(bh) && NILFS_MDT(inode))
110 nilfs_mdt_mark_dirty(inode);
111 }
112 }
113 brelse(obh);
114 unlock_page(opage);
115 page_cache_release(opage);
116 }
117 return bh;
118}
119
120/**
121 * nilfs_forget_buffer - discard dirty state
122 * @inode: owner inode of the buffer
123 * @bh: buffer head of the buffer to be discarded
124 */
125void nilfs_forget_buffer(struct buffer_head *bh)
126{
127 struct page *page = bh->b_page;
128
129 lock_buffer(bh);
130 clear_buffer_nilfs_volatile(bh);
131 if (test_clear_buffer_dirty(bh) && nilfs_page_buffers_clean(page))
132 __nilfs_clear_page_dirty(page);
133
134 clear_buffer_uptodate(bh);
135 clear_buffer_mapped(bh);
136 bh->b_blocknr = -1;
137 ClearPageUptodate(page);
138 ClearPageMappedToDisk(page);
139 unlock_buffer(bh);
140 brelse(bh);
141}
142
143/**
144 * nilfs_copy_buffer -- copy buffer data and flags
145 * @dbh: destination buffer
146 * @sbh: source buffer
147 */
148void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
149{
150 void *kaddr0, *kaddr1;
151 unsigned long bits;
152 struct page *spage = sbh->b_page, *dpage = dbh->b_page;
153 struct buffer_head *bh;
154
155 kaddr0 = kmap_atomic(spage, KM_USER0);
156 kaddr1 = kmap_atomic(dpage, KM_USER1);
157 memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
158 kunmap_atomic(kaddr1, KM_USER1);
159 kunmap_atomic(kaddr0, KM_USER0);
160
161 dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
162 dbh->b_blocknr = sbh->b_blocknr;
163 dbh->b_bdev = sbh->b_bdev;
164
165 bh = dbh;
166 bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped));
167 while ((bh = bh->b_this_page) != dbh) {
168 lock_buffer(bh);
169 bits &= bh->b_state;
170 unlock_buffer(bh);
171 }
172 if (bits & (1UL << BH_Uptodate))
173 SetPageUptodate(dpage);
174 else
175 ClearPageUptodate(dpage);
176 if (bits & (1UL << BH_Mapped))
177 SetPageMappedToDisk(dpage);
178 else
179 ClearPageMappedToDisk(dpage);
180}
181
182/**
183 * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
184 * @page: page to be checked
185 *
186 * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
187 * Otherwise, it returns non-zero value.
188 */
189int nilfs_page_buffers_clean(struct page *page)
190{
191 struct buffer_head *bh, *head;
192
193 bh = head = page_buffers(page);
194 do {
195 if (buffer_dirty(bh))
196 return 0;
197 bh = bh->b_this_page;
198 } while (bh != head);
199 return 1;
200}
201
202void nilfs_page_bug(struct page *page)
203{
204 struct address_space *m;
205 unsigned long ino = 0;
206
207 if (unlikely(!page)) {
208 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
209 return;
210 }
211
212 m = page->mapping;
213 if (m) {
214 struct inode *inode = NILFS_AS_I(m);
215 if (inode != NULL)
216 ino = inode->i_ino;
217 }
218 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
219 "mapping=%p ino=%lu\n",
220 page, atomic_read(&page->_count),
221 (unsigned long long)page->index, page->flags, m, ino);
222
223 if (page_has_buffers(page)) {
224 struct buffer_head *bh, *head;
225 int i = 0;
226
227 bh = head = page_buffers(page);
228 do {
229 printk(KERN_CRIT
230 " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
231 i++, bh, atomic_read(&bh->b_count),
232 (unsigned long long)bh->b_blocknr, bh->b_state);
233 bh = bh->b_this_page;
234 } while (bh != head);
235 }
236}
237
238/**
239 * nilfs_alloc_private_page - allocate a private page with buffer heads
240 *
241 * Return Value: On success, a pointer to the allocated page is returned.
242 * On error, NULL is returned.
243 */
244struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
245 unsigned long state)
246{
247 struct buffer_head *bh, *head, *tail;
248 struct page *page;
249
250 page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
251 if (unlikely(!page))
252 return NULL;
253
254 lock_page(page);
255 head = alloc_page_buffers(page, size, 0);
256 if (unlikely(!head)) {
257 unlock_page(page);
258 __free_page(page);
259 return NULL;
260 }
261
262 bh = head;
263 do {
264 bh->b_state = (1UL << BH_NILFS_Allocated) | state;
265 tail = bh;
266 bh->b_bdev = bdev;
267 bh = bh->b_this_page;
268 } while (bh);
269
270 tail->b_this_page = head;
271 attach_page_buffers(page, head);
272
273 return page;
274}
275
276void nilfs_free_private_page(struct page *page)
277{
278 BUG_ON(!PageLocked(page));
279 BUG_ON(page->mapping);
280
281 if (page_has_buffers(page) && !try_to_free_buffers(page))
282 NILFS_PAGE_BUG(page, "failed to free page");
283
284 unlock_page(page);
285 __free_page(page);
286}
287
288/**
289 * nilfs_copy_page -- copy the page with buffers
290 * @dst: destination page
291 * @src: source page
292 * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
293 *
294 * This fuction is for both data pages and btnode pages. The dirty flag
295 * should be treated by caller. The page must not be under i/o.
296 * Both src and dst page must be locked
297 */
298static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
299{
300 struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
301 unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
302
303 BUG_ON(PageWriteback(dst));
304
305 sbh = sbufs = page_buffers(src);
306 if (!page_has_buffers(dst))
307 create_empty_buffers(dst, sbh->b_size, 0);
308
309 if (copy_dirty)
310 mask |= (1UL << BH_Dirty);
311
312 dbh = dbufs = page_buffers(dst);
313 do {
314 lock_buffer(sbh);
315 lock_buffer(dbh);
316 dbh->b_state = sbh->b_state & mask;
317 dbh->b_blocknr = sbh->b_blocknr;
318 dbh->b_bdev = sbh->b_bdev;
319 sbh = sbh->b_this_page;
320 dbh = dbh->b_this_page;
321 } while (dbh != dbufs);
322
323 copy_highpage(dst, src);
324
325 if (PageUptodate(src) && !PageUptodate(dst))
326 SetPageUptodate(dst);
327 else if (!PageUptodate(src) && PageUptodate(dst))
328 ClearPageUptodate(dst);
329 if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
330 SetPageMappedToDisk(dst);
331 else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
332 ClearPageMappedToDisk(dst);
333
334 do {
335 unlock_buffer(sbh);
336 unlock_buffer(dbh);
337 sbh = sbh->b_this_page;
338 dbh = dbh->b_this_page;
339 } while (dbh != dbufs);
340}
341
342int nilfs_copy_dirty_pages(struct address_space *dmap,
343 struct address_space *smap)
344{
345 struct pagevec pvec;
346 unsigned int i;
347 pgoff_t index = 0;
348 int err = 0;
349
350 pagevec_init(&pvec, 0);
351repeat:
352 if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
353 PAGEVEC_SIZE))
354 return 0;
355
356 for (i = 0; i < pagevec_count(&pvec); i++) {
357 struct page *page = pvec.pages[i], *dpage;
358
359 lock_page(page);
360 if (unlikely(!PageDirty(page)))
361 NILFS_PAGE_BUG(page, "inconsistent dirty state");
362
363 dpage = grab_cache_page(dmap, page->index);
364 if (unlikely(!dpage)) {
365 /* No empty page is added to the page cache */
366 err = -ENOMEM;
367 unlock_page(page);
368 break;
369 }
370 if (unlikely(!page_has_buffers(page)))
371 NILFS_PAGE_BUG(page,
372 "found empty page in dat page cache");
373
374 nilfs_copy_page(dpage, page, 1);
375 __set_page_dirty_nobuffers(dpage);
376
377 unlock_page(dpage);
378 page_cache_release(dpage);
379 unlock_page(page);
380 }
381 pagevec_release(&pvec);
382 cond_resched();
383
384 if (likely(!err))
385 goto repeat;
386 return err;
387}
388
389/**
390 * nilfs_copy_back_pages -- copy back pages to orignal cache from shadow cache
391 * @dmap: destination page cache
392 * @smap: source page cache
393 *
394 * No pages must no be added to the cache during this process.
395 * This must be ensured by the caller.
396 */
397void nilfs_copy_back_pages(struct address_space *dmap,
398 struct address_space *smap)
399{
400 struct pagevec pvec;
401 unsigned int i, n;
402 pgoff_t index = 0;
403 int err;
404
405 pagevec_init(&pvec, 0);
406repeat:
407 n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE);
408 if (!n)
409 return;
410 index = pvec.pages[n - 1]->index + 1;
411
412 for (i = 0; i < pagevec_count(&pvec); i++) {
413 struct page *page = pvec.pages[i], *dpage;
414 pgoff_t offset = page->index;
415
416 lock_page(page);
417 dpage = find_lock_page(dmap, offset);
418 if (dpage) {
419 /* override existing page on the destination cache */
420 WARN_ON(PageDirty(dpage));
421 nilfs_copy_page(dpage, page, 0);
422 unlock_page(dpage);
423 page_cache_release(dpage);
424 } else {
425 struct page *page2;
426
427 /* move the page to the destination cache */
428 spin_lock_irq(&smap->tree_lock);
429 page2 = radix_tree_delete(&smap->page_tree, offset);
430 WARN_ON(page2 != page);
431
432 smap->nrpages--;
433 spin_unlock_irq(&smap->tree_lock);
434
435 spin_lock_irq(&dmap->tree_lock);
436 err = radix_tree_insert(&dmap->page_tree, offset, page);
437 if (unlikely(err < 0)) {
438 WARN_ON(err == -EEXIST);
439 page->mapping = NULL;
440 page_cache_release(page); /* for cache */
441 } else {
442 page->mapping = dmap;
443 dmap->nrpages++;
444 if (PageDirty(page))
445 radix_tree_tag_set(&dmap->page_tree,
446 offset,
447 PAGECACHE_TAG_DIRTY);
448 }
449 spin_unlock_irq(&dmap->tree_lock);
450 }
451 unlock_page(page);
452 }
453 pagevec_release(&pvec);
454 cond_resched();
455
456 goto repeat;
457}
458
459void nilfs_clear_dirty_pages(struct address_space *mapping)
460{
461 struct pagevec pvec;
462 unsigned int i;
463 pgoff_t index = 0;
464
465 pagevec_init(&pvec, 0);
466
467 while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
468 PAGEVEC_SIZE)) {
469 for (i = 0; i < pagevec_count(&pvec); i++) {
470 struct page *page = pvec.pages[i];
471 struct buffer_head *bh, *head;
472
473 lock_page(page);
474 ClearPageUptodate(page);
475 ClearPageMappedToDisk(page);
476 bh = head = page_buffers(page);
477 do {
478 lock_buffer(bh);
479 clear_buffer_dirty(bh);
480 clear_buffer_nilfs_volatile(bh);
481 clear_buffer_uptodate(bh);
482 clear_buffer_mapped(bh);
483 unlock_buffer(bh);
484 bh = bh->b_this_page;
485 } while (bh != head);
486
487 __nilfs_clear_page_dirty(page);
488 unlock_page(page);
489 }
490 pagevec_release(&pvec);
491 cond_resched();
492 }
493}
494
495unsigned nilfs_page_count_clean_buffers(struct page *page,
496 unsigned from, unsigned to)
497{
498 unsigned block_start, block_end;
499 struct buffer_head *bh, *head;
500 unsigned nc = 0;
501
502 for (bh = head = page_buffers(page), block_start = 0;
503 bh != head || !block_start;
504 block_start = block_end, bh = bh->b_this_page) {
505 block_end = block_start + bh->b_size;
506 if (block_end > from && block_start < to && !buffer_dirty(bh))
507 nc++;
508 }
509 return nc;
510}
511
512/*
513 * NILFS2 needs clear_page_dirty() in the following two cases:
514 *
515 * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
516 * page dirty flags when it copies back pages from the shadow cache
517 * (gcdat->{i_mapping,i_btnode_cache}) to its original cache
518 * (dat->{i_mapping,i_btnode_cache}).
519 *
520 * 2) Some B-tree operations like insertion or deletion may dispose buffers
521 * in dirty state, and this needs to cancel the dirty state of their pages.
522 */
523int __nilfs_clear_page_dirty(struct page *page)
524{
525 struct address_space *mapping = page->mapping;
526
527 if (mapping) {
528 spin_lock_irq(&mapping->tree_lock);
529 if (test_bit(PG_dirty, &page->flags)) {
530 radix_tree_tag_clear(&mapping->page_tree,
531 page_index(page),
532 PAGECACHE_TAG_DIRTY);
533 spin_unlock_irq(&mapping->tree_lock);
534 return clear_page_dirty_for_io(page);
535 }
536 spin_unlock_irq(&mapping->tree_lock);
537 return 0;
538 }
539 return TestClearPageDirty(page);
540}
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
new file mode 100644
index 000000000000..8abca4d1c1f8
--- /dev/null
+++ b/fs/nilfs2/page.h
@@ -0,0 +1,76 @@
1/*
2 * page.h - buffer/page management specific to NILFS
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>,
21 * Seiji Kihara <kihara@osrg.net>.
22 */
23
24#ifndef _NILFS_PAGE_H
25#define _NILFS_PAGE_H
26
27#include <linux/buffer_head.h>
28#include "nilfs.h"
29
30/*
31 * Extended buffer state bits
32 */
33enum {
34 BH_NILFS_Allocated = BH_PrivateStart,
35 BH_NILFS_Node,
36 BH_NILFS_Volatile,
37};
38
39BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */
40BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */
41BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
42
43
44void nilfs_mark_buffer_dirty(struct buffer_head *bh);
45int __nilfs_clear_page_dirty(struct page *);
46
47struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *,
48 unsigned long, unsigned long);
49void nilfs_forget_buffer(struct buffer_head *);
50void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *);
51int nilfs_page_buffers_clean(struct page *);
52void nilfs_page_bug(struct page *);
53struct page *nilfs_alloc_private_page(struct block_device *, int,
54 unsigned long);
55void nilfs_free_private_page(struct page *);
56
57int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
58void nilfs_copy_back_pages(struct address_space *, struct address_space *);
59void nilfs_clear_dirty_pages(struct address_space *);
60unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
61
62#define NILFS_PAGE_BUG(page, m, a...) \
63 do { nilfs_page_bug(page); BUG(); } while (0)
64
65static inline struct buffer_head *
66nilfs_page_get_nth_block(struct page *page, unsigned int count)
67{
68 struct buffer_head *bh = page_buffers(page);
69
70 while (count-- > 0)
71 bh = bh->b_this_page;
72 get_bh(bh);
73 return bh;
74}
75
76#endif /* _NILFS_PAGE_H */
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
new file mode 100644
index 000000000000..6ade0963fc1d
--- /dev/null
+++ b/fs/nilfs2/recovery.c
@@ -0,0 +1,929 @@
1/*
2 * recovery.c - NILFS recovery logic
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 */
22
23#include <linux/buffer_head.h>
24#include <linux/blkdev.h>
25#include <linux/swap.h>
26#include <linux/crc32.h>
27#include "nilfs.h"
28#include "segment.h"
29#include "sufile.h"
30#include "page.h"
31#include "seglist.h"
32#include "segbuf.h"
33
34/*
35 * Segment check result
36 */
37enum {
38 NILFS_SEG_VALID,
39 NILFS_SEG_NO_SUPER_ROOT,
40 NILFS_SEG_FAIL_IO,
41 NILFS_SEG_FAIL_MAGIC,
42 NILFS_SEG_FAIL_SEQ,
43 NILFS_SEG_FAIL_CHECKSUM_SEGSUM,
44 NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT,
45 NILFS_SEG_FAIL_CHECKSUM_FULL,
46 NILFS_SEG_FAIL_CONSISTENCY,
47};
48
49/* work structure for recovery */
50struct nilfs_recovery_block {
51 ino_t ino; /* Inode number of the file that this block
52 belongs to */
53 sector_t blocknr; /* block number */
54 __u64 vblocknr; /* virtual block number */
55 unsigned long blkoff; /* File offset of the data block (per block) */
56 struct list_head list;
57};
58
59
60static int nilfs_warn_segment_error(int err)
61{
62 switch (err) {
63 case NILFS_SEG_FAIL_IO:
64 printk(KERN_WARNING
65 "NILFS warning: I/O error on loading last segment\n");
66 return -EIO;
67 case NILFS_SEG_FAIL_MAGIC:
68 printk(KERN_WARNING
69 "NILFS warning: Segment magic number invalid\n");
70 break;
71 case NILFS_SEG_FAIL_SEQ:
72 printk(KERN_WARNING
73 "NILFS warning: Sequence number mismatch\n");
74 break;
75 case NILFS_SEG_FAIL_CHECKSUM_SEGSUM:
76 printk(KERN_WARNING
77 "NILFS warning: Checksum error in segment summary\n");
78 break;
79 case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT:
80 printk(KERN_WARNING
81 "NILFS warning: Checksum error in super root\n");
82 break;
83 case NILFS_SEG_FAIL_CHECKSUM_FULL:
84 printk(KERN_WARNING
85 "NILFS warning: Checksum error in segment payload\n");
86 break;
87 case NILFS_SEG_FAIL_CONSISTENCY:
88 printk(KERN_WARNING
89 "NILFS warning: Inconsistent segment\n");
90 break;
91 case NILFS_SEG_NO_SUPER_ROOT:
92 printk(KERN_WARNING
93 "NILFS warning: No super root in the last segment\n");
94 break;
95 }
96 return -EINVAL;
97}
98
99static void store_segsum_info(struct nilfs_segsum_info *ssi,
100 struct nilfs_segment_summary *sum,
101 unsigned int blocksize)
102{
103 ssi->flags = le16_to_cpu(sum->ss_flags);
104 ssi->seg_seq = le64_to_cpu(sum->ss_seq);
105 ssi->ctime = le64_to_cpu(sum->ss_create);
106 ssi->next = le64_to_cpu(sum->ss_next);
107 ssi->nblocks = le32_to_cpu(sum->ss_nblocks);
108 ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo);
109 ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes);
110
111 ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize);
112 ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi);
113}
114
115/**
116 * calc_crc_cont - check CRC of blocks continuously
117 * @sbi: nilfs_sb_info
118 * @bhs: buffer head of start block
119 * @sum: place to store result
120 * @offset: offset bytes in the first block
121 * @check_bytes: number of bytes to be checked
122 * @start: DBN of start block
123 * @nblock: number of blocks to be checked
124 */
125static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs,
126 u32 *sum, unsigned long offset, u64 check_bytes,
127 sector_t start, unsigned long nblock)
128{
129 unsigned long blocksize = sbi->s_super->s_blocksize;
130 unsigned long size;
131 u32 crc;
132
133 BUG_ON(offset >= blocksize);
134 check_bytes -= offset;
135 size = min_t(u64, check_bytes, blocksize - offset);
136 crc = crc32_le(sbi->s_nilfs->ns_crc_seed,
137 (unsigned char *)bhs->b_data + offset, size);
138 if (--nblock > 0) {
139 do {
140 struct buffer_head *bh
141 = sb_bread(sbi->s_super, ++start);
142 if (!bh)
143 return -EIO;
144 check_bytes -= size;
145 size = min_t(u64, check_bytes, blocksize);
146 crc = crc32_le(crc, bh->b_data, size);
147 brelse(bh);
148 } while (--nblock > 0);
149 }
150 *sum = crc;
151 return 0;
152}
153
154/**
155 * nilfs_read_super_root_block - read super root block
156 * @sb: super_block
157 * @sr_block: disk block number of the super root block
158 * @pbh: address of a buffer_head pointer to return super root buffer
159 * @check: CRC check flag
160 */
161int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block,
162 struct buffer_head **pbh, int check)
163{
164 struct buffer_head *bh_sr;
165 struct nilfs_super_root *sr;
166 u32 crc;
167 int ret;
168
169 *pbh = NULL;
170 bh_sr = sb_bread(sb, sr_block);
171 if (unlikely(!bh_sr)) {
172 ret = NILFS_SEG_FAIL_IO;
173 goto failed;
174 }
175
176 sr = (struct nilfs_super_root *)bh_sr->b_data;
177 if (check) {
178 unsigned bytes = le16_to_cpu(sr->sr_bytes);
179
180 if (bytes == 0 || bytes > sb->s_blocksize) {
181 ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
182 goto failed_bh;
183 }
184 if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc,
185 sizeof(sr->sr_sum), bytes, sr_block, 1)) {
186 ret = NILFS_SEG_FAIL_IO;
187 goto failed_bh;
188 }
189 if (crc != le32_to_cpu(sr->sr_sum)) {
190 ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
191 goto failed_bh;
192 }
193 }
194 *pbh = bh_sr;
195 return 0;
196
197 failed_bh:
198 brelse(bh_sr);
199
200 failed:
201 return nilfs_warn_segment_error(ret);
202}
203
204/**
205 * load_segment_summary - read segment summary of the specified partial segment
206 * @sbi: nilfs_sb_info
207 * @pseg_start: start disk block number of partial segment
208 * @seg_seq: sequence number requested
209 * @ssi: pointer to nilfs_segsum_info struct to store information
210 * @full_check: full check flag
211 * (0: only checks segment summary CRC, 1: data CRC)
212 */
213static int
214load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start,
215 u64 seg_seq, struct nilfs_segsum_info *ssi,
216 int full_check)
217{
218 struct buffer_head *bh_sum;
219 struct nilfs_segment_summary *sum;
220 unsigned long offset, nblock;
221 u64 check_bytes;
222 u32 crc, crc_sum;
223 int ret = NILFS_SEG_FAIL_IO;
224
225 bh_sum = sb_bread(sbi->s_super, pseg_start);
226 if (!bh_sum)
227 goto out;
228
229 sum = (struct nilfs_segment_summary *)bh_sum->b_data;
230
231 /* Check consistency of segment summary */
232 if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) {
233 ret = NILFS_SEG_FAIL_MAGIC;
234 goto failed;
235 }
236 store_segsum_info(ssi, sum, sbi->s_super->s_blocksize);
237 if (seg_seq != ssi->seg_seq) {
238 ret = NILFS_SEG_FAIL_SEQ;
239 goto failed;
240 }
241 if (full_check) {
242 offset = sizeof(sum->ss_datasum);
243 check_bytes =
244 ((u64)ssi->nblocks << sbi->s_super->s_blocksize_bits);
245 nblock = ssi->nblocks;
246 crc_sum = le32_to_cpu(sum->ss_datasum);
247 ret = NILFS_SEG_FAIL_CHECKSUM_FULL;
248 } else { /* only checks segment summary */
249 offset = sizeof(sum->ss_datasum) + sizeof(sum->ss_sumsum);
250 check_bytes = ssi->sumbytes;
251 nblock = ssi->nsumblk;
252 crc_sum = le32_to_cpu(sum->ss_sumsum);
253 ret = NILFS_SEG_FAIL_CHECKSUM_SEGSUM;
254 }
255
256 if (unlikely(nblock == 0 ||
257 nblock > sbi->s_nilfs->ns_blocks_per_segment)) {
258 /* This limits the number of blocks read in the CRC check */
259 ret = NILFS_SEG_FAIL_CONSISTENCY;
260 goto failed;
261 }
262 if (calc_crc_cont(sbi, bh_sum, &crc, offset, check_bytes,
263 pseg_start, nblock)) {
264 ret = NILFS_SEG_FAIL_IO;
265 goto failed;
266 }
267 if (crc == crc_sum)
268 ret = 0;
269 failed:
270 brelse(bh_sum);
271 out:
272 return ret;
273}
274
275static void *segsum_get(struct super_block *sb, struct buffer_head **pbh,
276 unsigned int *offset, unsigned int bytes)
277{
278 void *ptr;
279 sector_t blocknr;
280
281 BUG_ON((*pbh)->b_size < *offset);
282 if (bytes > (*pbh)->b_size - *offset) {
283 blocknr = (*pbh)->b_blocknr;
284 brelse(*pbh);
285 *pbh = sb_bread(sb, blocknr + 1);
286 if (unlikely(!*pbh))
287 return NULL;
288 *offset = 0;
289 }
290 ptr = (*pbh)->b_data + *offset;
291 *offset += bytes;
292 return ptr;
293}
294
295static void segsum_skip(struct super_block *sb, struct buffer_head **pbh,
296 unsigned int *offset, unsigned int bytes,
297 unsigned long count)
298{
299 unsigned int rest_item_in_current_block
300 = ((*pbh)->b_size - *offset) / bytes;
301
302 if (count <= rest_item_in_current_block) {
303 *offset += bytes * count;
304 } else {
305 sector_t blocknr = (*pbh)->b_blocknr;
306 unsigned int nitem_per_block = (*pbh)->b_size / bytes;
307 unsigned int bcnt;
308
309 count -= rest_item_in_current_block;
310 bcnt = DIV_ROUND_UP(count, nitem_per_block);
311 *offset = bytes * (count - (bcnt - 1) * nitem_per_block);
312
313 brelse(*pbh);
314 *pbh = sb_bread(sb, blocknr + bcnt);
315 }
316}
317
318static int
319collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr,
320 struct nilfs_segsum_info *ssi,
321 struct list_head *head)
322{
323 struct buffer_head *bh;
324 unsigned int offset;
325 unsigned long nfinfo = ssi->nfinfo;
326 sector_t blocknr = sum_blocknr + ssi->nsumblk;
327 ino_t ino;
328 int err = -EIO;
329
330 if (!nfinfo)
331 return 0;
332
333 bh = sb_bread(sbi->s_super, sum_blocknr);
334 if (unlikely(!bh))
335 goto out;
336
337 offset = le16_to_cpu(
338 ((struct nilfs_segment_summary *)bh->b_data)->ss_bytes);
339 for (;;) {
340 unsigned long nblocks, ndatablk, nnodeblk;
341 struct nilfs_finfo *finfo;
342
343 finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo));
344 if (unlikely(!finfo))
345 goto out;
346
347 ino = le64_to_cpu(finfo->fi_ino);
348 nblocks = le32_to_cpu(finfo->fi_nblocks);
349 ndatablk = le32_to_cpu(finfo->fi_ndatablk);
350 nnodeblk = nblocks - ndatablk;
351
352 while (ndatablk-- > 0) {
353 struct nilfs_recovery_block *rb;
354 struct nilfs_binfo_v *binfo;
355
356 binfo = segsum_get(sbi->s_super, &bh, &offset,
357 sizeof(*binfo));
358 if (unlikely(!binfo))
359 goto out;
360
361 rb = kmalloc(sizeof(*rb), GFP_NOFS);
362 if (unlikely(!rb)) {
363 err = -ENOMEM;
364 goto out;
365 }
366 rb->ino = ino;
367 rb->blocknr = blocknr++;
368 rb->vblocknr = le64_to_cpu(binfo->bi_vblocknr);
369 rb->blkoff = le64_to_cpu(binfo->bi_blkoff);
370 /* INIT_LIST_HEAD(&rb->list); */
371 list_add_tail(&rb->list, head);
372 }
373 if (--nfinfo == 0)
374 break;
375 blocknr += nnodeblk; /* always 0 for the data sync segments */
376 segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64),
377 nnodeblk);
378 if (unlikely(!bh))
379 goto out;
380 }
381 err = 0;
382 out:
383 brelse(bh); /* brelse(NULL) is just ignored */
384 return err;
385}
386
387static void dispose_recovery_list(struct list_head *head)
388{
389 while (!list_empty(head)) {
390 struct nilfs_recovery_block *rb
391 = list_entry(head->next,
392 struct nilfs_recovery_block, list);
393 list_del(&rb->list);
394 kfree(rb);
395 }
396}
397
398void nilfs_dispose_segment_list(struct list_head *head)
399{
400 while (!list_empty(head)) {
401 struct nilfs_segment_entry *ent
402 = list_entry(head->next,
403 struct nilfs_segment_entry, list);
404 list_del(&ent->list);
405 nilfs_free_segment_entry(ent);
406 }
407}
408
409static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
410 struct nilfs_recovery_info *ri)
411{
412 struct list_head *head = &ri->ri_used_segments;
413 struct nilfs_segment_entry *ent, *n;
414 struct inode *sufile = nilfs->ns_sufile;
415 __u64 segnum[4];
416 time_t mtime;
417 int err;
418 int i;
419
420 segnum[0] = nilfs->ns_segnum;
421 segnum[1] = nilfs->ns_nextnum;
422 segnum[2] = ri->ri_segnum;
423 segnum[3] = ri->ri_nextnum;
424
425 /*
426 * Releasing the next segment of the latest super root.
427 * The next segment is invalidated by this recovery.
428 */
429 err = nilfs_sufile_free(sufile, segnum[1]);
430 if (unlikely(err))
431 goto failed;
432
433 err = -ENOMEM;
434 for (i = 1; i < 4; i++) {
435 ent = nilfs_alloc_segment_entry(segnum[i]);
436 if (unlikely(!ent))
437 goto failed;
438 list_add_tail(&ent->list, head);
439 }
440
441 /*
442 * Collecting segments written after the latest super root.
443 * These are marked dirty to avoid being reallocated in the next write.
444 */
445 mtime = get_seconds();
446 list_for_each_entry_safe(ent, n, head, list) {
447 if (ent->segnum == segnum[0]) {
448 list_del(&ent->list);
449 nilfs_free_segment_entry(ent);
450 continue;
451 }
452 err = nilfs_open_segment_entry(ent, sufile);
453 if (unlikely(err))
454 goto failed;
455 if (!nilfs_segment_usage_dirty(ent->raw_su)) {
456 /* make the segment garbage */
457 ent->raw_su->su_nblocks = cpu_to_le32(0);
458 ent->raw_su->su_lastmod = cpu_to_le32(mtime);
459 nilfs_segment_usage_set_dirty(ent->raw_su);
460 }
461 list_del(&ent->list);
462 nilfs_close_segment_entry(ent, sufile);
463 nilfs_free_segment_entry(ent);
464 }
465
466 /* Allocate new segments for recovery */
467 err = nilfs_sufile_alloc(sufile, &segnum[0]);
468 if (unlikely(err))
469 goto failed;
470
471 nilfs->ns_pseg_offset = 0;
472 nilfs->ns_seg_seq = ri->ri_seq + 2;
473 nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0];
474 return 0;
475
476 failed:
477 /* No need to recover sufile because it will be destroyed on error */
478 return err;
479}
480
481static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi,
482 struct nilfs_recovery_block *rb,
483 struct page *page)
484{
485 struct buffer_head *bh_org;
486 void *kaddr;
487
488 bh_org = sb_bread(sbi->s_super, rb->blocknr);
489 if (unlikely(!bh_org))
490 return -EIO;
491
492 kaddr = kmap_atomic(page, KM_USER0);
493 memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
494 kunmap_atomic(kaddr, KM_USER0);
495 brelse(bh_org);
496 return 0;
497}
498
499static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
500 struct list_head *head,
501 unsigned long *nr_salvaged_blocks)
502{
503 struct inode *inode;
504 struct nilfs_recovery_block *rb, *n;
505 unsigned blocksize = sbi->s_super->s_blocksize;
506 struct page *page;
507 loff_t pos;
508 int err = 0, err2 = 0;
509
510 list_for_each_entry_safe(rb, n, head, list) {
511 inode = nilfs_iget(sbi->s_super, rb->ino);
512 if (IS_ERR(inode)) {
513 err = PTR_ERR(inode);
514 inode = NULL;
515 goto failed_inode;
516 }
517
518 pos = rb->blkoff << inode->i_blkbits;
519 page = NULL;
520 err = block_write_begin(NULL, inode->i_mapping, pos, blocksize,
521 0, &page, NULL, nilfs_get_block);
522 if (unlikely(err))
523 goto failed_inode;
524
525 err = nilfs_recovery_copy_block(sbi, rb, page);
526 if (unlikely(err))
527 goto failed_page;
528
529 err = nilfs_set_file_dirty(sbi, inode, 1);
530 if (unlikely(err))
531 goto failed_page;
532
533 block_write_end(NULL, inode->i_mapping, pos, blocksize,
534 blocksize, page, NULL);
535
536 unlock_page(page);
537 page_cache_release(page);
538
539 (*nr_salvaged_blocks)++;
540 goto next;
541
542 failed_page:
543 unlock_page(page);
544 page_cache_release(page);
545
546 failed_inode:
547 printk(KERN_WARNING
548 "NILFS warning: error recovering data block "
549 "(err=%d, ino=%lu, block-offset=%llu)\n",
550 err, rb->ino, (unsigned long long)rb->blkoff);
551 if (!err2)
552 err2 = err;
553 next:
554 iput(inode); /* iput(NULL) is just ignored */
555 list_del_init(&rb->list);
556 kfree(rb);
557 }
558 return err2;
559}
560
561/**
562 * nilfs_do_roll_forward - salvage logical segments newer than the latest
563 * checkpoint
564 * @sbi: nilfs_sb_info
565 * @nilfs: the_nilfs
566 * @ri: pointer to a nilfs_recovery_info
567 */
568static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
569 struct nilfs_sb_info *sbi,
570 struct nilfs_recovery_info *ri)
571{
572 struct nilfs_segsum_info ssi;
573 sector_t pseg_start;
574 sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */
575 unsigned long nsalvaged_blocks = 0;
576 u64 seg_seq;
577 __u64 segnum, nextnum = 0;
578 int empty_seg = 0;
579 int err = 0, ret;
580 LIST_HEAD(dsync_blocks); /* list of data blocks to be recovered */
581 enum {
582 RF_INIT_ST,
583 RF_DSYNC_ST, /* scanning data-sync segments */
584 };
585 int state = RF_INIT_ST;
586
587 nilfs_attach_writer(nilfs, sbi);
588 pseg_start = ri->ri_lsegs_start;
589 seg_seq = ri->ri_lsegs_start_seq;
590 segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
591 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
592
593 while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) {
594
595 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1);
596 if (ret) {
597 if (ret == NILFS_SEG_FAIL_IO) {
598 err = -EIO;
599 goto failed;
600 }
601 goto strayed;
602 }
603 if (unlikely(NILFS_SEG_HAS_SR(&ssi)))
604 goto confused;
605
606 /* Found a valid partial segment; do recovery actions */
607 nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
608 empty_seg = 0;
609 nilfs->ns_ctime = ssi.ctime;
610 if (!(ssi.flags & NILFS_SS_GC))
611 nilfs->ns_nongc_ctime = ssi.ctime;
612
613 switch (state) {
614 case RF_INIT_ST:
615 if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi))
616 goto try_next_pseg;
617 state = RF_DSYNC_ST;
618 /* Fall through */
619 case RF_DSYNC_ST:
620 if (!NILFS_SEG_DSYNC(&ssi))
621 goto confused;
622
623 err = collect_blocks_from_segsum(
624 sbi, pseg_start, &ssi, &dsync_blocks);
625 if (unlikely(err))
626 goto failed;
627 if (NILFS_SEG_LOGEND(&ssi)) {
628 err = recover_dsync_blocks(
629 sbi, &dsync_blocks, &nsalvaged_blocks);
630 if (unlikely(err))
631 goto failed;
632 state = RF_INIT_ST;
633 }
634 break; /* Fall through to try_next_pseg */
635 }
636
637 try_next_pseg:
638 if (pseg_start == ri->ri_lsegs_end)
639 break;
640 pseg_start += ssi.nblocks;
641 if (pseg_start < seg_end)
642 continue;
643 goto feed_segment;
644
645 strayed:
646 if (pseg_start == ri->ri_lsegs_end)
647 break;
648
649 feed_segment:
650 /* Looking to the next full segment */
651 if (empty_seg++)
652 break;
653 seg_seq++;
654 segnum = nextnum;
655 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
656 pseg_start = seg_start;
657 }
658
659 if (nsalvaged_blocks) {
660 printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n",
661 sbi->s_super->s_id, nsalvaged_blocks);
662 ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
663 }
664 out:
665 dispose_recovery_list(&dsync_blocks);
666 nilfs_detach_writer(sbi->s_nilfs, sbi);
667 return err;
668
669 confused:
670 err = -EINVAL;
671 failed:
672 printk(KERN_ERR
673 "NILFS (device %s): Error roll-forwarding "
674 "(err=%d, pseg block=%llu). ",
675 sbi->s_super->s_id, err, (unsigned long long)pseg_start);
676 goto out;
677}
678
679static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
680 struct nilfs_sb_info *sbi,
681 struct nilfs_recovery_info *ri)
682{
683 struct buffer_head *bh;
684 int err;
685
686 if (nilfs_get_segnum_of_block(nilfs, ri->ri_lsegs_start) !=
687 nilfs_get_segnum_of_block(nilfs, ri->ri_super_root))
688 return;
689
690 bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start);
691 BUG_ON(!bh);
692 memset(bh->b_data, 0, bh->b_size);
693 set_buffer_dirty(bh);
694 err = sync_dirty_buffer(bh);
695 if (unlikely(err))
696 printk(KERN_WARNING
697 "NILFS warning: buffer sync write failed during "
698 "post-cleaning of recovery.\n");
699 brelse(bh);
700}
701
702/**
703 * nilfs_recover_logical_segments - salvage logical segments written after
704 * the latest super root
705 * @nilfs: the_nilfs
706 * @sbi: nilfs_sb_info
707 * @ri: pointer to a nilfs_recovery_info struct to store search results.
708 *
709 * Return Value: On success, 0 is returned. On error, one of the following
710 * negative error code is returned.
711 *
712 * %-EINVAL - Inconsistent filesystem state.
713 *
714 * %-EIO - I/O error
715 *
716 * %-ENOSPC - No space left on device (only in a panic state).
717 *
718 * %-ERESTARTSYS - Interrupted.
719 *
720 * %-ENOMEM - Insufficient memory available.
721 */
722int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
723 struct nilfs_sb_info *sbi,
724 struct nilfs_recovery_info *ri)
725{
726 int err;
727
728 if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0)
729 return 0;
730
731 err = nilfs_attach_checkpoint(sbi, ri->ri_cno);
732 if (unlikely(err)) {
733 printk(KERN_ERR
734 "NILFS: error loading the latest checkpoint.\n");
735 return err;
736 }
737
738 err = nilfs_do_roll_forward(nilfs, sbi, ri);
739 if (unlikely(err))
740 goto failed;
741
742 if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
743 err = nilfs_prepare_segment_for_recovery(nilfs, ri);
744 if (unlikely(err)) {
745 printk(KERN_ERR "NILFS: Error preparing segments for "
746 "recovery.\n");
747 goto failed;
748 }
749
750 err = nilfs_attach_segment_constructor(sbi);
751 if (unlikely(err))
752 goto failed;
753
754 set_nilfs_discontinued(nilfs);
755 err = nilfs_construct_segment(sbi->s_super);
756 nilfs_detach_segment_constructor(sbi);
757
758 if (unlikely(err)) {
759 printk(KERN_ERR "NILFS: Oops! recovery failed. "
760 "(err=%d)\n", err);
761 goto failed;
762 }
763
764 nilfs_finish_roll_forward(nilfs, sbi, ri);
765 }
766
767 nilfs_detach_checkpoint(sbi);
768 return 0;
769
770 failed:
771 nilfs_detach_checkpoint(sbi);
772 nilfs_mdt_clear(nilfs->ns_cpfile);
773 nilfs_mdt_clear(nilfs->ns_sufile);
774 nilfs_mdt_clear(nilfs->ns_dat);
775 return err;
776}
777
778/**
779 * nilfs_search_super_root - search the latest valid super root
780 * @nilfs: the_nilfs
781 * @sbi: nilfs_sb_info
782 * @ri: pointer to a nilfs_recovery_info struct to store search results.
783 *
784 * nilfs_search_super_root() looks for the latest super-root from a partial
785 * segment pointed by the superblock. It sets up struct the_nilfs through
786 * this search. It fills nilfs_recovery_info (ri) required for recovery.
787 *
788 * Return Value: On success, 0 is returned. On error, one of the following
789 * negative error code is returned.
790 *
791 * %-EINVAL - No valid segment found
792 *
793 * %-EIO - I/O error
794 */
795int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
796 struct nilfs_recovery_info *ri)
797{
798 struct nilfs_segsum_info ssi;
799 sector_t pseg_start, pseg_end, sr_pseg_start = 0;
800 sector_t seg_start, seg_end; /* range of full segment (block number) */
801 u64 seg_seq;
802 __u64 segnum, nextnum = 0;
803 __u64 cno;
804 struct nilfs_segment_entry *ent;
805 LIST_HEAD(segments);
806 int empty_seg = 0, scan_newer = 0;
807 int ret;
808
809 pseg_start = nilfs->ns_last_pseg;
810 seg_seq = nilfs->ns_last_seq;
811 cno = nilfs->ns_last_cno;
812 segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
813
814 /* Calculate range of segment */
815 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
816
817 for (;;) {
818 /* Load segment summary */
819 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1);
820 if (ret) {
821 if (ret == NILFS_SEG_FAIL_IO)
822 goto failed;
823 goto strayed;
824 }
825 pseg_end = pseg_start + ssi.nblocks - 1;
826 if (unlikely(pseg_end > seg_end)) {
827 ret = NILFS_SEG_FAIL_CONSISTENCY;
828 goto strayed;
829 }
830
831 /* A valid partial segment */
832 ri->ri_pseg_start = pseg_start;
833 ri->ri_seq = seg_seq;
834 ri->ri_segnum = segnum;
835 nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
836 ri->ri_nextnum = nextnum;
837 empty_seg = 0;
838
839 if (!NILFS_SEG_HAS_SR(&ssi)) {
840 if (!scan_newer) {
841 /* This will never happen because a superblock
842 (last_segment) always points to a pseg
843 having a super root. */
844 ret = NILFS_SEG_FAIL_CONSISTENCY;
845 goto failed;
846 }
847 if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) {
848 ri->ri_lsegs_start = pseg_start;
849 ri->ri_lsegs_start_seq = seg_seq;
850 }
851 if (NILFS_SEG_LOGEND(&ssi))
852 ri->ri_lsegs_end = pseg_start;
853 goto try_next_pseg;
854 }
855
856 /* A valid super root was found. */
857 ri->ri_cno = cno++;
858 ri->ri_super_root = pseg_end;
859 ri->ri_lsegs_start = ri->ri_lsegs_end = 0;
860
861 nilfs_dispose_segment_list(&segments);
862 nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start)
863 + ssi.nblocks - seg_start;
864 nilfs->ns_seg_seq = seg_seq;
865 nilfs->ns_segnum = segnum;
866 nilfs->ns_cno = cno; /* nilfs->ns_cno = ri->ri_cno + 1 */
867 nilfs->ns_ctime = ssi.ctime;
868 nilfs->ns_nextnum = nextnum;
869
870 if (scan_newer)
871 ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED;
872 else {
873 if (nilfs->ns_mount_state & NILFS_VALID_FS)
874 goto super_root_found;
875 scan_newer = 1;
876 }
877
878 /* reset region for roll-forward */
879 pseg_start += ssi.nblocks;
880 if (pseg_start < seg_end)
881 continue;
882 goto feed_segment;
883
884 try_next_pseg:
885 /* Standing on a course, or met an inconsistent state */
886 pseg_start += ssi.nblocks;
887 if (pseg_start < seg_end)
888 continue;
889 goto feed_segment;
890
891 strayed:
892 /* Off the trail */
893 if (!scan_newer)
894 /*
895 * This can happen if a checkpoint was written without
896 * barriers, or as a result of an I/O failure.
897 */
898 goto failed;
899
900 feed_segment:
901 /* Looking to the next full segment */
902 if (empty_seg++)
903 goto super_root_found; /* found a valid super root */
904
905 ent = nilfs_alloc_segment_entry(segnum);
906 if (unlikely(!ent)) {
907 ret = -ENOMEM;
908 goto failed;
909 }
910 list_add_tail(&ent->list, &segments);
911
912 seg_seq++;
913 segnum = nextnum;
914 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
915 pseg_start = seg_start;
916 }
917
918 super_root_found:
919 /* Updating pointers relating to the latest checkpoint */
920 list_splice(&segments, ri->ri_used_segments.prev);
921 nilfs->ns_last_pseg = sr_pseg_start;
922 nilfs->ns_last_seq = nilfs->ns_seg_seq;
923 nilfs->ns_last_cno = ri->ri_cno;
924 return 0;
925
926 failed:
927 nilfs_dispose_segment_list(&segments);
928 return (ret < 0) ? ret : nilfs_warn_segment_error(ret);
929}
diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h
new file mode 100644
index 000000000000..adccd4fc654e
--- /dev/null
+++ b/fs/nilfs2/sb.h
@@ -0,0 +1,102 @@
1/*
2 * sb.h - NILFS on-memory super block structure.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 *
22 */
23
24#ifndef _NILFS_SB
25#define _NILFS_SB
26
27#include <linux/types.h>
28#include <linux/fs.h>
29
30/*
31 * Mount options
32 */
33struct nilfs_mount_options {
34 unsigned long mount_opt;
35 __u64 snapshot_cno;
36};
37
38struct the_nilfs;
39struct nilfs_sc_info;
40
41/*
42 * NILFS super-block data in memory
43 */
44struct nilfs_sb_info {
45 /* Snapshot status */
46 __u64 s_snapshot_cno; /* Checkpoint number */
47 atomic_t s_inodes_count;
48 atomic_t s_blocks_count; /* Reserved (might be deleted) */
49
50 /* Mount options */
51 unsigned long s_mount_opt;
52 uid_t s_resuid;
53 gid_t s_resgid;
54
55 unsigned long s_interval; /* construction interval */
56 unsigned long s_watermark; /* threshold of data amount
57 for the segment construction */
58
59 /* Fundamental members */
60 struct super_block *s_super; /* reverse pointer to super_block */
61 struct the_nilfs *s_nilfs;
62 struct list_head s_list; /* list head for nilfs->ns_supers */
63
64 /* Segment constructor */
65 struct list_head s_dirty_files; /* dirty files list */
66 struct nilfs_sc_info *s_sc_info; /* segment constructor info */
67 spinlock_t s_inode_lock; /* Lock for the nilfs inode.
68 It covers s_dirty_files list */
69
70 /* Metadata files */
71 struct inode *s_ifile; /* index file inode */
72
73 /* Inode allocator */
74 spinlock_t s_next_gen_lock;
75 u32 s_next_generation;
76};
77
78static inline struct nilfs_sb_info *NILFS_SB(struct super_block *sb)
79{
80 return sb->s_fs_info;
81}
82
83static inline struct nilfs_sc_info *NILFS_SC(struct nilfs_sb_info *sbi)
84{
85 return sbi->s_sc_info;
86}
87
88/*
89 * Bit operations for the mount option
90 */
91#define nilfs_clear_opt(sbi, opt) \
92 do { (sbi)->s_mount_opt &= ~NILFS_MOUNT_##opt; } while (0)
93#define nilfs_set_opt(sbi, opt) \
94 do { (sbi)->s_mount_opt |= NILFS_MOUNT_##opt; } while (0)
95#define nilfs_test_opt(sbi, opt) ((sbi)->s_mount_opt & NILFS_MOUNT_##opt)
96#define nilfs_write_opt(sbi, mask, opt) \
97 do { (sbi)->s_mount_opt = \
98 (((sbi)->s_mount_opt & ~NILFS_MOUNT_##mask) | \
99 NILFS_MOUNT_##opt); \
100 } while (0)
101
102#endif /* _NILFS_SB */
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
new file mode 100644
index 000000000000..1e68821b4a9b
--- /dev/null
+++ b/fs/nilfs2/segbuf.c
@@ -0,0 +1,439 @@
1/*
2 * segbuf.c - NILFS segment buffer
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 *
22 */
23
24#include <linux/buffer_head.h>
25#include <linux/writeback.h>
26#include <linux/crc32.h>
27#include "page.h"
28#include "segbuf.h"
29#include "seglist.h"
30
31
32static struct kmem_cache *nilfs_segbuf_cachep;
33
34static void nilfs_segbuf_init_once(void *obj)
35{
36 memset(obj, 0, sizeof(struct nilfs_segment_buffer));
37}
38
39int __init nilfs_init_segbuf_cache(void)
40{
41 nilfs_segbuf_cachep =
42 kmem_cache_create("nilfs2_segbuf_cache",
43 sizeof(struct nilfs_segment_buffer),
44 0, SLAB_RECLAIM_ACCOUNT,
45 nilfs_segbuf_init_once);
46
47 return (nilfs_segbuf_cachep == NULL) ? -ENOMEM : 0;
48}
49
50void nilfs_destroy_segbuf_cache(void)
51{
52 kmem_cache_destroy(nilfs_segbuf_cachep);
53}
54
55struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb)
56{
57 struct nilfs_segment_buffer *segbuf;
58
59 segbuf = kmem_cache_alloc(nilfs_segbuf_cachep, GFP_NOFS);
60 if (unlikely(!segbuf))
61 return NULL;
62
63 segbuf->sb_super = sb;
64 INIT_LIST_HEAD(&segbuf->sb_list);
65 INIT_LIST_HEAD(&segbuf->sb_segsum_buffers);
66 INIT_LIST_HEAD(&segbuf->sb_payload_buffers);
67 return segbuf;
68}
69
70void nilfs_segbuf_free(struct nilfs_segment_buffer *segbuf)
71{
72 kmem_cache_free(nilfs_segbuf_cachep, segbuf);
73}
74
75void nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum,
76 unsigned long offset, struct the_nilfs *nilfs)
77{
78 segbuf->sb_segnum = segnum;
79 nilfs_get_segment_range(nilfs, segnum, &segbuf->sb_fseg_start,
80 &segbuf->sb_fseg_end);
81
82 segbuf->sb_pseg_start = segbuf->sb_fseg_start + offset;
83 segbuf->sb_rest_blocks =
84 segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1;
85}
86
87void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf,
88 __u64 nextnum, struct the_nilfs *nilfs)
89{
90 segbuf->sb_nextnum = nextnum;
91 segbuf->sb_sum.next = nilfs_get_segment_start_blocknr(nilfs, nextnum);
92}
93
94int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf)
95{
96 struct buffer_head *bh;
97
98 bh = sb_getblk(segbuf->sb_super,
99 segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk);
100 if (unlikely(!bh))
101 return -ENOMEM;
102
103 nilfs_segbuf_add_segsum_buffer(segbuf, bh);
104 return 0;
105}
106
107int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf,
108 struct buffer_head **bhp)
109{
110 struct buffer_head *bh;
111
112 bh = sb_getblk(segbuf->sb_super,
113 segbuf->sb_pseg_start + segbuf->sb_sum.nblocks);
114 if (unlikely(!bh))
115 return -ENOMEM;
116
117 nilfs_segbuf_add_payload_buffer(segbuf, bh);
118 *bhp = bh;
119 return 0;
120}
121
122int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags,
123 time_t ctime)
124{
125 int err;
126
127 segbuf->sb_sum.nblocks = segbuf->sb_sum.nsumblk = 0;
128 err = nilfs_segbuf_extend_segsum(segbuf);
129 if (unlikely(err))
130 return err;
131
132 segbuf->sb_sum.flags = flags;
133 segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary);
134 segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0;
135 segbuf->sb_sum.ctime = ctime;
136
137 segbuf->sb_io_error = 0;
138 return 0;
139}
140
141/*
142 * Setup segument summary
143 */
144void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *segbuf)
145{
146 struct nilfs_segment_summary *raw_sum;
147 struct buffer_head *bh_sum;
148
149 bh_sum = list_entry(segbuf->sb_segsum_buffers.next,
150 struct buffer_head, b_assoc_buffers);
151 raw_sum = (struct nilfs_segment_summary *)bh_sum->b_data;
152
153 raw_sum->ss_magic = cpu_to_le32(NILFS_SEGSUM_MAGIC);
154 raw_sum->ss_bytes = cpu_to_le16(sizeof(*raw_sum));
155 raw_sum->ss_flags = cpu_to_le16(segbuf->sb_sum.flags);
156 raw_sum->ss_seq = cpu_to_le64(segbuf->sb_sum.seg_seq);
157 raw_sum->ss_create = cpu_to_le64(segbuf->sb_sum.ctime);
158 raw_sum->ss_next = cpu_to_le64(segbuf->sb_sum.next);
159 raw_sum->ss_nblocks = cpu_to_le32(segbuf->sb_sum.nblocks);
160 raw_sum->ss_nfinfo = cpu_to_le32(segbuf->sb_sum.nfinfo);
161 raw_sum->ss_sumbytes = cpu_to_le32(segbuf->sb_sum.sumbytes);
162 raw_sum->ss_pad = 0;
163}
164
165/*
166 * CRC calculation routines
167 */
168void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf,
169 u32 seed)
170{
171 struct buffer_head *bh;
172 struct nilfs_segment_summary *raw_sum;
173 unsigned long size, bytes = segbuf->sb_sum.sumbytes;
174 u32 crc;
175
176 bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head,
177 b_assoc_buffers);
178
179 raw_sum = (struct nilfs_segment_summary *)bh->b_data;
180 size = min_t(unsigned long, bytes, bh->b_size);
181 crc = crc32_le(seed,
182 (unsigned char *)raw_sum +
183 sizeof(raw_sum->ss_datasum) + sizeof(raw_sum->ss_sumsum),
184 size - (sizeof(raw_sum->ss_datasum) +
185 sizeof(raw_sum->ss_sumsum)));
186
187 list_for_each_entry_continue(bh, &segbuf->sb_segsum_buffers,
188 b_assoc_buffers) {
189 bytes -= size;
190 size = min_t(unsigned long, bytes, bh->b_size);
191 crc = crc32_le(crc, bh->b_data, size);
192 }
193 raw_sum->ss_sumsum = cpu_to_le32(crc);
194}
195
196void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
197 u32 seed)
198{
199 struct buffer_head *bh;
200 struct nilfs_segment_summary *raw_sum;
201 void *kaddr;
202 u32 crc;
203
204 bh = list_entry(segbuf->sb_segsum_buffers.next, struct buffer_head,
205 b_assoc_buffers);
206 raw_sum = (struct nilfs_segment_summary *)bh->b_data;
207 crc = crc32_le(seed,
208 (unsigned char *)raw_sum + sizeof(raw_sum->ss_datasum),
209 bh->b_size - sizeof(raw_sum->ss_datasum));
210
211 list_for_each_entry_continue(bh, &segbuf->sb_segsum_buffers,
212 b_assoc_buffers) {
213 crc = crc32_le(crc, bh->b_data, bh->b_size);
214 }
215 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
216 kaddr = kmap_atomic(bh->b_page, KM_USER0);
217 crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size);
218 kunmap_atomic(kaddr, KM_USER0);
219 }
220 raw_sum->ss_datasum = cpu_to_le32(crc);
221}
222
223void nilfs_release_buffers(struct list_head *list)
224{
225 struct buffer_head *bh, *n;
226
227 list_for_each_entry_safe(bh, n, list, b_assoc_buffers) {
228 list_del_init(&bh->b_assoc_buffers);
229 if (buffer_nilfs_allocated(bh)) {
230 struct page *clone_page = bh->b_page;
231
232 /* remove clone page */
233 brelse(bh);
234 page_cache_release(clone_page); /* for each bh */
235 if (page_count(clone_page) <= 2) {
236 lock_page(clone_page);
237 nilfs_free_private_page(clone_page);
238 }
239 continue;
240 }
241 brelse(bh);
242 }
243}
244
245/*
246 * BIO operations
247 */
248static void nilfs_end_bio_write(struct bio *bio, int err)
249{
250 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
251 struct nilfs_write_info *wi = bio->bi_private;
252
253 if (err == -EOPNOTSUPP) {
254 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
255 bio_put(bio);
256 /* to be detected by submit_seg_bio() */
257 }
258
259 if (!uptodate)
260 atomic_inc(&wi->err);
261
262 bio_put(bio);
263 complete(&wi->bio_event);
264}
265
266static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode)
267{
268 struct bio *bio = wi->bio;
269 int err;
270
271 if (wi->nbio > 0 && bdi_write_congested(wi->bdi)) {
272 wait_for_completion(&wi->bio_event);
273 wi->nbio--;
274 if (unlikely(atomic_read(&wi->err))) {
275 bio_put(bio);
276 err = -EIO;
277 goto failed;
278 }
279 }
280
281 bio->bi_end_io = nilfs_end_bio_write;
282 bio->bi_private = wi;
283 bio_get(bio);
284 submit_bio(mode, bio);
285 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
286 bio_put(bio);
287 err = -EOPNOTSUPP;
288 goto failed;
289 }
290 wi->nbio++;
291 bio_put(bio);
292
293 wi->bio = NULL;
294 wi->rest_blocks -= wi->end - wi->start;
295 wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
296 wi->start = wi->end;
297 return 0;
298
299 failed:
300 wi->bio = NULL;
301 return err;
302}
303
304/**
305 * nilfs_alloc_seg_bio - allocate a bio for writing segment.
306 * @sb: super block
307 * @start: beginning disk block number of this BIO.
308 * @nr_vecs: request size of page vector.
309 *
310 * alloc_seg_bio() allocates a new BIO structure and initialize it.
311 *
312 * Return Value: On success, pointer to the struct bio is returned.
313 * On error, NULL is returned.
314 */
315static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start,
316 int nr_vecs)
317{
318 struct bio *bio;
319
320 bio = bio_alloc(GFP_NOWAIT, nr_vecs);
321 if (bio == NULL) {
322 while (!bio && (nr_vecs >>= 1))
323 bio = bio_alloc(GFP_NOWAIT, nr_vecs);
324 }
325 if (likely(bio)) {
326 bio->bi_bdev = sb->s_bdev;
327 bio->bi_sector = (sector_t)start << (sb->s_blocksize_bits - 9);
328 }
329 return bio;
330}
331
332void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
333 struct nilfs_write_info *wi)
334{
335 wi->bio = NULL;
336 wi->rest_blocks = segbuf->sb_sum.nblocks;
337 wi->max_pages = bio_get_nr_vecs(wi->sb->s_bdev);
338 wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
339 wi->start = wi->end = 0;
340 wi->nbio = 0;
341 wi->blocknr = segbuf->sb_pseg_start;
342
343 atomic_set(&wi->err, 0);
344 init_completion(&wi->bio_event);
345}
346
347static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh,
348 int mode)
349{
350 int len, err;
351
352 BUG_ON(wi->nr_vecs <= 0);
353 repeat:
354 if (!wi->bio) {
355 wi->bio = nilfs_alloc_seg_bio(wi->sb, wi->blocknr + wi->end,
356 wi->nr_vecs);
357 if (unlikely(!wi->bio))
358 return -ENOMEM;
359 }
360
361 len = bio_add_page(wi->bio, bh->b_page, bh->b_size, bh_offset(bh));
362 if (len == bh->b_size) {
363 wi->end++;
364 return 0;
365 }
366 /* bio is FULL */
367 err = nilfs_submit_seg_bio(wi, mode);
368 /* never submit current bh */
369 if (likely(!err))
370 goto repeat;
371 return err;
372}
373
374int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
375 struct nilfs_write_info *wi)
376{
377 struct buffer_head *bh;
378 int res, rw = WRITE;
379
380 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) {
381 res = nilfs_submit_bh(wi, bh, rw);
382 if (unlikely(res))
383 goto failed_bio;
384 }
385
386 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
387 res = nilfs_submit_bh(wi, bh, rw);
388 if (unlikely(res))
389 goto failed_bio;
390 }
391
392 if (wi->bio) {
393 /*
394 * Last BIO is always sent through the following
395 * submission.
396 */
397 rw |= (1 << BIO_RW_SYNCIO);
398 res = nilfs_submit_seg_bio(wi, rw);
399 if (unlikely(res))
400 goto failed_bio;
401 }
402
403 res = 0;
404 out:
405 return res;
406
407 failed_bio:
408 atomic_inc(&wi->err);
409 goto out;
410}
411
412/**
413 * nilfs_segbuf_wait - wait for completion of requested BIOs
414 * @wi: nilfs_write_info
415 *
416 * Return Value: On Success, 0 is returned. On Error, one of the following
417 * negative error code is returned.
418 *
419 * %-EIO - I/O error
420 */
421int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf,
422 struct nilfs_write_info *wi)
423{
424 int err = 0;
425
426 if (!wi->nbio)
427 return 0;
428
429 do {
430 wait_for_completion(&wi->bio_event);
431 } while (--wi->nbio > 0);
432
433 if (unlikely(atomic_read(&wi->err) > 0)) {
434 printk(KERN_ERR "NILFS: IO error writing segment\n");
435 err = -EIO;
436 segbuf->sb_io_error = 1;
437 }
438 return err;
439}
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
new file mode 100644
index 000000000000..0c3076f4e592
--- /dev/null
+++ b/fs/nilfs2/segbuf.h
@@ -0,0 +1,201 @@
1/*
2 * segbuf.h - NILFS Segment buffer prototypes and definitions
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 *
22 */
23#ifndef _NILFS_SEGBUF_H
24#define _NILFS_SEGBUF_H
25
26#include <linux/fs.h>
27#include <linux/buffer_head.h>
28#include <linux/bio.h>
29#include <linux/completion.h>
30#include <linux/backing-dev.h>
31
32/**
33 * struct nilfs_segsum_info - On-memory segment summary
34 * @flags: Flags
35 * @nfinfo: Number of file information structures
36 * @nblocks: Number of blocks included in the partial segment
37 * @nsumblk: Number of summary blocks
38 * @sumbytes: Byte count of segment summary
39 * @nfileblk: Total number of file blocks
40 * @seg_seq: Segment sequence number
41 * @ctime: Creation time
42 * @next: Block number of the next full segment
43 */
44struct nilfs_segsum_info {
45 unsigned int flags;
46 unsigned long nfinfo;
47 unsigned long nblocks;
48 unsigned long nsumblk;
49 unsigned long sumbytes;
50 unsigned long nfileblk;
51 u64 seg_seq;
52 time_t ctime;
53 sector_t next;
54};
55
56/* macro for the flags */
57#define NILFS_SEG_HAS_SR(sum) ((sum)->flags & NILFS_SS_SR)
58#define NILFS_SEG_LOGBGN(sum) ((sum)->flags & NILFS_SS_LOGBGN)
59#define NILFS_SEG_LOGEND(sum) ((sum)->flags & NILFS_SS_LOGEND)
60#define NILFS_SEG_DSYNC(sum) ((sum)->flags & NILFS_SS_SYNDT)
61#define NILFS_SEG_SIMPLEX(sum) \
62 (((sum)->flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) == \
63 (NILFS_SS_LOGBGN | NILFS_SS_LOGEND))
64
65#define NILFS_SEG_EMPTY(sum) ((sum)->nblocks == (sum)->nsumblk)
66
67/**
68 * struct nilfs_segment_buffer - Segment buffer
69 * @sb_super: back pointer to a superblock struct
70 * @sb_list: List head to chain this structure
71 * @sb_sum: On-memory segment summary
72 * @sb_segnum: Index number of the full segment
73 * @sb_nextnum: Index number of the next full segment
74 * @sb_fseg_start: Start block number of the full segment
75 * @sb_fseg_end: End block number of the full segment
76 * @sb_pseg_start: Disk block number of partial segment
77 * @sb_rest_blocks: Number of residual blocks in the current segment
78 * @sb_segsum_buffers: List of buffers for segment summaries
79 * @sb_payload_buffers: List of buffers for segment payload
80 * @sb_io_error: I/O error status
81 */
82struct nilfs_segment_buffer {
83 struct super_block *sb_super;
84 struct list_head sb_list;
85
86 /* Segment information */
87 struct nilfs_segsum_info sb_sum;
88 __u64 sb_segnum;
89 __u64 sb_nextnum;
90 sector_t sb_fseg_start, sb_fseg_end;
91 sector_t sb_pseg_start;
92 unsigned sb_rest_blocks;
93
94 /* Buffers */
95 struct list_head sb_segsum_buffers;
96 struct list_head sb_payload_buffers; /* including super root */
97
98 /* io status */
99 int sb_io_error;
100};
101
102#define NILFS_LIST_SEGBUF(head) \
103 list_entry((head), struct nilfs_segment_buffer, sb_list)
104#define NILFS_NEXT_SEGBUF(segbuf) NILFS_LIST_SEGBUF((segbuf)->sb_list.next)
105#define NILFS_PREV_SEGBUF(segbuf) NILFS_LIST_SEGBUF((segbuf)->sb_list.prev)
106#define NILFS_LAST_SEGBUF(head) NILFS_LIST_SEGBUF((head)->prev)
107#define NILFS_FIRST_SEGBUF(head) NILFS_LIST_SEGBUF((head)->next)
108#define NILFS_SEGBUF_IS_LAST(segbuf, head) ((segbuf)->sb_list.next == (head))
109
110#define nilfs_for_each_segbuf_before(s, t, h) \
111 for ((s) = NILFS_FIRST_SEGBUF(h); (s) != (t); \
112 (s) = NILFS_NEXT_SEGBUF(s))
113
114#define NILFS_SEGBUF_FIRST_BH(head) \
115 (list_entry((head)->next, struct buffer_head, b_assoc_buffers))
116#define NILFS_SEGBUF_NEXT_BH(bh) \
117 (list_entry((bh)->b_assoc_buffers.next, struct buffer_head, \
118 b_assoc_buffers))
119#define NILFS_SEGBUF_BH_IS_LAST(bh, head) ((bh)->b_assoc_buffers.next == head)
120
121
122int __init nilfs_init_segbuf_cache(void);
123void nilfs_destroy_segbuf_cache(void);
124struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *);
125void nilfs_segbuf_free(struct nilfs_segment_buffer *);
126void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long,
127 struct the_nilfs *);
128void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64,
129 struct the_nilfs *);
130int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t);
131int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *);
132int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *,
133 struct buffer_head **);
134void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *);
135void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *, u32);
136void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *, u32);
137
138static inline void
139nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf,
140 struct buffer_head *bh)
141{
142 list_add_tail(&bh->b_assoc_buffers, &segbuf->sb_segsum_buffers);
143 segbuf->sb_sum.nblocks++;
144 segbuf->sb_sum.nsumblk++;
145}
146
147static inline void
148nilfs_segbuf_add_payload_buffer(struct nilfs_segment_buffer *segbuf,
149 struct buffer_head *bh)
150{
151 list_add_tail(&bh->b_assoc_buffers, &segbuf->sb_payload_buffers);
152 segbuf->sb_sum.nblocks++;
153}
154
155static inline void
156nilfs_segbuf_add_file_buffer(struct nilfs_segment_buffer *segbuf,
157 struct buffer_head *bh)
158{
159 get_bh(bh);
160 nilfs_segbuf_add_payload_buffer(segbuf, bh);
161 segbuf->sb_sum.nfileblk++;
162}
163
164void nilfs_release_buffers(struct list_head *);
165
166static inline void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf)
167{
168 nilfs_release_buffers(&segbuf->sb_segsum_buffers);
169 nilfs_release_buffers(&segbuf->sb_payload_buffers);
170}
171
172struct nilfs_write_info {
173 struct bio *bio;
174 int start, end; /* The region to be submitted */
175 int rest_blocks;
176 int max_pages;
177 int nr_vecs;
178 sector_t blocknr;
179
180 int nbio;
181 atomic_t err;
182 struct completion bio_event;
183 /* completion event of segment write */
184
185 /*
186 * The following fields must be set explicitly
187 */
188 struct super_block *sb;
189 struct backing_dev_info *bdi; /* backing dev info */
190 struct buffer_head *bh_sr;
191};
192
193
194void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *,
195 struct nilfs_write_info *);
196int nilfs_segbuf_write(struct nilfs_segment_buffer *,
197 struct nilfs_write_info *);
198int nilfs_segbuf_wait(struct nilfs_segment_buffer *,
199 struct nilfs_write_info *);
200
201#endif /* _NILFS_SEGBUF_H */
diff --git a/fs/nilfs2/seglist.h b/fs/nilfs2/seglist.h
new file mode 100644
index 000000000000..d39df9144e99
--- /dev/null
+++ b/fs/nilfs2/seglist.h
@@ -0,0 +1,85 @@
1/*
2 * seglist.h - expediential structure and routines to handle list of segments
3 * (would be removed in a future release)
4 *
5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 *
21 * Written by Ryusuke Konishi <ryusuke@osrg.net>
22 *
23 */
24#ifndef _NILFS_SEGLIST_H
25#define _NILFS_SEGLIST_H
26
27#include <linux/fs.h>
28#include <linux/buffer_head.h>
29#include <linux/nilfs2_fs.h>
30#include "sufile.h"
31
32struct nilfs_segment_entry {
33 __u64 segnum;
34
35#define NILFS_SLH_FREED 0x0001 /* The segment was freed provisonally.
36 It must be cancelled if
37 construction aborted */
38
39 unsigned flags;
40 struct list_head list;
41 struct buffer_head *bh_su;
42 struct nilfs_segment_usage *raw_su;
43};
44
45
46void nilfs_dispose_segment_list(struct list_head *);
47
48static inline struct nilfs_segment_entry *
49nilfs_alloc_segment_entry(__u64 segnum)
50{
51 struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS);
52
53 if (likely(ent)) {
54 ent->segnum = segnum;
55 ent->flags = 0;
56 ent->bh_su = NULL;
57 ent->raw_su = NULL;
58 INIT_LIST_HEAD(&ent->list);
59 }
60 return ent;
61}
62
63static inline int nilfs_open_segment_entry(struct nilfs_segment_entry *ent,
64 struct inode *sufile)
65{
66 return nilfs_sufile_get_segment_usage(sufile, ent->segnum,
67 &ent->raw_su, &ent->bh_su);
68}
69
70static inline void nilfs_close_segment_entry(struct nilfs_segment_entry *ent,
71 struct inode *sufile)
72{
73 if (!ent->bh_su)
74 return;
75 nilfs_sufile_put_segment_usage(sufile, ent->segnum, ent->bh_su);
76 ent->bh_su = NULL;
77 ent->raw_su = NULL;
78}
79
80static inline void nilfs_free_segment_entry(struct nilfs_segment_entry *ent)
81{
82 kfree(ent);
83}
84
85#endif /* _NILFS_SEGLIST_H */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
new file mode 100644
index 000000000000..fb70ec3be20e
--- /dev/null
+++ b/fs/nilfs2/segment.c
@@ -0,0 +1,2977 @@
1/*
2 * segment.c - NILFS segment constructor.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 *
22 */
23
24#include <linux/pagemap.h>
25#include <linux/buffer_head.h>
26#include <linux/writeback.h>
27#include <linux/bio.h>
28#include <linux/completion.h>
29#include <linux/blkdev.h>
30#include <linux/backing-dev.h>
31#include <linux/freezer.h>
32#include <linux/kthread.h>
33#include <linux/crc32.h>
34#include <linux/pagevec.h>
35#include "nilfs.h"
36#include "btnode.h"
37#include "page.h"
38#include "segment.h"
39#include "sufile.h"
40#include "cpfile.h"
41#include "ifile.h"
42#include "seglist.h"
43#include "segbuf.h"
44
45
46/*
47 * Segment constructor
48 */
49#define SC_N_INODEVEC 16 /* Size of locally allocated inode vector */
50
51#define SC_MAX_SEGDELTA 64 /* Upper limit of the number of segments
52 appended in collection retry loop */
53
54/* Construction mode */
55enum {
56 SC_LSEG_SR = 1, /* Make a logical segment having a super root */
57 SC_LSEG_DSYNC, /* Flush data blocks of a given file and make
58 a logical segment without a super root */
59 SC_FLUSH_FILE, /* Flush data files, leads to segment writes without
60 creating a checkpoint */
61 SC_FLUSH_DAT, /* Flush DAT file. This also creates segments without
62 a checkpoint */
63};
64
65/* Stage numbers of dirty block collection */
66enum {
67 NILFS_ST_INIT = 0,
68 NILFS_ST_GC, /* Collecting dirty blocks for GC */
69 NILFS_ST_FILE,
70 NILFS_ST_IFILE,
71 NILFS_ST_CPFILE,
72 NILFS_ST_SUFILE,
73 NILFS_ST_DAT,
74 NILFS_ST_SR, /* Super root */
75 NILFS_ST_DSYNC, /* Data sync blocks */
76 NILFS_ST_DONE,
77};
78
79/* State flags of collection */
80#define NILFS_CF_NODE 0x0001 /* Collecting node blocks */
81#define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */
82#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED)
83
84/* Operations depending on the construction mode and file type */
85struct nilfs_sc_operations {
86 int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *,
87 struct inode *);
88 int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *,
89 struct inode *);
90 int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *,
91 struct inode *);
92 void (*write_data_binfo)(struct nilfs_sc_info *,
93 struct nilfs_segsum_pointer *,
94 union nilfs_binfo *);
95 void (*write_node_binfo)(struct nilfs_sc_info *,
96 struct nilfs_segsum_pointer *,
97 union nilfs_binfo *);
98};
99
100/*
101 * Other definitions
102 */
103static void nilfs_segctor_start_timer(struct nilfs_sc_info *);
104static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int);
105static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *);
106static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *,
107 int);
108
109#define nilfs_cnt32_gt(a, b) \
110 (typecheck(__u32, a) && typecheck(__u32, b) && \
111 ((__s32)(b) - (__s32)(a) < 0))
112#define nilfs_cnt32_ge(a, b) \
113 (typecheck(__u32, a) && typecheck(__u32, b) && \
114 ((__s32)(a) - (__s32)(b) >= 0))
115#define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a)
116#define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a)
117
118/*
119 * Transaction
120 */
121static struct kmem_cache *nilfs_transaction_cachep;
122
123/**
124 * nilfs_init_transaction_cache - create a cache for nilfs_transaction_info
125 *
126 * nilfs_init_transaction_cache() creates a slab cache for the struct
127 * nilfs_transaction_info.
128 *
129 * Return Value: On success, it returns 0. On error, one of the following
130 * negative error code is returned.
131 *
132 * %-ENOMEM - Insufficient memory available.
133 */
134int nilfs_init_transaction_cache(void)
135{
136 nilfs_transaction_cachep =
137 kmem_cache_create("nilfs2_transaction_cache",
138 sizeof(struct nilfs_transaction_info),
139 0, SLAB_RECLAIM_ACCOUNT, NULL);
140 return (nilfs_transaction_cachep == NULL) ? -ENOMEM : 0;
141}
142
143/**
144 * nilfs_detroy_transaction_cache - destroy the cache for transaction info
145 *
146 * nilfs_destroy_transaction_cache() frees the slab cache for the struct
147 * nilfs_transaction_info.
148 */
149void nilfs_destroy_transaction_cache(void)
150{
151 kmem_cache_destroy(nilfs_transaction_cachep);
152}
153
154static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti)
155{
156 struct nilfs_transaction_info *cur_ti = current->journal_info;
157 void *save = NULL;
158
159 if (cur_ti) {
160 if (cur_ti->ti_magic == NILFS_TI_MAGIC)
161 return ++cur_ti->ti_count;
162 else {
163 /*
164 * If journal_info field is occupied by other FS,
165 * it is saved and will be restored on
166 * nilfs_transaction_commit().
167 */
168 printk(KERN_WARNING
169 "NILFS warning: journal info from a different "
170 "FS\n");
171 save = current->journal_info;
172 }
173 }
174 if (!ti) {
175 ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS);
176 if (!ti)
177 return -ENOMEM;
178 ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC;
179 } else {
180 ti->ti_flags = 0;
181 }
182 ti->ti_count = 0;
183 ti->ti_save = save;
184 ti->ti_magic = NILFS_TI_MAGIC;
185 current->journal_info = ti;
186 return 0;
187}
188
189/**
190 * nilfs_transaction_begin - start indivisible file operations.
191 * @sb: super block
192 * @ti: nilfs_transaction_info
193 * @vacancy_check: flags for vacancy rate checks
194 *
195 * nilfs_transaction_begin() acquires a reader/writer semaphore, called
196 * the segment semaphore, to make a segment construction and write tasks
197 * exclusive. The function is used with nilfs_transaction_commit() in pairs.
198 * The region enclosed by these two functions can be nested. To avoid a
199 * deadlock, the semaphore is only acquired or released in the outermost call.
200 *
201 * This function allocates a nilfs_transaction_info struct to keep context
202 * information on it. It is initialized and hooked onto the current task in
203 * the outermost call. If a pre-allocated struct is given to @ti, it is used
204 * instead; othewise a new struct is assigned from a slab.
205 *
206 * When @vacancy_check flag is set, this function will check the amount of
207 * free space, and will wait for the GC to reclaim disk space if low capacity.
208 *
209 * Return Value: On success, 0 is returned. On error, one of the following
210 * negative error code is returned.
211 *
212 * %-ENOMEM - Insufficient memory available.
213 *
214 * %-ENOSPC - No space left on device
215 */
216int nilfs_transaction_begin(struct super_block *sb,
217 struct nilfs_transaction_info *ti,
218 int vacancy_check)
219{
220 struct nilfs_sb_info *sbi;
221 struct the_nilfs *nilfs;
222 int ret = nilfs_prepare_segment_lock(ti);
223
224 if (unlikely(ret < 0))
225 return ret;
226 if (ret > 0)
227 return 0;
228
229 sbi = NILFS_SB(sb);
230 nilfs = sbi->s_nilfs;
231 down_read(&nilfs->ns_segctor_sem);
232 if (vacancy_check && nilfs_near_disk_full(nilfs)) {
233 up_read(&nilfs->ns_segctor_sem);
234 ret = -ENOSPC;
235 goto failed;
236 }
237 return 0;
238
239 failed:
240 ti = current->journal_info;
241 current->journal_info = ti->ti_save;
242 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
243 kmem_cache_free(nilfs_transaction_cachep, ti);
244 return ret;
245}
246
247/**
248 * nilfs_transaction_commit - commit indivisible file operations.
249 * @sb: super block
250 *
251 * nilfs_transaction_commit() releases the read semaphore which is
252 * acquired by nilfs_transaction_begin(). This is only performed
253 * in outermost call of this function. If a commit flag is set,
254 * nilfs_transaction_commit() sets a timer to start the segment
255 * constructor. If a sync flag is set, it starts construction
256 * directly.
257 */
258int nilfs_transaction_commit(struct super_block *sb)
259{
260 struct nilfs_transaction_info *ti = current->journal_info;
261 struct nilfs_sb_info *sbi;
262 struct nilfs_sc_info *sci;
263 int err = 0;
264
265 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
266 ti->ti_flags |= NILFS_TI_COMMIT;
267 if (ti->ti_count > 0) {
268 ti->ti_count--;
269 return 0;
270 }
271 sbi = NILFS_SB(sb);
272 sci = NILFS_SC(sbi);
273 if (sci != NULL) {
274 if (ti->ti_flags & NILFS_TI_COMMIT)
275 nilfs_segctor_start_timer(sci);
276 if (atomic_read(&sbi->s_nilfs->ns_ndirtyblks) >
277 sci->sc_watermark)
278 nilfs_segctor_do_flush(sci, 0);
279 }
280 up_read(&sbi->s_nilfs->ns_segctor_sem);
281 current->journal_info = ti->ti_save;
282
283 if (ti->ti_flags & NILFS_TI_SYNC)
284 err = nilfs_construct_segment(sb);
285 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
286 kmem_cache_free(nilfs_transaction_cachep, ti);
287 return err;
288}
289
290void nilfs_transaction_abort(struct super_block *sb)
291{
292 struct nilfs_transaction_info *ti = current->journal_info;
293
294 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
295 if (ti->ti_count > 0) {
296 ti->ti_count--;
297 return;
298 }
299 up_read(&NILFS_SB(sb)->s_nilfs->ns_segctor_sem);
300
301 current->journal_info = ti->ti_save;
302 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
303 kmem_cache_free(nilfs_transaction_cachep, ti);
304}
305
306void nilfs_relax_pressure_in_lock(struct super_block *sb)
307{
308 struct nilfs_sb_info *sbi = NILFS_SB(sb);
309 struct nilfs_sc_info *sci = NILFS_SC(sbi);
310 struct the_nilfs *nilfs = sbi->s_nilfs;
311
312 if (!sci || !sci->sc_flush_request)
313 return;
314
315 set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
316 up_read(&nilfs->ns_segctor_sem);
317
318 down_write(&nilfs->ns_segctor_sem);
319 if (sci->sc_flush_request &&
320 test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) {
321 struct nilfs_transaction_info *ti = current->journal_info;
322
323 ti->ti_flags |= NILFS_TI_WRITER;
324 nilfs_segctor_do_immediate_flush(sci);
325 ti->ti_flags &= ~NILFS_TI_WRITER;
326 }
327 downgrade_write(&nilfs->ns_segctor_sem);
328}
329
330static void nilfs_transaction_lock(struct nilfs_sb_info *sbi,
331 struct nilfs_transaction_info *ti,
332 int gcflag)
333{
334 struct nilfs_transaction_info *cur_ti = current->journal_info;
335
336 WARN_ON(cur_ti);
337 ti->ti_flags = NILFS_TI_WRITER;
338 ti->ti_count = 0;
339 ti->ti_save = cur_ti;
340 ti->ti_magic = NILFS_TI_MAGIC;
341 INIT_LIST_HEAD(&ti->ti_garbage);
342 current->journal_info = ti;
343
344 for (;;) {
345 down_write(&sbi->s_nilfs->ns_segctor_sem);
346 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &NILFS_SC(sbi)->sc_flags))
347 break;
348
349 nilfs_segctor_do_immediate_flush(NILFS_SC(sbi));
350
351 up_write(&sbi->s_nilfs->ns_segctor_sem);
352 yield();
353 }
354 if (gcflag)
355 ti->ti_flags |= NILFS_TI_GC;
356}
357
358static void nilfs_transaction_unlock(struct nilfs_sb_info *sbi)
359{
360 struct nilfs_transaction_info *ti = current->journal_info;
361
362 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
363 BUG_ON(ti->ti_count > 0);
364
365 up_write(&sbi->s_nilfs->ns_segctor_sem);
366 current->journal_info = ti->ti_save;
367 if (!list_empty(&ti->ti_garbage))
368 nilfs_dispose_list(sbi, &ti->ti_garbage, 0);
369}
370
371static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
372 struct nilfs_segsum_pointer *ssp,
373 unsigned bytes)
374{
375 struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
376 unsigned blocksize = sci->sc_super->s_blocksize;
377 void *p;
378
379 if (unlikely(ssp->offset + bytes > blocksize)) {
380 ssp->offset = 0;
381 BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh,
382 &segbuf->sb_segsum_buffers));
383 ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh);
384 }
385 p = ssp->bh->b_data + ssp->offset;
386 ssp->offset += bytes;
387 return p;
388}
389
390/**
391 * nilfs_segctor_reset_segment_buffer - reset the current segment buffer
392 * @sci: nilfs_sc_info
393 */
394static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
395{
396 struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
397 struct buffer_head *sumbh;
398 unsigned sumbytes;
399 unsigned flags = 0;
400 int err;
401
402 if (nilfs_doing_gc())
403 flags = NILFS_SS_GC;
404 err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime);
405 if (unlikely(err))
406 return err;
407
408 sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
409 sumbytes = segbuf->sb_sum.sumbytes;
410 sci->sc_finfo_ptr.bh = sumbh; sci->sc_finfo_ptr.offset = sumbytes;
411 sci->sc_binfo_ptr.bh = sumbh; sci->sc_binfo_ptr.offset = sumbytes;
412 sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
413 return 0;
414}
415
416static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
417{
418 sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
419 if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs))
420 return -E2BIG; /* The current segment is filled up
421 (internal code) */
422 sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
423 return nilfs_segctor_reset_segment_buffer(sci);
424}
425
426static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci)
427{
428 struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
429 int err;
430
431 if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) {
432 err = nilfs_segctor_feed_segment(sci);
433 if (err)
434 return err;
435 segbuf = sci->sc_curseg;
436 }
437 err = nilfs_segbuf_extend_payload(segbuf, &sci->sc_super_root);
438 if (likely(!err))
439 segbuf->sb_sum.flags |= NILFS_SS_SR;
440 return err;
441}
442
443/*
444 * Functions for making segment summary and payloads
445 */
446static int nilfs_segctor_segsum_block_required(
447 struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp,
448 unsigned binfo_size)
449{
450 unsigned blocksize = sci->sc_super->s_blocksize;
451 /* Size of finfo and binfo is enough small against blocksize */
452
453 return ssp->offset + binfo_size +
454 (!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) >
455 blocksize;
456}
457
458static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
459 struct inode *inode)
460{
461 sci->sc_curseg->sb_sum.nfinfo++;
462 sci->sc_binfo_ptr = sci->sc_finfo_ptr;
463 nilfs_segctor_map_segsum_entry(
464 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
465
466 if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
467 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
468 /* skip finfo */
469}
470
471static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci,
472 struct inode *inode)
473{
474 struct nilfs_finfo *finfo;
475 struct nilfs_inode_info *ii;
476 struct nilfs_segment_buffer *segbuf;
477
478 if (sci->sc_blk_cnt == 0)
479 return;
480
481 ii = NILFS_I(inode);
482 finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr,
483 sizeof(*finfo));
484 finfo->fi_ino = cpu_to_le64(inode->i_ino);
485 finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt);
486 finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt);
487 finfo->fi_cno = cpu_to_le64(ii->i_cno);
488
489 segbuf = sci->sc_curseg;
490 segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset +
491 sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1);
492 sci->sc_finfo_ptr = sci->sc_binfo_ptr;
493 sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
494}
495
496static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
497 struct buffer_head *bh,
498 struct inode *inode,
499 unsigned binfo_size)
500{
501 struct nilfs_segment_buffer *segbuf;
502 int required, err = 0;
503
504 retry:
505 segbuf = sci->sc_curseg;
506 required = nilfs_segctor_segsum_block_required(
507 sci, &sci->sc_binfo_ptr, binfo_size);
508 if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) {
509 nilfs_segctor_end_finfo(sci, inode);
510 err = nilfs_segctor_feed_segment(sci);
511 if (err)
512 return err;
513 goto retry;
514 }
515 if (unlikely(required)) {
516 err = nilfs_segbuf_extend_segsum(segbuf);
517 if (unlikely(err))
518 goto failed;
519 }
520 if (sci->sc_blk_cnt == 0)
521 nilfs_segctor_begin_finfo(sci, inode);
522
523 nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size);
524 /* Substitution to vblocknr is delayed until update_blocknr() */
525 nilfs_segbuf_add_file_buffer(segbuf, bh);
526 sci->sc_blk_cnt++;
527 failed:
528 return err;
529}
530
531static int nilfs_handle_bmap_error(int err, const char *fname,
532 struct inode *inode, struct super_block *sb)
533{
534 if (err == -EINVAL) {
535 nilfs_error(sb, fname, "broken bmap (inode=%lu)\n",
536 inode->i_ino);
537 err = -EIO;
538 }
539 return err;
540}
541
542/*
543 * Callback functions that enumerate, mark, and collect dirty blocks
544 */
545static int nilfs_collect_file_data(struct nilfs_sc_info *sci,
546 struct buffer_head *bh, struct inode *inode)
547{
548 int err;
549
550 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
551 if (unlikely(err < 0))
552 return nilfs_handle_bmap_error(err, __func__, inode,
553 sci->sc_super);
554
555 err = nilfs_segctor_add_file_block(sci, bh, inode,
556 sizeof(struct nilfs_binfo_v));
557 if (!err)
558 sci->sc_datablk_cnt++;
559 return err;
560}
561
562static int nilfs_collect_file_node(struct nilfs_sc_info *sci,
563 struct buffer_head *bh,
564 struct inode *inode)
565{
566 int err;
567
568 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
569 if (unlikely(err < 0))
570 return nilfs_handle_bmap_error(err, __func__, inode,
571 sci->sc_super);
572 return 0;
573}
574
575static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci,
576 struct buffer_head *bh,
577 struct inode *inode)
578{
579 WARN_ON(!buffer_dirty(bh));
580 return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
581}
582
583static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci,
584 struct nilfs_segsum_pointer *ssp,
585 union nilfs_binfo *binfo)
586{
587 struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry(
588 sci, ssp, sizeof(*binfo_v));
589 *binfo_v = binfo->bi_v;
590}
591
592static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci,
593 struct nilfs_segsum_pointer *ssp,
594 union nilfs_binfo *binfo)
595{
596 __le64 *vblocknr = nilfs_segctor_map_segsum_entry(
597 sci, ssp, sizeof(*vblocknr));
598 *vblocknr = binfo->bi_v.bi_vblocknr;
599}
600
601struct nilfs_sc_operations nilfs_sc_file_ops = {
602 .collect_data = nilfs_collect_file_data,
603 .collect_node = nilfs_collect_file_node,
604 .collect_bmap = nilfs_collect_file_bmap,
605 .write_data_binfo = nilfs_write_file_data_binfo,
606 .write_node_binfo = nilfs_write_file_node_binfo,
607};
608
609static int nilfs_collect_dat_data(struct nilfs_sc_info *sci,
610 struct buffer_head *bh, struct inode *inode)
611{
612 int err;
613
614 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
615 if (unlikely(err < 0))
616 return nilfs_handle_bmap_error(err, __func__, inode,
617 sci->sc_super);
618
619 err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
620 if (!err)
621 sci->sc_datablk_cnt++;
622 return err;
623}
624
625static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci,
626 struct buffer_head *bh, struct inode *inode)
627{
628 WARN_ON(!buffer_dirty(bh));
629 return nilfs_segctor_add_file_block(sci, bh, inode,
630 sizeof(struct nilfs_binfo_dat));
631}
632
633static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci,
634 struct nilfs_segsum_pointer *ssp,
635 union nilfs_binfo *binfo)
636{
637 __le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp,
638 sizeof(*blkoff));
639 *blkoff = binfo->bi_dat.bi_blkoff;
640}
641
642static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci,
643 struct nilfs_segsum_pointer *ssp,
644 union nilfs_binfo *binfo)
645{
646 struct nilfs_binfo_dat *binfo_dat =
647 nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat));
648 *binfo_dat = binfo->bi_dat;
649}
650
651struct nilfs_sc_operations nilfs_sc_dat_ops = {
652 .collect_data = nilfs_collect_dat_data,
653 .collect_node = nilfs_collect_file_node,
654 .collect_bmap = nilfs_collect_dat_bmap,
655 .write_data_binfo = nilfs_write_dat_data_binfo,
656 .write_node_binfo = nilfs_write_dat_node_binfo,
657};
658
659struct nilfs_sc_operations nilfs_sc_dsync_ops = {
660 .collect_data = nilfs_collect_file_data,
661 .collect_node = NULL,
662 .collect_bmap = NULL,
663 .write_data_binfo = nilfs_write_file_data_binfo,
664 .write_node_binfo = NULL,
665};
666
667static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
668 struct list_head *listp,
669 size_t nlimit,
670 loff_t start, loff_t end)
671{
672 struct address_space *mapping = inode->i_mapping;
673 struct pagevec pvec;
674 pgoff_t index = 0, last = ULONG_MAX;
675 size_t ndirties = 0;
676 int i;
677
678 if (unlikely(start != 0 || end != LLONG_MAX)) {
679 /*
680 * A valid range is given for sync-ing data pages. The
681 * range is rounded to per-page; extra dirty buffers
682 * may be included if blocksize < pagesize.
683 */
684 index = start >> PAGE_SHIFT;
685 last = end >> PAGE_SHIFT;
686 }
687 pagevec_init(&pvec, 0);
688 repeat:
689 if (unlikely(index > last) ||
690 !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
691 min_t(pgoff_t, last - index,
692 PAGEVEC_SIZE - 1) + 1))
693 return ndirties;
694
695 for (i = 0; i < pagevec_count(&pvec); i++) {
696 struct buffer_head *bh, *head;
697 struct page *page = pvec.pages[i];
698
699 if (unlikely(page->index > last))
700 break;
701
702 if (mapping->host) {
703 lock_page(page);
704 if (!page_has_buffers(page))
705 create_empty_buffers(page,
706 1 << inode->i_blkbits, 0);
707 unlock_page(page);
708 }
709
710 bh = head = page_buffers(page);
711 do {
712 if (!buffer_dirty(bh))
713 continue;
714 get_bh(bh);
715 list_add_tail(&bh->b_assoc_buffers, listp);
716 ndirties++;
717 if (unlikely(ndirties >= nlimit)) {
718 pagevec_release(&pvec);
719 cond_resched();
720 return ndirties;
721 }
722 } while (bh = bh->b_this_page, bh != head);
723 }
724 pagevec_release(&pvec);
725 cond_resched();
726 goto repeat;
727}
728
729static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
730 struct list_head *listp)
731{
732 struct nilfs_inode_info *ii = NILFS_I(inode);
733 struct address_space *mapping = &ii->i_btnode_cache;
734 struct pagevec pvec;
735 struct buffer_head *bh, *head;
736 unsigned int i;
737 pgoff_t index = 0;
738
739 pagevec_init(&pvec, 0);
740
741 while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
742 PAGEVEC_SIZE)) {
743 for (i = 0; i < pagevec_count(&pvec); i++) {
744 bh = head = page_buffers(pvec.pages[i]);
745 do {
746 if (buffer_dirty(bh)) {
747 get_bh(bh);
748 list_add_tail(&bh->b_assoc_buffers,
749 listp);
750 }
751 bh = bh->b_this_page;
752 } while (bh != head);
753 }
754 pagevec_release(&pvec);
755 cond_resched();
756 }
757}
758
759static void nilfs_dispose_list(struct nilfs_sb_info *sbi,
760 struct list_head *head, int force)
761{
762 struct nilfs_inode_info *ii, *n;
763 struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii;
764 unsigned nv = 0;
765
766 while (!list_empty(head)) {
767 spin_lock(&sbi->s_inode_lock);
768 list_for_each_entry_safe(ii, n, head, i_dirty) {
769 list_del_init(&ii->i_dirty);
770 if (force) {
771 if (unlikely(ii->i_bh)) {
772 brelse(ii->i_bh);
773 ii->i_bh = NULL;
774 }
775 } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
776 set_bit(NILFS_I_QUEUED, &ii->i_state);
777 list_add_tail(&ii->i_dirty,
778 &sbi->s_dirty_files);
779 continue;
780 }
781 ivec[nv++] = ii;
782 if (nv == SC_N_INODEVEC)
783 break;
784 }
785 spin_unlock(&sbi->s_inode_lock);
786
787 for (pii = ivec; nv > 0; pii++, nv--)
788 iput(&(*pii)->vfs_inode);
789 }
790}
791
792static int nilfs_test_metadata_dirty(struct nilfs_sb_info *sbi)
793{
794 struct the_nilfs *nilfs = sbi->s_nilfs;
795 int ret = 0;
796
797 if (nilfs_mdt_fetch_dirty(sbi->s_ifile))
798 ret++;
799 if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile))
800 ret++;
801 if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile))
802 ret++;
803 if (ret || nilfs_doing_gc())
804 if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs)))
805 ret++;
806 return ret;
807}
808
809static int nilfs_segctor_clean(struct nilfs_sc_info *sci)
810{
811 return list_empty(&sci->sc_dirty_files) &&
812 !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) &&
813 list_empty(&sci->sc_cleaning_segments) &&
814 (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes));
815}
816
817static int nilfs_segctor_confirm(struct nilfs_sc_info *sci)
818{
819 struct nilfs_sb_info *sbi = sci->sc_sbi;
820 int ret = 0;
821
822 if (nilfs_test_metadata_dirty(sbi))
823 set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
824
825 spin_lock(&sbi->s_inode_lock);
826 if (list_empty(&sbi->s_dirty_files) && nilfs_segctor_clean(sci))
827 ret++;
828
829 spin_unlock(&sbi->s_inode_lock);
830 return ret;
831}
832
833static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
834{
835 struct nilfs_sb_info *sbi = sci->sc_sbi;
836 struct the_nilfs *nilfs = sbi->s_nilfs;
837
838 nilfs_mdt_clear_dirty(sbi->s_ifile);
839 nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
840 nilfs_mdt_clear_dirty(nilfs->ns_sufile);
841 nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs));
842}
843
844static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
845{
846 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs;
847 struct buffer_head *bh_cp;
848 struct nilfs_checkpoint *raw_cp;
849 int err;
850
851 /* XXX: this interface will be changed */
852 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1,
853 &raw_cp, &bh_cp);
854 if (likely(!err)) {
855 /* The following code is duplicated with cpfile. But, it is
856 needed to collect the checkpoint even if it was not newly
857 created */
858 nilfs_mdt_mark_buffer_dirty(bh_cp);
859 nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
860 nilfs_cpfile_put_checkpoint(
861 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
862 } else
863 WARN_ON(err == -EINVAL || err == -ENOENT);
864
865 return err;
866}
867
868static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
869{
870 struct nilfs_sb_info *sbi = sci->sc_sbi;
871 struct the_nilfs *nilfs = sbi->s_nilfs;
872 struct buffer_head *bh_cp;
873 struct nilfs_checkpoint *raw_cp;
874 int err;
875
876 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
877 &raw_cp, &bh_cp);
878 if (unlikely(err)) {
879 WARN_ON(err == -EINVAL || err == -ENOENT);
880 goto failed_ibh;
881 }
882 raw_cp->cp_snapshot_list.ssl_next = 0;
883 raw_cp->cp_snapshot_list.ssl_prev = 0;
884 raw_cp->cp_inodes_count =
885 cpu_to_le64(atomic_read(&sbi->s_inodes_count));
886 raw_cp->cp_blocks_count =
887 cpu_to_le64(atomic_read(&sbi->s_blocks_count));
888 raw_cp->cp_nblk_inc =
889 cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc);
890 raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime);
891 raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno);
892
893 if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
894 nilfs_checkpoint_clear_minor(raw_cp);
895 else
896 nilfs_checkpoint_set_minor(raw_cp);
897
898 nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1);
899 nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
900 return 0;
901
902 failed_ibh:
903 return err;
904}
905
906static void nilfs_fill_in_file_bmap(struct inode *ifile,
907 struct nilfs_inode_info *ii)
908
909{
910 struct buffer_head *ibh;
911 struct nilfs_inode *raw_inode;
912
913 if (test_bit(NILFS_I_BMAP, &ii->i_state)) {
914 ibh = ii->i_bh;
915 BUG_ON(!ibh);
916 raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino,
917 ibh);
918 nilfs_bmap_write(ii->i_bmap, raw_inode);
919 nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh);
920 }
921}
922
923static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci,
924 struct inode *ifile)
925{
926 struct nilfs_inode_info *ii;
927
928 list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) {
929 nilfs_fill_in_file_bmap(ifile, ii);
930 set_bit(NILFS_I_COLLECTED, &ii->i_state);
931 }
932}
933
934/*
935 * CRC calculation routines
936 */
937static void nilfs_fill_in_super_root_crc(struct buffer_head *bh_sr, u32 seed)
938{
939 struct nilfs_super_root *raw_sr =
940 (struct nilfs_super_root *)bh_sr->b_data;
941 u32 crc;
942
943 crc = crc32_le(seed,
944 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
945 NILFS_SR_BYTES - sizeof(raw_sr->sr_sum));
946 raw_sr->sr_sum = cpu_to_le32(crc);
947}
948
949static void nilfs_segctor_fill_in_checksums(struct nilfs_sc_info *sci,
950 u32 seed)
951{
952 struct nilfs_segment_buffer *segbuf;
953
954 if (sci->sc_super_root)
955 nilfs_fill_in_super_root_crc(sci->sc_super_root, seed);
956
957 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
958 nilfs_segbuf_fill_in_segsum_crc(segbuf, seed);
959 nilfs_segbuf_fill_in_data_crc(segbuf, seed);
960 }
961}
962
963static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
964 struct the_nilfs *nilfs)
965{
966 struct buffer_head *bh_sr = sci->sc_super_root;
967 struct nilfs_super_root *raw_sr =
968 (struct nilfs_super_root *)bh_sr->b_data;
969 unsigned isz = nilfs->ns_inode_size;
970
971 raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES);
972 raw_sr->sr_nongc_ctime
973 = cpu_to_le64(nilfs_doing_gc() ?
974 nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
975 raw_sr->sr_flags = 0;
976
977 nilfs_mdt_write_inode_direct(
978 nilfs_dat_inode(nilfs), bh_sr, NILFS_SR_DAT_OFFSET(isz));
979 nilfs_mdt_write_inode_direct(
980 nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(isz));
981 nilfs_mdt_write_inode_direct(
982 nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(isz));
983}
984
985static void nilfs_redirty_inodes(struct list_head *head)
986{
987 struct nilfs_inode_info *ii;
988
989 list_for_each_entry(ii, head, i_dirty) {
990 if (test_bit(NILFS_I_COLLECTED, &ii->i_state))
991 clear_bit(NILFS_I_COLLECTED, &ii->i_state);
992 }
993}
994
995static void nilfs_drop_collected_inodes(struct list_head *head)
996{
997 struct nilfs_inode_info *ii;
998
999 list_for_each_entry(ii, head, i_dirty) {
1000 if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state))
1001 continue;
1002
1003 clear_bit(NILFS_I_INODE_DIRTY, &ii->i_state);
1004 set_bit(NILFS_I_UPDATED, &ii->i_state);
1005 }
1006}
1007
1008static void nilfs_segctor_cancel_free_segments(struct nilfs_sc_info *sci,
1009 struct inode *sufile)
1010
1011{
1012 struct list_head *head = &sci->sc_cleaning_segments;
1013 struct nilfs_segment_entry *ent;
1014 int err;
1015
1016 list_for_each_entry(ent, head, list) {
1017 if (!(ent->flags & NILFS_SLH_FREED))
1018 break;
1019 err = nilfs_sufile_cancel_free(sufile, ent->segnum);
1020 WARN_ON(err); /* do not happen */
1021 ent->flags &= ~NILFS_SLH_FREED;
1022 }
1023}
1024
1025static int nilfs_segctor_prepare_free_segments(struct nilfs_sc_info *sci,
1026 struct inode *sufile)
1027{
1028 struct list_head *head = &sci->sc_cleaning_segments;
1029 struct nilfs_segment_entry *ent;
1030 int err;
1031
1032 list_for_each_entry(ent, head, list) {
1033 err = nilfs_sufile_free(sufile, ent->segnum);
1034 if (unlikely(err))
1035 return err;
1036 ent->flags |= NILFS_SLH_FREED;
1037 }
1038 return 0;
1039}
1040
1041static void nilfs_segctor_commit_free_segments(struct nilfs_sc_info *sci)
1042{
1043 nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
1044}
1045
1046static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
1047 struct inode *inode,
1048 struct list_head *listp,
1049 int (*collect)(struct nilfs_sc_info *,
1050 struct buffer_head *,
1051 struct inode *))
1052{
1053 struct buffer_head *bh, *n;
1054 int err = 0;
1055
1056 if (collect) {
1057 list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) {
1058 list_del_init(&bh->b_assoc_buffers);
1059 err = collect(sci, bh, inode);
1060 brelse(bh);
1061 if (unlikely(err))
1062 goto dispose_buffers;
1063 }
1064 return 0;
1065 }
1066
1067 dispose_buffers:
1068 while (!list_empty(listp)) {
1069 bh = list_entry(listp->next, struct buffer_head,
1070 b_assoc_buffers);
1071 list_del_init(&bh->b_assoc_buffers);
1072 brelse(bh);
1073 }
1074 return err;
1075}
1076
1077static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci)
1078{
1079 /* Remaining number of blocks within segment buffer */
1080 return sci->sc_segbuf_nblocks -
1081 (sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks);
1082}
1083
1084static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci,
1085 struct inode *inode,
1086 struct nilfs_sc_operations *sc_ops)
1087{
1088 LIST_HEAD(data_buffers);
1089 LIST_HEAD(node_buffers);
1090 int err;
1091
1092 if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
1093 size_t n, rest = nilfs_segctor_buffer_rest(sci);
1094
1095 n = nilfs_lookup_dirty_data_buffers(
1096 inode, &data_buffers, rest + 1, 0, LLONG_MAX);
1097 if (n > rest) {
1098 err = nilfs_segctor_apply_buffers(
1099 sci, inode, &data_buffers,
1100 sc_ops->collect_data);
1101 BUG_ON(!err); /* always receive -E2BIG or true error */
1102 goto break_or_fail;
1103 }
1104 }
1105 nilfs_lookup_dirty_node_buffers(inode, &node_buffers);
1106
1107 if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
1108 err = nilfs_segctor_apply_buffers(
1109 sci, inode, &data_buffers, sc_ops->collect_data);
1110 if (unlikely(err)) {
1111 /* dispose node list */
1112 nilfs_segctor_apply_buffers(
1113 sci, inode, &node_buffers, NULL);
1114 goto break_or_fail;
1115 }
1116 sci->sc_stage.flags |= NILFS_CF_NODE;
1117 }
1118 /* Collect node */
1119 err = nilfs_segctor_apply_buffers(
1120 sci, inode, &node_buffers, sc_ops->collect_node);
1121 if (unlikely(err))
1122 goto break_or_fail;
1123
1124 nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers);
1125 err = nilfs_segctor_apply_buffers(
1126 sci, inode, &node_buffers, sc_ops->collect_bmap);
1127 if (unlikely(err))
1128 goto break_or_fail;
1129
1130 nilfs_segctor_end_finfo(sci, inode);
1131 sci->sc_stage.flags &= ~NILFS_CF_NODE;
1132
1133 break_or_fail:
1134 return err;
1135}
1136
1137static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci,
1138 struct inode *inode)
1139{
1140 LIST_HEAD(data_buffers);
1141 size_t n, rest = nilfs_segctor_buffer_rest(sci);
1142 int err;
1143
1144 n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1,
1145 sci->sc_dsync_start,
1146 sci->sc_dsync_end);
1147
1148 err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers,
1149 nilfs_collect_file_data);
1150 if (!err) {
1151 nilfs_segctor_end_finfo(sci, inode);
1152 BUG_ON(n > rest);
1153 /* always receive -E2BIG or true error if n > rest */
1154 }
1155 return err;
1156}
1157
1158static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1159{
1160 struct nilfs_sb_info *sbi = sci->sc_sbi;
1161 struct the_nilfs *nilfs = sbi->s_nilfs;
1162 struct list_head *head;
1163 struct nilfs_inode_info *ii;
1164 int err = 0;
1165
1166 switch (sci->sc_stage.scnt) {
1167 case NILFS_ST_INIT:
1168 /* Pre-processes */
1169 sci->sc_stage.flags = 0;
1170
1171 if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) {
1172 sci->sc_nblk_inc = 0;
1173 sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN;
1174 if (mode == SC_LSEG_DSYNC) {
1175 sci->sc_stage.scnt = NILFS_ST_DSYNC;
1176 goto dsync_mode;
1177 }
1178 }
1179
1180 sci->sc_stage.dirty_file_ptr = NULL;
1181 sci->sc_stage.gc_inode_ptr = NULL;
1182 if (mode == SC_FLUSH_DAT) {
1183 sci->sc_stage.scnt = NILFS_ST_DAT;
1184 goto dat_stage;
1185 }
1186 sci->sc_stage.scnt++; /* Fall through */
1187 case NILFS_ST_GC:
1188 if (nilfs_doing_gc()) {
1189 head = &sci->sc_gc_inodes;
1190 ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr,
1191 head, i_dirty);
1192 list_for_each_entry_continue(ii, head, i_dirty) {
1193 err = nilfs_segctor_scan_file(
1194 sci, &ii->vfs_inode,
1195 &nilfs_sc_file_ops);
1196 if (unlikely(err)) {
1197 sci->sc_stage.gc_inode_ptr = list_entry(
1198 ii->i_dirty.prev,
1199 struct nilfs_inode_info,
1200 i_dirty);
1201 goto break_or_fail;
1202 }
1203 set_bit(NILFS_I_COLLECTED, &ii->i_state);
1204 }
1205 sci->sc_stage.gc_inode_ptr = NULL;
1206 }
1207 sci->sc_stage.scnt++; /* Fall through */
1208 case NILFS_ST_FILE:
1209 head = &sci->sc_dirty_files;
1210 ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head,
1211 i_dirty);
1212 list_for_each_entry_continue(ii, head, i_dirty) {
1213 clear_bit(NILFS_I_DIRTY, &ii->i_state);
1214
1215 err = nilfs_segctor_scan_file(sci, &ii->vfs_inode,
1216 &nilfs_sc_file_ops);
1217 if (unlikely(err)) {
1218 sci->sc_stage.dirty_file_ptr =
1219 list_entry(ii->i_dirty.prev,
1220 struct nilfs_inode_info,
1221 i_dirty);
1222 goto break_or_fail;
1223 }
1224 /* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */
1225 /* XXX: required ? */
1226 }
1227 sci->sc_stage.dirty_file_ptr = NULL;
1228 if (mode == SC_FLUSH_FILE) {
1229 sci->sc_stage.scnt = NILFS_ST_DONE;
1230 return 0;
1231 }
1232 sci->sc_stage.scnt++;
1233 sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED;
1234 /* Fall through */
1235 case NILFS_ST_IFILE:
1236 err = nilfs_segctor_scan_file(sci, sbi->s_ifile,
1237 &nilfs_sc_file_ops);
1238 if (unlikely(err))
1239 break;
1240 sci->sc_stage.scnt++;
1241 /* Creating a checkpoint */
1242 err = nilfs_segctor_create_checkpoint(sci);
1243 if (unlikely(err))
1244 break;
1245 /* Fall through */
1246 case NILFS_ST_CPFILE:
1247 err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile,
1248 &nilfs_sc_file_ops);
1249 if (unlikely(err))
1250 break;
1251 sci->sc_stage.scnt++; /* Fall through */
1252 case NILFS_ST_SUFILE:
1253 err = nilfs_segctor_prepare_free_segments(sci,
1254 nilfs->ns_sufile);
1255 if (unlikely(err))
1256 break;
1257 err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile,
1258 &nilfs_sc_file_ops);
1259 if (unlikely(err))
1260 break;
1261 sci->sc_stage.scnt++; /* Fall through */
1262 case NILFS_ST_DAT:
1263 dat_stage:
1264 err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs),
1265 &nilfs_sc_dat_ops);
1266 if (unlikely(err))
1267 break;
1268 if (mode == SC_FLUSH_DAT) {
1269 sci->sc_stage.scnt = NILFS_ST_DONE;
1270 return 0;
1271 }
1272 sci->sc_stage.scnt++; /* Fall through */
1273 case NILFS_ST_SR:
1274 if (mode == SC_LSEG_SR) {
1275 /* Appending a super root */
1276 err = nilfs_segctor_add_super_root(sci);
1277 if (unlikely(err))
1278 break;
1279 }
1280 /* End of a logical segment */
1281 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1282 sci->sc_stage.scnt = NILFS_ST_DONE;
1283 return 0;
1284 case NILFS_ST_DSYNC:
1285 dsync_mode:
1286 sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT;
1287 ii = sci->sc_dsync_inode;
1288 if (!test_bit(NILFS_I_BUSY, &ii->i_state))
1289 break;
1290
1291 err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode);
1292 if (unlikely(err))
1293 break;
1294 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1295 sci->sc_stage.scnt = NILFS_ST_DONE;
1296 return 0;
1297 case NILFS_ST_DONE:
1298 return 0;
1299 default:
1300 BUG();
1301 }
1302
1303 break_or_fail:
1304 return err;
1305}
1306
1307static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum)
1308{
1309 struct buffer_head *bh_su;
1310 struct nilfs_segment_usage *raw_su;
1311 int err;
1312
1313 err = nilfs_sufile_get_segment_usage(sufile, segnum, &raw_su, &bh_su);
1314 if (unlikely(err))
1315 return err;
1316 nilfs_mdt_mark_buffer_dirty(bh_su);
1317 nilfs_mdt_mark_dirty(sufile);
1318 nilfs_sufile_put_segment_usage(sufile, segnum, bh_su);
1319 return 0;
1320}
1321
1322static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci,
1323 struct the_nilfs *nilfs)
1324{
1325 struct nilfs_segment_buffer *segbuf, *n;
1326 __u64 nextnum;
1327 int err;
1328
1329 if (list_empty(&sci->sc_segbufs)) {
1330 segbuf = nilfs_segbuf_new(sci->sc_super);
1331 if (unlikely(!segbuf))
1332 return -ENOMEM;
1333 list_add(&segbuf->sb_list, &sci->sc_segbufs);
1334 } else
1335 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1336
1337 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, nilfs->ns_pseg_offset,
1338 nilfs);
1339
1340 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1341 nilfs_shift_to_next_segment(nilfs);
1342 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs);
1343 }
1344 sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks;
1345
1346 err = nilfs_touch_segusage(nilfs->ns_sufile, segbuf->sb_segnum);
1347 if (unlikely(err))
1348 return err;
1349
1350 if (nilfs->ns_segnum == nilfs->ns_nextnum) {
1351 /* Start from the head of a new full segment */
1352 err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum);
1353 if (unlikely(err))
1354 return err;
1355 } else
1356 nextnum = nilfs->ns_nextnum;
1357
1358 segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq;
1359 nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs);
1360
1361 /* truncating segment buffers */
1362 list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs,
1363 sb_list) {
1364 list_del_init(&segbuf->sb_list);
1365 nilfs_segbuf_free(segbuf);
1366 }
1367 return 0;
1368}
1369
1370static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
1371 struct the_nilfs *nilfs, int nadd)
1372{
1373 struct nilfs_segment_buffer *segbuf, *prev, *n;
1374 struct inode *sufile = nilfs->ns_sufile;
1375 __u64 nextnextnum;
1376 LIST_HEAD(list);
1377 int err, ret, i;
1378
1379 prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs);
1380 /*
1381 * Since the segment specified with nextnum might be allocated during
1382 * the previous construction, the buffer including its segusage may
1383 * not be dirty. The following call ensures that the buffer is dirty
1384 * and will pin the buffer on memory until the sufile is written.
1385 */
1386 err = nilfs_touch_segusage(sufile, prev->sb_nextnum);
1387 if (unlikely(err))
1388 return err;
1389
1390 for (i = 0; i < nadd; i++) {
1391 /* extend segment info */
1392 err = -ENOMEM;
1393 segbuf = nilfs_segbuf_new(sci->sc_super);
1394 if (unlikely(!segbuf))
1395 goto failed;
1396
1397 /* map this buffer to region of segment on-disk */
1398 nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
1399 sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks;
1400
1401 /* allocate the next next full segment */
1402 err = nilfs_sufile_alloc(sufile, &nextnextnum);
1403 if (unlikely(err))
1404 goto failed_segbuf;
1405
1406 segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1;
1407 nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs);
1408
1409 list_add_tail(&segbuf->sb_list, &list);
1410 prev = segbuf;
1411 }
1412 list_splice(&list, sci->sc_segbufs.prev);
1413 return 0;
1414
1415 failed_segbuf:
1416 nilfs_segbuf_free(segbuf);
1417 failed:
1418 list_for_each_entry_safe(segbuf, n, &list, sb_list) {
1419 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1420 WARN_ON(ret); /* never fails */
1421 list_del_init(&segbuf->sb_list);
1422 nilfs_segbuf_free(segbuf);
1423 }
1424 return err;
1425}
1426
1427static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci,
1428 struct the_nilfs *nilfs)
1429{
1430 struct nilfs_segment_buffer *segbuf;
1431 int ret, done = 0;
1432
1433 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1434 if (nilfs->ns_nextnum != segbuf->sb_nextnum) {
1435 ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum);
1436 WARN_ON(ret); /* never fails */
1437 }
1438 if (segbuf->sb_io_error) {
1439 /* Case 1: The first segment failed */
1440 if (segbuf->sb_pseg_start != segbuf->sb_fseg_start)
1441 /* Case 1a: Partial segment appended into an existing
1442 segment */
1443 nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start,
1444 segbuf->sb_fseg_end);
1445 else /* Case 1b: New full segment */
1446 set_nilfs_discontinued(nilfs);
1447 done++;
1448 }
1449
1450 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) {
1451 ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum);
1452 WARN_ON(ret); /* never fails */
1453 if (!done && segbuf->sb_io_error) {
1454 if (segbuf->sb_segnum != nilfs->ns_nextnum)
1455 /* Case 2: extended segment (!= next) failed */
1456 nilfs_sufile_set_error(nilfs->ns_sufile,
1457 segbuf->sb_segnum);
1458 done++;
1459 }
1460 }
1461}
1462
1463static void nilfs_segctor_clear_segment_buffers(struct nilfs_sc_info *sci)
1464{
1465 struct nilfs_segment_buffer *segbuf;
1466
1467 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list)
1468 nilfs_segbuf_clear(segbuf);
1469 sci->sc_super_root = NULL;
1470}
1471
1472static void nilfs_segctor_destroy_segment_buffers(struct nilfs_sc_info *sci)
1473{
1474 struct nilfs_segment_buffer *segbuf;
1475
1476 while (!list_empty(&sci->sc_segbufs)) {
1477 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1478 list_del_init(&segbuf->sb_list);
1479 nilfs_segbuf_free(segbuf);
1480 }
1481 /* sci->sc_curseg = NULL; */
1482}
1483
1484static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci,
1485 struct the_nilfs *nilfs, int err)
1486{
1487 if (unlikely(err)) {
1488 nilfs_segctor_free_incomplete_segments(sci, nilfs);
1489 nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile);
1490 }
1491 nilfs_segctor_clear_segment_buffers(sci);
1492}
1493
1494static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci,
1495 struct inode *sufile)
1496{
1497 struct nilfs_segment_buffer *segbuf;
1498 struct buffer_head *bh_su;
1499 struct nilfs_segment_usage *raw_su;
1500 unsigned long live_blocks;
1501 int ret;
1502
1503 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1504 ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum,
1505 &raw_su, &bh_su);
1506 WARN_ON(ret); /* always succeed because bh_su is dirty */
1507 live_blocks = segbuf->sb_sum.nblocks +
1508 (segbuf->sb_pseg_start - segbuf->sb_fseg_start);
1509 raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime);
1510 raw_su->su_nblocks = cpu_to_le32(live_blocks);
1511 nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum,
1512 bh_su);
1513 }
1514}
1515
1516static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci,
1517 struct inode *sufile)
1518{
1519 struct nilfs_segment_buffer *segbuf;
1520 struct buffer_head *bh_su;
1521 struct nilfs_segment_usage *raw_su;
1522 int ret;
1523
1524 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1525 ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum,
1526 &raw_su, &bh_su);
1527 WARN_ON(ret); /* always succeed because bh_su is dirty */
1528 raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start -
1529 segbuf->sb_fseg_start);
1530 nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su);
1531
1532 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) {
1533 ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum,
1534 &raw_su, &bh_su);
1535 WARN_ON(ret); /* always succeed */
1536 raw_su->su_nblocks = 0;
1537 nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum,
1538 bh_su);
1539 }
1540}
1541
1542static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci,
1543 struct nilfs_segment_buffer *last,
1544 struct inode *sufile)
1545{
1546 struct nilfs_segment_buffer *segbuf = last, *n;
1547 int ret;
1548
1549 list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs,
1550 sb_list) {
1551 list_del_init(&segbuf->sb_list);
1552 sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks;
1553 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1554 WARN_ON(ret);
1555 nilfs_segbuf_free(segbuf);
1556 }
1557}
1558
1559
1560static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
1561 struct the_nilfs *nilfs, int mode)
1562{
1563 struct nilfs_cstage prev_stage = sci->sc_stage;
1564 int err, nadd = 1;
1565
1566 /* Collection retry loop */
1567 for (;;) {
1568 sci->sc_super_root = NULL;
1569 sci->sc_nblk_this_inc = 0;
1570 sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1571
1572 err = nilfs_segctor_reset_segment_buffer(sci);
1573 if (unlikely(err))
1574 goto failed;
1575
1576 err = nilfs_segctor_collect_blocks(sci, mode);
1577 sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
1578 if (!err)
1579 break;
1580
1581 if (unlikely(err != -E2BIG))
1582 goto failed;
1583
1584 /* The current segment is filled up */
1585 if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE)
1586 break;
1587
1588 nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile);
1589 nilfs_segctor_clear_segment_buffers(sci);
1590
1591 err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
1592 if (unlikely(err))
1593 return err;
1594
1595 nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
1596 sci->sc_stage = prev_stage;
1597 }
1598 nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
1599 return 0;
1600
1601 failed:
1602 return err;
1603}
1604
1605static void nilfs_list_replace_buffer(struct buffer_head *old_bh,
1606 struct buffer_head *new_bh)
1607{
1608 BUG_ON(!list_empty(&new_bh->b_assoc_buffers));
1609
1610 list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers);
1611 /* The caller must release old_bh */
1612}
1613
1614static int
1615nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1616 struct nilfs_segment_buffer *segbuf,
1617 int mode)
1618{
1619 struct inode *inode = NULL;
1620 sector_t blocknr;
1621 unsigned long nfinfo = segbuf->sb_sum.nfinfo;
1622 unsigned long nblocks = 0, ndatablk = 0;
1623 struct nilfs_sc_operations *sc_op = NULL;
1624 struct nilfs_segsum_pointer ssp;
1625 struct nilfs_finfo *finfo = NULL;
1626 union nilfs_binfo binfo;
1627 struct buffer_head *bh, *bh_org;
1628 ino_t ino = 0;
1629 int err = 0;
1630
1631 if (!nfinfo)
1632 goto out;
1633
1634 blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk;
1635 ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
1636 ssp.offset = sizeof(struct nilfs_segment_summary);
1637
1638 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
1639 if (bh == sci->sc_super_root)
1640 break;
1641 if (!finfo) {
1642 finfo = nilfs_segctor_map_segsum_entry(
1643 sci, &ssp, sizeof(*finfo));
1644 ino = le64_to_cpu(finfo->fi_ino);
1645 nblocks = le32_to_cpu(finfo->fi_nblocks);
1646 ndatablk = le32_to_cpu(finfo->fi_ndatablk);
1647
1648 if (buffer_nilfs_node(bh))
1649 inode = NILFS_BTNC_I(bh->b_page->mapping);
1650 else
1651 inode = NILFS_AS_I(bh->b_page->mapping);
1652
1653 if (mode == SC_LSEG_DSYNC)
1654 sc_op = &nilfs_sc_dsync_ops;
1655 else if (ino == NILFS_DAT_INO)
1656 sc_op = &nilfs_sc_dat_ops;
1657 else /* file blocks */
1658 sc_op = &nilfs_sc_file_ops;
1659 }
1660 bh_org = bh;
1661 get_bh(bh_org);
1662 err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr,
1663 &binfo);
1664 if (bh != bh_org)
1665 nilfs_list_replace_buffer(bh_org, bh);
1666 brelse(bh_org);
1667 if (unlikely(err))
1668 goto failed_bmap;
1669
1670 if (ndatablk > 0)
1671 sc_op->write_data_binfo(sci, &ssp, &binfo);
1672 else
1673 sc_op->write_node_binfo(sci, &ssp, &binfo);
1674
1675 blocknr++;
1676 if (--nblocks == 0) {
1677 finfo = NULL;
1678 if (--nfinfo == 0)
1679 break;
1680 } else if (ndatablk > 0)
1681 ndatablk--;
1682 }
1683 out:
1684 return 0;
1685
1686 failed_bmap:
1687 err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super);
1688 return err;
1689}
1690
1691static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
1692{
1693 struct nilfs_segment_buffer *segbuf;
1694 int err;
1695
1696 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1697 err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode);
1698 if (unlikely(err))
1699 return err;
1700 nilfs_segbuf_fill_in_segsum(segbuf);
1701 }
1702 return 0;
1703}
1704
1705static int
1706nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
1707{
1708 struct page *clone_page;
1709 struct buffer_head *bh, *head, *bh2;
1710 void *kaddr;
1711
1712 bh = head = page_buffers(page);
1713
1714 clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0);
1715 if (unlikely(!clone_page))
1716 return -ENOMEM;
1717
1718 bh2 = page_buffers(clone_page);
1719 kaddr = kmap_atomic(page, KM_USER0);
1720 do {
1721 if (list_empty(&bh->b_assoc_buffers))
1722 continue;
1723 get_bh(bh2);
1724 page_cache_get(clone_page); /* for each bh */
1725 memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size);
1726 bh2->b_blocknr = bh->b_blocknr;
1727 list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers);
1728 list_add_tail(&bh->b_assoc_buffers, out);
1729 } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head);
1730 kunmap_atomic(kaddr, KM_USER0);
1731
1732 if (!TestSetPageWriteback(clone_page))
1733 inc_zone_page_state(clone_page, NR_WRITEBACK);
1734 unlock_page(clone_page);
1735
1736 return 0;
1737}
1738
1739static int nilfs_test_page_to_be_frozen(struct page *page)
1740{
1741 struct address_space *mapping = page->mapping;
1742
1743 if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode))
1744 return 0;
1745
1746 if (page_mapped(page)) {
1747 ClearPageChecked(page);
1748 return 1;
1749 }
1750 return PageChecked(page);
1751}
1752
1753static int nilfs_begin_page_io(struct page *page, struct list_head *out)
1754{
1755 if (!page || PageWriteback(page))
1756 /* For split b-tree node pages, this function may be called
1757 twice. We ignore the 2nd or later calls by this check. */
1758 return 0;
1759
1760 lock_page(page);
1761 clear_page_dirty_for_io(page);
1762 set_page_writeback(page);
1763 unlock_page(page);
1764
1765 if (nilfs_test_page_to_be_frozen(page)) {
1766 int err = nilfs_copy_replace_page_buffers(page, out);
1767 if (unlikely(err))
1768 return err;
1769 }
1770 return 0;
1771}
1772
1773static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1774 struct page **failed_page)
1775{
1776 struct nilfs_segment_buffer *segbuf;
1777 struct page *bd_page = NULL, *fs_page = NULL;
1778 struct list_head *list = &sci->sc_copied_buffers;
1779 int err;
1780
1781 *failed_page = NULL;
1782 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1783 struct buffer_head *bh;
1784
1785 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1786 b_assoc_buffers) {
1787 if (bh->b_page != bd_page) {
1788 if (bd_page) {
1789 lock_page(bd_page);
1790 clear_page_dirty_for_io(bd_page);
1791 set_page_writeback(bd_page);
1792 unlock_page(bd_page);
1793 }
1794 bd_page = bh->b_page;
1795 }
1796 }
1797
1798 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1799 b_assoc_buffers) {
1800 if (bh == sci->sc_super_root) {
1801 if (bh->b_page != bd_page) {
1802 lock_page(bd_page);
1803 clear_page_dirty_for_io(bd_page);
1804 set_page_writeback(bd_page);
1805 unlock_page(bd_page);
1806 bd_page = bh->b_page;
1807 }
1808 break;
1809 }
1810 if (bh->b_page != fs_page) {
1811 err = nilfs_begin_page_io(fs_page, list);
1812 if (unlikely(err)) {
1813 *failed_page = fs_page;
1814 goto out;
1815 }
1816 fs_page = bh->b_page;
1817 }
1818 }
1819 }
1820 if (bd_page) {
1821 lock_page(bd_page);
1822 clear_page_dirty_for_io(bd_page);
1823 set_page_writeback(bd_page);
1824 unlock_page(bd_page);
1825 }
1826 err = nilfs_begin_page_io(fs_page, list);
1827 if (unlikely(err))
1828 *failed_page = fs_page;
1829 out:
1830 return err;
1831}
1832
1833static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1834 struct backing_dev_info *bdi)
1835{
1836 struct nilfs_segment_buffer *segbuf;
1837 struct nilfs_write_info wi;
1838 int err, res;
1839
1840 wi.sb = sci->sc_super;
1841 wi.bh_sr = sci->sc_super_root;
1842 wi.bdi = bdi;
1843
1844 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1845 nilfs_segbuf_prepare_write(segbuf, &wi);
1846 err = nilfs_segbuf_write(segbuf, &wi);
1847
1848 res = nilfs_segbuf_wait(segbuf, &wi);
1849 err = unlikely(err) ? : res;
1850 if (unlikely(err))
1851 return err;
1852 }
1853 return 0;
1854}
1855
1856static int nilfs_page_has_uncleared_buffer(struct page *page)
1857{
1858 struct buffer_head *head, *bh;
1859
1860 head = bh = page_buffers(page);
1861 do {
1862 if (buffer_dirty(bh) && !list_empty(&bh->b_assoc_buffers))
1863 return 1;
1864 bh = bh->b_this_page;
1865 } while (bh != head);
1866 return 0;
1867}
1868
1869static void __nilfs_end_page_io(struct page *page, int err)
1870{
1871 if (!err) {
1872 if (!nilfs_page_buffers_clean(page))
1873 __set_page_dirty_nobuffers(page);
1874 ClearPageError(page);
1875 } else {
1876 __set_page_dirty_nobuffers(page);
1877 SetPageError(page);
1878 }
1879
1880 if (buffer_nilfs_allocated(page_buffers(page))) {
1881 if (TestClearPageWriteback(page))
1882 dec_zone_page_state(page, NR_WRITEBACK);
1883 } else
1884 end_page_writeback(page);
1885}
1886
1887static void nilfs_end_page_io(struct page *page, int err)
1888{
1889 if (!page)
1890 return;
1891
1892 if (buffer_nilfs_node(page_buffers(page)) &&
1893 nilfs_page_has_uncleared_buffer(page))
1894 /* For b-tree node pages, this function may be called twice
1895 or more because they might be split in a segment.
1896 This check assures that cleanup has been done for all
1897 buffers in a split btnode page. */
1898 return;
1899
1900 __nilfs_end_page_io(page, err);
1901}
1902
1903static void nilfs_clear_copied_buffers(struct list_head *list, int err)
1904{
1905 struct buffer_head *bh, *head;
1906 struct page *page;
1907
1908 while (!list_empty(list)) {
1909 bh = list_entry(list->next, struct buffer_head,
1910 b_assoc_buffers);
1911 page = bh->b_page;
1912 page_cache_get(page);
1913 head = bh = page_buffers(page);
1914 do {
1915 if (!list_empty(&bh->b_assoc_buffers)) {
1916 list_del_init(&bh->b_assoc_buffers);
1917 if (!err) {
1918 set_buffer_uptodate(bh);
1919 clear_buffer_dirty(bh);
1920 clear_buffer_nilfs_volatile(bh);
1921 }
1922 brelse(bh); /* for b_assoc_buffers */
1923 }
1924 } while ((bh = bh->b_this_page) != head);
1925
1926 __nilfs_end_page_io(page, err);
1927 page_cache_release(page);
1928 }
1929}
1930
1931static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
1932 struct page *failed_page, int err)
1933{
1934 struct nilfs_segment_buffer *segbuf;
1935 struct page *bd_page = NULL, *fs_page = NULL;
1936
1937 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1938 struct buffer_head *bh;
1939
1940 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1941 b_assoc_buffers) {
1942 if (bh->b_page != bd_page) {
1943 if (bd_page)
1944 end_page_writeback(bd_page);
1945 bd_page = bh->b_page;
1946 }
1947 }
1948
1949 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1950 b_assoc_buffers) {
1951 if (bh == sci->sc_super_root) {
1952 if (bh->b_page != bd_page) {
1953 end_page_writeback(bd_page);
1954 bd_page = bh->b_page;
1955 }
1956 break;
1957 }
1958 if (bh->b_page != fs_page) {
1959 nilfs_end_page_io(fs_page, err);
1960 if (unlikely(fs_page == failed_page))
1961 goto done;
1962 fs_page = bh->b_page;
1963 }
1964 }
1965 }
1966 if (bd_page)
1967 end_page_writeback(bd_page);
1968
1969 nilfs_end_page_io(fs_page, err);
1970 done:
1971 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err);
1972}
1973
1974static void nilfs_set_next_segment(struct the_nilfs *nilfs,
1975 struct nilfs_segment_buffer *segbuf)
1976{
1977 nilfs->ns_segnum = segbuf->sb_segnum;
1978 nilfs->ns_nextnum = segbuf->sb_nextnum;
1979 nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start
1980 + segbuf->sb_sum.nblocks;
1981 nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq;
1982 nilfs->ns_ctime = segbuf->sb_sum.ctime;
1983}
1984
1985static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1986{
1987 struct nilfs_segment_buffer *segbuf;
1988 struct page *bd_page = NULL, *fs_page = NULL;
1989 struct nilfs_sb_info *sbi = sci->sc_sbi;
1990 struct the_nilfs *nilfs = sbi->s_nilfs;
1991 int update_sr = (sci->sc_super_root != NULL);
1992
1993 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1994 struct buffer_head *bh;
1995
1996 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1997 b_assoc_buffers) {
1998 set_buffer_uptodate(bh);
1999 clear_buffer_dirty(bh);
2000 if (bh->b_page != bd_page) {
2001 if (bd_page)
2002 end_page_writeback(bd_page);
2003 bd_page = bh->b_page;
2004 }
2005 }
2006 /*
2007 * We assume that the buffers which belong to the same page
2008 * continue over the buffer list.
2009 * Under this assumption, the last BHs of pages is
2010 * identifiable by the discontinuity of bh->b_page
2011 * (page != fs_page).
2012 *
2013 * For B-tree node blocks, however, this assumption is not
2014 * guaranteed. The cleanup code of B-tree node pages needs
2015 * special care.
2016 */
2017 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
2018 b_assoc_buffers) {
2019 set_buffer_uptodate(bh);
2020 clear_buffer_dirty(bh);
2021 clear_buffer_nilfs_volatile(bh);
2022 if (bh == sci->sc_super_root) {
2023 if (bh->b_page != bd_page) {
2024 end_page_writeback(bd_page);
2025 bd_page = bh->b_page;
2026 }
2027 break;
2028 }
2029 if (bh->b_page != fs_page) {
2030 nilfs_end_page_io(fs_page, 0);
2031 fs_page = bh->b_page;
2032 }
2033 }
2034
2035 if (!NILFS_SEG_SIMPLEX(&segbuf->sb_sum)) {
2036 if (NILFS_SEG_LOGBGN(&segbuf->sb_sum)) {
2037 set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
2038 sci->sc_lseg_stime = jiffies;
2039 }
2040 if (NILFS_SEG_LOGEND(&segbuf->sb_sum))
2041 clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
2042 }
2043 }
2044 /*
2045 * Since pages may continue over multiple segment buffers,
2046 * end of the last page must be checked outside of the loop.
2047 */
2048 if (bd_page)
2049 end_page_writeback(bd_page);
2050
2051 nilfs_end_page_io(fs_page, 0);
2052
2053 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0);
2054
2055 nilfs_drop_collected_inodes(&sci->sc_dirty_files);
2056
2057 if (nilfs_doing_gc()) {
2058 nilfs_drop_collected_inodes(&sci->sc_gc_inodes);
2059 if (update_sr)
2060 nilfs_commit_gcdat_inode(nilfs);
2061 } else
2062 nilfs->ns_nongc_ctime = sci->sc_seg_ctime;
2063
2064 sci->sc_nblk_inc += sci->sc_nblk_this_inc;
2065
2066 segbuf = NILFS_LAST_SEGBUF(&sci->sc_segbufs);
2067 nilfs_set_next_segment(nilfs, segbuf);
2068
2069 if (update_sr) {
2070 nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start,
2071 segbuf->sb_sum.seg_seq, nilfs->ns_cno++);
2072 sbi->s_super->s_dirt = 1;
2073
2074 clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
2075 clear_bit(NILFS_SC_DIRTY, &sci->sc_flags);
2076 set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
2077 } else
2078 clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
2079}
2080
2081static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci,
2082 struct nilfs_sb_info *sbi)
2083{
2084 struct nilfs_inode_info *ii, *n;
2085 __u64 cno = sbi->s_nilfs->ns_cno;
2086
2087 spin_lock(&sbi->s_inode_lock);
2088 retry:
2089 list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) {
2090 if (!ii->i_bh) {
2091 struct buffer_head *ibh;
2092 int err;
2093
2094 spin_unlock(&sbi->s_inode_lock);
2095 err = nilfs_ifile_get_inode_block(
2096 sbi->s_ifile, ii->vfs_inode.i_ino, &ibh);
2097 if (unlikely(err)) {
2098 nilfs_warning(sbi->s_super, __func__,
2099 "failed to get inode block.\n");
2100 return err;
2101 }
2102 nilfs_mdt_mark_buffer_dirty(ibh);
2103 nilfs_mdt_mark_dirty(sbi->s_ifile);
2104 spin_lock(&sbi->s_inode_lock);
2105 if (likely(!ii->i_bh))
2106 ii->i_bh = ibh;
2107 else
2108 brelse(ibh);
2109 goto retry;
2110 }
2111 ii->i_cno = cno;
2112
2113 clear_bit(NILFS_I_QUEUED, &ii->i_state);
2114 set_bit(NILFS_I_BUSY, &ii->i_state);
2115 list_del(&ii->i_dirty);
2116 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
2117 }
2118 spin_unlock(&sbi->s_inode_lock);
2119
2120 NILFS_I(sbi->s_ifile)->i_cno = cno;
2121
2122 return 0;
2123}
2124
2125static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci,
2126 struct nilfs_sb_info *sbi)
2127{
2128 struct nilfs_transaction_info *ti = current->journal_info;
2129 struct nilfs_inode_info *ii, *n;
2130 __u64 cno = sbi->s_nilfs->ns_cno;
2131
2132 spin_lock(&sbi->s_inode_lock);
2133 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
2134 if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) ||
2135 test_bit(NILFS_I_DIRTY, &ii->i_state)) {
2136 /* The current checkpoint number (=nilfs->ns_cno) is
2137 changed between check-in and check-out only if the
2138 super root is written out. So, we can update i_cno
2139 for the inodes that remain in the dirty list. */
2140 ii->i_cno = cno;
2141 continue;
2142 }
2143 clear_bit(NILFS_I_BUSY, &ii->i_state);
2144 brelse(ii->i_bh);
2145 ii->i_bh = NULL;
2146 list_del(&ii->i_dirty);
2147 list_add_tail(&ii->i_dirty, &ti->ti_garbage);
2148 }
2149 spin_unlock(&sbi->s_inode_lock);
2150}
2151
2152/*
2153 * Main procedure of segment constructor
2154 */
2155static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2156{
2157 struct nilfs_sb_info *sbi = sci->sc_sbi;
2158 struct the_nilfs *nilfs = sbi->s_nilfs;
2159 struct page *failed_page;
2160 int err, has_sr = 0;
2161
2162 sci->sc_stage.scnt = NILFS_ST_INIT;
2163
2164 err = nilfs_segctor_check_in_files(sci, sbi);
2165 if (unlikely(err))
2166 goto out;
2167
2168 if (nilfs_test_metadata_dirty(sbi))
2169 set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
2170
2171 if (nilfs_segctor_clean(sci))
2172 goto out;
2173
2174 do {
2175 sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK;
2176
2177 err = nilfs_segctor_begin_construction(sci, nilfs);
2178 if (unlikely(err))
2179 goto out;
2180
2181 /* Update time stamp */
2182 sci->sc_seg_ctime = get_seconds();
2183
2184 err = nilfs_segctor_collect(sci, nilfs, mode);
2185 if (unlikely(err))
2186 goto failed;
2187
2188 has_sr = (sci->sc_super_root != NULL);
2189
2190 /* Avoid empty segment */
2191 if (sci->sc_stage.scnt == NILFS_ST_DONE &&
2192 NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) {
2193 nilfs_segctor_end_construction(sci, nilfs, 1);
2194 goto out;
2195 }
2196
2197 err = nilfs_segctor_assign(sci, mode);
2198 if (unlikely(err))
2199 goto failed;
2200
2201 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2202 nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile);
2203
2204 if (has_sr) {
2205 err = nilfs_segctor_fill_in_checkpoint(sci);
2206 if (unlikely(err))
2207 goto failed_to_make_up;
2208
2209 nilfs_segctor_fill_in_super_root(sci, nilfs);
2210 }
2211 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
2212
2213 /* Write partial segments */
2214 err = nilfs_segctor_prepare_write(sci, &failed_page);
2215 if (unlikely(err))
2216 goto failed_to_write;
2217
2218 nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed);
2219
2220 err = nilfs_segctor_write(sci, nilfs->ns_bdi);
2221 if (unlikely(err))
2222 goto failed_to_write;
2223
2224 nilfs_segctor_complete_write(sci);
2225
2226 /* Commit segments */
2227 if (has_sr) {
2228 nilfs_segctor_commit_free_segments(sci);
2229 nilfs_segctor_clear_metadata_dirty(sci);
2230 }
2231
2232 nilfs_segctor_end_construction(sci, nilfs, 0);
2233
2234 } while (sci->sc_stage.scnt != NILFS_ST_DONE);
2235
2236 out:
2237 nilfs_segctor_destroy_segment_buffers(sci);
2238 nilfs_segctor_check_out_files(sci, sbi);
2239 return err;
2240
2241 failed_to_write:
2242 nilfs_segctor_abort_write(sci, failed_page, err);
2243 nilfs_segctor_cancel_segusage(sci, nilfs->ns_sufile);
2244
2245 failed_to_make_up:
2246 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2247 nilfs_redirty_inodes(&sci->sc_dirty_files);
2248
2249 failed:
2250 if (nilfs_doing_gc())
2251 nilfs_redirty_inodes(&sci->sc_gc_inodes);
2252 nilfs_segctor_end_construction(sci, nilfs, err);
2253 goto out;
2254}
2255
2256/**
2257 * nilfs_secgtor_start_timer - set timer of background write
2258 * @sci: nilfs_sc_info
2259 *
2260 * If the timer has already been set, it ignores the new request.
2261 * This function MUST be called within a section locking the segment
2262 * semaphore.
2263 */
2264static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
2265{
2266 spin_lock(&sci->sc_state_lock);
2267 if (sci->sc_timer && !(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
2268 sci->sc_timer->expires = jiffies + sci->sc_interval;
2269 add_timer(sci->sc_timer);
2270 sci->sc_state |= NILFS_SEGCTOR_COMMIT;
2271 }
2272 spin_unlock(&sci->sc_state_lock);
2273}
2274
2275static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
2276{
2277 spin_lock(&sci->sc_state_lock);
2278 if (!(sci->sc_flush_request & (1 << bn))) {
2279 unsigned long prev_req = sci->sc_flush_request;
2280
2281 sci->sc_flush_request |= (1 << bn);
2282 if (!prev_req)
2283 wake_up(&sci->sc_wait_daemon);
2284 }
2285 spin_unlock(&sci->sc_state_lock);
2286}
2287
2288/**
2289 * nilfs_flush_segment - trigger a segment construction for resource control
2290 * @sb: super block
2291 * @ino: inode number of the file to be flushed out.
2292 */
2293void nilfs_flush_segment(struct super_block *sb, ino_t ino)
2294{
2295 struct nilfs_sb_info *sbi = NILFS_SB(sb);
2296 struct nilfs_sc_info *sci = NILFS_SC(sbi);
2297
2298 if (!sci || nilfs_doing_construction())
2299 return;
2300 nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0);
2301 /* assign bit 0 to data files */
2302}
2303
2304int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci,
2305 __u64 *segnum, size_t nsegs)
2306{
2307 struct nilfs_segment_entry *ent;
2308 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs;
2309 struct inode *sufile = nilfs->ns_sufile;
2310 LIST_HEAD(list);
2311 __u64 *pnum;
2312 size_t i;
2313 int err;
2314
2315 for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) {
2316 ent = nilfs_alloc_segment_entry(*pnum);
2317 if (unlikely(!ent)) {
2318 err = -ENOMEM;
2319 goto failed;
2320 }
2321 list_add_tail(&ent->list, &list);
2322
2323 err = nilfs_open_segment_entry(ent, sufile);
2324 if (unlikely(err))
2325 goto failed;
2326
2327 if (unlikely(!nilfs_segment_usage_dirty(ent->raw_su)))
2328 printk(KERN_WARNING "NILFS: unused segment is "
2329 "requested to be cleaned (segnum=%llu)\n",
2330 (unsigned long long)ent->segnum);
2331 nilfs_close_segment_entry(ent, sufile);
2332 }
2333 list_splice(&list, sci->sc_cleaning_segments.prev);
2334 return 0;
2335
2336 failed:
2337 nilfs_dispose_segment_list(&list);
2338 return err;
2339}
2340
2341void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *sci)
2342{
2343 nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
2344}
2345
2346struct nilfs_segctor_wait_request {
2347 wait_queue_t wq;
2348 __u32 seq;
2349 int err;
2350 atomic_t done;
2351};
2352
2353static int nilfs_segctor_sync(struct nilfs_sc_info *sci)
2354{
2355 struct nilfs_segctor_wait_request wait_req;
2356 int err = 0;
2357
2358 spin_lock(&sci->sc_state_lock);
2359 init_wait(&wait_req.wq);
2360 wait_req.err = 0;
2361 atomic_set(&wait_req.done, 0);
2362 wait_req.seq = ++sci->sc_seq_request;
2363 spin_unlock(&sci->sc_state_lock);
2364
2365 init_waitqueue_entry(&wait_req.wq, current);
2366 add_wait_queue(&sci->sc_wait_request, &wait_req.wq);
2367 set_current_state(TASK_INTERRUPTIBLE);
2368 wake_up(&sci->sc_wait_daemon);
2369
2370 for (;;) {
2371 if (atomic_read(&wait_req.done)) {
2372 err = wait_req.err;
2373 break;
2374 }
2375 if (!signal_pending(current)) {
2376 schedule();
2377 continue;
2378 }
2379 err = -ERESTARTSYS;
2380 break;
2381 }
2382 finish_wait(&sci->sc_wait_request, &wait_req.wq);
2383 return err;
2384}
2385
2386static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
2387{
2388 struct nilfs_segctor_wait_request *wrq, *n;
2389 unsigned long flags;
2390
2391 spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
2392 list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list,
2393 wq.task_list) {
2394 if (!atomic_read(&wrq->done) &&
2395 nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
2396 wrq->err = err;
2397 atomic_set(&wrq->done, 1);
2398 }
2399 if (atomic_read(&wrq->done)) {
2400 wrq->wq.func(&wrq->wq,
2401 TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
2402 0, NULL);
2403 }
2404 }
2405 spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags);
2406}
2407
2408/**
2409 * nilfs_construct_segment - construct a logical segment
2410 * @sb: super block
2411 *
2412 * Return Value: On success, 0 is retured. On errors, one of the following
2413 * negative error code is returned.
2414 *
2415 * %-EROFS - Read only filesystem.
2416 *
2417 * %-EIO - I/O error
2418 *
2419 * %-ENOSPC - No space left on device (only in a panic state).
2420 *
2421 * %-ERESTARTSYS - Interrupted.
2422 *
2423 * %-ENOMEM - Insufficient memory available.
2424 */
2425int nilfs_construct_segment(struct super_block *sb)
2426{
2427 struct nilfs_sb_info *sbi = NILFS_SB(sb);
2428 struct nilfs_sc_info *sci = NILFS_SC(sbi);
2429 struct nilfs_transaction_info *ti;
2430 int err;
2431
2432 if (!sci)
2433 return -EROFS;
2434
2435 /* A call inside transactions causes a deadlock. */
2436 BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC);
2437
2438 err = nilfs_segctor_sync(sci);
2439 return err;
2440}
2441
2442/**
2443 * nilfs_construct_dsync_segment - construct a data-only logical segment
2444 * @sb: super block
2445 * @inode: inode whose data blocks should be written out
2446 * @start: start byte offset
2447 * @end: end byte offset (inclusive)
2448 *
2449 * Return Value: On success, 0 is retured. On errors, one of the following
2450 * negative error code is returned.
2451 *
2452 * %-EROFS - Read only filesystem.
2453 *
2454 * %-EIO - I/O error
2455 *
2456 * %-ENOSPC - No space left on device (only in a panic state).
2457 *
2458 * %-ERESTARTSYS - Interrupted.
2459 *
2460 * %-ENOMEM - Insufficient memory available.
2461 */
2462int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
2463 loff_t start, loff_t end)
2464{
2465 struct nilfs_sb_info *sbi = NILFS_SB(sb);
2466 struct nilfs_sc_info *sci = NILFS_SC(sbi);
2467 struct nilfs_inode_info *ii;
2468 struct nilfs_transaction_info ti;
2469 int err = 0;
2470
2471 if (!sci)
2472 return -EROFS;
2473
2474 nilfs_transaction_lock(sbi, &ti, 0);
2475
2476 ii = NILFS_I(inode);
2477 if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) ||
2478 nilfs_test_opt(sbi, STRICT_ORDER) ||
2479 test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2480 nilfs_discontinued(sbi->s_nilfs)) {
2481 nilfs_transaction_unlock(sbi);
2482 err = nilfs_segctor_sync(sci);
2483 return err;
2484 }
2485
2486 spin_lock(&sbi->s_inode_lock);
2487 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
2488 !test_bit(NILFS_I_BUSY, &ii->i_state)) {
2489 spin_unlock(&sbi->s_inode_lock);
2490 nilfs_transaction_unlock(sbi);
2491 return 0;
2492 }
2493 spin_unlock(&sbi->s_inode_lock);
2494 sci->sc_dsync_inode = ii;
2495 sci->sc_dsync_start = start;
2496 sci->sc_dsync_end = end;
2497
2498 err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC);
2499
2500 nilfs_transaction_unlock(sbi);
2501 return err;
2502}
2503
2504struct nilfs_segctor_req {
2505 int mode;
2506 __u32 seq_accepted;
2507 int sc_err; /* construction failure */
2508 int sb_err; /* super block writeback failure */
2509};
2510
2511#define FLUSH_FILE_BIT (0x1) /* data file only */
2512#define FLUSH_DAT_BIT (1 << NILFS_DAT_INO) /* DAT only */
2513
2514static void nilfs_segctor_accept(struct nilfs_sc_info *sci,
2515 struct nilfs_segctor_req *req)
2516{
2517 req->sc_err = req->sb_err = 0;
2518 spin_lock(&sci->sc_state_lock);
2519 req->seq_accepted = sci->sc_seq_request;
2520 spin_unlock(&sci->sc_state_lock);
2521
2522 if (sci->sc_timer)
2523 del_timer_sync(sci->sc_timer);
2524}
2525
2526static void nilfs_segctor_notify(struct nilfs_sc_info *sci,
2527 struct nilfs_segctor_req *req)
2528{
2529 /* Clear requests (even when the construction failed) */
2530 spin_lock(&sci->sc_state_lock);
2531
2532 sci->sc_state &= ~NILFS_SEGCTOR_COMMIT;
2533
2534 if (req->mode == SC_LSEG_SR) {
2535 sci->sc_seq_done = req->seq_accepted;
2536 nilfs_segctor_wakeup(sci, req->sc_err ? : req->sb_err);
2537 sci->sc_flush_request = 0;
2538 } else if (req->mode == SC_FLUSH_FILE)
2539 sci->sc_flush_request &= ~FLUSH_FILE_BIT;
2540 else if (req->mode == SC_FLUSH_DAT)
2541 sci->sc_flush_request &= ~FLUSH_DAT_BIT;
2542
2543 spin_unlock(&sci->sc_state_lock);
2544}
2545
2546static int nilfs_segctor_construct(struct nilfs_sc_info *sci,
2547 struct nilfs_segctor_req *req)
2548{
2549 struct nilfs_sb_info *sbi = sci->sc_sbi;
2550 struct the_nilfs *nilfs = sbi->s_nilfs;
2551 int err = 0;
2552
2553 if (nilfs_discontinued(nilfs))
2554 req->mode = SC_LSEG_SR;
2555 if (!nilfs_segctor_confirm(sci)) {
2556 err = nilfs_segctor_do_construct(sci, req->mode);
2557 req->sc_err = err;
2558 }
2559 if (likely(!err)) {
2560 if (req->mode != SC_FLUSH_DAT)
2561 atomic_set(&nilfs->ns_ndirtyblks, 0);
2562 if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) &&
2563 nilfs_discontinued(nilfs)) {
2564 down_write(&nilfs->ns_sem);
2565 req->sb_err = nilfs_commit_super(sbi, 0);
2566 up_write(&nilfs->ns_sem);
2567 }
2568 }
2569 return err;
2570}
2571
2572static void nilfs_construction_timeout(unsigned long data)
2573{
2574 struct task_struct *p = (struct task_struct *)data;
2575 wake_up_process(p);
2576}
2577
2578static void
2579nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
2580{
2581 struct nilfs_inode_info *ii, *n;
2582
2583 list_for_each_entry_safe(ii, n, head, i_dirty) {
2584 if (!test_bit(NILFS_I_UPDATED, &ii->i_state))
2585 continue;
2586 hlist_del_init(&ii->vfs_inode.i_hash);
2587 list_del_init(&ii->i_dirty);
2588 nilfs_clear_gcinode(&ii->vfs_inode);
2589 }
2590}
2591
2592int nilfs_clean_segments(struct super_block *sb, void __user *argp)
2593{
2594 struct nilfs_sb_info *sbi = NILFS_SB(sb);
2595 struct nilfs_sc_info *sci = NILFS_SC(sbi);
2596 struct the_nilfs *nilfs = sbi->s_nilfs;
2597 struct nilfs_transaction_info ti;
2598 struct nilfs_segctor_req req = { .mode = SC_LSEG_SR };
2599 int err;
2600
2601 if (unlikely(!sci))
2602 return -EROFS;
2603
2604 nilfs_transaction_lock(sbi, &ti, 1);
2605
2606 err = nilfs_init_gcdat_inode(nilfs);
2607 if (unlikely(err))
2608 goto out_unlock;
2609 err = nilfs_ioctl_prepare_clean_segments(nilfs, argp);
2610 if (unlikely(err))
2611 goto out_unlock;
2612
2613 list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev);
2614
2615 for (;;) {
2616 nilfs_segctor_accept(sci, &req);
2617 err = nilfs_segctor_construct(sci, &req);
2618 nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes);
2619 nilfs_segctor_notify(sci, &req);
2620
2621 if (likely(!err))
2622 break;
2623
2624 nilfs_warning(sb, __func__,
2625 "segment construction failed. (err=%d)", err);
2626 set_current_state(TASK_INTERRUPTIBLE);
2627 schedule_timeout(sci->sc_interval);
2628 }
2629
2630 out_unlock:
2631 nilfs_clear_gcdat_inode(nilfs);
2632 nilfs_transaction_unlock(sbi);
2633 return err;
2634}
2635
2636static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
2637{
2638 struct nilfs_sb_info *sbi = sci->sc_sbi;
2639 struct nilfs_transaction_info ti;
2640 struct nilfs_segctor_req req = { .mode = mode };
2641
2642 nilfs_transaction_lock(sbi, &ti, 0);
2643
2644 nilfs_segctor_accept(sci, &req);
2645 nilfs_segctor_construct(sci, &req);
2646 nilfs_segctor_notify(sci, &req);
2647
2648 /*
2649 * Unclosed segment should be retried. We do this using sc_timer.
2650 * Timeout of sc_timer will invoke complete construction which leads
2651 * to close the current logical segment.
2652 */
2653 if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags))
2654 nilfs_segctor_start_timer(sci);
2655
2656 nilfs_transaction_unlock(sbi);
2657}
2658
2659static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
2660{
2661 int mode = 0;
2662 int err;
2663
2664 spin_lock(&sci->sc_state_lock);
2665 mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ?
2666 SC_FLUSH_DAT : SC_FLUSH_FILE;
2667 spin_unlock(&sci->sc_state_lock);
2668
2669 if (mode) {
2670 err = nilfs_segctor_do_construct(sci, mode);
2671
2672 spin_lock(&sci->sc_state_lock);
2673 sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ?
2674 ~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT;
2675 spin_unlock(&sci->sc_state_lock);
2676 }
2677 clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
2678}
2679
2680static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci)
2681{
2682 if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2683 time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) {
2684 if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT))
2685 return SC_FLUSH_FILE;
2686 else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT))
2687 return SC_FLUSH_DAT;
2688 }
2689 return SC_LSEG_SR;
2690}
2691
2692/**
2693 * nilfs_segctor_thread - main loop of the segment constructor thread.
2694 * @arg: pointer to a struct nilfs_sc_info.
2695 *
2696 * nilfs_segctor_thread() initializes a timer and serves as a daemon
2697 * to execute segment constructions.
2698 */
2699static int nilfs_segctor_thread(void *arg)
2700{
2701 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
2702 struct timer_list timer;
2703 int timeout = 0;
2704
2705 init_timer(&timer);
2706 timer.data = (unsigned long)current;
2707 timer.function = nilfs_construction_timeout;
2708 sci->sc_timer = &timer;
2709
2710 /* start sync. */
2711 sci->sc_task = current;
2712 wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */
2713 printk(KERN_INFO
2714 "segctord starting. Construction interval = %lu seconds, "
2715 "CP frequency < %lu seconds\n",
2716 sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ);
2717
2718 spin_lock(&sci->sc_state_lock);
2719 loop:
2720 for (;;) {
2721 int mode;
2722
2723 if (sci->sc_state & NILFS_SEGCTOR_QUIT)
2724 goto end_thread;
2725
2726 if (timeout || sci->sc_seq_request != sci->sc_seq_done)
2727 mode = SC_LSEG_SR;
2728 else if (!sci->sc_flush_request)
2729 break;
2730 else
2731 mode = nilfs_segctor_flush_mode(sci);
2732
2733 spin_unlock(&sci->sc_state_lock);
2734 nilfs_segctor_thread_construct(sci, mode);
2735 spin_lock(&sci->sc_state_lock);
2736 timeout = 0;
2737 }
2738
2739
2740 if (freezing(current)) {
2741 spin_unlock(&sci->sc_state_lock);
2742 refrigerator();
2743 spin_lock(&sci->sc_state_lock);
2744 } else {
2745 DEFINE_WAIT(wait);
2746 int should_sleep = 1;
2747
2748 prepare_to_wait(&sci->sc_wait_daemon, &wait,
2749 TASK_INTERRUPTIBLE);
2750
2751 if (sci->sc_seq_request != sci->sc_seq_done)
2752 should_sleep = 0;
2753 else if (sci->sc_flush_request)
2754 should_sleep = 0;
2755 else if (sci->sc_state & NILFS_SEGCTOR_COMMIT)
2756 should_sleep = time_before(jiffies,
2757 sci->sc_timer->expires);
2758
2759 if (should_sleep) {
2760 spin_unlock(&sci->sc_state_lock);
2761 schedule();
2762 spin_lock(&sci->sc_state_lock);
2763 }
2764 finish_wait(&sci->sc_wait_daemon, &wait);
2765 timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2766 time_after_eq(jiffies, sci->sc_timer->expires));
2767 }
2768 goto loop;
2769
2770 end_thread:
2771 spin_unlock(&sci->sc_state_lock);
2772 del_timer_sync(sci->sc_timer);
2773 sci->sc_timer = NULL;
2774
2775 /* end sync. */
2776 sci->sc_task = NULL;
2777 wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
2778 return 0;
2779}
2780
2781static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci)
2782{
2783 struct task_struct *t;
2784
2785 t = kthread_run(nilfs_segctor_thread, sci, "segctord");
2786 if (IS_ERR(t)) {
2787 int err = PTR_ERR(t);
2788
2789 printk(KERN_ERR "NILFS: error %d creating segctord thread\n",
2790 err);
2791 return err;
2792 }
2793 wait_event(sci->sc_wait_task, sci->sc_task != NULL);
2794 return 0;
2795}
2796
2797static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
2798{
2799 sci->sc_state |= NILFS_SEGCTOR_QUIT;
2800
2801 while (sci->sc_task) {
2802 wake_up(&sci->sc_wait_daemon);
2803 spin_unlock(&sci->sc_state_lock);
2804 wait_event(sci->sc_wait_task, sci->sc_task == NULL);
2805 spin_lock(&sci->sc_state_lock);
2806 }
2807}
2808
2809static int nilfs_segctor_init(struct nilfs_sc_info *sci)
2810{
2811 sci->sc_seq_done = sci->sc_seq_request;
2812
2813 return nilfs_segctor_start_thread(sci);
2814}
2815
2816/*
2817 * Setup & clean-up functions
2818 */
2819static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi)
2820{
2821 struct nilfs_sc_info *sci;
2822
2823 sci = kzalloc(sizeof(*sci), GFP_KERNEL);
2824 if (!sci)
2825 return NULL;
2826
2827 sci->sc_sbi = sbi;
2828 sci->sc_super = sbi->s_super;
2829
2830 init_waitqueue_head(&sci->sc_wait_request);
2831 init_waitqueue_head(&sci->sc_wait_daemon);
2832 init_waitqueue_head(&sci->sc_wait_task);
2833 spin_lock_init(&sci->sc_state_lock);
2834 INIT_LIST_HEAD(&sci->sc_dirty_files);
2835 INIT_LIST_HEAD(&sci->sc_segbufs);
2836 INIT_LIST_HEAD(&sci->sc_gc_inodes);
2837 INIT_LIST_HEAD(&sci->sc_cleaning_segments);
2838 INIT_LIST_HEAD(&sci->sc_copied_buffers);
2839
2840 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
2841 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
2842 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
2843
2844 if (sbi->s_interval)
2845 sci->sc_interval = sbi->s_interval;
2846 if (sbi->s_watermark)
2847 sci->sc_watermark = sbi->s_watermark;
2848 return sci;
2849}
2850
2851static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
2852{
2853 int ret, retrycount = NILFS_SC_CLEANUP_RETRY;
2854
2855 /* The segctord thread was stopped and its timer was removed.
2856 But some tasks remain. */
2857 do {
2858 struct nilfs_sb_info *sbi = sci->sc_sbi;
2859 struct nilfs_transaction_info ti;
2860 struct nilfs_segctor_req req = { .mode = SC_LSEG_SR };
2861
2862 nilfs_transaction_lock(sbi, &ti, 0);
2863 nilfs_segctor_accept(sci, &req);
2864 ret = nilfs_segctor_construct(sci, &req);
2865 nilfs_segctor_notify(sci, &req);
2866 nilfs_transaction_unlock(sbi);
2867
2868 } while (ret && retrycount-- > 0);
2869}
2870
2871/**
2872 * nilfs_segctor_destroy - destroy the segment constructor.
2873 * @sci: nilfs_sc_info
2874 *
2875 * nilfs_segctor_destroy() kills the segctord thread and frees
2876 * the nilfs_sc_info struct.
2877 * Caller must hold the segment semaphore.
2878 */
2879static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2880{
2881 struct nilfs_sb_info *sbi = sci->sc_sbi;
2882 int flag;
2883
2884 up_write(&sbi->s_nilfs->ns_segctor_sem);
2885
2886 spin_lock(&sci->sc_state_lock);
2887 nilfs_segctor_kill_thread(sci);
2888 flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request
2889 || sci->sc_seq_request != sci->sc_seq_done);
2890 spin_unlock(&sci->sc_state_lock);
2891
2892 if (flag || nilfs_segctor_confirm(sci))
2893 nilfs_segctor_write_out(sci);
2894
2895 WARN_ON(!list_empty(&sci->sc_copied_buffers));
2896
2897 if (!list_empty(&sci->sc_dirty_files)) {
2898 nilfs_warning(sbi->s_super, __func__,
2899 "dirty file(s) after the final construction\n");
2900 nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1);
2901 }
2902
2903 if (!list_empty(&sci->sc_cleaning_segments))
2904 nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
2905
2906 WARN_ON(!list_empty(&sci->sc_segbufs));
2907
2908 down_write(&sbi->s_nilfs->ns_segctor_sem);
2909
2910 kfree(sci);
2911}
2912
2913/**
2914 * nilfs_attach_segment_constructor - attach a segment constructor
2915 * @sbi: nilfs_sb_info
2916 *
2917 * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info,
2918 * initilizes it, and starts the segment constructor.
2919 *
2920 * Return Value: On success, 0 is returned. On error, one of the following
2921 * negative error code is returned.
2922 *
2923 * %-ENOMEM - Insufficient memory available.
2924 */
2925int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi)
2926{
2927 struct the_nilfs *nilfs = sbi->s_nilfs;
2928 int err;
2929
2930 /* Each field of nilfs_segctor is cleared through the initialization
2931 of super-block info */
2932 sbi->s_sc_info = nilfs_segctor_new(sbi);
2933 if (!sbi->s_sc_info)
2934 return -ENOMEM;
2935
2936 nilfs_attach_writer(nilfs, sbi);
2937 err = nilfs_segctor_init(NILFS_SC(sbi));
2938 if (err) {
2939 nilfs_detach_writer(nilfs, sbi);
2940 kfree(sbi->s_sc_info);
2941 sbi->s_sc_info = NULL;
2942 }
2943 return err;
2944}
2945
2946/**
2947 * nilfs_detach_segment_constructor - destroy the segment constructor
2948 * @sbi: nilfs_sb_info
2949 *
2950 * nilfs_detach_segment_constructor() kills the segment constructor daemon,
2951 * frees the struct nilfs_sc_info, and destroy the dirty file list.
2952 */
2953void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi)
2954{
2955 struct the_nilfs *nilfs = sbi->s_nilfs;
2956 LIST_HEAD(garbage_list);
2957
2958 down_write(&nilfs->ns_segctor_sem);
2959 if (NILFS_SC(sbi)) {
2960 nilfs_segctor_destroy(NILFS_SC(sbi));
2961 sbi->s_sc_info = NULL;
2962 }
2963
2964 /* Force to free the list of dirty files */
2965 spin_lock(&sbi->s_inode_lock);
2966 if (!list_empty(&sbi->s_dirty_files)) {
2967 list_splice_init(&sbi->s_dirty_files, &garbage_list);
2968 nilfs_warning(sbi->s_super, __func__,
2969 "Non empty dirty list after the last "
2970 "segment construction\n");
2971 }
2972 spin_unlock(&sbi->s_inode_lock);
2973 up_write(&nilfs->ns_segctor_sem);
2974
2975 nilfs_dispose_list(sbi, &garbage_list, 1);
2976 nilfs_detach_writer(nilfs, sbi);
2977}
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
new file mode 100644
index 000000000000..a98fc1ed0bbb
--- /dev/null
+++ b/fs/nilfs2/segment.h
@@ -0,0 +1,243 @@
1/*
2 * segment.h - NILFS Segment constructor prototypes and definitions
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 *
22 */
23#ifndef _NILFS_SEGMENT_H
24#define _NILFS_SEGMENT_H
25
26#include <linux/types.h>
27#include <linux/fs.h>
28#include <linux/buffer_head.h>
29#include <linux/nilfs2_fs.h>
30#include "sb.h"
31
32/**
33 * struct nilfs_recovery_info - Recovery infomation
34 * @ri_need_recovery: Recovery status
35 * @ri_super_root: Block number of the last super root
36 * @ri_ri_cno: Number of the last checkpoint
37 * @ri_lsegs_start: Region for roll-forwarding (start block number)
38 * @ri_lsegs_end: Region for roll-forwarding (end block number)
39 * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start
40 * @ri_used_segments: List of segments to be mark active
41 * @ri_pseg_start: Block number of the last partial segment
42 * @ri_seq: Sequence number on the last partial segment
43 * @ri_segnum: Segment number on the last partial segment
44 * @ri_nextnum: Next segment number on the last partial segment
45 */
46struct nilfs_recovery_info {
47 int ri_need_recovery;
48 sector_t ri_super_root;
49 __u64 ri_cno;
50
51 sector_t ri_lsegs_start;
52 sector_t ri_lsegs_end;
53 u64 ri_lsegs_start_seq;
54 struct list_head ri_used_segments;
55 sector_t ri_pseg_start;
56 u64 ri_seq;
57 __u64 ri_segnum;
58 __u64 ri_nextnum;
59};
60
61/* ri_need_recovery */
62#define NILFS_RECOVERY_SR_UPDATED 1 /* The super root was updated */
63#define NILFS_RECOVERY_ROLLFORWARD_DONE 2 /* Rollforward was carried out */
64
65/**
66 * struct nilfs_cstage - Context of collection stage
67 * @scnt: Stage count
68 * @flags: State flags
69 * @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file
70 * @gc_inode_ptr: Pointer on the list of gc-inodes
71 */
72struct nilfs_cstage {
73 int scnt;
74 unsigned flags;
75 struct nilfs_inode_info *dirty_file_ptr;
76 struct nilfs_inode_info *gc_inode_ptr;
77};
78
79struct nilfs_segment_buffer;
80
81struct nilfs_segsum_pointer {
82 struct buffer_head *bh;
83 unsigned offset; /* offset in bytes */
84};
85
86/**
87 * struct nilfs_sc_info - Segment constructor information
88 * @sc_super: Back pointer to super_block struct
89 * @sc_sbi: Back pointer to nilfs_sb_info struct
90 * @sc_nblk_inc: Block count of current generation
91 * @sc_dirty_files: List of files to be written
92 * @sc_gc_inodes: List of GC inodes having blocks to be written
93 * @sc_cleaning_segments: List of segments to be freed through construction
94 * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data
95 * @sc_dsync_inode: inode whose data pages are written for a sync operation
96 * @sc_dsync_start: start byte offset of data pages
97 * @sc_dsync_end: end byte offset of data pages (inclusive)
98 * @sc_segbufs: List of segment buffers
99 * @sc_segbuf_nblocks: Number of available blocks in segment buffers.
100 * @sc_curseg: Current segment buffer
101 * @sc_super_root: Pointer to the super root buffer
102 * @sc_stage: Collection stage
103 * @sc_finfo_ptr: pointer to the current finfo struct in the segment summary
104 * @sc_binfo_ptr: pointer to the current binfo struct in the segment summary
105 * @sc_blk_cnt: Block count of a file
106 * @sc_datablk_cnt: Data block count of a file
107 * @sc_nblk_this_inc: Number of blocks included in the current logical segment
108 * @sc_seg_ctime: Creation time
109 * @sc_flags: Internal flags
110 * @sc_state_lock: spinlock for sc_state and so on
111 * @sc_state: Segctord state flags
112 * @sc_flush_request: inode bitmap of metadata files to be flushed
113 * @sc_wait_request: Client request queue
114 * @sc_wait_daemon: Daemon wait queue
115 * @sc_wait_task: Start/end wait queue to control segctord task
116 * @sc_seq_request: Request counter
117 * @sc_seq_done: Completion counter
118 * @sc_sync: Request of explicit sync operation
119 * @sc_interval: Timeout value of background construction
120 * @sc_mjcp_freq: Frequency of creating checkpoints
121 * @sc_lseg_stime: Start time of the latest logical segment
122 * @sc_watermark: Watermark for the number of dirty buffers
123 * @sc_timer: Timer for segctord
124 * @sc_task: current thread of segctord
125 */
126struct nilfs_sc_info {
127 struct super_block *sc_super;
128 struct nilfs_sb_info *sc_sbi;
129
130 unsigned long sc_nblk_inc;
131
132 struct list_head sc_dirty_files;
133 struct list_head sc_gc_inodes;
134 struct list_head sc_cleaning_segments;
135 struct list_head sc_copied_buffers;
136
137 struct nilfs_inode_info *sc_dsync_inode;
138 loff_t sc_dsync_start;
139 loff_t sc_dsync_end;
140
141 /* Segment buffers */
142 struct list_head sc_segbufs;
143 unsigned long sc_segbuf_nblocks;
144 struct nilfs_segment_buffer *sc_curseg;
145 struct buffer_head *sc_super_root;
146
147 struct nilfs_cstage sc_stage;
148
149 struct nilfs_segsum_pointer sc_finfo_ptr;
150 struct nilfs_segsum_pointer sc_binfo_ptr;
151 unsigned long sc_blk_cnt;
152 unsigned long sc_datablk_cnt;
153 unsigned long sc_nblk_this_inc;
154 time_t sc_seg_ctime;
155
156 unsigned long sc_flags;
157
158 spinlock_t sc_state_lock;
159 unsigned long sc_state;
160 unsigned long sc_flush_request;
161
162 wait_queue_head_t sc_wait_request;
163 wait_queue_head_t sc_wait_daemon;
164 wait_queue_head_t sc_wait_task;
165
166 __u32 sc_seq_request;
167 __u32 sc_seq_done;
168
169 int sc_sync;
170 unsigned long sc_interval;
171 unsigned long sc_mjcp_freq;
172 unsigned long sc_lseg_stime; /* in 1/HZ seconds */
173 unsigned long sc_watermark;
174
175 struct timer_list *sc_timer;
176 struct task_struct *sc_task;
177};
178
179/* sc_flags */
180enum {
181 NILFS_SC_DIRTY, /* One or more dirty meta-data blocks exist */
182 NILFS_SC_UNCLOSED, /* Logical segment is not closed */
183 NILFS_SC_SUPER_ROOT, /* The latest segment has a super root */
184 NILFS_SC_PRIOR_FLUSH, /* Requesting immediate flush without making a
185 checkpoint */
186 NILFS_SC_HAVE_DELTA, /* Next checkpoint will have update of files
187 other than DAT, cpfile, sufile, or files
188 moved by GC */
189};
190
191/* sc_state */
192#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */
193#define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */
194
195/*
196 * Constant parameters
197 */
198#define NILFS_SC_CLEANUP_RETRY 3 /* Retry count of construction when
199 destroying segctord */
200
201/*
202 * Default values of timeout, in seconds.
203 */
204#define NILFS_SC_DEFAULT_TIMEOUT 5 /* Timeout value of dirty blocks.
205 It triggers construction of a
206 logical segment with a super root */
207#define NILFS_SC_DEFAULT_SR_FREQ 30 /* Maximum frequency of super root
208 creation */
209
210/*
211 * The default threshold amount of data, in block counts.
212 */
213#define NILFS_SC_DEFAULT_WATERMARK 3600
214
215
216/* segment.c */
217extern int nilfs_init_transaction_cache(void);
218extern void nilfs_destroy_transaction_cache(void);
219extern void nilfs_relax_pressure_in_lock(struct super_block *);
220
221extern int nilfs_construct_segment(struct super_block *);
222extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *,
223 loff_t, loff_t);
224extern void nilfs_flush_segment(struct super_block *, ino_t);
225extern int nilfs_clean_segments(struct super_block *, void __user *);
226
227extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *,
228 __u64 *, size_t);
229extern void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *);
230
231extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *);
232extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *);
233
234/* recovery.c */
235extern int nilfs_read_super_root_block(struct super_block *, sector_t,
236 struct buffer_head **, int);
237extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *,
238 struct nilfs_recovery_info *);
239extern int nilfs_recover_logical_segments(struct the_nilfs *,
240 struct nilfs_sb_info *,
241 struct nilfs_recovery_info *);
242
243#endif /* _NILFS_SEGMENT_H */
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
new file mode 100644
index 000000000000..c774cf397e2f
--- /dev/null
+++ b/fs/nilfs2/sufile.c
@@ -0,0 +1,640 @@
1/*
2 * sufile.c - NILFS segment usage file.
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#include <linux/kernel.h>
24#include <linux/fs.h>
25#include <linux/string.h>
26#include <linux/buffer_head.h>
27#include <linux/errno.h>
28#include <linux/nilfs2_fs.h>
29#include "mdt.h"
30#include "sufile.h"
31
32
33static inline unsigned long
34nilfs_sufile_segment_usages_per_block(const struct inode *sufile)
35{
36 return NILFS_MDT(sufile)->mi_entries_per_block;
37}
38
39static unsigned long
40nilfs_sufile_get_blkoff(const struct inode *sufile, __u64 segnum)
41{
42 __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset;
43 do_div(t, nilfs_sufile_segment_usages_per_block(sufile));
44 return (unsigned long)t;
45}
46
47static unsigned long
48nilfs_sufile_get_offset(const struct inode *sufile, __u64 segnum)
49{
50 __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset;
51 return do_div(t, nilfs_sufile_segment_usages_per_block(sufile));
52}
53
54static unsigned long
55nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr,
56 __u64 max)
57{
58 return min_t(unsigned long,
59 nilfs_sufile_segment_usages_per_block(sufile) -
60 nilfs_sufile_get_offset(sufile, curr),
61 max - curr + 1);
62}
63
64static inline struct nilfs_sufile_header *
65nilfs_sufile_block_get_header(const struct inode *sufile,
66 struct buffer_head *bh,
67 void *kaddr)
68{
69 return kaddr + bh_offset(bh);
70}
71
72static struct nilfs_segment_usage *
73nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum,
74 struct buffer_head *bh, void *kaddr)
75{
76 return kaddr + bh_offset(bh) +
77 nilfs_sufile_get_offset(sufile, segnum) *
78 NILFS_MDT(sufile)->mi_entry_size;
79}
80
81static inline int nilfs_sufile_get_header_block(struct inode *sufile,
82 struct buffer_head **bhp)
83{
84 return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp);
85}
86
87static inline int
88nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
89 int create, struct buffer_head **bhp)
90{
91 return nilfs_mdt_get_block(sufile,
92 nilfs_sufile_get_blkoff(sufile, segnum),
93 create, NULL, bhp);
94}
95
96/**
97 * nilfs_sufile_alloc - allocate a segment
98 * @sufile: inode of segment usage file
99 * @segnump: pointer to segment number
100 *
101 * Description: nilfs_sufile_alloc() allocates a clean segment.
102 *
103 * Return Value: On success, 0 is returned and the segment number of the
104 * allocated segment is stored in the place pointed by @segnump. On error, one
105 * of the following negative error codes is returned.
106 *
107 * %-EIO - I/O error.
108 *
109 * %-ENOMEM - Insufficient amount of memory available.
110 *
111 * %-ENOSPC - No clean segment left.
112 */
113int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
114{
115 struct buffer_head *header_bh, *su_bh;
116 struct the_nilfs *nilfs;
117 struct nilfs_sufile_header *header;
118 struct nilfs_segment_usage *su;
119 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
120 __u64 segnum, maxsegnum, last_alloc;
121 void *kaddr;
122 unsigned long nsegments, ncleansegs, nsus;
123 int ret, i, j;
124
125 down_write(&NILFS_MDT(sufile)->mi_sem);
126
127 nilfs = NILFS_MDT(sufile)->mi_nilfs;
128
129 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
130 if (ret < 0)
131 goto out_sem;
132 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
133 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
134 ncleansegs = le64_to_cpu(header->sh_ncleansegs);
135 last_alloc = le64_to_cpu(header->sh_last_alloc);
136 kunmap_atomic(kaddr, KM_USER0);
137
138 nsegments = nilfs_sufile_get_nsegments(sufile);
139 segnum = last_alloc + 1;
140 maxsegnum = nsegments - 1;
141 for (i = 0; i < nsegments; i += nsus) {
142 if (segnum >= nsegments) {
143 /* wrap around */
144 segnum = 0;
145 maxsegnum = last_alloc;
146 }
147 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
148 &su_bh);
149 if (ret < 0)
150 goto out_header;
151 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
152 su = nilfs_sufile_block_get_segment_usage(
153 sufile, segnum, su_bh, kaddr);
154
155 nsus = nilfs_sufile_segment_usages_in_block(
156 sufile, segnum, maxsegnum);
157 for (j = 0; j < nsus; j++, su = (void *)su + susz, segnum++) {
158 if (!nilfs_segment_usage_clean(su))
159 continue;
160 /* found a clean segment */
161 nilfs_segment_usage_set_dirty(su);
162 kunmap_atomic(kaddr, KM_USER0);
163
164 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
165 header = nilfs_sufile_block_get_header(
166 sufile, header_bh, kaddr);
167 le64_add_cpu(&header->sh_ncleansegs, -1);
168 le64_add_cpu(&header->sh_ndirtysegs, 1);
169 header->sh_last_alloc = cpu_to_le64(segnum);
170 kunmap_atomic(kaddr, KM_USER0);
171
172 nilfs_mdt_mark_buffer_dirty(header_bh);
173 nilfs_mdt_mark_buffer_dirty(su_bh);
174 nilfs_mdt_mark_dirty(sufile);
175 brelse(su_bh);
176 *segnump = segnum;
177 goto out_header;
178 }
179
180 kunmap_atomic(kaddr, KM_USER0);
181 brelse(su_bh);
182 }
183
184 /* no segments left */
185 ret = -ENOSPC;
186
187 out_header:
188 brelse(header_bh);
189
190 out_sem:
191 up_write(&NILFS_MDT(sufile)->mi_sem);
192 return ret;
193}
194
195/**
196 * nilfs_sufile_cancel_free -
197 * @sufile: inode of segment usage file
198 * @segnum: segment number
199 *
200 * Description:
201 *
202 * Return Value: On success, 0 is returned. On error, one of the following
203 * negative error codes is returned.
204 *
205 * %-EIO - I/O error.
206 *
207 * %-ENOMEM - Insufficient amount of memory available.
208 */
209int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum)
210{
211 struct buffer_head *header_bh, *su_bh;
212 struct the_nilfs *nilfs;
213 struct nilfs_sufile_header *header;
214 struct nilfs_segment_usage *su;
215 void *kaddr;
216 int ret;
217
218 down_write(&NILFS_MDT(sufile)->mi_sem);
219
220 nilfs = NILFS_MDT(sufile)->mi_nilfs;
221
222 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
223 if (ret < 0)
224 goto out_sem;
225
226 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh);
227 if (ret < 0)
228 goto out_header;
229
230 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
231 su = nilfs_sufile_block_get_segment_usage(
232 sufile, segnum, su_bh, kaddr);
233 if (unlikely(!nilfs_segment_usage_clean(su))) {
234 printk(KERN_WARNING "%s: segment %llu must be clean\n",
235 __func__, (unsigned long long)segnum);
236 kunmap_atomic(kaddr, KM_USER0);
237 goto out_su_bh;
238 }
239 nilfs_segment_usage_set_dirty(su);
240 kunmap_atomic(kaddr, KM_USER0);
241
242 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
243 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
244 le64_add_cpu(&header->sh_ncleansegs, -1);
245 le64_add_cpu(&header->sh_ndirtysegs, 1);
246 kunmap_atomic(kaddr, KM_USER0);
247
248 nilfs_mdt_mark_buffer_dirty(header_bh);
249 nilfs_mdt_mark_buffer_dirty(su_bh);
250 nilfs_mdt_mark_dirty(sufile);
251
252 out_su_bh:
253 brelse(su_bh);
254 out_header:
255 brelse(header_bh);
256 out_sem:
257 up_write(&NILFS_MDT(sufile)->mi_sem);
258 return ret;
259}
260
261/**
262 * nilfs_sufile_freev - free segments
263 * @sufile: inode of segment usage file
264 * @segnum: array of segment numbers
265 * @nsegs: number of segments
266 *
267 * Description: nilfs_sufile_freev() frees segments specified by @segnum and
268 * @nsegs, which must have been returned by a previous call to
269 * nilfs_sufile_alloc().
270 *
271 * Return Value: On success, 0 is returned. On error, one of the following
272 * negative error codes is returned.
273 *
274 * %-EIO - I/O error.
275 *
276 * %-ENOMEM - Insufficient amount of memory available.
277 */
278#define NILFS_SUFILE_FREEV_PREALLOC 16
279int nilfs_sufile_freev(struct inode *sufile, __u64 *segnum, size_t nsegs)
280{
281 struct buffer_head *header_bh, **su_bh,
282 *su_bh_prealloc[NILFS_SUFILE_FREEV_PREALLOC];
283 struct the_nilfs *nilfs;
284 struct nilfs_sufile_header *header;
285 struct nilfs_segment_usage *su;
286 void *kaddr;
287 int ret, i;
288
289 down_write(&NILFS_MDT(sufile)->mi_sem);
290
291 nilfs = NILFS_MDT(sufile)->mi_nilfs;
292
293 /* prepare resources */
294 if (nsegs <= NILFS_SUFILE_FREEV_PREALLOC)
295 su_bh = su_bh_prealloc;
296 else {
297 su_bh = kmalloc(sizeof(*su_bh) * nsegs, GFP_NOFS);
298 if (su_bh == NULL) {
299 ret = -ENOMEM;
300 goto out_sem;
301 }
302 }
303
304 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
305 if (ret < 0)
306 goto out_su_bh;
307 for (i = 0; i < nsegs; i++) {
308 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum[i],
309 0, &su_bh[i]);
310 if (ret < 0)
311 goto out_bh;
312 }
313
314 /* free segments */
315 for (i = 0; i < nsegs; i++) {
316 kaddr = kmap_atomic(su_bh[i]->b_page, KM_USER0);
317 su = nilfs_sufile_block_get_segment_usage(
318 sufile, segnum[i], su_bh[i], kaddr);
319 WARN_ON(nilfs_segment_usage_error(su));
320 nilfs_segment_usage_set_clean(su);
321 kunmap_atomic(kaddr, KM_USER0);
322 nilfs_mdt_mark_buffer_dirty(su_bh[i]);
323 }
324 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
325 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
326 le64_add_cpu(&header->sh_ncleansegs, nsegs);
327 le64_add_cpu(&header->sh_ndirtysegs, -(u64)nsegs);
328 kunmap_atomic(kaddr, KM_USER0);
329 nilfs_mdt_mark_buffer_dirty(header_bh);
330 nilfs_mdt_mark_dirty(sufile);
331
332 out_bh:
333 for (i--; i >= 0; i--)
334 brelse(su_bh[i]);
335 brelse(header_bh);
336
337 out_su_bh:
338 if (su_bh != su_bh_prealloc)
339 kfree(su_bh);
340
341 out_sem:
342 up_write(&NILFS_MDT(sufile)->mi_sem);
343 return ret;
344}
345
346/**
347 * nilfs_sufile_free -
348 * @sufile:
349 * @segnum:
350 */
351int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
352{
353 return nilfs_sufile_freev(sufile, &segnum, 1);
354}
355
356/**
357 * nilfs_sufile_get_segment_usage - get a segment usage
358 * @sufile: inode of segment usage file
359 * @segnum: segment number
360 * @sup: pointer to segment usage
361 * @bhp: pointer to buffer head
362 *
363 * Description: nilfs_sufile_get_segment_usage() acquires the segment usage
364 * specified by @segnum.
365 *
366 * Return Value: On success, 0 is returned, and the segment usage and the
367 * buffer head of the buffer on which the segment usage is located are stored
368 * in the place pointed by @sup and @bhp, respectively. On error, one of the
369 * following negative error codes is returned.
370 *
371 * %-EIO - I/O error.
372 *
373 * %-ENOMEM - Insufficient amount of memory available.
374 *
375 * %-EINVAL - Invalid segment usage number.
376 */
377int nilfs_sufile_get_segment_usage(struct inode *sufile, __u64 segnum,
378 struct nilfs_segment_usage **sup,
379 struct buffer_head **bhp)
380{
381 struct buffer_head *bh;
382 struct nilfs_segment_usage *su;
383 void *kaddr;
384 int ret;
385
386 /* segnum is 0 origin */
387 if (segnum >= nilfs_sufile_get_nsegments(sufile))
388 return -EINVAL;
389 down_write(&NILFS_MDT(sufile)->mi_sem);
390 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &bh);
391 if (ret < 0)
392 goto out_sem;
393 kaddr = kmap(bh->b_page);
394 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
395 if (nilfs_segment_usage_error(su)) {
396 kunmap(bh->b_page);
397 brelse(bh);
398 ret = -EINVAL;
399 goto out_sem;
400 }
401
402 if (sup != NULL)
403 *sup = su;
404 *bhp = bh;
405
406 out_sem:
407 up_write(&NILFS_MDT(sufile)->mi_sem);
408 return ret;
409}
410
411/**
412 * nilfs_sufile_put_segment_usage - put a segment usage
413 * @sufile: inode of segment usage file
414 * @segnum: segment number
415 * @bh: buffer head
416 *
417 * Description: nilfs_sufile_put_segment_usage() releases the segment usage
418 * specified by @segnum. @bh must be the buffer head which have been returned
419 * by a previous call to nilfs_sufile_get_segment_usage() with @segnum.
420 */
421void nilfs_sufile_put_segment_usage(struct inode *sufile, __u64 segnum,
422 struct buffer_head *bh)
423{
424 kunmap(bh->b_page);
425 brelse(bh);
426}
427
428/**
429 * nilfs_sufile_get_stat - get segment usage statistics
430 * @sufile: inode of segment usage file
431 * @stat: pointer to a structure of segment usage statistics
432 *
433 * Description: nilfs_sufile_get_stat() returns information about segment
434 * usage.
435 *
436 * Return Value: On success, 0 is returned, and segment usage information is
437 * stored in the place pointed by @stat. On error, one of the following
438 * negative error codes is returned.
439 *
440 * %-EIO - I/O error.
441 *
442 * %-ENOMEM - Insufficient amount of memory available.
443 */
444int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
445{
446 struct buffer_head *header_bh;
447 struct nilfs_sufile_header *header;
448 struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs;
449 void *kaddr;
450 int ret;
451
452 down_read(&NILFS_MDT(sufile)->mi_sem);
453
454 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
455 if (ret < 0)
456 goto out_sem;
457
458 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
459 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
460 sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile);
461 sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs);
462 sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs);
463 sustat->ss_ctime = nilfs->ns_ctime;
464 sustat->ss_nongc_ctime = nilfs->ns_nongc_ctime;
465 spin_lock(&nilfs->ns_last_segment_lock);
466 sustat->ss_prot_seq = nilfs->ns_prot_seq;
467 spin_unlock(&nilfs->ns_last_segment_lock);
468 kunmap_atomic(kaddr, KM_USER0);
469 brelse(header_bh);
470
471 out_sem:
472 up_read(&NILFS_MDT(sufile)->mi_sem);
473 return ret;
474}
475
476/**
477 * nilfs_sufile_get_ncleansegs - get the number of clean segments
478 * @sufile: inode of segment usage file
479 * @nsegsp: pointer to the number of clean segments
480 *
481 * Description: nilfs_sufile_get_ncleansegs() acquires the number of clean
482 * segments.
483 *
484 * Return Value: On success, 0 is returned and the number of clean segments is
485 * stored in the place pointed by @nsegsp. On error, one of the following
486 * negative error codes is returned.
487 *
488 * %-EIO - I/O error.
489 *
490 * %-ENOMEM - Insufficient amount of memory available.
491 */
492int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp)
493{
494 struct nilfs_sustat sustat;
495 int ret;
496
497 ret = nilfs_sufile_get_stat(sufile, &sustat);
498 if (ret == 0)
499 *nsegsp = sustat.ss_ncleansegs;
500 return ret;
501}
502
503/**
504 * nilfs_sufile_set_error - mark a segment as erroneous
505 * @sufile: inode of segment usage file
506 * @segnum: segment number
507 *
508 * Description: nilfs_sufile_set_error() marks the segment specified by
509 * @segnum as erroneous. The error segment will never be used again.
510 *
511 * Return Value: On success, 0 is returned. On error, one of the following
512 * negative error codes is returned.
513 *
514 * %-EIO - I/O error.
515 *
516 * %-ENOMEM - Insufficient amount of memory available.
517 *
518 * %-EINVAL - Invalid segment usage number.
519 */
520int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum)
521{
522 struct buffer_head *header_bh, *su_bh;
523 struct nilfs_segment_usage *su;
524 struct nilfs_sufile_header *header;
525 void *kaddr;
526 int ret;
527
528 if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) {
529 printk(KERN_WARNING "%s: invalid segment number: %llu\n",
530 __func__, (unsigned long long)segnum);
531 return -EINVAL;
532 }
533 down_write(&NILFS_MDT(sufile)->mi_sem);
534
535 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
536 if (ret < 0)
537 goto out_sem;
538 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh);
539 if (ret < 0)
540 goto out_header;
541
542 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
543 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
544 if (nilfs_segment_usage_error(su)) {
545 kunmap_atomic(kaddr, KM_USER0);
546 brelse(su_bh);
547 goto out_header;
548 }
549
550 nilfs_segment_usage_set_error(su);
551 kunmap_atomic(kaddr, KM_USER0);
552 brelse(su_bh);
553
554 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
555 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
556 le64_add_cpu(&header->sh_ndirtysegs, -1);
557 kunmap_atomic(kaddr, KM_USER0);
558 nilfs_mdt_mark_buffer_dirty(header_bh);
559 nilfs_mdt_mark_buffer_dirty(su_bh);
560 nilfs_mdt_mark_dirty(sufile);
561 brelse(su_bh);
562
563 out_header:
564 brelse(header_bh);
565
566 out_sem:
567 up_write(&NILFS_MDT(sufile)->mi_sem);
568 return ret;
569}
570
571/**
572 * nilfs_sufile_get_suinfo -
573 * @sufile: inode of segment usage file
574 * @segnum: segment number to start looking
575 * @si: array of suinfo
576 * @nsi: size of suinfo array
577 *
578 * Description:
579 *
580 * Return Value: On success, 0 is returned and .... On error, one of the
581 * following negative error codes is returned.
582 *
583 * %-EIO - I/O error.
584 *
585 * %-ENOMEM - Insufficient amount of memory available.
586 */
587ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum,
588 struct nilfs_suinfo *si, size_t nsi)
589{
590 struct buffer_head *su_bh;
591 struct nilfs_segment_usage *su;
592 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
593 struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs;
594 void *kaddr;
595 unsigned long nsegs, segusages_per_block;
596 ssize_t n;
597 int ret, i, j;
598
599 down_read(&NILFS_MDT(sufile)->mi_sem);
600
601 segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile);
602 nsegs = min_t(unsigned long,
603 nilfs_sufile_get_nsegments(sufile) - segnum,
604 nsi);
605 for (i = 0; i < nsegs; i += n, segnum += n) {
606 n = min_t(unsigned long,
607 segusages_per_block -
608 nilfs_sufile_get_offset(sufile, segnum),
609 nsegs - i);
610 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0,
611 &su_bh);
612 if (ret < 0) {
613 if (ret != -ENOENT)
614 goto out;
615 /* hole */
616 memset(&si[i], 0, sizeof(struct nilfs_suinfo) * n);
617 continue;
618 }
619
620 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
621 su = nilfs_sufile_block_get_segment_usage(
622 sufile, segnum, su_bh, kaddr);
623 for (j = 0; j < n; j++, su = (void *)su + susz) {
624 si[i + j].sui_lastmod = le64_to_cpu(su->su_lastmod);
625 si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks);
626 si[i + j].sui_flags = le32_to_cpu(su->su_flags) &
627 ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE);
628 if (nilfs_segment_is_active(nilfs, segnum + i + j))
629 si[i + j].sui_flags |=
630 (1UL << NILFS_SEGMENT_USAGE_ACTIVE);
631 }
632 kunmap_atomic(kaddr, KM_USER0);
633 brelse(su_bh);
634 }
635 ret = nsegs;
636
637 out:
638 up_read(&NILFS_MDT(sufile)->mi_sem);
639 return ret;
640}
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
new file mode 100644
index 000000000000..d595f33a768d
--- /dev/null
+++ b/fs/nilfs2/sufile.h
@@ -0,0 +1,54 @@
1/*
2 * sufile.h - NILFS segment usage file.
3 *
4 * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#ifndef _NILFS_SUFILE_H
24#define _NILFS_SUFILE_H
25
26#include <linux/fs.h>
27#include <linux/buffer_head.h>
28#include <linux/nilfs2_fs.h>
29#include "mdt.h"
30
31#define NILFS_SUFILE_GFP NILFS_MDT_GFP
32
33static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
34{
35 return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments;
36}
37
38int nilfs_sufile_alloc(struct inode *, __u64 *);
39int nilfs_sufile_cancel_free(struct inode *, __u64);
40int nilfs_sufile_freev(struct inode *, __u64 *, size_t);
41int nilfs_sufile_free(struct inode *, __u64);
42int nilfs_sufile_get_segment_usage(struct inode *, __u64,
43 struct nilfs_segment_usage **,
44 struct buffer_head **);
45void nilfs_sufile_put_segment_usage(struct inode *, __u64,
46 struct buffer_head *);
47int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
48int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *);
49int nilfs_sufile_set_error(struct inode *, __u64);
50ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *,
51 size_t);
52
53
54#endif /* _NILFS_SUFILE_H */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
new file mode 100644
index 000000000000..e117e1ea9bff
--- /dev/null
+++ b/fs/nilfs2/super.c
@@ -0,0 +1,1323 @@
1/*
2 * super.c - NILFS module and super block management.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 */
22/*
23 * linux/fs/ext2/super.c
24 *
25 * Copyright (C) 1992, 1993, 1994, 1995
26 * Remy Card (card@masi.ibp.fr)
27 * Laboratoire MASI - Institut Blaise Pascal
28 * Universite Pierre et Marie Curie (Paris VI)
29 *
30 * from
31 *
32 * linux/fs/minix/inode.c
33 *
34 * Copyright (C) 1991, 1992 Linus Torvalds
35 *
36 * Big-endian to little-endian byte-swapping/bitmaps by
37 * David S. Miller (davem@caip.rutgers.edu), 1995
38 */
39
40#include <linux/module.h>
41#include <linux/string.h>
42#include <linux/slab.h>
43#include <linux/init.h>
44#include <linux/blkdev.h>
45#include <linux/parser.h>
46#include <linux/random.h>
47#include <linux/crc32.h>
48#include <linux/smp_lock.h>
49#include <linux/vfs.h>
50#include <linux/writeback.h>
51#include <linux/kobject.h>
52#include <linux/exportfs.h>
53#include "nilfs.h"
54#include "mdt.h"
55#include "alloc.h"
56#include "page.h"
57#include "cpfile.h"
58#include "ifile.h"
59#include "dat.h"
60#include "segment.h"
61#include "segbuf.h"
62
63MODULE_AUTHOR("NTT Corp.");
64MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
65 "(NILFS)");
66MODULE_VERSION(NILFS_VERSION);
67MODULE_LICENSE("GPL");
68
69static int nilfs_remount(struct super_block *sb, int *flags, char *data);
70static int test_exclusive_mount(struct file_system_type *fs_type,
71 struct block_device *bdev, int flags);
72
73/**
74 * nilfs_error() - report failure condition on a filesystem
75 *
76 * nilfs_error() sets an ERROR_FS flag on the superblock as well as
77 * reporting an error message. It should be called when NILFS detects
78 * incoherences or defects of meta data on disk. As for sustainable
79 * errors such as a single-shot I/O error, nilfs_warning() or the printk()
80 * function should be used instead.
81 *
82 * The segment constructor must not call this function because it can
83 * kill itself.
84 */
85void nilfs_error(struct super_block *sb, const char *function,
86 const char *fmt, ...)
87{
88 struct nilfs_sb_info *sbi = NILFS_SB(sb);
89 va_list args;
90
91 va_start(args, fmt);
92 printk(KERN_CRIT "NILFS error (device %s): %s: ", sb->s_id, function);
93 vprintk(fmt, args);
94 printk("\n");
95 va_end(args);
96
97 if (!(sb->s_flags & MS_RDONLY)) {
98 struct the_nilfs *nilfs = sbi->s_nilfs;
99
100 if (!nilfs_test_opt(sbi, ERRORS_CONT))
101 nilfs_detach_segment_constructor(sbi);
102
103 down_write(&nilfs->ns_sem);
104 if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) {
105 nilfs->ns_mount_state |= NILFS_ERROR_FS;
106 nilfs->ns_sbp[0]->s_state |=
107 cpu_to_le16(NILFS_ERROR_FS);
108 nilfs_commit_super(sbi, 1);
109 }
110 up_write(&nilfs->ns_sem);
111
112 if (nilfs_test_opt(sbi, ERRORS_RO)) {
113 printk(KERN_CRIT "Remounting filesystem read-only\n");
114 sb->s_flags |= MS_RDONLY;
115 }
116 }
117
118 if (nilfs_test_opt(sbi, ERRORS_PANIC))
119 panic("NILFS (device %s): panic forced after error\n",
120 sb->s_id);
121}
122
123void nilfs_warning(struct super_block *sb, const char *function,
124 const char *fmt, ...)
125{
126 va_list args;
127
128 va_start(args, fmt);
129 printk(KERN_WARNING "NILFS warning (device %s): %s: ",
130 sb->s_id, function);
131 vprintk(fmt, args);
132 printk("\n");
133 va_end(args);
134}
135
136static struct kmem_cache *nilfs_inode_cachep;
137
138struct inode *nilfs_alloc_inode(struct super_block *sb)
139{
140 struct nilfs_inode_info *ii;
141
142 ii = kmem_cache_alloc(nilfs_inode_cachep, GFP_NOFS);
143 if (!ii)
144 return NULL;
145 ii->i_bh = NULL;
146 ii->i_state = 0;
147 ii->vfs_inode.i_version = 1;
148 nilfs_btnode_cache_init(&ii->i_btnode_cache);
149 return &ii->vfs_inode;
150}
151
152void nilfs_destroy_inode(struct inode *inode)
153{
154 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
155}
156
157static void init_once(void *obj)
158{
159 struct nilfs_inode_info *ii = obj;
160
161 INIT_LIST_HEAD(&ii->i_dirty);
162#ifdef CONFIG_NILFS_XATTR
163 init_rwsem(&ii->xattr_sem);
164#endif
165 nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
166 ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union;
167 inode_init_once(&ii->vfs_inode);
168}
169
170static int nilfs_init_inode_cache(void)
171{
172 nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache",
173 sizeof(struct nilfs_inode_info),
174 0, SLAB_RECLAIM_ACCOUNT,
175 init_once);
176
177 return (nilfs_inode_cachep == NULL) ? -ENOMEM : 0;
178}
179
180static inline void nilfs_destroy_inode_cache(void)
181{
182 kmem_cache_destroy(nilfs_inode_cachep);
183}
184
185static void nilfs_clear_inode(struct inode *inode)
186{
187 struct nilfs_inode_info *ii = NILFS_I(inode);
188
189#ifdef CONFIG_NILFS_POSIX_ACL
190 if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) {
191 posix_acl_release(ii->i_acl);
192 ii->i_acl = NILFS_ACL_NOT_CACHED;
193 }
194 if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) {
195 posix_acl_release(ii->i_default_acl);
196 ii->i_default_acl = NILFS_ACL_NOT_CACHED;
197 }
198#endif
199 /*
200 * Free resources allocated in nilfs_read_inode(), here.
201 */
202 BUG_ON(!list_empty(&ii->i_dirty));
203 brelse(ii->i_bh);
204 ii->i_bh = NULL;
205
206 if (test_bit(NILFS_I_BMAP, &ii->i_state))
207 nilfs_bmap_clear(ii->i_bmap);
208
209 nilfs_btnode_cache_clear(&ii->i_btnode_cache);
210}
211
212static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb)
213{
214 struct the_nilfs *nilfs = sbi->s_nilfs;
215 int err;
216 int barrier_done = 0;
217
218 if (nilfs_test_opt(sbi, BARRIER)) {
219 set_buffer_ordered(nilfs->ns_sbh[0]);
220 barrier_done = 1;
221 }
222 retry:
223 set_buffer_dirty(nilfs->ns_sbh[0]);
224 err = sync_dirty_buffer(nilfs->ns_sbh[0]);
225 if (err == -EOPNOTSUPP && barrier_done) {
226 nilfs_warning(sbi->s_super, __func__,
227 "barrier-based sync failed. "
228 "disabling barriers\n");
229 nilfs_clear_opt(sbi, BARRIER);
230 barrier_done = 0;
231 clear_buffer_ordered(nilfs->ns_sbh[0]);
232 goto retry;
233 }
234 if (unlikely(err)) {
235 printk(KERN_ERR
236 "NILFS: unable to write superblock (err=%d)\n", err);
237 if (err == -EIO && nilfs->ns_sbh[1]) {
238 nilfs_fall_back_super_block(nilfs);
239 goto retry;
240 }
241 } else {
242 struct nilfs_super_block *sbp = nilfs->ns_sbp[0];
243
244 /*
245 * The latest segment becomes trailable from the position
246 * written in superblock.
247 */
248 clear_nilfs_discontinued(nilfs);
249
250 /* update GC protection for recent segments */
251 if (nilfs->ns_sbh[1]) {
252 sbp = NULL;
253 if (dupsb) {
254 set_buffer_dirty(nilfs->ns_sbh[1]);
255 if (!sync_dirty_buffer(nilfs->ns_sbh[1]))
256 sbp = nilfs->ns_sbp[1];
257 }
258 }
259 if (sbp) {
260 spin_lock(&nilfs->ns_last_segment_lock);
261 nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq);
262 spin_unlock(&nilfs->ns_last_segment_lock);
263 }
264 }
265
266 return err;
267}
268
269int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb)
270{
271 struct the_nilfs *nilfs = sbi->s_nilfs;
272 struct nilfs_super_block **sbp = nilfs->ns_sbp;
273 sector_t nfreeblocks;
274 time_t t;
275 int err;
276
277 /* nilfs->sem must be locked by the caller. */
278 if (sbp[0]->s_magic != NILFS_SUPER_MAGIC) {
279 if (sbp[1] && sbp[1]->s_magic == NILFS_SUPER_MAGIC)
280 nilfs_swap_super_block(nilfs);
281 else {
282 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n",
283 sbi->s_super->s_id);
284 return -EIO;
285 }
286 }
287 err = nilfs_count_free_blocks(nilfs, &nfreeblocks);
288 if (unlikely(err)) {
289 printk(KERN_ERR "NILFS: failed to count free blocks\n");
290 return err;
291 }
292 spin_lock(&nilfs->ns_last_segment_lock);
293 sbp[0]->s_last_seq = cpu_to_le64(nilfs->ns_last_seq);
294 sbp[0]->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg);
295 sbp[0]->s_last_cno = cpu_to_le64(nilfs->ns_last_cno);
296 spin_unlock(&nilfs->ns_last_segment_lock);
297
298 t = get_seconds();
299 nilfs->ns_sbwtime[0] = t;
300 sbp[0]->s_free_blocks_count = cpu_to_le64(nfreeblocks);
301 sbp[0]->s_wtime = cpu_to_le64(t);
302 sbp[0]->s_sum = 0;
303 sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed,
304 (unsigned char *)sbp[0],
305 nilfs->ns_sbsize));
306 if (dupsb && sbp[1]) {
307 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
308 nilfs->ns_sbwtime[1] = t;
309 }
310 sbi->s_super->s_dirt = 0;
311 return nilfs_sync_super(sbi, dupsb);
312}
313
314static void nilfs_put_super(struct super_block *sb)
315{
316 struct nilfs_sb_info *sbi = NILFS_SB(sb);
317 struct the_nilfs *nilfs = sbi->s_nilfs;
318
319 nilfs_detach_segment_constructor(sbi);
320
321 if (!(sb->s_flags & MS_RDONLY)) {
322 down_write(&nilfs->ns_sem);
323 nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state);
324 nilfs_commit_super(sbi, 1);
325 up_write(&nilfs->ns_sem);
326 }
327
328 nilfs_detach_checkpoint(sbi);
329 put_nilfs(sbi->s_nilfs);
330 sbi->s_super = NULL;
331 sb->s_fs_info = NULL;
332 kfree(sbi);
333}
334
335/**
336 * nilfs_write_super - write super block(s) of NILFS
337 * @sb: super_block
338 *
339 * nilfs_write_super() gets a fs-dependent lock, writes super block(s), and
340 * clears s_dirt. This function is called in the section protected by
341 * lock_super().
342 *
343 * The s_dirt flag is managed by each filesystem and we protect it by ns_sem
344 * of the struct the_nilfs. Lock order must be as follows:
345 *
346 * 1. lock_super()
347 * 2. down_write(&nilfs->ns_sem)
348 *
349 * Inside NILFS, locking ns_sem is enough to protect s_dirt and the buffer
350 * of the super block (nilfs->ns_sbp[]).
351 *
352 * In most cases, VFS functions call lock_super() before calling these
353 * methods. So we must be careful not to bring on deadlocks when using
354 * lock_super(); see generic_shutdown_super(), write_super(), and so on.
355 *
356 * Note that order of lock_kernel() and lock_super() depends on contexts
357 * of VFS. We should also note that lock_kernel() can be used in its
358 * protective section and only the outermost one has an effect.
359 */
360static void nilfs_write_super(struct super_block *sb)
361{
362 struct nilfs_sb_info *sbi = NILFS_SB(sb);
363 struct the_nilfs *nilfs = sbi->s_nilfs;
364
365 down_write(&nilfs->ns_sem);
366 if (!(sb->s_flags & MS_RDONLY)) {
367 struct nilfs_super_block **sbp = nilfs->ns_sbp;
368 u64 t = get_seconds();
369 int dupsb;
370
371 if (!nilfs_discontinued(nilfs) && t >= nilfs->ns_sbwtime[0] &&
372 t < nilfs->ns_sbwtime[0] + NILFS_SB_FREQ) {
373 up_write(&nilfs->ns_sem);
374 return;
375 }
376 dupsb = sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ;
377 nilfs_commit_super(sbi, dupsb);
378 }
379 sb->s_dirt = 0;
380 up_write(&nilfs->ns_sem);
381}
382
383static int nilfs_sync_fs(struct super_block *sb, int wait)
384{
385 int err = 0;
386
387 /* This function is called when super block should be written back */
388 if (wait)
389 err = nilfs_construct_segment(sb);
390 return err;
391}
392
393int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
394{
395 struct the_nilfs *nilfs = sbi->s_nilfs;
396 struct nilfs_checkpoint *raw_cp;
397 struct buffer_head *bh_cp;
398 int err;
399
400 down_write(&nilfs->ns_sem);
401 list_add(&sbi->s_list, &nilfs->ns_supers);
402 up_write(&nilfs->ns_sem);
403
404 sbi->s_ifile = nilfs_mdt_new(
405 nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP);
406 if (!sbi->s_ifile)
407 return -ENOMEM;
408
409 err = nilfs_palloc_init_blockgroup(sbi->s_ifile, nilfs->ns_inode_size);
410 if (unlikely(err))
411 goto failed;
412
413 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
414 &bh_cp);
415 if (unlikely(err)) {
416 if (err == -ENOENT || err == -EINVAL) {
417 printk(KERN_ERR
418 "NILFS: Invalid checkpoint "
419 "(checkpoint number=%llu)\n",
420 (unsigned long long)cno);
421 err = -EINVAL;
422 }
423 goto failed;
424 }
425 err = nilfs_read_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode);
426 if (unlikely(err))
427 goto failed_bh;
428 atomic_set(&sbi->s_inodes_count, le64_to_cpu(raw_cp->cp_inodes_count));
429 atomic_set(&sbi->s_blocks_count, le64_to_cpu(raw_cp->cp_blocks_count));
430
431 nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp);
432 return 0;
433
434 failed_bh:
435 nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp);
436 failed:
437 nilfs_mdt_destroy(sbi->s_ifile);
438 sbi->s_ifile = NULL;
439
440 down_write(&nilfs->ns_sem);
441 list_del_init(&sbi->s_list);
442 up_write(&nilfs->ns_sem);
443
444 return err;
445}
446
447void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi)
448{
449 struct the_nilfs *nilfs = sbi->s_nilfs;
450
451 nilfs_mdt_clear(sbi->s_ifile);
452 nilfs_mdt_destroy(sbi->s_ifile);
453 sbi->s_ifile = NULL;
454 down_write(&nilfs->ns_sem);
455 list_del_init(&sbi->s_list);
456 up_write(&nilfs->ns_sem);
457}
458
459static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi)
460{
461 struct the_nilfs *nilfs = sbi->s_nilfs;
462 int err = 0;
463
464 down_write(&nilfs->ns_sem);
465 if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) {
466 nilfs->ns_mount_state |= NILFS_VALID_FS;
467 err = nilfs_commit_super(sbi, 1);
468 if (likely(!err))
469 printk(KERN_INFO "NILFS: recovery complete.\n");
470 }
471 up_write(&nilfs->ns_sem);
472 return err;
473}
474
475static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
476{
477 struct super_block *sb = dentry->d_sb;
478 struct nilfs_sb_info *sbi = NILFS_SB(sb);
479 unsigned long long blocks;
480 unsigned long overhead;
481 unsigned long nrsvblocks;
482 sector_t nfreeblocks;
483 struct the_nilfs *nilfs = sbi->s_nilfs;
484 int err;
485
486 /*
487 * Compute all of the segment blocks
488 *
489 * The blocks before first segment and after last segment
490 * are excluded.
491 */
492 blocks = nilfs->ns_blocks_per_segment * nilfs->ns_nsegments
493 - nilfs->ns_first_data_block;
494 nrsvblocks = nilfs->ns_nrsvsegs * nilfs->ns_blocks_per_segment;
495
496 /*
497 * Compute the overhead
498 *
499 * When distributing meta data blocks outside semgent structure,
500 * We must count them as the overhead.
501 */
502 overhead = 0;
503
504 err = nilfs_count_free_blocks(nilfs, &nfreeblocks);
505 if (unlikely(err))
506 return err;
507
508 buf->f_type = NILFS_SUPER_MAGIC;
509 buf->f_bsize = sb->s_blocksize;
510 buf->f_blocks = blocks - overhead;
511 buf->f_bfree = nfreeblocks;
512 buf->f_bavail = (buf->f_bfree >= nrsvblocks) ?
513 (buf->f_bfree - nrsvblocks) : 0;
514 buf->f_files = atomic_read(&sbi->s_inodes_count);
515 buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */
516 buf->f_namelen = NILFS_NAME_LEN;
517 return 0;
518}
519
520static struct super_operations nilfs_sops = {
521 .alloc_inode = nilfs_alloc_inode,
522 .destroy_inode = nilfs_destroy_inode,
523 .dirty_inode = nilfs_dirty_inode,
524 /* .write_inode = nilfs_write_inode, */
525 /* .put_inode = nilfs_put_inode, */
526 /* .drop_inode = nilfs_drop_inode, */
527 .delete_inode = nilfs_delete_inode,
528 .put_super = nilfs_put_super,
529 .write_super = nilfs_write_super,
530 .sync_fs = nilfs_sync_fs,
531 /* .write_super_lockfs */
532 /* .unlockfs */
533 .statfs = nilfs_statfs,
534 .remount_fs = nilfs_remount,
535 .clear_inode = nilfs_clear_inode,
536 /* .umount_begin */
537 /* .show_options */
538};
539
540static struct inode *
541nilfs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation)
542{
543 struct inode *inode;
544
545 if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO &&
546 ino != NILFS_SKETCH_INO)
547 return ERR_PTR(-ESTALE);
548
549 inode = nilfs_iget(sb, ino);
550 if (IS_ERR(inode))
551 return ERR_CAST(inode);
552 if (generation && inode->i_generation != generation) {
553 iput(inode);
554 return ERR_PTR(-ESTALE);
555 }
556
557 return inode;
558}
559
560static struct dentry *
561nilfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
562 int fh_type)
563{
564 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
565 nilfs_nfs_get_inode);
566}
567
568static struct dentry *
569nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len,
570 int fh_type)
571{
572 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
573 nilfs_nfs_get_inode);
574}
575
576static struct export_operations nilfs_export_ops = {
577 .fh_to_dentry = nilfs_fh_to_dentry,
578 .fh_to_parent = nilfs_fh_to_parent,
579 .get_parent = nilfs_get_parent,
580};
581
582enum {
583 Opt_err_cont, Opt_err_panic, Opt_err_ro,
584 Opt_barrier, Opt_snapshot, Opt_order,
585 Opt_err,
586};
587
588static match_table_t tokens = {
589 {Opt_err_cont, "errors=continue"},
590 {Opt_err_panic, "errors=panic"},
591 {Opt_err_ro, "errors=remount-ro"},
592 {Opt_barrier, "barrier=%s"},
593 {Opt_snapshot, "cp=%u"},
594 {Opt_order, "order=%s"},
595 {Opt_err, NULL}
596};
597
598static int match_bool(substring_t *s, int *result)
599{
600 int len = s->to - s->from;
601
602 if (strncmp(s->from, "on", len) == 0)
603 *result = 1;
604 else if (strncmp(s->from, "off", len) == 0)
605 *result = 0;
606 else
607 return 1;
608 return 0;
609}
610
611static int parse_options(char *options, struct super_block *sb)
612{
613 struct nilfs_sb_info *sbi = NILFS_SB(sb);
614 char *p;
615 substring_t args[MAX_OPT_ARGS];
616 int option;
617
618 if (!options)
619 return 1;
620
621 while ((p = strsep(&options, ",")) != NULL) {
622 int token;
623 if (!*p)
624 continue;
625
626 token = match_token(p, tokens, args);
627 switch (token) {
628 case Opt_barrier:
629 if (match_bool(&args[0], &option))
630 return 0;
631 if (option)
632 nilfs_set_opt(sbi, BARRIER);
633 else
634 nilfs_clear_opt(sbi, BARRIER);
635 break;
636 case Opt_order:
637 if (strcmp(args[0].from, "relaxed") == 0)
638 /* Ordered data semantics */
639 nilfs_clear_opt(sbi, STRICT_ORDER);
640 else if (strcmp(args[0].from, "strict") == 0)
641 /* Strict in-order semantics */
642 nilfs_set_opt(sbi, STRICT_ORDER);
643 else
644 return 0;
645 break;
646 case Opt_err_panic:
647 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_PANIC);
648 break;
649 case Opt_err_ro:
650 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_RO);
651 break;
652 case Opt_err_cont:
653 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT);
654 break;
655 case Opt_snapshot:
656 if (match_int(&args[0], &option) || option <= 0)
657 return 0;
658 if (!(sb->s_flags & MS_RDONLY))
659 return 0;
660 sbi->s_snapshot_cno = option;
661 nilfs_set_opt(sbi, SNAPSHOT);
662 break;
663 default:
664 printk(KERN_ERR
665 "NILFS: Unrecognized mount option \"%s\"\n", p);
666 return 0;
667 }
668 }
669 return 1;
670}
671
672static inline void
673nilfs_set_default_options(struct nilfs_sb_info *sbi,
674 struct nilfs_super_block *sbp)
675{
676 sbi->s_mount_opt =
677 NILFS_MOUNT_ERRORS_CONT | NILFS_MOUNT_BARRIER;
678}
679
680static int nilfs_setup_super(struct nilfs_sb_info *sbi)
681{
682 struct the_nilfs *nilfs = sbi->s_nilfs;
683 struct nilfs_super_block *sbp = nilfs->ns_sbp[0];
684 int max_mnt_count = le16_to_cpu(sbp->s_max_mnt_count);
685 int mnt_count = le16_to_cpu(sbp->s_mnt_count);
686
687 /* nilfs->sem must be locked by the caller. */
688 if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) {
689 printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n");
690 } else if (nilfs->ns_mount_state & NILFS_ERROR_FS) {
691 printk(KERN_WARNING
692 "NILFS warning: mounting fs with errors\n");
693#if 0
694 } else if (max_mnt_count >= 0 && mnt_count >= max_mnt_count) {
695 printk(KERN_WARNING
696 "NILFS warning: maximal mount count reached\n");
697#endif
698 }
699 if (!max_mnt_count)
700 sbp->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT);
701
702 sbp->s_mnt_count = cpu_to_le16(mnt_count + 1);
703 sbp->s_state = cpu_to_le16(le16_to_cpu(sbp->s_state) & ~NILFS_VALID_FS);
704 sbp->s_mtime = cpu_to_le64(get_seconds());
705 return nilfs_commit_super(sbi, 1);
706}
707
708struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb,
709 u64 pos, int blocksize,
710 struct buffer_head **pbh)
711{
712 unsigned long long sb_index = pos;
713 unsigned long offset;
714
715 offset = do_div(sb_index, blocksize);
716 *pbh = sb_bread(sb, sb_index);
717 if (!*pbh)
718 return NULL;
719 return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset);
720}
721
722int nilfs_store_magic_and_option(struct super_block *sb,
723 struct nilfs_super_block *sbp,
724 char *data)
725{
726 struct nilfs_sb_info *sbi = NILFS_SB(sb);
727
728 sb->s_magic = le16_to_cpu(sbp->s_magic);
729
730 /* FS independent flags */
731#ifdef NILFS_ATIME_DISABLE
732 sb->s_flags |= MS_NOATIME;
733#endif
734
735 nilfs_set_default_options(sbi, sbp);
736
737 sbi->s_resuid = le16_to_cpu(sbp->s_def_resuid);
738 sbi->s_resgid = le16_to_cpu(sbp->s_def_resgid);
739 sbi->s_interval = le32_to_cpu(sbp->s_c_interval);
740 sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max);
741
742 return !parse_options(data, sb) ? -EINVAL : 0 ;
743}
744
745/**
746 * nilfs_fill_super() - initialize a super block instance
747 * @sb: super_block
748 * @data: mount options
749 * @silent: silent mode flag
750 * @nilfs: the_nilfs struct
751 *
752 * This function is called exclusively by bd_mount_mutex.
753 * So, the recovery process is protected from other simultaneous mounts.
754 */
755static int
756nilfs_fill_super(struct super_block *sb, void *data, int silent,
757 struct the_nilfs *nilfs)
758{
759 struct nilfs_sb_info *sbi;
760 struct inode *root;
761 __u64 cno;
762 int err;
763
764 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
765 if (!sbi)
766 return -ENOMEM;
767
768 sb->s_fs_info = sbi;
769
770 get_nilfs(nilfs);
771 sbi->s_nilfs = nilfs;
772 sbi->s_super = sb;
773
774 err = init_nilfs(nilfs, sbi, (char *)data);
775 if (err)
776 goto failed_sbi;
777
778 spin_lock_init(&sbi->s_inode_lock);
779 INIT_LIST_HEAD(&sbi->s_dirty_files);
780 INIT_LIST_HEAD(&sbi->s_list);
781
782 /*
783 * Following initialization is overlapped because
784 * nilfs_sb_info structure has been cleared at the beginning.
785 * But we reserve them to keep our interest and make ready
786 * for the future change.
787 */
788 get_random_bytes(&sbi->s_next_generation,
789 sizeof(sbi->s_next_generation));
790 spin_lock_init(&sbi->s_next_gen_lock);
791
792 sb->s_op = &nilfs_sops;
793 sb->s_export_op = &nilfs_export_ops;
794 sb->s_root = NULL;
795 sb->s_time_gran = 1;
796
797 if (!nilfs_loaded(nilfs)) {
798 err = load_nilfs(nilfs, sbi);
799 if (err)
800 goto failed_sbi;
801 }
802 cno = nilfs_last_cno(nilfs);
803
804 if (sb->s_flags & MS_RDONLY) {
805 if (nilfs_test_opt(sbi, SNAPSHOT)) {
806 err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile,
807 sbi->s_snapshot_cno);
808 if (err < 0)
809 goto failed_sbi;
810 if (!err) {
811 printk(KERN_ERR
812 "NILFS: The specified checkpoint is "
813 "not a snapshot "
814 "(checkpoint number=%llu).\n",
815 (unsigned long long)sbi->s_snapshot_cno);
816 err = -EINVAL;
817 goto failed_sbi;
818 }
819 cno = sbi->s_snapshot_cno;
820 } else
821 /* Read-only mount */
822 sbi->s_snapshot_cno = cno;
823 }
824
825 err = nilfs_attach_checkpoint(sbi, cno);
826 if (err) {
827 printk(KERN_ERR "NILFS: error loading a checkpoint"
828 " (checkpoint number=%llu).\n", (unsigned long long)cno);
829 goto failed_sbi;
830 }
831
832 if (!(sb->s_flags & MS_RDONLY)) {
833 err = nilfs_attach_segment_constructor(sbi);
834 if (err)
835 goto failed_checkpoint;
836 }
837
838 root = nilfs_iget(sb, NILFS_ROOT_INO);
839 if (IS_ERR(root)) {
840 printk(KERN_ERR "NILFS: get root inode failed\n");
841 err = PTR_ERR(root);
842 goto failed_segctor;
843 }
844 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
845 iput(root);
846 printk(KERN_ERR "NILFS: corrupt root inode.\n");
847 err = -EINVAL;
848 goto failed_segctor;
849 }
850 sb->s_root = d_alloc_root(root);
851 if (!sb->s_root) {
852 iput(root);
853 printk(KERN_ERR "NILFS: get root dentry failed\n");
854 err = -ENOMEM;
855 goto failed_segctor;
856 }
857
858 if (!(sb->s_flags & MS_RDONLY)) {
859 down_write(&nilfs->ns_sem);
860 nilfs_setup_super(sbi);
861 up_write(&nilfs->ns_sem);
862 }
863
864 err = nilfs_mark_recovery_complete(sbi);
865 if (unlikely(err)) {
866 printk(KERN_ERR "NILFS: recovery failed.\n");
867 goto failed_root;
868 }
869
870 return 0;
871
872 failed_root:
873 dput(sb->s_root);
874 sb->s_root = NULL;
875
876 failed_segctor:
877 nilfs_detach_segment_constructor(sbi);
878
879 failed_checkpoint:
880 nilfs_detach_checkpoint(sbi);
881
882 failed_sbi:
883 put_nilfs(nilfs);
884 sb->s_fs_info = NULL;
885 kfree(sbi);
886 return err;
887}
888
889static int nilfs_remount(struct super_block *sb, int *flags, char *data)
890{
891 struct nilfs_sb_info *sbi = NILFS_SB(sb);
892 struct nilfs_super_block *sbp;
893 struct the_nilfs *nilfs = sbi->s_nilfs;
894 unsigned long old_sb_flags;
895 struct nilfs_mount_options old_opts;
896 int err;
897
898 old_sb_flags = sb->s_flags;
899 old_opts.mount_opt = sbi->s_mount_opt;
900 old_opts.snapshot_cno = sbi->s_snapshot_cno;
901
902 if (!parse_options(data, sb)) {
903 err = -EINVAL;
904 goto restore_opts;
905 }
906 sb->s_flags = (sb->s_flags & ~MS_POSIXACL);
907
908 if ((*flags & MS_RDONLY) &&
909 sbi->s_snapshot_cno != old_opts.snapshot_cno) {
910 printk(KERN_WARNING "NILFS (device %s): couldn't "
911 "remount to a different snapshot. \n",
912 sb->s_id);
913 err = -EINVAL;
914 goto restore_opts;
915 }
916
917 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
918 goto out;
919 if (*flags & MS_RDONLY) {
920 /* Shutting down the segment constructor */
921 nilfs_detach_segment_constructor(sbi);
922 sb->s_flags |= MS_RDONLY;
923
924 sbi->s_snapshot_cno = nilfs_last_cno(nilfs);
925 /* nilfs_set_opt(sbi, SNAPSHOT); */
926
927 /*
928 * Remounting a valid RW partition RDONLY, so set
929 * the RDONLY flag and then mark the partition as valid again.
930 */
931 down_write(&nilfs->ns_sem);
932 sbp = nilfs->ns_sbp[0];
933 if (!(sbp->s_state & le16_to_cpu(NILFS_VALID_FS)) &&
934 (nilfs->ns_mount_state & NILFS_VALID_FS))
935 sbp->s_state = cpu_to_le16(nilfs->ns_mount_state);
936 sbp->s_mtime = cpu_to_le64(get_seconds());
937 nilfs_commit_super(sbi, 1);
938 up_write(&nilfs->ns_sem);
939 } else {
940 /*
941 * Mounting a RDONLY partition read-write, so reread and
942 * store the current valid flag. (It may have been changed
943 * by fsck since we originally mounted the partition.)
944 */
945 down(&sb->s_bdev->bd_mount_sem);
946 /* Check existing RW-mount */
947 if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) {
948 printk(KERN_WARNING "NILFS (device %s): couldn't "
949 "remount because a RW-mount exists.\n",
950 sb->s_id);
951 err = -EBUSY;
952 goto rw_remount_failed;
953 }
954 if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) {
955 printk(KERN_WARNING "NILFS (device %s): couldn't "
956 "remount because the current RO-mount is not "
957 "the latest one.\n",
958 sb->s_id);
959 err = -EINVAL;
960 goto rw_remount_failed;
961 }
962 sb->s_flags &= ~MS_RDONLY;
963 nilfs_clear_opt(sbi, SNAPSHOT);
964 sbi->s_snapshot_cno = 0;
965
966 err = nilfs_attach_segment_constructor(sbi);
967 if (err)
968 goto rw_remount_failed;
969
970 down_write(&nilfs->ns_sem);
971 nilfs_setup_super(sbi);
972 up_write(&nilfs->ns_sem);
973
974 up(&sb->s_bdev->bd_mount_sem);
975 }
976 out:
977 return 0;
978
979 rw_remount_failed:
980 up(&sb->s_bdev->bd_mount_sem);
981 restore_opts:
982 sb->s_flags = old_sb_flags;
983 sbi->s_mount_opt = old_opts.mount_opt;
984 sbi->s_snapshot_cno = old_opts.snapshot_cno;
985 return err;
986}
987
988struct nilfs_super_data {
989 struct block_device *bdev;
990 __u64 cno;
991 int flags;
992};
993
994/**
995 * nilfs_identify - pre-read mount options needed to identify mount instance
996 * @data: mount options
997 * @sd: nilfs_super_data
998 */
999static int nilfs_identify(char *data, struct nilfs_super_data *sd)
1000{
1001 char *p, *options = data;
1002 substring_t args[MAX_OPT_ARGS];
1003 int option, token;
1004 int ret = 0;
1005
1006 do {
1007 p = strsep(&options, ",");
1008 if (p != NULL && *p) {
1009 token = match_token(p, tokens, args);
1010 if (token == Opt_snapshot) {
1011 if (!(sd->flags & MS_RDONLY))
1012 ret++;
1013 else {
1014 ret = match_int(&args[0], &option);
1015 if (!ret) {
1016 if (option > 0)
1017 sd->cno = option;
1018 else
1019 ret++;
1020 }
1021 }
1022 }
1023 if (ret)
1024 printk(KERN_ERR
1025 "NILFS: invalid mount option: %s\n", p);
1026 }
1027 if (!options)
1028 break;
1029 BUG_ON(options == data);
1030 *(options - 1) = ',';
1031 } while (!ret);
1032 return ret;
1033}
1034
1035static int nilfs_set_bdev_super(struct super_block *s, void *data)
1036{
1037 struct nilfs_super_data *sd = data;
1038
1039 s->s_bdev = sd->bdev;
1040 s->s_dev = s->s_bdev->bd_dev;
1041 return 0;
1042}
1043
1044static int nilfs_test_bdev_super(struct super_block *s, void *data)
1045{
1046 struct nilfs_super_data *sd = data;
1047
1048 return s->s_bdev == sd->bdev;
1049}
1050
1051static int nilfs_test_bdev_super2(struct super_block *s, void *data)
1052{
1053 struct nilfs_super_data *sd = data;
1054 int ret;
1055
1056 if (s->s_bdev != sd->bdev)
1057 return 0;
1058
1059 if (!((s->s_flags | sd->flags) & MS_RDONLY))
1060 return 1; /* Reuse an old R/W-mode super_block */
1061
1062 if (s->s_flags & sd->flags & MS_RDONLY) {
1063 if (down_read_trylock(&s->s_umount)) {
1064 ret = s->s_root &&
1065 (sd->cno == NILFS_SB(s)->s_snapshot_cno);
1066 up_read(&s->s_umount);
1067 /*
1068 * This path is locked with sb_lock by sget().
1069 * So, drop_super() causes deadlock.
1070 */
1071 return ret;
1072 }
1073 }
1074 return 0;
1075}
1076
1077static int
1078nilfs_get_sb(struct file_system_type *fs_type, int flags,
1079 const char *dev_name, void *data, struct vfsmount *mnt)
1080{
1081 struct nilfs_super_data sd;
1082 struct super_block *s, *s2;
1083 struct the_nilfs *nilfs = NULL;
1084 int err, need_to_close = 1;
1085
1086 sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type);
1087 if (IS_ERR(sd.bdev))
1088 return PTR_ERR(sd.bdev);
1089
1090 /*
1091 * To get mount instance using sget() vfs-routine, NILFS needs
1092 * much more information than normal filesystems to identify mount
1093 * instance. For snapshot mounts, not only a mount type (ro-mount
1094 * or rw-mount) but also a checkpoint number is required.
1095 * The results are passed in sget() using nilfs_super_data.
1096 */
1097 sd.cno = 0;
1098 sd.flags = flags;
1099 if (nilfs_identify((char *)data, &sd)) {
1100 err = -EINVAL;
1101 goto failed;
1102 }
1103
1104 /*
1105 * once the super is inserted into the list by sget, s_umount
1106 * will protect the lockfs code from trying to start a snapshot
1107 * while we are mounting
1108 */
1109 down(&sd.bdev->bd_mount_sem);
1110 if (!sd.cno &&
1111 (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) {
1112 err = (err < 0) ? : -EBUSY;
1113 goto failed_unlock;
1114 }
1115
1116 /*
1117 * Phase-1: search any existent instance and get the_nilfs
1118 */
1119 s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd);
1120 if (IS_ERR(s))
1121 goto error_s;
1122
1123 if (!s->s_root) {
1124 err = -ENOMEM;
1125 nilfs = alloc_nilfs(sd.bdev);
1126 if (!nilfs)
1127 goto cancel_new;
1128 } else {
1129 struct nilfs_sb_info *sbi = NILFS_SB(s);
1130
1131 /*
1132 * s_umount protects super_block from unmount process;
1133 * It covers pointers of nilfs_sb_info and the_nilfs.
1134 */
1135 nilfs = sbi->s_nilfs;
1136 get_nilfs(nilfs);
1137 up_write(&s->s_umount);
1138
1139 /*
1140 * Phase-2: search specified snapshot or R/W mode super_block
1141 */
1142 if (!sd.cno)
1143 /* trying to get the latest checkpoint. */
1144 sd.cno = nilfs_last_cno(nilfs);
1145
1146 s2 = sget(fs_type, nilfs_test_bdev_super2,
1147 nilfs_set_bdev_super, &sd);
1148 deactivate_super(s);
1149 /*
1150 * Although deactivate_super() invokes close_bdev_exclusive() at
1151 * kill_block_super(). Here, s is an existent mount; we need
1152 * one more close_bdev_exclusive() call.
1153 */
1154 s = s2;
1155 if (IS_ERR(s))
1156 goto error_s;
1157 }
1158
1159 if (!s->s_root) {
1160 char b[BDEVNAME_SIZE];
1161
1162 s->s_flags = flags;
1163 strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id));
1164 sb_set_blocksize(s, block_size(sd.bdev));
1165
1166 err = nilfs_fill_super(s, data, flags & MS_VERBOSE, nilfs);
1167 if (err)
1168 goto cancel_new;
1169
1170 s->s_flags |= MS_ACTIVE;
1171 need_to_close = 0;
1172 } else if (!(s->s_flags & MS_RDONLY)) {
1173 err = -EBUSY;
1174 }
1175
1176 up(&sd.bdev->bd_mount_sem);
1177 put_nilfs(nilfs);
1178 if (need_to_close)
1179 close_bdev_exclusive(sd.bdev, flags);
1180 simple_set_mnt(mnt, s);
1181 return 0;
1182
1183 error_s:
1184 up(&sd.bdev->bd_mount_sem);
1185 if (nilfs)
1186 put_nilfs(nilfs);
1187 close_bdev_exclusive(sd.bdev, flags);
1188 return PTR_ERR(s);
1189
1190 failed_unlock:
1191 up(&sd.bdev->bd_mount_sem);
1192 failed:
1193 close_bdev_exclusive(sd.bdev, flags);
1194
1195 return err;
1196
1197 cancel_new:
1198 /* Abandoning the newly allocated superblock */
1199 up(&sd.bdev->bd_mount_sem);
1200 if (nilfs)
1201 put_nilfs(nilfs);
1202 up_write(&s->s_umount);
1203 deactivate_super(s);
1204 /*
1205 * deactivate_super() invokes close_bdev_exclusive().
1206 * We must finish all post-cleaning before this call;
1207 * put_nilfs() and unlocking bd_mount_sem need the block device.
1208 */
1209 return err;
1210}
1211
1212static int nilfs_test_bdev_super3(struct super_block *s, void *data)
1213{
1214 struct nilfs_super_data *sd = data;
1215 int ret;
1216
1217 if (s->s_bdev != sd->bdev)
1218 return 0;
1219 if (down_read_trylock(&s->s_umount)) {
1220 ret = (s->s_flags & MS_RDONLY) && s->s_root &&
1221 nilfs_test_opt(NILFS_SB(s), SNAPSHOT);
1222 up_read(&s->s_umount);
1223 if (ret)
1224 return 0; /* ignore snapshot mounts */
1225 }
1226 return !((sd->flags ^ s->s_flags) & MS_RDONLY);
1227}
1228
1229static int __false_bdev_super(struct super_block *s, void *data)
1230{
1231#if 0 /* XXX: workaround for lock debug. This is not good idea */
1232 up_write(&s->s_umount);
1233#endif
1234 return -EFAULT;
1235}
1236
1237/**
1238 * test_exclusive_mount - check whether an exclusive RW/RO mount exists or not.
1239 * fs_type: filesystem type
1240 * bdev: block device
1241 * flag: 0 (check rw-mount) or MS_RDONLY (check ro-mount)
1242 * res: pointer to an integer to store result
1243 *
1244 * This function must be called within a section protected by bd_mount_mutex.
1245 */
1246static int test_exclusive_mount(struct file_system_type *fs_type,
1247 struct block_device *bdev, int flags)
1248{
1249 struct super_block *s;
1250 struct nilfs_super_data sd = { .flags = flags, .bdev = bdev };
1251
1252 s = sget(fs_type, nilfs_test_bdev_super3, __false_bdev_super, &sd);
1253 if (IS_ERR(s)) {
1254 if (PTR_ERR(s) != -EFAULT)
1255 return PTR_ERR(s);
1256 return 0; /* Not found */
1257 }
1258 up_write(&s->s_umount);
1259 deactivate_super(s);
1260 return 1; /* Found */
1261}
1262
1263struct file_system_type nilfs_fs_type = {
1264 .owner = THIS_MODULE,
1265 .name = "nilfs2",
1266 .get_sb = nilfs_get_sb,
1267 .kill_sb = kill_block_super,
1268 .fs_flags = FS_REQUIRES_DEV,
1269};
1270
1271static int __init init_nilfs_fs(void)
1272{
1273 int err;
1274
1275 err = nilfs_init_inode_cache();
1276 if (err)
1277 goto failed;
1278
1279 err = nilfs_init_transaction_cache();
1280 if (err)
1281 goto failed_inode_cache;
1282
1283 err = nilfs_init_segbuf_cache();
1284 if (err)
1285 goto failed_transaction_cache;
1286
1287 err = nilfs_btree_path_cache_init();
1288 if (err)
1289 goto failed_segbuf_cache;
1290
1291 err = register_filesystem(&nilfs_fs_type);
1292 if (err)
1293 goto failed_btree_path_cache;
1294
1295 return 0;
1296
1297 failed_btree_path_cache:
1298 nilfs_btree_path_cache_destroy();
1299
1300 failed_segbuf_cache:
1301 nilfs_destroy_segbuf_cache();
1302
1303 failed_transaction_cache:
1304 nilfs_destroy_transaction_cache();
1305
1306 failed_inode_cache:
1307 nilfs_destroy_inode_cache();
1308
1309 failed:
1310 return err;
1311}
1312
1313static void __exit exit_nilfs_fs(void)
1314{
1315 nilfs_destroy_segbuf_cache();
1316 nilfs_destroy_transaction_cache();
1317 nilfs_destroy_inode_cache();
1318 nilfs_btree_path_cache_destroy();
1319 unregister_filesystem(&nilfs_fs_type);
1320}
1321
1322module_init(init_nilfs_fs)
1323module_exit(exit_nilfs_fs)
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
new file mode 100644
index 000000000000..33400cf0bbe2
--- /dev/null
+++ b/fs/nilfs2/the_nilfs.c
@@ -0,0 +1,637 @@
1/*
2 * the_nilfs.c - the_nilfs shared structure.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 *
22 */
23
24#include <linux/buffer_head.h>
25#include <linux/slab.h>
26#include <linux/blkdev.h>
27#include <linux/backing-dev.h>
28#include <linux/crc32.h>
29#include "nilfs.h"
30#include "segment.h"
31#include "alloc.h"
32#include "cpfile.h"
33#include "sufile.h"
34#include "dat.h"
35#include "seglist.h"
36#include "segbuf.h"
37
38void nilfs_set_last_segment(struct the_nilfs *nilfs,
39 sector_t start_blocknr, u64 seq, __u64 cno)
40{
41 spin_lock(&nilfs->ns_last_segment_lock);
42 nilfs->ns_last_pseg = start_blocknr;
43 nilfs->ns_last_seq = seq;
44 nilfs->ns_last_cno = cno;
45 spin_unlock(&nilfs->ns_last_segment_lock);
46}
47
48/**
49 * alloc_nilfs - allocate the_nilfs structure
50 * @bdev: block device to which the_nilfs is related
51 *
52 * alloc_nilfs() allocates memory for the_nilfs and
53 * initializes its reference count and locks.
54 *
55 * Return Value: On success, pointer to the_nilfs is returned.
56 * On error, NULL is returned.
57 */
58struct the_nilfs *alloc_nilfs(struct block_device *bdev)
59{
60 struct the_nilfs *nilfs;
61
62 nilfs = kzalloc(sizeof(*nilfs), GFP_KERNEL);
63 if (!nilfs)
64 return NULL;
65
66 nilfs->ns_bdev = bdev;
67 atomic_set(&nilfs->ns_count, 1);
68 atomic_set(&nilfs->ns_writer_refcount, -1);
69 atomic_set(&nilfs->ns_ndirtyblks, 0);
70 init_rwsem(&nilfs->ns_sem);
71 mutex_init(&nilfs->ns_writer_mutex);
72 INIT_LIST_HEAD(&nilfs->ns_supers);
73 spin_lock_init(&nilfs->ns_last_segment_lock);
74 nilfs->ns_gc_inodes_h = NULL;
75 init_rwsem(&nilfs->ns_segctor_sem);
76
77 return nilfs;
78}
79
80/**
81 * put_nilfs - release a reference to the_nilfs
82 * @nilfs: the_nilfs structure to be released
83 *
84 * put_nilfs() decrements a reference counter of the_nilfs.
85 * If the reference count reaches zero, the_nilfs is freed.
86 */
87void put_nilfs(struct the_nilfs *nilfs)
88{
89 if (!atomic_dec_and_test(&nilfs->ns_count))
90 return;
91 /*
92 * Increment of ns_count never occur below because the caller
93 * of get_nilfs() holds at least one reference to the_nilfs.
94 * Thus its exclusion control is not required here.
95 */
96 might_sleep();
97 if (nilfs_loaded(nilfs)) {
98 nilfs_mdt_clear(nilfs->ns_sufile);
99 nilfs_mdt_destroy(nilfs->ns_sufile);
100 nilfs_mdt_clear(nilfs->ns_cpfile);
101 nilfs_mdt_destroy(nilfs->ns_cpfile);
102 nilfs_mdt_clear(nilfs->ns_dat);
103 nilfs_mdt_destroy(nilfs->ns_dat);
104 /* XXX: how and when to clear nilfs->ns_gc_dat? */
105 nilfs_mdt_destroy(nilfs->ns_gc_dat);
106 }
107 if (nilfs_init(nilfs)) {
108 nilfs_destroy_gccache(nilfs);
109 brelse(nilfs->ns_sbh[0]);
110 brelse(nilfs->ns_sbh[1]);
111 }
112 kfree(nilfs);
113}
114
115static int nilfs_load_super_root(struct the_nilfs *nilfs,
116 struct nilfs_sb_info *sbi, sector_t sr_block)
117{
118 struct buffer_head *bh_sr;
119 struct nilfs_super_root *raw_sr;
120 struct nilfs_super_block **sbp = nilfs->ns_sbp;
121 unsigned dat_entry_size, segment_usage_size, checkpoint_size;
122 unsigned inode_size;
123 int err;
124
125 err = nilfs_read_super_root_block(sbi->s_super, sr_block, &bh_sr, 1);
126 if (unlikely(err))
127 return err;
128
129 down_read(&nilfs->ns_sem);
130 dat_entry_size = le16_to_cpu(sbp[0]->s_dat_entry_size);
131 checkpoint_size = le16_to_cpu(sbp[0]->s_checkpoint_size);
132 segment_usage_size = le16_to_cpu(sbp[0]->s_segment_usage_size);
133 up_read(&nilfs->ns_sem);
134
135 inode_size = nilfs->ns_inode_size;
136
137 err = -ENOMEM;
138 nilfs->ns_dat = nilfs_mdt_new(
139 nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP);
140 if (unlikely(!nilfs->ns_dat))
141 goto failed;
142
143 nilfs->ns_gc_dat = nilfs_mdt_new(
144 nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP);
145 if (unlikely(!nilfs->ns_gc_dat))
146 goto failed_dat;
147
148 nilfs->ns_cpfile = nilfs_mdt_new(
149 nilfs, NULL, NILFS_CPFILE_INO, NILFS_CPFILE_GFP);
150 if (unlikely(!nilfs->ns_cpfile))
151 goto failed_gc_dat;
152
153 nilfs->ns_sufile = nilfs_mdt_new(
154 nilfs, NULL, NILFS_SUFILE_INO, NILFS_SUFILE_GFP);
155 if (unlikely(!nilfs->ns_sufile))
156 goto failed_cpfile;
157
158 err = nilfs_palloc_init_blockgroup(nilfs->ns_dat, dat_entry_size);
159 if (unlikely(err))
160 goto failed_sufile;
161
162 err = nilfs_palloc_init_blockgroup(nilfs->ns_gc_dat, dat_entry_size);
163 if (unlikely(err))
164 goto failed_sufile;
165
166 nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat);
167 nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size,
168 sizeof(struct nilfs_cpfile_header));
169 nilfs_mdt_set_entry_size(nilfs->ns_sufile, segment_usage_size,
170 sizeof(struct nilfs_sufile_header));
171
172 err = nilfs_mdt_read_inode_direct(
173 nilfs->ns_dat, bh_sr, NILFS_SR_DAT_OFFSET(inode_size));
174 if (unlikely(err))
175 goto failed_sufile;
176
177 err = nilfs_mdt_read_inode_direct(
178 nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(inode_size));
179 if (unlikely(err))
180 goto failed_sufile;
181
182 err = nilfs_mdt_read_inode_direct(
183 nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(inode_size));
184 if (unlikely(err))
185 goto failed_sufile;
186
187 raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
188 nilfs->ns_nongc_ctime = le64_to_cpu(raw_sr->sr_nongc_ctime);
189
190 failed:
191 brelse(bh_sr);
192 return err;
193
194 failed_sufile:
195 nilfs_mdt_destroy(nilfs->ns_sufile);
196
197 failed_cpfile:
198 nilfs_mdt_destroy(nilfs->ns_cpfile);
199
200 failed_gc_dat:
201 nilfs_mdt_destroy(nilfs->ns_gc_dat);
202
203 failed_dat:
204 nilfs_mdt_destroy(nilfs->ns_dat);
205 goto failed;
206}
207
208static void nilfs_init_recovery_info(struct nilfs_recovery_info *ri)
209{
210 memset(ri, 0, sizeof(*ri));
211 INIT_LIST_HEAD(&ri->ri_used_segments);
212}
213
214static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri)
215{
216 nilfs_dispose_segment_list(&ri->ri_used_segments);
217}
218
219/**
220 * load_nilfs - load and recover the nilfs
221 * @nilfs: the_nilfs structure to be released
222 * @sbi: nilfs_sb_info used to recover past segment
223 *
224 * load_nilfs() searches and load the latest super root,
225 * attaches the last segment, and does recovery if needed.
226 * The caller must call this exclusively for simultaneous mounts.
227 */
228int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
229{
230 struct nilfs_recovery_info ri;
231 unsigned int s_flags = sbi->s_super->s_flags;
232 int really_read_only = bdev_read_only(nilfs->ns_bdev);
233 unsigned valid_fs;
234 int err = 0;
235
236 nilfs_init_recovery_info(&ri);
237
238 down_write(&nilfs->ns_sem);
239 valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS);
240 up_write(&nilfs->ns_sem);
241
242 if (!valid_fs && (s_flags & MS_RDONLY)) {
243 printk(KERN_INFO "NILFS: INFO: recovery "
244 "required for readonly filesystem.\n");
245 if (really_read_only) {
246 printk(KERN_ERR "NILFS: write access "
247 "unavailable, cannot proceed.\n");
248 err = -EROFS;
249 goto failed;
250 }
251 printk(KERN_INFO "NILFS: write access will "
252 "be enabled during recovery.\n");
253 sbi->s_super->s_flags &= ~MS_RDONLY;
254 }
255
256 err = nilfs_search_super_root(nilfs, sbi, &ri);
257 if (unlikely(err)) {
258 printk(KERN_ERR "NILFS: error searching super root.\n");
259 goto failed;
260 }
261
262 err = nilfs_load_super_root(nilfs, sbi, ri.ri_super_root);
263 if (unlikely(err)) {
264 printk(KERN_ERR "NILFS: error loading super root.\n");
265 goto failed;
266 }
267
268 if (!valid_fs) {
269 err = nilfs_recover_logical_segments(nilfs, sbi, &ri);
270 if (unlikely(err)) {
271 nilfs_mdt_destroy(nilfs->ns_cpfile);
272 nilfs_mdt_destroy(nilfs->ns_sufile);
273 nilfs_mdt_destroy(nilfs->ns_dat);
274 goto failed;
275 }
276 if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED)
277 sbi->s_super->s_dirt = 1;
278 }
279
280 set_nilfs_loaded(nilfs);
281
282 failed:
283 nilfs_clear_recovery_info(&ri);
284 sbi->s_super->s_flags = s_flags;
285 return err;
286}
287
288static unsigned long long nilfs_max_size(unsigned int blkbits)
289{
290 unsigned int max_bits;
291 unsigned long long res = MAX_LFS_FILESIZE; /* page cache limit */
292
293 max_bits = blkbits + NILFS_BMAP_KEY_BIT; /* bmap size limit */
294 if (max_bits < 64)
295 res = min_t(unsigned long long, res, (1ULL << max_bits) - 1);
296 return res;
297}
298
299static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
300 struct nilfs_super_block *sbp)
301{
302 if (le32_to_cpu(sbp->s_rev_level) != NILFS_CURRENT_REV) {
303 printk(KERN_ERR "NILFS: revision mismatch "
304 "(superblock rev.=%d.%d, current rev.=%d.%d). "
305 "Please check the version of mkfs.nilfs.\n",
306 le32_to_cpu(sbp->s_rev_level),
307 le16_to_cpu(sbp->s_minor_rev_level),
308 NILFS_CURRENT_REV, NILFS_MINOR_REV);
309 return -EINVAL;
310 }
311 nilfs->ns_sbsize = le16_to_cpu(sbp->s_bytes);
312 if (nilfs->ns_sbsize > BLOCK_SIZE)
313 return -EINVAL;
314
315 nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size);
316 nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino);
317
318 nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment);
319 if (nilfs->ns_blocks_per_segment < NILFS_SEG_MIN_BLOCKS) {
320 printk(KERN_ERR "NILFS: too short segment. \n");
321 return -EINVAL;
322 }
323
324 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
325 nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments);
326 nilfs->ns_r_segments_percentage =
327 le32_to_cpu(sbp->s_r_segments_percentage);
328 nilfs->ns_nrsvsegs =
329 max_t(unsigned long, NILFS_MIN_NRSVSEGS,
330 DIV_ROUND_UP(nilfs->ns_nsegments *
331 nilfs->ns_r_segments_percentage, 100));
332 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
333 return 0;
334}
335
336static int nilfs_valid_sb(struct nilfs_super_block *sbp)
337{
338 static unsigned char sum[4];
339 const int sumoff = offsetof(struct nilfs_super_block, s_sum);
340 size_t bytes;
341 u32 crc;
342
343 if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC)
344 return 0;
345 bytes = le16_to_cpu(sbp->s_bytes);
346 if (bytes > BLOCK_SIZE)
347 return 0;
348 crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp,
349 sumoff);
350 crc = crc32_le(crc, sum, 4);
351 crc = crc32_le(crc, (unsigned char *)sbp + sumoff + 4,
352 bytes - sumoff - 4);
353 return crc == le32_to_cpu(sbp->s_sum);
354}
355
356static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset)
357{
358 return offset < ((le64_to_cpu(sbp->s_nsegments) *
359 le32_to_cpu(sbp->s_blocks_per_segment)) <<
360 (le32_to_cpu(sbp->s_log_block_size) + 10));
361}
362
363static void nilfs_release_super_block(struct the_nilfs *nilfs)
364{
365 int i;
366
367 for (i = 0; i < 2; i++) {
368 if (nilfs->ns_sbp[i]) {
369 brelse(nilfs->ns_sbh[i]);
370 nilfs->ns_sbh[i] = NULL;
371 nilfs->ns_sbp[i] = NULL;
372 }
373 }
374}
375
376void nilfs_fall_back_super_block(struct the_nilfs *nilfs)
377{
378 brelse(nilfs->ns_sbh[0]);
379 nilfs->ns_sbh[0] = nilfs->ns_sbh[1];
380 nilfs->ns_sbp[0] = nilfs->ns_sbp[1];
381 nilfs->ns_sbh[1] = NULL;
382 nilfs->ns_sbp[1] = NULL;
383}
384
385void nilfs_swap_super_block(struct the_nilfs *nilfs)
386{
387 struct buffer_head *tsbh = nilfs->ns_sbh[0];
388 struct nilfs_super_block *tsbp = nilfs->ns_sbp[0];
389
390 nilfs->ns_sbh[0] = nilfs->ns_sbh[1];
391 nilfs->ns_sbp[0] = nilfs->ns_sbp[1];
392 nilfs->ns_sbh[1] = tsbh;
393 nilfs->ns_sbp[1] = tsbp;
394}
395
396static int nilfs_load_super_block(struct the_nilfs *nilfs,
397 struct super_block *sb, int blocksize,
398 struct nilfs_super_block **sbpp)
399{
400 struct nilfs_super_block **sbp = nilfs->ns_sbp;
401 struct buffer_head **sbh = nilfs->ns_sbh;
402 u64 sb2off = NILFS_SB2_OFFSET_BYTES(nilfs->ns_bdev->bd_inode->i_size);
403 int valid[2], swp = 0;
404
405 sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize,
406 &sbh[0]);
407 sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]);
408
409 if (!sbp[0]) {
410 if (!sbp[1]) {
411 printk(KERN_ERR "NILFS: unable to read superblock\n");
412 return -EIO;
413 }
414 printk(KERN_WARNING
415 "NILFS warning: unable to read primary superblock\n");
416 } else if (!sbp[1])
417 printk(KERN_WARNING
418 "NILFS warning: unable to read secondary superblock\n");
419
420 valid[0] = nilfs_valid_sb(sbp[0]);
421 valid[1] = nilfs_valid_sb(sbp[1]);
422 swp = valid[1] &&
423 (!valid[0] ||
424 le64_to_cpu(sbp[1]->s_wtime) > le64_to_cpu(sbp[0]->s_wtime));
425
426 if (valid[swp] && nilfs_sb2_bad_offset(sbp[swp], sb2off)) {
427 brelse(sbh[1]);
428 sbh[1] = NULL;
429 sbp[1] = NULL;
430 swp = 0;
431 }
432 if (!valid[swp]) {
433 nilfs_release_super_block(nilfs);
434 printk(KERN_ERR "NILFS: Can't find nilfs on dev %s.\n",
435 sb->s_id);
436 return -EINVAL;
437 }
438
439 if (swp) {
440 printk(KERN_WARNING "NILFS warning: broken superblock. "
441 "using spare superblock.\n");
442 nilfs_swap_super_block(nilfs);
443 }
444
445 nilfs->ns_sbwtime[0] = le64_to_cpu(sbp[0]->s_wtime);
446 nilfs->ns_sbwtime[1] = valid[!swp] ? le64_to_cpu(sbp[1]->s_wtime) : 0;
447 nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq);
448 *sbpp = sbp[0];
449 return 0;
450}
451
452/**
453 * init_nilfs - initialize a NILFS instance.
454 * @nilfs: the_nilfs structure
455 * @sbi: nilfs_sb_info
456 * @sb: super block
457 * @data: mount options
458 *
459 * init_nilfs() performs common initialization per block device (e.g.
460 * reading the super block, getting disk layout information, initializing
461 * shared fields in the_nilfs). It takes on some portion of the jobs
462 * typically done by a fill_super() routine. This division arises from
463 * the nature that multiple NILFS instances may be simultaneously
464 * mounted on a device.
465 * For multiple mounts on the same device, only the first mount
466 * invokes these tasks.
467 *
468 * Return Value: On success, 0 is returned. On error, a negative error
469 * code is returned.
470 */
471int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
472{
473 struct super_block *sb = sbi->s_super;
474 struct nilfs_super_block *sbp;
475 struct backing_dev_info *bdi;
476 int blocksize;
477 int err;
478
479 down_write(&nilfs->ns_sem);
480 if (nilfs_init(nilfs)) {
481 /* Load values from existing the_nilfs */
482 sbp = nilfs->ns_sbp[0];
483 err = nilfs_store_magic_and_option(sb, sbp, data);
484 if (err)
485 goto out;
486
487 blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
488 if (sb->s_blocksize != blocksize &&
489 !sb_set_blocksize(sb, blocksize)) {
490 printk(KERN_ERR "NILFS: blocksize %d unfit to device\n",
491 blocksize);
492 err = -EINVAL;
493 }
494 sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits);
495 goto out;
496 }
497
498 blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
499 if (!blocksize) {
500 printk(KERN_ERR "NILFS: unable to set blocksize\n");
501 err = -EINVAL;
502 goto out;
503 }
504 err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp);
505 if (err)
506 goto out;
507
508 err = nilfs_store_magic_and_option(sb, sbp, data);
509 if (err)
510 goto failed_sbh;
511
512 blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size);
513 if (sb->s_blocksize != blocksize) {
514 int hw_blocksize = bdev_hardsect_size(sb->s_bdev);
515
516 if (blocksize < hw_blocksize) {
517 printk(KERN_ERR
518 "NILFS: blocksize %d too small for device "
519 "(sector-size = %d).\n",
520 blocksize, hw_blocksize);
521 err = -EINVAL;
522 goto failed_sbh;
523 }
524 nilfs_release_super_block(nilfs);
525 sb_set_blocksize(sb, blocksize);
526
527 err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp);
528 if (err)
529 goto out;
530 /* not failed_sbh; sbh is released automatically
531 when reloading fails. */
532 }
533 nilfs->ns_blocksize_bits = sb->s_blocksize_bits;
534
535 err = nilfs_store_disk_layout(nilfs, sbp);
536 if (err)
537 goto failed_sbh;
538
539 sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits);
540
541 nilfs->ns_mount_state = le16_to_cpu(sbp->s_state);
542
543 bdi = nilfs->ns_bdev->bd_inode_backing_dev_info;
544 if (!bdi)
545 bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info;
546 nilfs->ns_bdi = bdi ? : &default_backing_dev_info;
547
548 /* Finding last segment */
549 nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg);
550 nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno);
551 nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq);
552
553 nilfs->ns_seg_seq = nilfs->ns_last_seq;
554 nilfs->ns_segnum =
555 nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg);
556 nilfs->ns_cno = nilfs->ns_last_cno + 1;
557 if (nilfs->ns_segnum >= nilfs->ns_nsegments) {
558 printk(KERN_ERR "NILFS invalid last segment number.\n");
559 err = -EINVAL;
560 goto failed_sbh;
561 }
562 /* Dummy values */
563 nilfs->ns_free_segments_count =
564 nilfs->ns_nsegments - (nilfs->ns_segnum + 1);
565
566 /* Initialize gcinode cache */
567 err = nilfs_init_gccache(nilfs);
568 if (err)
569 goto failed_sbh;
570
571 set_nilfs_init(nilfs);
572 err = 0;
573 out:
574 up_write(&nilfs->ns_sem);
575 return err;
576
577 failed_sbh:
578 nilfs_release_super_block(nilfs);
579 goto out;
580}
581
582int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
583{
584 struct inode *dat = nilfs_dat_inode(nilfs);
585 unsigned long ncleansegs;
586 int err;
587
588 down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
589 err = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile, &ncleansegs);
590 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
591 if (likely(!err))
592 *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment;
593 return err;
594}
595
596int nilfs_near_disk_full(struct the_nilfs *nilfs)
597{
598 struct inode *sufile = nilfs->ns_sufile;
599 unsigned long ncleansegs, nincsegs;
600 int ret;
601
602 ret = nilfs_sufile_get_ncleansegs(sufile, &ncleansegs);
603 if (likely(!ret)) {
604 nincsegs = atomic_read(&nilfs->ns_ndirtyblks) /
605 nilfs->ns_blocks_per_segment + 1;
606 if (ncleansegs <= nilfs->ns_nrsvsegs + nincsegs)
607 ret++;
608 }
609 return ret;
610}
611
612int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno,
613 int snapshot_mount)
614{
615 struct nilfs_sb_info *sbi;
616 int ret = 0;
617
618 down_read(&nilfs->ns_sem);
619 if (cno == 0 || cno > nilfs->ns_cno)
620 goto out_unlock;
621
622 list_for_each_entry(sbi, &nilfs->ns_supers, s_list) {
623 if (sbi->s_snapshot_cno == cno &&
624 (!snapshot_mount || nilfs_test_opt(sbi, SNAPSHOT))) {
625 /* exclude read-only mounts */
626 ret++;
627 break;
628 }
629 }
630 /* for protecting recent checkpoints */
631 if (cno >= nilfs_last_cno(nilfs))
632 ret++;
633
634 out_unlock:
635 up_read(&nilfs->ns_sem);
636 return ret;
637}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
new file mode 100644
index 000000000000..30fe58778d05
--- /dev/null
+++ b/fs/nilfs2/the_nilfs.h
@@ -0,0 +1,298 @@
1/*
2 * the_nilfs.h - the_nilfs shared structure.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 *
22 */
23
24#ifndef _THE_NILFS_H
25#define _THE_NILFS_H
26
27#include <linux/types.h>
28#include <linux/buffer_head.h>
29#include <linux/fs.h>
30#include <linux/blkdev.h>
31#include <linux/backing-dev.h>
32#include "sb.h"
33
34/* the_nilfs struct */
35enum {
36 THE_NILFS_INIT = 0, /* Information from super_block is set */
37 THE_NILFS_LOADED, /* Roll-back/roll-forward has done and
38 the latest checkpoint was loaded */
39 THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
40};
41
42/**
43 * struct the_nilfs - struct to supervise multiple nilfs mount points
44 * @ns_flags: flags
45 * @ns_count: reference count
46 * @ns_bdev: block device
47 * @ns_bdi: backing dev info
48 * @ns_writer: back pointer to writable nilfs_sb_info
49 * @ns_sem: semaphore for shared states
50 * @ns_writer_mutex: mutex protecting ns_writer attach/detach
51 * @ns_writer_refcount: number of referrers on ns_writer
52 * @ns_sbh: buffer heads of on-disk super blocks
53 * @ns_sbp: pointers to super block data
54 * @ns_sbwtime: previous write time of super blocks
55 * @ns_sbsize: size of valid data in super block
56 * @ns_supers: list of nilfs super block structs
57 * @ns_seg_seq: segment sequence counter
58 * @ns_segnum: index number of the latest full segment.
59 * @ns_nextnum: index number of the full segment index to be used next
60 * @ns_pseg_offset: offset of next partial segment in the current full segment
61 * @ns_cno: next checkpoint number
62 * @ns_ctime: write time of the last segment
63 * @ns_nongc_ctime: write time of the last segment not for cleaner operation
64 * @ns_ndirtyblks: Number of dirty data blocks
65 * @ns_last_segment_lock: lock protecting fields for the latest segment
66 * @ns_last_pseg: start block number of the latest segment
67 * @ns_last_seq: sequence value of the latest segment
68 * @ns_last_cno: checkpoint number of the latest segment
69 * @ns_prot_seq: least sequence number of segments which must not be reclaimed
70 * @ns_free_segments_count: counter of free segments
71 * @ns_segctor_sem: segment constructor semaphore
72 * @ns_dat: DAT file inode
73 * @ns_cpfile: checkpoint file inode
74 * @ns_sufile: segusage file inode
75 * @ns_gc_dat: shadow inode of the DAT file inode for GC
76 * @ns_gc_inodes: dummy inodes to keep live blocks
77 * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks
78 * @ns_blocksize_bits: bit length of block size
79 * @ns_nsegments: number of segments in filesystem
80 * @ns_blocks_per_segment: number of blocks per segment
81 * @ns_r_segments_percentage: reserved segments percentage
82 * @ns_nrsvsegs: number of reserved segments
83 * @ns_first_data_block: block number of first data block
84 * @ns_inode_size: size of on-disk inode
85 * @ns_first_ino: first not-special inode number
86 * @ns_crc_seed: seed value of CRC32 calculation
87 */
88struct the_nilfs {
89 unsigned long ns_flags;
90 atomic_t ns_count;
91
92 struct block_device *ns_bdev;
93 struct backing_dev_info *ns_bdi;
94 struct nilfs_sb_info *ns_writer;
95 struct rw_semaphore ns_sem;
96 struct mutex ns_writer_mutex;
97 atomic_t ns_writer_refcount;
98
99 /*
100 * used for
101 * - loading the latest checkpoint exclusively.
102 * - allocating a new full segment.
103 * - protecting s_dirt in the super_block struct
104 * (see nilfs_write_super) and the following fields.
105 */
106 struct buffer_head *ns_sbh[2];
107 struct nilfs_super_block *ns_sbp[2];
108 time_t ns_sbwtime[2];
109 unsigned ns_sbsize;
110 unsigned ns_mount_state;
111 struct list_head ns_supers;
112
113 /*
114 * Following fields are dedicated to a writable FS-instance.
115 * Except for the period seeking checkpoint, code outside the segment
116 * constructor must lock a segment semaphore while accessing these
117 * fields.
118 * The writable FS-instance is sole during a lifetime of the_nilfs.
119 */
120 u64 ns_seg_seq;
121 __u64 ns_segnum;
122 __u64 ns_nextnum;
123 unsigned long ns_pseg_offset;
124 __u64 ns_cno;
125 time_t ns_ctime;
126 time_t ns_nongc_ctime;
127 atomic_t ns_ndirtyblks;
128
129 /*
130 * The following fields hold information on the latest partial segment
131 * written to disk with a super root. These fields are protected by
132 * ns_last_segment_lock.
133 */
134 spinlock_t ns_last_segment_lock;
135 sector_t ns_last_pseg;
136 u64 ns_last_seq;
137 __u64 ns_last_cno;
138 u64 ns_prot_seq;
139 unsigned long ns_free_segments_count;
140
141 struct rw_semaphore ns_segctor_sem;
142
143 /*
144 * Following fields are lock free except for the period before
145 * the_nilfs is initialized.
146 */
147 struct inode *ns_dat;
148 struct inode *ns_cpfile;
149 struct inode *ns_sufile;
150 struct inode *ns_gc_dat;
151
152 /* GC inode list and hash table head */
153 struct list_head ns_gc_inodes;
154 struct hlist_head *ns_gc_inodes_h;
155
156 /* Disk layout information (static) */
157 unsigned int ns_blocksize_bits;
158 unsigned long ns_nsegments;
159 unsigned long ns_blocks_per_segment;
160 unsigned long ns_r_segments_percentage;
161 unsigned long ns_nrsvsegs;
162 unsigned long ns_first_data_block;
163 int ns_inode_size;
164 int ns_first_ino;
165 u32 ns_crc_seed;
166};
167
168#define NILFS_GCINODE_HASH_BITS 8
169#define NILFS_GCINODE_HASH_SIZE (1<<NILFS_GCINODE_HASH_BITS)
170
171#define THE_NILFS_FNS(bit, name) \
172static inline void set_nilfs_##name(struct the_nilfs *nilfs) \
173{ \
174 set_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \
175} \
176static inline void clear_nilfs_##name(struct the_nilfs *nilfs) \
177{ \
178 clear_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \
179} \
180static inline int nilfs_##name(struct the_nilfs *nilfs) \
181{ \
182 return test_bit(THE_NILFS_##bit, &(nilfs)->ns_flags); \
183}
184
185THE_NILFS_FNS(INIT, init)
186THE_NILFS_FNS(LOADED, loaded)
187THE_NILFS_FNS(DISCONTINUED, discontinued)
188
189/* Minimum interval of periodical update of superblocks (in seconds) */
190#define NILFS_SB_FREQ 10
191#define NILFS_ALTSB_FREQ 60 /* spare superblock */
192
193void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
194struct the_nilfs *alloc_nilfs(struct block_device *);
195void put_nilfs(struct the_nilfs *);
196int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *);
197int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *);
198int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
199int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int);
200int nilfs_near_disk_full(struct the_nilfs *);
201void nilfs_fall_back_super_block(struct the_nilfs *);
202void nilfs_swap_super_block(struct the_nilfs *);
203
204
205static inline void get_nilfs(struct the_nilfs *nilfs)
206{
207 /* Caller must have at least one reference of the_nilfs. */
208 atomic_inc(&nilfs->ns_count);
209}
210
211static inline struct nilfs_sb_info *nilfs_get_writer(struct the_nilfs *nilfs)
212{
213 if (atomic_inc_and_test(&nilfs->ns_writer_refcount))
214 mutex_lock(&nilfs->ns_writer_mutex);
215 return nilfs->ns_writer;
216}
217
218static inline void nilfs_put_writer(struct the_nilfs *nilfs)
219{
220 if (atomic_add_negative(-1, &nilfs->ns_writer_refcount))
221 mutex_unlock(&nilfs->ns_writer_mutex);
222}
223
224static inline void
225nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
226{
227 mutex_lock(&nilfs->ns_writer_mutex);
228 nilfs->ns_writer = sbi;
229 mutex_unlock(&nilfs->ns_writer_mutex);
230}
231
232static inline void
233nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
234{
235 mutex_lock(&nilfs->ns_writer_mutex);
236 if (sbi == nilfs->ns_writer)
237 nilfs->ns_writer = NULL;
238 mutex_unlock(&nilfs->ns_writer_mutex);
239}
240
241static inline void
242nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum,
243 sector_t *seg_start, sector_t *seg_end)
244{
245 *seg_start = (sector_t)nilfs->ns_blocks_per_segment * segnum;
246 *seg_end = *seg_start + nilfs->ns_blocks_per_segment - 1;
247 if (segnum == 0)
248 *seg_start = nilfs->ns_first_data_block;
249}
250
251static inline sector_t
252nilfs_get_segment_start_blocknr(struct the_nilfs *nilfs, __u64 segnum)
253{
254 return (segnum == 0) ? nilfs->ns_first_data_block :
255 (sector_t)nilfs->ns_blocks_per_segment * segnum;
256}
257
258static inline __u64
259nilfs_get_segnum_of_block(struct the_nilfs *nilfs, sector_t blocknr)
260{
261 sector_t segnum = blocknr;
262
263 sector_div(segnum, nilfs->ns_blocks_per_segment);
264 return segnum;
265}
266
267static inline void
268nilfs_terminate_segment(struct the_nilfs *nilfs, sector_t seg_start,
269 sector_t seg_end)
270{
271 /* terminate the current full segment (used in case of I/O-error) */
272 nilfs->ns_pseg_offset = seg_end - seg_start + 1;
273}
274
275static inline void nilfs_shift_to_next_segment(struct the_nilfs *nilfs)
276{
277 /* move forward with a full segment */
278 nilfs->ns_segnum = nilfs->ns_nextnum;
279 nilfs->ns_pseg_offset = 0;
280 nilfs->ns_seg_seq++;
281}
282
283static inline __u64 nilfs_last_cno(struct the_nilfs *nilfs)
284{
285 __u64 cno;
286
287 spin_lock(&nilfs->ns_last_segment_lock);
288 cno = nilfs->ns_last_cno;
289 spin_unlock(&nilfs->ns_last_segment_lock);
290 return cno;
291}
292
293static inline int nilfs_segment_is_active(struct the_nilfs *nilfs, __u64 n)
294{
295 return n == nilfs->ns_segnum || n == nilfs->ns_nextnum;
296}
297
298#endif /* _THE_NILFS_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a5887df2cd8a..8672b9536039 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1926,7 +1926,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1926 out->f_path.dentry->d_name.len, 1926 out->f_path.dentry->d_name.len,
1927 out->f_path.dentry->d_name.name); 1927 out->f_path.dentry->d_name.name);
1928 1928
1929 inode_double_lock(inode, pipe->inode); 1929 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
1930 1930
1931 ret = ocfs2_rw_lock(inode, 1); 1931 ret = ocfs2_rw_lock(inode, 1);
1932 if (ret < 0) { 1932 if (ret < 0) {
@@ -1941,12 +1941,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1941 goto out_unlock; 1941 goto out_unlock;
1942 } 1942 }
1943 1943
1944 if (pipe->inode)
1945 mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
1944 ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); 1946 ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
1947 if (pipe->inode)
1948 mutex_unlock(&pipe->inode->i_mutex);
1945 1949
1946out_unlock: 1950out_unlock:
1947 ocfs2_rw_unlock(inode, 1); 1951 ocfs2_rw_unlock(inode, 1);
1948out: 1952out:
1949 inode_double_unlock(inode, pipe->inode); 1953 mutex_unlock(&inode->i_mutex);
1950 1954
1951 mlog_exit(ret); 1955 mlog_exit(ret);
1952 return ret; 1956 return ret;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index b0ae0be4801f..39e4ad4f59f4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -204,6 +204,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
204 struct file *file = vma->vm_file; 204 struct file *file = vma->vm_file;
205 int flags = vma->vm_flags; 205 int flags = vma->vm_flags;
206 unsigned long ino = 0; 206 unsigned long ino = 0;
207 unsigned long long pgoff = 0;
207 dev_t dev = 0; 208 dev_t dev = 0;
208 int len; 209 int len;
209 210
@@ -211,6 +212,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
211 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 212 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
212 dev = inode->i_sb->s_dev; 213 dev = inode->i_sb->s_dev;
213 ino = inode->i_ino; 214 ino = inode->i_ino;
215 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
214 } 216 }
215 217
216 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", 218 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
@@ -220,7 +222,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
220 flags & VM_WRITE ? 'w' : '-', 222 flags & VM_WRITE ? 'w' : '-',
221 flags & VM_EXEC ? 'x' : '-', 223 flags & VM_EXEC ? 'x' : '-',
222 flags & VM_MAYSHARE ? 's' : 'p', 224 flags & VM_MAYSHARE ? 's' : 'p',
223 ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, 225 pgoff,
224 MAJOR(dev), MINOR(dev), ino, &len); 226 MAJOR(dev), MINOR(dev), ino, &len);
225 227
226 /* 228 /*
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 863464d5519c..64a72e2e7650 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -126,6 +126,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
126 struct file *file; 126 struct file *file;
127 dev_t dev = 0; 127 dev_t dev = 0;
128 int flags, len; 128 int flags, len;
129 unsigned long long pgoff = 0;
129 130
130 flags = vma->vm_flags; 131 flags = vma->vm_flags;
131 file = vma->vm_file; 132 file = vma->vm_file;
@@ -134,6 +135,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
134 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 135 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
135 dev = inode->i_sb->s_dev; 136 dev = inode->i_sb->s_dev;
136 ino = inode->i_ino; 137 ino = inode->i_ino;
138 pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
137 } 139 }
138 140
139 seq_printf(m, 141 seq_printf(m,
@@ -144,7 +146,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
144 flags & VM_WRITE ? 'w' : '-', 146 flags & VM_WRITE ? 'w' : '-',
145 flags & VM_EXEC ? 'x' : '-', 147 flags & VM_EXEC ? 'x' : '-',
146 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', 148 flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
147 (unsigned long long) vma->vm_pgoff << PAGE_SHIFT, 149 pgoff,
148 MAJOR(dev), MINOR(dev), ino, &len); 150 MAJOR(dev), MINOR(dev), ino, &len);
149 151
150 if (file) { 152 if (file) {
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a404fb88e456..3a6b193d8444 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -221,22 +221,23 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
221 save_mount_options(sb, data); 221 save_mount_options(sb, data);
222 222
223 fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL); 223 fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL);
224 sb->s_fs_info = fsi;
224 if (!fsi) { 225 if (!fsi) {
225 err = -ENOMEM; 226 err = -ENOMEM;
226 goto fail; 227 goto fail;
227 } 228 }
228 sb->s_fs_info = fsi;
229 229
230 err = ramfs_parse_options(data, &fsi->mount_opts); 230 err = ramfs_parse_options(data, &fsi->mount_opts);
231 if (err) 231 if (err)
232 goto fail; 232 goto fail;
233 233
234 sb->s_maxbytes = MAX_LFS_FILESIZE; 234 sb->s_maxbytes = MAX_LFS_FILESIZE;
235 sb->s_blocksize = PAGE_CACHE_SIZE; 235 sb->s_blocksize = PAGE_CACHE_SIZE;
236 sb->s_blocksize_bits = PAGE_CACHE_SHIFT; 236 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
237 sb->s_magic = RAMFS_MAGIC; 237 sb->s_magic = RAMFS_MAGIC;
238 sb->s_op = &ramfs_ops; 238 sb->s_op = &ramfs_ops;
239 sb->s_time_gran = 1; 239 sb->s_time_gran = 1;
240
240 inode = ramfs_get_inode(sb, S_IFDIR | fsi->mount_opts.mode, 0); 241 inode = ramfs_get_inode(sb, S_IFDIR | fsi->mount_opts.mode, 0);
241 if (!inode) { 242 if (!inode) {
242 err = -ENOMEM; 243 err = -ENOMEM;
@@ -244,14 +245,16 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
244 } 245 }
245 246
246 root = d_alloc_root(inode); 247 root = d_alloc_root(inode);
248 sb->s_root = root;
247 if (!root) { 249 if (!root) {
248 err = -ENOMEM; 250 err = -ENOMEM;
249 goto fail; 251 goto fail;
250 } 252 }
251 sb->s_root = root; 253
252 return 0; 254 return 0;
253fail: 255fail:
254 kfree(fsi); 256 kfree(fsi);
257 sb->s_fs_info = NULL;
255 iput(inode); 258 iput(inode);
256 return err; 259 return err;
257} 260}
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 1e548a4975ba..10ca7d984a8b 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -408,12 +408,17 @@ static void romfs_destroy_inode(struct inode *inode)
408 */ 408 */
409static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) 409static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
410{ 410{
411 struct super_block *sb = dentry->d_sb;
412 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
413
411 buf->f_type = ROMFS_MAGIC; 414 buf->f_type = ROMFS_MAGIC;
412 buf->f_namelen = ROMFS_MAXFN; 415 buf->f_namelen = ROMFS_MAXFN;
413 buf->f_bsize = ROMBSIZE; 416 buf->f_bsize = ROMBSIZE;
414 buf->f_bfree = buf->f_bavail = buf->f_ffree; 417 buf->f_bfree = buf->f_bavail = buf->f_ffree;
415 buf->f_blocks = 418 buf->f_blocks =
416 (romfs_maxsize(dentry->d_sb) + ROMBSIZE - 1) >> ROMBSBITS; 419 (romfs_maxsize(dentry->d_sb) + ROMBSIZE - 1) >> ROMBSBITS;
420 buf->f_fsid.val[0] = (u32)id;
421 buf->f_fsid.val[1] = (u32)(id >> 32);
417 return 0; 422 return 0;
418} 423}
419 424
diff --git a/fs/splice.c b/fs/splice.c
index dd727d43e5b7..c18aa7e03e2b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -737,10 +737,19 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
737 * ->write_end. Most of the time, these expect i_mutex to 737 * ->write_end. Most of the time, these expect i_mutex to
738 * be held. Since this may result in an ABBA deadlock with 738 * be held. Since this may result in an ABBA deadlock with
739 * pipe->inode, we have to order lock acquiry here. 739 * pipe->inode, we have to order lock acquiry here.
740 *
741 * Outer lock must be inode->i_mutex, as pipe_wait() will
742 * release and reacquire pipe->inode->i_mutex, AND inode must
743 * never be a pipe.
740 */ 744 */
741 inode_double_lock(inode, pipe->inode); 745 WARN_ON(S_ISFIFO(inode->i_mode));
746 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
747 if (pipe->inode)
748 mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
742 ret = __splice_from_pipe(pipe, &sd, actor); 749 ret = __splice_from_pipe(pipe, &sd, actor);
743 inode_double_unlock(inode, pipe->inode); 750 if (pipe->inode)
751 mutex_unlock(&pipe->inode->i_mutex);
752 mutex_unlock(&inode->i_mutex);
744 753
745 return ret; 754 return ret;
746} 755}
@@ -831,11 +840,17 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
831 }; 840 };
832 ssize_t ret; 841 ssize_t ret;
833 842
834 inode_double_lock(inode, pipe->inode); 843 WARN_ON(S_ISFIFO(inode->i_mode));
844 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
835 ret = file_remove_suid(out); 845 ret = file_remove_suid(out);
836 if (likely(!ret)) 846 if (likely(!ret)) {
847 if (pipe->inode)
848 mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
837 ret = __splice_from_pipe(pipe, &sd, pipe_to_file); 849 ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
838 inode_double_unlock(inode, pipe->inode); 850 if (pipe->inode)
851 mutex_unlock(&pipe->inode->i_mutex);
852 }
853 mutex_unlock(&inode->i_mutex);
839 if (ret > 0) { 854 if (ret > 0) {
840 unsigned long nr_pages; 855 unsigned long nr_pages;
841 856
diff --git a/fs/super.c b/fs/super.c
index 77cb4ec919b9..786fe7d72790 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -771,6 +771,46 @@ void kill_litter_super(struct super_block *sb)
771 771
772EXPORT_SYMBOL(kill_litter_super); 772EXPORT_SYMBOL(kill_litter_super);
773 773
774static int ns_test_super(struct super_block *sb, void *data)
775{
776 return sb->s_fs_info == data;
777}
778
779static int ns_set_super(struct super_block *sb, void *data)
780{
781 sb->s_fs_info = data;
782 return set_anon_super(sb, NULL);
783}
784
785int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
786 int (*fill_super)(struct super_block *, void *, int),
787 struct vfsmount *mnt)
788{
789 struct super_block *sb;
790
791 sb = sget(fs_type, ns_test_super, ns_set_super, data);
792 if (IS_ERR(sb))
793 return PTR_ERR(sb);
794
795 if (!sb->s_root) {
796 int err;
797 sb->s_flags = flags;
798 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
799 if (err) {
800 up_write(&sb->s_umount);
801 deactivate_super(sb);
802 return err;
803 }
804
805 sb->s_flags |= MS_ACTIVE;
806 }
807
808 simple_set_mnt(mnt, sb);
809 return 0;
810}
811
812EXPORT_SYMBOL(get_sb_ns);
813
774#ifdef CONFIG_BLOCK 814#ifdef CONFIG_BLOCK
775static int set_bdev_super(struct super_block *s, void *data) 815static int set_bdev_super(struct super_block *s, void *data)
776{ 816{