aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorRyusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>2009-04-06 22:01:37 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-07 11:31:15 -0400
commit9ff05123e3bfbb1d2b68ba1d9bf1f7d1dffc1453 (patch)
tree056c7bdc2395c8baf77bc63a54a1f747cbf5b650 /fs
parent64b5a32e0b3680a9655b3f2e668a646068e71d33 (diff)
nilfs2: segment constructor
This adds the segment constructor (also called log writer). The segment constructor collects dirty buffers for every dirty inode, makes summaries of the buffers, assigns disk block addresses to the buffers, and then submits BIOs for the buffers. Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/nilfs2/file.c62
-rw-r--r--fs/nilfs2/seglist.h85
-rw-r--r--fs/nilfs2/segment.c3187
-rw-r--r--fs/nilfs2/segment.h246
4 files changed, 3577 insertions, 3 deletions
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 7ddd42e24f77..8031086db8d5 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -73,10 +73,66 @@ nilfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
73 return ret; 73 return ret;
74} 74}
75 75
76static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) 76static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
77{ 77{
78 if (!(vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) 78 struct page *page = vmf->page;
79 return -EPERM; 79 struct inode *inode = vma->vm_file->f_dentry->d_inode;
80 struct nilfs_transaction_info ti;
81 int ret;
82
83 if (unlikely(nilfs_near_disk_full(NILFS_SB(inode->i_sb)->s_nilfs)))
84 return VM_FAULT_SIGBUS; /* -ENOSPC */
85
86 lock_page(page);
87 if (page->mapping != inode->i_mapping ||
88 page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
89 unlock_page(page);
90 return VM_FAULT_NOPAGE; /* make the VM retry the fault */
91 }
92
93 /*
94 * check to see if the page is mapped already (no holes)
95 */
96 if (PageMappedToDisk(page)) {
97 unlock_page(page);
98 goto mapped;
99 }
100 if (page_has_buffers(page)) {
101 struct buffer_head *bh, *head;
102 int fully_mapped = 1;
103
104 bh = head = page_buffers(page);
105 do {
106 if (!buffer_mapped(bh)) {
107 fully_mapped = 0;
108 break;
109 }
110 } while (bh = bh->b_this_page, bh != head);
111
112 if (fully_mapped) {
113 SetPageMappedToDisk(page);
114 unlock_page(page);
115 goto mapped;
116 }
117 }
118 unlock_page(page);
119
120 /*
121 * fill hole blocks
122 */
123 ret = nilfs_transaction_begin(inode->i_sb, &ti, 1);
124 /* never returns -ENOMEM, but may return -ENOSPC */
125 if (unlikely(ret))
126 return VM_FAULT_SIGBUS;
127
128 ret = block_page_mkwrite(vma, vmf, nilfs_get_block);
129 if (unlikely(ret)) {
130 nilfs_transaction_abort(inode->i_sb);
131 return ret;
132 }
133 nilfs_transaction_commit(inode->i_sb);
134
135 mapped:
80 SetPageChecked(page); 136 SetPageChecked(page);
81 wait_on_page_writeback(page); 137 wait_on_page_writeback(page);
82 return 0; 138 return 0;
diff --git a/fs/nilfs2/seglist.h b/fs/nilfs2/seglist.h
new file mode 100644
index 000000000000..d39df9144e99
--- /dev/null
+++ b/fs/nilfs2/seglist.h
@@ -0,0 +1,85 @@
1/*
2 * seglist.h - expediential structure and routines to handle list of segments
3 * (would be removed in a future release)
4 *
5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 *
21 * Written by Ryusuke Konishi <ryusuke@osrg.net>
22 *
23 */
24#ifndef _NILFS_SEGLIST_H
25#define _NILFS_SEGLIST_H
26
27#include <linux/fs.h>
28#include <linux/buffer_head.h>
29#include <linux/nilfs2_fs.h>
30#include "sufile.h"
31
32struct nilfs_segment_entry {
33 __u64 segnum;
34
35#define NILFS_SLH_FREED 0x0001 /* The segment was freed provisonally.
36 It must be cancelled if
37 construction aborted */
38
39 unsigned flags;
40 struct list_head list;
41 struct buffer_head *bh_su;
42 struct nilfs_segment_usage *raw_su;
43};
44
45
46void nilfs_dispose_segment_list(struct list_head *);
47
48static inline struct nilfs_segment_entry *
49nilfs_alloc_segment_entry(__u64 segnum)
50{
51 struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS);
52
53 if (likely(ent)) {
54 ent->segnum = segnum;
55 ent->flags = 0;
56 ent->bh_su = NULL;
57 ent->raw_su = NULL;
58 INIT_LIST_HEAD(&ent->list);
59 }
60 return ent;
61}
62
63static inline int nilfs_open_segment_entry(struct nilfs_segment_entry *ent,
64 struct inode *sufile)
65{
66 return nilfs_sufile_get_segment_usage(sufile, ent->segnum,
67 &ent->raw_su, &ent->bh_su);
68}
69
70static inline void nilfs_close_segment_entry(struct nilfs_segment_entry *ent,
71 struct inode *sufile)
72{
73 if (!ent->bh_su)
74 return;
75 nilfs_sufile_put_segment_usage(sufile, ent->segnum, ent->bh_su);
76 ent->bh_su = NULL;
77 ent->raw_su = NULL;
78}
79
80static inline void nilfs_free_segment_entry(struct nilfs_segment_entry *ent)
81{
82 kfree(ent);
83}
84
85#endif /* _NILFS_SEGLIST_H */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
new file mode 100644
index 000000000000..2c4c088059fd
--- /dev/null
+++ b/fs/nilfs2/segment.c
@@ -0,0 +1,3187 @@
1/*
2 * segment.c - NILFS segment constructor.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 *
22 */
23
24#include <linux/pagemap.h>
25#include <linux/buffer_head.h>
26#include <linux/writeback.h>
27#include <linux/bio.h>
28#include <linux/completion.h>
29#include <linux/blkdev.h>
30#include <linux/backing-dev.h>
31#include <linux/freezer.h>
32#include <linux/kthread.h>
33#include <linux/crc32.h>
34#include <linux/pagevec.h>
35#include "nilfs.h"
36#include "btnode.h"
37#include "page.h"
38#include "segment.h"
39#include "sufile.h"
40#include "cpfile.h"
41#include "ifile.h"
42#include "seglist.h"
43#include "segbuf.h"
44
45
46/*
47 * Segment constructor
48 */
49#define SC_N_INODEVEC 16 /* Size of locally allocated inode vector */
50
51#define SC_MAX_SEGDELTA 64 /* Upper limit of the number of segments
52 appended in collection retry loop */
53
54/* Construction mode */
55enum {
56 SC_LSEG_SR = 1, /* Make a logical segment having a super root */
57 SC_LSEG_DSYNC, /* Flush data blocks of a given file and make
58 a logical segment without a super root */
59 SC_FLUSH_FILE, /* Flush data files, leads to segment writes without
60 creating a checkpoint */
61 SC_FLUSH_DAT, /* Flush DAT file. This also creates segments without
62 a checkpoint */
63};
64
65/* Stage numbers of dirty block collection */
66enum {
67 NILFS_ST_INIT = 0,
68 NILFS_ST_GC, /* Collecting dirty blocks for GC */
69 NILFS_ST_FILE,
70 NILFS_ST_SKETCH,
71 NILFS_ST_IFILE,
72 NILFS_ST_CPFILE,
73 NILFS_ST_SUFILE,
74 NILFS_ST_DAT,
75 NILFS_ST_SR, /* Super root */
76 NILFS_ST_DSYNC, /* Data sync blocks */
77 NILFS_ST_DONE,
78};
79
80/* State flags of collection */
81#define NILFS_CF_NODE 0x0001 /* Collecting node blocks */
82#define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */
83#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED)
84
85/* Operations depending on the construction mode and file type */
86struct nilfs_sc_operations {
87 int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *,
88 struct inode *);
89 int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *,
90 struct inode *);
91 int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *,
92 struct inode *);
93 void (*write_data_binfo)(struct nilfs_sc_info *,
94 struct nilfs_segsum_pointer *,
95 union nilfs_binfo *);
96 void (*write_node_binfo)(struct nilfs_sc_info *,
97 struct nilfs_segsum_pointer *,
98 union nilfs_binfo *);
99};
100
101/*
102 * Other definitions
103 */
104static void nilfs_segctor_start_timer(struct nilfs_sc_info *);
105static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int);
106static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *);
107static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *,
108 int);
109
110#define nilfs_cnt32_gt(a, b) \
111 (typecheck(__u32, a) && typecheck(__u32, b) && \
112 ((__s32)(b) - (__s32)(a) < 0))
113#define nilfs_cnt32_ge(a, b) \
114 (typecheck(__u32, a) && typecheck(__u32, b) && \
115 ((__s32)(a) - (__s32)(b) >= 0))
116#define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a)
117#define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a)
118
119/*
120 * Transaction
121 */
122static struct kmem_cache *nilfs_transaction_cachep;
123
124/**
125 * nilfs_init_transaction_cache - create a cache for nilfs_transaction_info
126 *
127 * nilfs_init_transaction_cache() creates a slab cache for the struct
128 * nilfs_transaction_info.
129 *
130 * Return Value: On success, it returns 0. On error, one of the following
131 * negative error code is returned.
132 *
133 * %-ENOMEM - Insufficient memory available.
134 */
135int nilfs_init_transaction_cache(void)
136{
137 nilfs_transaction_cachep =
138 kmem_cache_create("nilfs2_transaction_cache",
139 sizeof(struct nilfs_transaction_info),
140 0, SLAB_RECLAIM_ACCOUNT, NULL);
141 return (nilfs_transaction_cachep == NULL) ? -ENOMEM : 0;
142}
143
144/**
145 * nilfs_detroy_transaction_cache - destroy the cache for transaction info
146 *
147 * nilfs_destroy_transaction_cache() frees the slab cache for the struct
148 * nilfs_transaction_info.
149 */
150void nilfs_destroy_transaction_cache(void)
151{
152 kmem_cache_destroy(nilfs_transaction_cachep);
153}
154
155static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti)
156{
157 struct nilfs_transaction_info *cur_ti = current->journal_info;
158 void *save = NULL;
159
160 if (cur_ti) {
161 if (cur_ti->ti_magic == NILFS_TI_MAGIC)
162 return ++cur_ti->ti_count;
163 else {
164 /*
165 * If journal_info field is occupied by other FS,
166 * we save it and restore on nilfs_transaction_end().
167 * But this should never happen.
168 */
169 printk(KERN_WARNING
170 "NILFS warning: journal info from a different "
171 "FS\n");
172 save = current->journal_info;
173 }
174 }
175 if (!ti) {
176 ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS);
177 if (!ti)
178 return -ENOMEM;
179 ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC;
180 } else {
181 ti->ti_flags = 0;
182 }
183 ti->ti_count = 0;
184 ti->ti_save = save;
185 ti->ti_magic = NILFS_TI_MAGIC;
186 current->journal_info = ti;
187 return 0;
188}
189
190/**
191 * nilfs_transaction_begin - start indivisible file operations.
192 * @sb: super block
193 * @ti: nilfs_transaction_info
194 * @vacancy_check: flags for vacancy rate checks
195 *
196 * nilfs_transaction_begin() acquires a reader/writer semaphore, called
197 * the segment semaphore, to make a segment construction and write tasks
198 * exclusive. The function is used with nilfs_transaction_end() in pairs.
199 * The region enclosed by these two functions can be nested. To avoid a
200 * deadlock, the semaphore is only acquired or released in the outermost call.
201 *
202 * This function allocates a nilfs_transaction_info struct to keep context
203 * information on it. It is initialized and hooked onto the current task in
204 * the outermost call. If a pre-allocated struct is given to @ti, it is used
205 * instead; othewise a new struct is assigned from a slab.
206 *
207 * When @vacancy_check flag is set, this function will check the amount of
208 * free space, and will wait for the GC to reclaim disk space if low capacity.
209 *
210 * Return Value: On success, 0 is returned. On error, one of the following
211 * negative error code is returned.
212 *
213 * %-ENOMEM - Insufficient memory available.
214 *
215 * %-ERESTARTSYS - Interrupted
216 *
217 * %-ENOSPC - No space left on device
218 */
219int nilfs_transaction_begin(struct super_block *sb,
220 struct nilfs_transaction_info *ti,
221 int vacancy_check)
222{
223 struct nilfs_sb_info *sbi;
224 struct the_nilfs *nilfs;
225 int ret = nilfs_prepare_segment_lock(ti);
226
227 if (unlikely(ret < 0))
228 return ret;
229 if (ret > 0)
230 return 0;
231
232 sbi = NILFS_SB(sb);
233 nilfs = sbi->s_nilfs;
234 down_read(&nilfs->ns_segctor_sem);
235 if (vacancy_check && nilfs_near_disk_full(nilfs)) {
236 up_read(&nilfs->ns_segctor_sem);
237 ret = -ENOSPC;
238 goto failed;
239 }
240 return 0;
241
242 failed:
243 ti = current->journal_info;
244 current->journal_info = ti->ti_save;
245 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
246 kmem_cache_free(nilfs_transaction_cachep, ti);
247 return ret;
248}
249
250/**
251 * nilfs_transaction_end - end indivisible file operations.
252 * @sb: super block
253 * @commit: commit flag (0 for no change)
254 *
255 * nilfs_transaction_end() releases the read semaphore which is
256 * acquired by nilfs_transaction_begin(). Its releasing is only done
257 * in outermost call of this function. If the nilfs_transaction_info
258 * was allocated dynamically, it is given back to a slab cache.
259 */
260int nilfs_transaction_end(struct super_block *sb, int commit)
261{
262 struct nilfs_transaction_info *ti = current->journal_info;
263 struct nilfs_sb_info *sbi;
264 struct nilfs_sc_info *sci;
265 int err = 0;
266
267 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
268
269 if (commit)
270 ti->ti_flags |= NILFS_TI_COMMIT;
271 if (ti->ti_count > 0) {
272 ti->ti_count--;
273 return 0;
274 }
275 sbi = NILFS_SB(sb);
276 sci = NILFS_SC(sbi);
277 if (sci != NULL) {
278 if (ti->ti_flags & NILFS_TI_COMMIT)
279 nilfs_segctor_start_timer(sci);
280 if (atomic_read(&sbi->s_nilfs->ns_ndirtyblks) >
281 sci->sc_watermark)
282 nilfs_segctor_do_flush(sci, 0);
283 }
284 up_read(&sbi->s_nilfs->ns_segctor_sem);
285 current->journal_info = ti->ti_save;
286
287 if (ti->ti_flags & NILFS_TI_SYNC)
288 err = nilfs_construct_segment(sb);
289 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
290 kmem_cache_free(nilfs_transaction_cachep, ti);
291 return err;
292}
293
294void nilfs_relax_pressure_in_lock(struct super_block *sb)
295{
296 struct nilfs_sb_info *sbi = NILFS_SB(sb);
297 struct nilfs_sc_info *sci = NILFS_SC(sbi);
298 struct the_nilfs *nilfs = sbi->s_nilfs;
299
300 if (!sci || !sci->sc_flush_request)
301 return;
302
303 set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
304 up_read(&nilfs->ns_segctor_sem);
305
306 down_write(&nilfs->ns_segctor_sem);
307 if (sci->sc_flush_request &&
308 test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) {
309 struct nilfs_transaction_info *ti = current->journal_info;
310
311 ti->ti_flags |= NILFS_TI_WRITER;
312 nilfs_segctor_do_immediate_flush(sci);
313 ti->ti_flags &= ~NILFS_TI_WRITER;
314 }
315 downgrade_write(&nilfs->ns_segctor_sem);
316}
317
318static void nilfs_transaction_lock(struct nilfs_sb_info *sbi,
319 struct nilfs_transaction_info *ti,
320 int gcflag)
321{
322 struct nilfs_transaction_info *cur_ti = current->journal_info;
323
324 BUG_ON(cur_ti);
325 BUG_ON(!ti);
326 ti->ti_flags = NILFS_TI_WRITER;
327 ti->ti_count = 0;
328 ti->ti_save = cur_ti;
329 ti->ti_magic = NILFS_TI_MAGIC;
330 INIT_LIST_HEAD(&ti->ti_garbage);
331 current->journal_info = ti;
332
333 for (;;) {
334 down_write(&sbi->s_nilfs->ns_segctor_sem);
335 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &NILFS_SC(sbi)->sc_flags))
336 break;
337
338 nilfs_segctor_do_immediate_flush(NILFS_SC(sbi));
339
340 up_write(&sbi->s_nilfs->ns_segctor_sem);
341 yield();
342 }
343 if (gcflag)
344 ti->ti_flags |= NILFS_TI_GC;
345}
346
347static void nilfs_transaction_unlock(struct nilfs_sb_info *sbi)
348{
349 struct nilfs_transaction_info *ti = current->journal_info;
350
351 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
352 BUG_ON(ti->ti_count > 0);
353
354 up_write(&sbi->s_nilfs->ns_segctor_sem);
355 current->journal_info = ti->ti_save;
356 if (!list_empty(&ti->ti_garbage))
357 nilfs_dispose_list(sbi, &ti->ti_garbage, 0);
358}
359
360static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
361 struct nilfs_segsum_pointer *ssp,
362 unsigned bytes)
363{
364 struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
365 unsigned blocksize = sci->sc_super->s_blocksize;
366 void *p;
367
368 if (unlikely(ssp->offset + bytes > blocksize)) {
369 ssp->offset = 0;
370 BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh,
371 &segbuf->sb_segsum_buffers));
372 ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh);
373 }
374 p = ssp->bh->b_data + ssp->offset;
375 ssp->offset += bytes;
376 return p;
377}
378
379/**
380 * nilfs_segctor_reset_segment_buffer - reset the current segment buffer
381 * @sci: nilfs_sc_info
382 */
383static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
384{
385 struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
386 struct buffer_head *sumbh;
387 unsigned sumbytes;
388 unsigned flags = 0;
389 int err;
390
391 if (nilfs_doing_gc())
392 flags = NILFS_SS_GC;
393 err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime);
394 if (unlikely(err))
395 return err;
396
397 sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
398 sumbytes = segbuf->sb_sum.sumbytes;
399 sci->sc_finfo_ptr.bh = sumbh; sci->sc_finfo_ptr.offset = sumbytes;
400 sci->sc_binfo_ptr.bh = sumbh; sci->sc_binfo_ptr.offset = sumbytes;
401 sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
402 return 0;
403}
404
405static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
406{
407 sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
408 if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs))
409 return -E2BIG; /* The current segment is filled up
410 (internal code) */
411 sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
412 return nilfs_segctor_reset_segment_buffer(sci);
413}
414
415static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci)
416{
417 struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
418 int err;
419
420 if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) {
421 err = nilfs_segctor_feed_segment(sci);
422 if (err)
423 return err;
424 segbuf = sci->sc_curseg;
425 }
426 err = nilfs_segbuf_extend_payload(segbuf, &sci->sc_super_root);
427 if (likely(!err))
428 segbuf->sb_sum.flags |= NILFS_SS_SR;
429 return err;
430}
431
432/*
433 * Functions for making segment summary and payloads
434 */
435static int nilfs_segctor_segsum_block_required(
436 struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp,
437 unsigned binfo_size)
438{
439 unsigned blocksize = sci->sc_super->s_blocksize;
440 /* Size of finfo and binfo is enough small against blocksize */
441
442 return ssp->offset + binfo_size +
443 (!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) >
444 blocksize;
445}
446
447static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
448 struct inode *inode)
449{
450 sci->sc_curseg->sb_sum.nfinfo++;
451 sci->sc_binfo_ptr = sci->sc_finfo_ptr;
452 nilfs_segctor_map_segsum_entry(
453 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
454 /* skip finfo */
455}
456
457static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci,
458 struct inode *inode)
459{
460 struct nilfs_finfo *finfo;
461 struct nilfs_inode_info *ii;
462 struct nilfs_segment_buffer *segbuf;
463
464 if (sci->sc_blk_cnt == 0)
465 return;
466
467 ii = NILFS_I(inode);
468 finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr,
469 sizeof(*finfo));
470 finfo->fi_ino = cpu_to_le64(inode->i_ino);
471 finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt);
472 finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt);
473 finfo->fi_cno = cpu_to_le64(ii->i_cno);
474
475 segbuf = sci->sc_curseg;
476 segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset +
477 sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1);
478 sci->sc_finfo_ptr = sci->sc_binfo_ptr;
479 sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
480}
481
482static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
483 struct buffer_head *bh,
484 struct inode *inode,
485 unsigned binfo_size)
486{
487 struct nilfs_segment_buffer *segbuf;
488 int required, err = 0;
489
490 retry:
491 segbuf = sci->sc_curseg;
492 required = nilfs_segctor_segsum_block_required(
493 sci, &sci->sc_binfo_ptr, binfo_size);
494 if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) {
495 nilfs_segctor_end_finfo(sci, inode);
496 err = nilfs_segctor_feed_segment(sci);
497 if (err)
498 return err;
499 goto retry;
500 }
501 if (unlikely(required)) {
502 err = nilfs_segbuf_extend_segsum(segbuf);
503 if (unlikely(err))
504 goto failed;
505 }
506 if (sci->sc_blk_cnt == 0)
507 nilfs_segctor_begin_finfo(sci, inode);
508
509 nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size);
510 /* Substitution to vblocknr is delayed until update_blocknr() */
511 nilfs_segbuf_add_file_buffer(segbuf, bh);
512 sci->sc_blk_cnt++;
513 failed:
514 return err;
515}
516
517static int nilfs_handle_bmap_error(int err, const char *fname,
518 struct inode *inode, struct super_block *sb)
519{
520 if (err == -EINVAL) {
521 nilfs_error(sb, fname, "broken bmap (inode=%lu)\n",
522 inode->i_ino);
523 err = -EIO;
524 }
525 return err;
526}
527
528/*
529 * Callback functions that enumerate, mark, and collect dirty blocks
530 */
531static int nilfs_collect_file_data(struct nilfs_sc_info *sci,
532 struct buffer_head *bh, struct inode *inode)
533{
534 int err;
535
536 /* BUG_ON(!buffer_dirty(bh)); */
537 /* excluded by scan_dirty_data_buffers() */
538 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
539 if (unlikely(err < 0))
540 return nilfs_handle_bmap_error(err, __func__, inode,
541 sci->sc_super);
542
543 err = nilfs_segctor_add_file_block(sci, bh, inode,
544 sizeof(struct nilfs_binfo_v));
545 if (!err)
546 sci->sc_datablk_cnt++;
547 return err;
548}
549
550static int nilfs_collect_file_node(struct nilfs_sc_info *sci,
551 struct buffer_head *bh,
552 struct inode *inode)
553{
554 int err;
555
556 /* BUG_ON(!buffer_dirty(bh)); */
557 /* excluded by scan_dirty_node_buffers() */
558 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
559 if (unlikely(err < 0))
560 return nilfs_handle_bmap_error(err, __func__, inode,
561 sci->sc_super);
562 return 0;
563}
564
565static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci,
566 struct buffer_head *bh,
567 struct inode *inode)
568{
569 BUG_ON(!buffer_dirty(bh));
570 return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
571}
572
573static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci,
574 struct nilfs_segsum_pointer *ssp,
575 union nilfs_binfo *binfo)
576{
577 struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry(
578 sci, ssp, sizeof(*binfo_v));
579 *binfo_v = binfo->bi_v;
580}
581
582static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci,
583 struct nilfs_segsum_pointer *ssp,
584 union nilfs_binfo *binfo)
585{
586 __le64 *vblocknr = nilfs_segctor_map_segsum_entry(
587 sci, ssp, sizeof(*vblocknr));
588 *vblocknr = binfo->bi_v.bi_vblocknr;
589}
590
591struct nilfs_sc_operations nilfs_sc_file_ops = {
592 .collect_data = nilfs_collect_file_data,
593 .collect_node = nilfs_collect_file_node,
594 .collect_bmap = nilfs_collect_file_bmap,
595 .write_data_binfo = nilfs_write_file_data_binfo,
596 .write_node_binfo = nilfs_write_file_node_binfo,
597};
598
599static int nilfs_collect_dat_data(struct nilfs_sc_info *sci,
600 struct buffer_head *bh, struct inode *inode)
601{
602 int err;
603
604 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
605 if (unlikely(err < 0))
606 return nilfs_handle_bmap_error(err, __func__, inode,
607 sci->sc_super);
608
609 err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
610 if (!err)
611 sci->sc_datablk_cnt++;
612 return err;
613}
614
615static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci,
616 struct buffer_head *bh, struct inode *inode)
617{
618 BUG_ON(!buffer_dirty(bh));
619 return nilfs_segctor_add_file_block(sci, bh, inode,
620 sizeof(struct nilfs_binfo_dat));
621}
622
623static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci,
624 struct nilfs_segsum_pointer *ssp,
625 union nilfs_binfo *binfo)
626{
627 __le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp,
628 sizeof(*blkoff));
629 *blkoff = binfo->bi_dat.bi_blkoff;
630}
631
632static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci,
633 struct nilfs_segsum_pointer *ssp,
634 union nilfs_binfo *binfo)
635{
636 struct nilfs_binfo_dat *binfo_dat =
637 nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat));
638 *binfo_dat = binfo->bi_dat;
639}
640
641struct nilfs_sc_operations nilfs_sc_dat_ops = {
642 .collect_data = nilfs_collect_dat_data,
643 .collect_node = nilfs_collect_file_node,
644 .collect_bmap = nilfs_collect_dat_bmap,
645 .write_data_binfo = nilfs_write_dat_data_binfo,
646 .write_node_binfo = nilfs_write_dat_node_binfo,
647};
648
649struct nilfs_sc_operations nilfs_sc_dsync_ops = {
650 .collect_data = nilfs_collect_file_data,
651 .collect_node = NULL,
652 .collect_bmap = NULL,
653 .write_data_binfo = nilfs_write_file_data_binfo,
654 .write_node_binfo = NULL,
655};
656
657static int nilfs_lookup_dirty_data_buffers(struct inode *inode,
658 struct list_head *listp,
659 struct nilfs_sc_info *sci)
660{
661 struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
662 struct address_space *mapping = inode->i_mapping;
663 struct pagevec pvec;
664 unsigned i, ndirties = 0, nlimit;
665 pgoff_t index = 0;
666 int err = 0;
667
668 nlimit = sci->sc_segbuf_nblocks -
669 (sci->sc_nblk_this_inc + segbuf->sb_sum.nblocks);
670 pagevec_init(&pvec, 0);
671 repeat:
672 if (!pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
673 PAGEVEC_SIZE))
674 return 0;
675
676 for (i = 0; i < pagevec_count(&pvec); i++) {
677 struct buffer_head *bh, *head;
678 struct page *page = pvec.pages[i];
679
680 if (mapping->host) {
681 lock_page(page);
682 if (!page_has_buffers(page))
683 create_empty_buffers(page,
684 1 << inode->i_blkbits, 0);
685 unlock_page(page);
686 }
687
688 bh = head = page_buffers(page);
689 do {
690 if (buffer_dirty(bh)) {
691 if (ndirties > nlimit) {
692 err = -E2BIG;
693 break;
694 }
695 get_bh(bh);
696 list_add_tail(&bh->b_assoc_buffers, listp);
697 ndirties++;
698 }
699 bh = bh->b_this_page;
700 } while (bh != head);
701 }
702 pagevec_release(&pvec);
703 cond_resched();
704
705 if (!err)
706 goto repeat;
707 return err;
708}
709
710static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
711 struct list_head *listp)
712{
713 struct nilfs_inode_info *ii = NILFS_I(inode);
714 struct address_space *mapping = &ii->i_btnode_cache;
715 struct pagevec pvec;
716 struct buffer_head *bh, *head;
717 unsigned int i;
718 pgoff_t index = 0;
719
720 pagevec_init(&pvec, 0);
721
722 while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
723 PAGEVEC_SIZE)) {
724 for (i = 0; i < pagevec_count(&pvec); i++) {
725 bh = head = page_buffers(pvec.pages[i]);
726 do {
727 if (buffer_dirty(bh)) {
728 get_bh(bh);
729 list_add_tail(&bh->b_assoc_buffers,
730 listp);
731 }
732 bh = bh->b_this_page;
733 } while (bh != head);
734 }
735 pagevec_release(&pvec);
736 cond_resched();
737 }
738}
739
740static void nilfs_dispose_list(struct nilfs_sb_info *sbi,
741 struct list_head *head, int force)
742{
743 struct nilfs_inode_info *ii, *n;
744 struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii;
745 unsigned nv = 0;
746
747 while (!list_empty(head)) {
748 spin_lock(&sbi->s_inode_lock);
749 list_for_each_entry_safe(ii, n, head, i_dirty) {
750 list_del_init(&ii->i_dirty);
751 if (force) {
752 if (unlikely(ii->i_bh)) {
753 brelse(ii->i_bh);
754 ii->i_bh = NULL;
755 }
756 } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
757 set_bit(NILFS_I_QUEUED, &ii->i_state);
758 list_add_tail(&ii->i_dirty,
759 &sbi->s_dirty_files);
760 continue;
761 }
762 ivec[nv++] = ii;
763 if (nv == SC_N_INODEVEC)
764 break;
765 }
766 spin_unlock(&sbi->s_inode_lock);
767
768 for (pii = ivec; nv > 0; pii++, nv--)
769 iput(&(*pii)->vfs_inode);
770 }
771}
772
773static int nilfs_test_metadata_dirty(struct nilfs_sb_info *sbi)
774{
775 struct the_nilfs *nilfs = sbi->s_nilfs;
776 int ret = 0;
777
778 if (nilfs_mdt_fetch_dirty(sbi->s_ifile))
779 ret++;
780 if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile))
781 ret++;
782 if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile))
783 ret++;
784 if (ret || nilfs_doing_gc())
785 if (nilfs_mdt_fetch_dirty(nilfs_dat_inode(nilfs)))
786 ret++;
787 return ret;
788}
789
790static int nilfs_segctor_clean(struct nilfs_sc_info *sci)
791{
792 return list_empty(&sci->sc_dirty_files) &&
793 !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) &&
794 list_empty(&sci->sc_cleaning_segments) &&
795 (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes));
796}
797
798static int nilfs_segctor_confirm(struct nilfs_sc_info *sci)
799{
800 struct nilfs_sb_info *sbi = sci->sc_sbi;
801 int ret = 0;
802
803 if (nilfs_test_metadata_dirty(sbi))
804 set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
805
806 spin_lock(&sbi->s_inode_lock);
807 if (list_empty(&sbi->s_dirty_files) && nilfs_segctor_clean(sci))
808 ret++;
809
810 spin_unlock(&sbi->s_inode_lock);
811 return ret;
812}
813
814static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
815{
816 struct nilfs_sb_info *sbi = sci->sc_sbi;
817 struct the_nilfs *nilfs = sbi->s_nilfs;
818
819 nilfs_mdt_clear_dirty(sbi->s_ifile);
820 nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
821 nilfs_mdt_clear_dirty(nilfs->ns_sufile);
822 nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs));
823}
824
825static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
826{
827 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs;
828 struct buffer_head *bh_cp;
829 struct nilfs_checkpoint *raw_cp;
830 int err;
831
832 /* XXX: this interface will be changed */
833 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1,
834 &raw_cp, &bh_cp);
835 if (likely(!err)) {
836 /* The following code is duplicated with cpfile. But, it is
837 needed to collect the checkpoint even if it was not newly
838 created */
839 nilfs_mdt_mark_buffer_dirty(bh_cp);
840 nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
841 nilfs_cpfile_put_checkpoint(
842 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
843 } else {
844 BUG_ON(err == -EINVAL || err == -ENOENT);
845 }
846 return err;
847}
848
849static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
850{
851 struct nilfs_sb_info *sbi = sci->sc_sbi;
852 struct the_nilfs *nilfs = sbi->s_nilfs;
853 struct buffer_head *bh_cp;
854 struct nilfs_checkpoint *raw_cp;
855 int err;
856
857 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
858 &raw_cp, &bh_cp);
859 if (unlikely(err)) {
860 BUG_ON(err == -EINVAL || err == -ENOENT);
861 goto failed_ibh;
862 }
863 raw_cp->cp_snapshot_list.ssl_next = 0;
864 raw_cp->cp_snapshot_list.ssl_prev = 0;
865 raw_cp->cp_inodes_count =
866 cpu_to_le64(atomic_read(&sbi->s_inodes_count));
867 raw_cp->cp_blocks_count =
868 cpu_to_le64(atomic_read(&sbi->s_blocks_count));
869 raw_cp->cp_nblk_inc =
870 cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc);
871 raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime);
872 raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno);
873 if (sci->sc_sketch_inode && i_size_read(sci->sc_sketch_inode) > 0)
874 nilfs_checkpoint_set_sketch(raw_cp);
875 nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1);
876 nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
877 return 0;
878
879 failed_ibh:
880 return err;
881}
882
883static void nilfs_fill_in_file_bmap(struct inode *ifile,
884 struct nilfs_inode_info *ii)
885
886{
887 struct buffer_head *ibh;
888 struct nilfs_inode *raw_inode;
889
890 if (test_bit(NILFS_I_BMAP, &ii->i_state)) {
891 ibh = ii->i_bh;
892 BUG_ON(!ibh);
893 raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino,
894 ibh);
895 nilfs_bmap_write(ii->i_bmap, raw_inode);
896 nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh);
897 }
898}
899
900static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci,
901 struct inode *ifile)
902{
903 struct nilfs_inode_info *ii;
904
905 list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) {
906 nilfs_fill_in_file_bmap(ifile, ii);
907 set_bit(NILFS_I_COLLECTED, &ii->i_state);
908 }
909 if (sci->sc_sketch_inode) {
910 ii = NILFS_I(sci->sc_sketch_inode);
911 if (test_bit(NILFS_I_DIRTY, &ii->i_state))
912 nilfs_fill_in_file_bmap(ifile, ii);
913 }
914}
915
916/*
917 * CRC calculation routines
918 */
919static void nilfs_fill_in_super_root_crc(struct buffer_head *bh_sr, u32 seed)
920{
921 struct nilfs_super_root *raw_sr =
922 (struct nilfs_super_root *)bh_sr->b_data;
923 u32 crc;
924
925 BUG_ON(NILFS_SR_BYTES > bh_sr->b_size);
926 crc = crc32_le(seed,
927 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
928 NILFS_SR_BYTES - sizeof(raw_sr->sr_sum));
929 raw_sr->sr_sum = cpu_to_le32(crc);
930}
931
932static void nilfs_segctor_fill_in_checksums(struct nilfs_sc_info *sci,
933 u32 seed)
934{
935 struct nilfs_segment_buffer *segbuf;
936
937 if (sci->sc_super_root)
938 nilfs_fill_in_super_root_crc(sci->sc_super_root, seed);
939
940 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
941 nilfs_segbuf_fill_in_segsum_crc(segbuf, seed);
942 nilfs_segbuf_fill_in_data_crc(segbuf, seed);
943 }
944}
945
946static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
947 struct the_nilfs *nilfs)
948{
949 struct buffer_head *bh_sr = sci->sc_super_root;
950 struct nilfs_super_root *raw_sr =
951 (struct nilfs_super_root *)bh_sr->b_data;
952 unsigned isz = nilfs->ns_inode_size;
953
954 raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES);
955 raw_sr->sr_nongc_ctime
956 = cpu_to_le64(nilfs_doing_gc() ?
957 nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
958 raw_sr->sr_flags = 0;
959
960 nilfs_mdt_write_inode_direct(
961 nilfs_dat_inode(nilfs), bh_sr, NILFS_SR_DAT_OFFSET(isz));
962 nilfs_mdt_write_inode_direct(
963 nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(isz));
964 nilfs_mdt_write_inode_direct(
965 nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(isz));
966}
967
968static void nilfs_redirty_inodes(struct list_head *head)
969{
970 struct nilfs_inode_info *ii;
971
972 list_for_each_entry(ii, head, i_dirty) {
973 if (test_bit(NILFS_I_COLLECTED, &ii->i_state))
974 clear_bit(NILFS_I_COLLECTED, &ii->i_state);
975 }
976}
977
978static void nilfs_drop_collected_inodes(struct list_head *head)
979{
980 struct nilfs_inode_info *ii;
981
982 list_for_each_entry(ii, head, i_dirty) {
983 if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state))
984 continue;
985
986 clear_bit(NILFS_I_INODE_DIRTY, &ii->i_state);
987 set_bit(NILFS_I_UPDATED, &ii->i_state);
988 }
989}
990
991static void nilfs_segctor_cancel_free_segments(struct nilfs_sc_info *sci,
992 struct inode *sufile)
993
994{
995 struct list_head *head = &sci->sc_cleaning_segments;
996 struct nilfs_segment_entry *ent;
997 int err;
998
999 list_for_each_entry(ent, head, list) {
1000 if (!(ent->flags & NILFS_SLH_FREED))
1001 break;
1002 err = nilfs_sufile_cancel_free(sufile, ent->segnum);
1003 BUG_ON(err);
1004
1005 ent->flags &= ~NILFS_SLH_FREED;
1006 }
1007}
1008
1009static int nilfs_segctor_prepare_free_segments(struct nilfs_sc_info *sci,
1010 struct inode *sufile)
1011{
1012 struct list_head *head = &sci->sc_cleaning_segments;
1013 struct nilfs_segment_entry *ent;
1014 int err;
1015
1016 list_for_each_entry(ent, head, list) {
1017 err = nilfs_sufile_free(sufile, ent->segnum);
1018 if (unlikely(err))
1019 return err;
1020 ent->flags |= NILFS_SLH_FREED;
1021 }
1022 return 0;
1023}
1024
1025static void nilfs_segctor_commit_free_segments(struct nilfs_sc_info *sci)
1026{
1027 nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
1028}
1029
1030static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
1031 struct inode *inode,
1032 struct list_head *listp,
1033 int (*collect)(struct nilfs_sc_info *,
1034 struct buffer_head *,
1035 struct inode *))
1036{
1037 struct buffer_head *bh, *n;
1038 int err = 0;
1039
1040 if (collect) {
1041 list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) {
1042 list_del_init(&bh->b_assoc_buffers);
1043 err = collect(sci, bh, inode);
1044 brelse(bh);
1045 if (unlikely(err))
1046 goto dispose_buffers;
1047 }
1048 return 0;
1049 }
1050
1051 dispose_buffers:
1052 while (!list_empty(listp)) {
1053 bh = list_entry(listp->next, struct buffer_head,
1054 b_assoc_buffers);
1055 list_del_init(&bh->b_assoc_buffers);
1056 brelse(bh);
1057 }
1058 return err;
1059}
1060
1061static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci,
1062 struct inode *inode,
1063 struct nilfs_sc_operations *sc_ops)
1064{
1065 LIST_HEAD(data_buffers);
1066 LIST_HEAD(node_buffers);
1067 int err, err2;
1068
1069 if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
1070 err = nilfs_lookup_dirty_data_buffers(inode, &data_buffers,
1071 sci);
1072 if (err) {
1073 err2 = nilfs_segctor_apply_buffers(
1074 sci, inode, &data_buffers,
1075 err == -E2BIG ? sc_ops->collect_data : NULL);
1076 if (err == -E2BIG)
1077 err = err2;
1078 goto break_or_fail;
1079 }
1080 }
1081 nilfs_lookup_dirty_node_buffers(inode, &node_buffers);
1082
1083 if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
1084 err = nilfs_segctor_apply_buffers(
1085 sci, inode, &data_buffers, sc_ops->collect_data);
1086 if (unlikely(err)) {
1087 /* dispose node list */
1088 nilfs_segctor_apply_buffers(
1089 sci, inode, &node_buffers, NULL);
1090 goto break_or_fail;
1091 }
1092 sci->sc_stage.flags |= NILFS_CF_NODE;
1093 }
1094 /* Collect node */
1095 err = nilfs_segctor_apply_buffers(
1096 sci, inode, &node_buffers, sc_ops->collect_node);
1097 if (unlikely(err))
1098 goto break_or_fail;
1099
1100 nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers);
1101 err = nilfs_segctor_apply_buffers(
1102 sci, inode, &node_buffers, sc_ops->collect_bmap);
1103 if (unlikely(err))
1104 goto break_or_fail;
1105
1106 nilfs_segctor_end_finfo(sci, inode);
1107 sci->sc_stage.flags &= ~NILFS_CF_NODE;
1108
1109 break_or_fail:
1110 return err;
1111}
1112
1113static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci,
1114 struct inode *inode)
1115{
1116 LIST_HEAD(data_buffers);
1117 int err, err2;
1118
1119 err = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, sci);
1120 err2 = nilfs_segctor_apply_buffers(sci, inode, &data_buffers,
1121 (!err || err == -E2BIG) ?
1122 nilfs_collect_file_data : NULL);
1123 if (err == -E2BIG)
1124 err = err2;
1125 if (!err)
1126 nilfs_segctor_end_finfo(sci, inode);
1127 return err;
1128}
1129
1130static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1131{
1132 struct nilfs_sb_info *sbi = sci->sc_sbi;
1133 struct the_nilfs *nilfs = sbi->s_nilfs;
1134 struct list_head *head;
1135 struct nilfs_inode_info *ii;
1136 int err = 0;
1137
1138 switch (sci->sc_stage.scnt) {
1139 case NILFS_ST_INIT:
1140 /* Pre-processes */
1141 sci->sc_stage.flags = 0;
1142
1143 if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) {
1144 sci->sc_nblk_inc = 0;
1145 sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN;
1146 if (mode == SC_LSEG_DSYNC) {
1147 sci->sc_stage.scnt = NILFS_ST_DSYNC;
1148 goto dsync_mode;
1149 }
1150 }
1151
1152 sci->sc_stage.dirty_file_ptr = NULL;
1153 sci->sc_stage.gc_inode_ptr = NULL;
1154 if (mode == SC_FLUSH_DAT) {
1155 sci->sc_stage.scnt = NILFS_ST_DAT;
1156 goto dat_stage;
1157 }
1158 sci->sc_stage.scnt++; /* Fall through */
1159 case NILFS_ST_GC:
1160 if (nilfs_doing_gc()) {
1161 head = &sci->sc_gc_inodes;
1162 ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr,
1163 head, i_dirty);
1164 list_for_each_entry_continue(ii, head, i_dirty) {
1165 err = nilfs_segctor_scan_file(
1166 sci, &ii->vfs_inode,
1167 &nilfs_sc_file_ops);
1168 if (unlikely(err)) {
1169 sci->sc_stage.gc_inode_ptr = list_entry(
1170 ii->i_dirty.prev,
1171 struct nilfs_inode_info,
1172 i_dirty);
1173 goto break_or_fail;
1174 }
1175 set_bit(NILFS_I_COLLECTED, &ii->i_state);
1176 }
1177 sci->sc_stage.gc_inode_ptr = NULL;
1178 }
1179 sci->sc_stage.scnt++; /* Fall through */
1180 case NILFS_ST_FILE:
1181 head = &sci->sc_dirty_files;
1182 ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head,
1183 i_dirty);
1184 list_for_each_entry_continue(ii, head, i_dirty) {
1185 clear_bit(NILFS_I_DIRTY, &ii->i_state);
1186
1187 err = nilfs_segctor_scan_file(sci, &ii->vfs_inode,
1188 &nilfs_sc_file_ops);
1189 if (unlikely(err)) {
1190 sci->sc_stage.dirty_file_ptr =
1191 list_entry(ii->i_dirty.prev,
1192 struct nilfs_inode_info,
1193 i_dirty);
1194 goto break_or_fail;
1195 }
1196 /* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */
1197 /* XXX: required ? */
1198 }
1199 sci->sc_stage.dirty_file_ptr = NULL;
1200 if (mode == SC_FLUSH_FILE) {
1201 sci->sc_stage.scnt = NILFS_ST_DONE;
1202 return 0;
1203 }
1204 sci->sc_stage.scnt++; /* Fall through */
1205 case NILFS_ST_SKETCH:
1206 if (mode == SC_LSEG_SR && sci->sc_sketch_inode) {
1207 ii = NILFS_I(sci->sc_sketch_inode);
1208 if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
1209 sci->sc_sketch_inode->i_ctime.tv_sec
1210 = sci->sc_seg_ctime;
1211 sci->sc_sketch_inode->i_mtime.tv_sec
1212 = sci->sc_seg_ctime;
1213 err = nilfs_mark_inode_dirty(
1214 sci->sc_sketch_inode);
1215 if (unlikely(err))
1216 goto break_or_fail;
1217 }
1218 err = nilfs_segctor_scan_file(sci,
1219 sci->sc_sketch_inode,
1220 &nilfs_sc_file_ops);
1221 if (unlikely(err))
1222 goto break_or_fail;
1223 }
1224 sci->sc_stage.scnt++;
1225 sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED;
1226 /* Fall through */
1227 case NILFS_ST_IFILE:
1228 err = nilfs_segctor_scan_file(sci, sbi->s_ifile,
1229 &nilfs_sc_file_ops);
1230 if (unlikely(err))
1231 break;
1232 sci->sc_stage.scnt++;
1233 /* Creating a checkpoint */
1234 err = nilfs_segctor_create_checkpoint(sci);
1235 if (unlikely(err))
1236 break;
1237 /* Fall through */
1238 case NILFS_ST_CPFILE:
1239 err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile,
1240 &nilfs_sc_file_ops);
1241 if (unlikely(err))
1242 break;
1243 sci->sc_stage.scnt++; /* Fall through */
1244 case NILFS_ST_SUFILE:
1245 err = nilfs_segctor_prepare_free_segments(sci,
1246 nilfs->ns_sufile);
1247 if (unlikely(err))
1248 break;
1249 err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile,
1250 &nilfs_sc_file_ops);
1251 if (unlikely(err))
1252 break;
1253 sci->sc_stage.scnt++; /* Fall through */
1254 case NILFS_ST_DAT:
1255 dat_stage:
1256 err = nilfs_segctor_scan_file(sci, nilfs_dat_inode(nilfs),
1257 &nilfs_sc_dat_ops);
1258 if (unlikely(err))
1259 break;
1260 if (mode == SC_FLUSH_DAT) {
1261 sci->sc_stage.scnt = NILFS_ST_DONE;
1262 return 0;
1263 }
1264 sci->sc_stage.scnt++; /* Fall through */
1265 case NILFS_ST_SR:
1266 if (mode == SC_LSEG_SR) {
1267 /* Appending a super root */
1268 err = nilfs_segctor_add_super_root(sci);
1269 if (unlikely(err))
1270 break;
1271 }
1272 /* End of a logical segment */
1273 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1274 sci->sc_stage.scnt = NILFS_ST_DONE;
1275 return 0;
1276 case NILFS_ST_DSYNC:
1277 dsync_mode:
1278 sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT;
1279 ii = sci->sc_stage.dirty_file_ptr;
1280 if (!test_bit(NILFS_I_BUSY, &ii->i_state))
1281 break;
1282
1283 err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode);
1284 if (unlikely(err))
1285 break;
1286 sci->sc_stage.dirty_file_ptr = NULL;
1287 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
1288 sci->sc_stage.scnt = NILFS_ST_DONE;
1289 return 0;
1290 case NILFS_ST_DONE:
1291 return 0;
1292 default:
1293 BUG();
1294 }
1295
1296 break_or_fail:
1297 return err;
1298}
1299
1300static int nilfs_segctor_terminate_segment(struct nilfs_sc_info *sci,
1301 struct nilfs_segment_buffer *segbuf,
1302 struct inode *sufile)
1303{
1304 struct nilfs_segment_entry *ent = segbuf->sb_segent;
1305 int err;
1306
1307 err = nilfs_open_segment_entry(ent, sufile);
1308 if (unlikely(err))
1309 return err;
1310 nilfs_mdt_mark_buffer_dirty(ent->bh_su);
1311 nilfs_mdt_mark_dirty(sufile);
1312 nilfs_close_segment_entry(ent, sufile);
1313
1314 list_add_tail(&ent->list, &sci->sc_active_segments);
1315 segbuf->sb_segent = NULL;
1316 return 0;
1317}
1318
1319static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum)
1320{
1321 struct buffer_head *bh_su;
1322 struct nilfs_segment_usage *raw_su;
1323 int err;
1324
1325 err = nilfs_sufile_get_segment_usage(sufile, segnum, &raw_su, &bh_su);
1326 if (unlikely(err))
1327 return err;
1328 nilfs_mdt_mark_buffer_dirty(bh_su);
1329 nilfs_mdt_mark_dirty(sufile);
1330 nilfs_sufile_put_segment_usage(sufile, segnum, bh_su);
1331 return 0;
1332}
1333
1334static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci,
1335 struct the_nilfs *nilfs)
1336{
1337 struct nilfs_segment_buffer *segbuf, *n;
1338 struct inode *sufile = nilfs->ns_sufile;
1339 __u64 nextnum;
1340 int err;
1341
1342 if (list_empty(&sci->sc_segbufs)) {
1343 segbuf = nilfs_segbuf_new(sci->sc_super);
1344 if (unlikely(!segbuf))
1345 return -ENOMEM;
1346 list_add(&segbuf->sb_list, &sci->sc_segbufs);
1347 } else
1348 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1349
1350 err = nilfs_segbuf_map(segbuf, nilfs->ns_segnum,
1351 nilfs->ns_pseg_offset, nilfs);
1352 if (unlikely(err))
1353 return err;
1354
1355 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1356 err = nilfs_segctor_terminate_segment(sci, segbuf, sufile);
1357 if (unlikely(err))
1358 return err;
1359
1360 nilfs_shift_to_next_segment(nilfs);
1361 err = nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs);
1362 }
1363 sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks;
1364
1365 err = nilfs_touch_segusage(sufile, segbuf->sb_segnum);
1366 if (unlikely(err))
1367 return err;
1368
1369 if (nilfs->ns_segnum == nilfs->ns_nextnum) {
1370 /* Start from the head of a new full segment */
1371 err = nilfs_sufile_alloc(sufile, &nextnum);
1372 if (unlikely(err))
1373 return err;
1374 } else
1375 nextnum = nilfs->ns_nextnum;
1376
1377 segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq;
1378 nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs);
1379
1380 /* truncating segment buffers */
1381 list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs,
1382 sb_list) {
1383 list_del_init(&segbuf->sb_list);
1384 nilfs_segbuf_free(segbuf);
1385 }
1386 return err;
1387}
1388
1389static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
1390 struct the_nilfs *nilfs, int nadd)
1391{
1392 struct nilfs_segment_buffer *segbuf, *prev, *n;
1393 struct inode *sufile = nilfs->ns_sufile;
1394 __u64 nextnextnum;
1395 LIST_HEAD(list);
1396 int err, ret, i;
1397
1398 prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs);
1399 /*
1400 * Since the segment specified with nextnum might be allocated during
1401 * the previous construction, the buffer including its segusage may
1402 * not be dirty. The following call ensures that the buffer is dirty
1403 * and will pin the buffer on memory until the sufile is written.
1404 */
1405 err = nilfs_touch_segusage(sufile, prev->sb_nextnum);
1406 if (unlikely(err))
1407 return err;
1408
1409 for (i = 0; i < nadd; i++) {
1410 /* extend segment info */
1411 err = -ENOMEM;
1412 segbuf = nilfs_segbuf_new(sci->sc_super);
1413 if (unlikely(!segbuf))
1414 goto failed;
1415
1416 /* map this buffer to region of segment on-disk */
1417 err = nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
1418 if (unlikely(err))
1419 goto failed_segbuf;
1420
1421 sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks;
1422
1423 /* allocate the next next full segment */
1424 err = nilfs_sufile_alloc(sufile, &nextnextnum);
1425 if (unlikely(err))
1426 goto failed_segbuf;
1427
1428 segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1;
1429 nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs);
1430
1431 list_add_tail(&segbuf->sb_list, &list);
1432 prev = segbuf;
1433 }
1434 list_splice(&list, sci->sc_segbufs.prev);
1435 return 0;
1436
1437 failed_segbuf:
1438 nilfs_segbuf_free(segbuf);
1439 failed:
1440 list_for_each_entry_safe(segbuf, n, &list, sb_list) {
1441 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1442 BUG_ON(ret);
1443 list_del_init(&segbuf->sb_list);
1444 nilfs_segbuf_free(segbuf);
1445 }
1446 return err;
1447}
1448
1449static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci,
1450 struct the_nilfs *nilfs)
1451{
1452 struct nilfs_segment_buffer *segbuf;
1453 int ret, done = 0;
1454
1455 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1456 if (nilfs->ns_nextnum != segbuf->sb_nextnum) {
1457 ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum);
1458 BUG_ON(ret);
1459 }
1460 if (segbuf->sb_io_error) {
1461 /* Case 1: The first segment failed */
1462 if (segbuf->sb_pseg_start != segbuf->sb_fseg_start)
1463 /* Case 1a: Partial segment appended into an existing
1464 segment */
1465 nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start,
1466 segbuf->sb_fseg_end);
1467 else /* Case 1b: New full segment */
1468 set_nilfs_discontinued(nilfs);
1469 done++;
1470 }
1471
1472 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) {
1473 ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum);
1474 BUG_ON(ret);
1475 if (!done && segbuf->sb_io_error) {
1476 if (segbuf->sb_segnum != nilfs->ns_nextnum)
1477 /* Case 2: extended segment (!= next) failed */
1478 nilfs_sufile_set_error(nilfs->ns_sufile,
1479 segbuf->sb_segnum);
1480 done++;
1481 }
1482 }
1483}
1484
1485static void nilfs_segctor_clear_segment_buffers(struct nilfs_sc_info *sci)
1486{
1487 struct nilfs_segment_buffer *segbuf;
1488
1489 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list)
1490 nilfs_segbuf_clear(segbuf);
1491 sci->sc_super_root = NULL;
1492}
1493
1494static void nilfs_segctor_destroy_segment_buffers(struct nilfs_sc_info *sci)
1495{
1496 struct nilfs_segment_buffer *segbuf;
1497
1498 while (!list_empty(&sci->sc_segbufs)) {
1499 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1500 list_del_init(&segbuf->sb_list);
1501 nilfs_segbuf_free(segbuf);
1502 }
1503 /* sci->sc_curseg = NULL; */
1504}
1505
1506static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci,
1507 struct the_nilfs *nilfs, int err)
1508{
1509 if (unlikely(err)) {
1510 nilfs_segctor_free_incomplete_segments(sci, nilfs);
1511 nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile);
1512 }
1513 nilfs_segctor_clear_segment_buffers(sci);
1514}
1515
1516static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci,
1517 struct inode *sufile)
1518{
1519 struct nilfs_segment_buffer *segbuf;
1520 struct buffer_head *bh_su;
1521 struct nilfs_segment_usage *raw_su;
1522 unsigned long live_blocks;
1523 int ret;
1524
1525 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1526 ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum,
1527 &raw_su, &bh_su);
1528 BUG_ON(ret); /* always succeed because bh_su is dirty */
1529 live_blocks = segbuf->sb_sum.nblocks +
1530 (segbuf->sb_pseg_start - segbuf->sb_fseg_start);
1531 raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime);
1532 raw_su->su_nblocks = cpu_to_le32(live_blocks);
1533 nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum,
1534 bh_su);
1535 }
1536}
1537
1538static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci,
1539 struct inode *sufile)
1540{
1541 struct nilfs_segment_buffer *segbuf;
1542 struct buffer_head *bh_su;
1543 struct nilfs_segment_usage *raw_su;
1544 int ret;
1545
1546 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1547 ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum,
1548 &raw_su, &bh_su);
1549 BUG_ON(ret); /* always succeed because bh_su is dirty */
1550 raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start -
1551 segbuf->sb_fseg_start);
1552 nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su);
1553
1554 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) {
1555 ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum,
1556 &raw_su, &bh_su);
1557 BUG_ON(ret); /* always succeed */
1558 raw_su->su_nblocks = 0;
1559 nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum,
1560 bh_su);
1561 }
1562}
1563
1564static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci,
1565 struct nilfs_segment_buffer *last,
1566 struct inode *sufile)
1567{
1568 struct nilfs_segment_buffer *segbuf = last, *n;
1569 int ret;
1570
1571 list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs,
1572 sb_list) {
1573 list_del_init(&segbuf->sb_list);
1574 sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks;
1575 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1576 BUG_ON(ret);
1577 nilfs_segbuf_free(segbuf);
1578 }
1579}
1580
1581
1582static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
1583 struct the_nilfs *nilfs, int mode)
1584{
1585 struct nilfs_cstage prev_stage = sci->sc_stage;
1586 int err, nadd = 1;
1587
1588 /* Collection retry loop */
1589 for (;;) {
1590 sci->sc_super_root = NULL;
1591 sci->sc_nblk_this_inc = 0;
1592 sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1593
1594 err = nilfs_segctor_reset_segment_buffer(sci);
1595 if (unlikely(err))
1596 goto failed;
1597
1598 err = nilfs_segctor_collect_blocks(sci, mode);
1599 sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
1600 if (!err)
1601 break;
1602
1603 if (unlikely(err != -E2BIG))
1604 goto failed;
1605
1606 /* The current segment is filled up */
1607 if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE)
1608 break;
1609
1610 nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile);
1611 nilfs_segctor_clear_segment_buffers(sci);
1612
1613 err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
1614 if (unlikely(err))
1615 return err;
1616
1617 nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
1618 sci->sc_stage = prev_stage;
1619 }
1620 nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
1621 return 0;
1622
1623 failed:
1624 return err;
1625}
1626
1627static void nilfs_list_replace_buffer(struct buffer_head *old_bh,
1628 struct buffer_head *new_bh)
1629{
1630 BUG_ON(!list_empty(&new_bh->b_assoc_buffers));
1631
1632 list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers);
1633 /* The caller must release old_bh */
1634}
1635
1636static int
1637nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1638 struct nilfs_segment_buffer *segbuf,
1639 int mode)
1640{
1641 struct inode *inode = NULL;
1642 sector_t blocknr;
1643 unsigned long nfinfo = segbuf->sb_sum.nfinfo;
1644 unsigned long nblocks = 0, ndatablk = 0;
1645 struct nilfs_sc_operations *sc_op = NULL;
1646 struct nilfs_segsum_pointer ssp;
1647 struct nilfs_finfo *finfo = NULL;
1648 union nilfs_binfo binfo;
1649 struct buffer_head *bh, *bh_org;
1650 ino_t ino = 0;
1651 int err = 0;
1652
1653 if (!nfinfo)
1654 goto out;
1655
1656 blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk;
1657 ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
1658 ssp.offset = sizeof(struct nilfs_segment_summary);
1659
1660 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
1661 if (bh == sci->sc_super_root)
1662 break;
1663 if (!finfo) {
1664 finfo = nilfs_segctor_map_segsum_entry(
1665 sci, &ssp, sizeof(*finfo));
1666 ino = le64_to_cpu(finfo->fi_ino);
1667 nblocks = le32_to_cpu(finfo->fi_nblocks);
1668 ndatablk = le32_to_cpu(finfo->fi_ndatablk);
1669
1670 if (buffer_nilfs_node(bh))
1671 inode = NILFS_BTNC_I(bh->b_page->mapping);
1672 else
1673 inode = NILFS_AS_I(bh->b_page->mapping);
1674
1675 if (mode == SC_LSEG_DSYNC)
1676 sc_op = &nilfs_sc_dsync_ops;
1677 else if (ino == NILFS_DAT_INO)
1678 sc_op = &nilfs_sc_dat_ops;
1679 else /* file blocks */
1680 sc_op = &nilfs_sc_file_ops;
1681 }
1682 bh_org = bh;
1683 get_bh(bh_org);
1684 err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr,
1685 &binfo);
1686 if (bh != bh_org)
1687 nilfs_list_replace_buffer(bh_org, bh);
1688 brelse(bh_org);
1689 if (unlikely(err))
1690 goto failed_bmap;
1691
1692 if (ndatablk > 0)
1693 sc_op->write_data_binfo(sci, &ssp, &binfo);
1694 else
1695 sc_op->write_node_binfo(sci, &ssp, &binfo);
1696
1697 blocknr++;
1698 if (--nblocks == 0) {
1699 finfo = NULL;
1700 if (--nfinfo == 0)
1701 break;
1702 } else if (ndatablk > 0)
1703 ndatablk--;
1704 }
1705 out:
1706 return 0;
1707
1708 failed_bmap:
1709 err = nilfs_handle_bmap_error(err, __func__, inode, sci->sc_super);
1710 return err;
1711}
1712
1713static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
1714{
1715 struct nilfs_segment_buffer *segbuf;
1716 int err;
1717
1718 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1719 err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode);
1720 if (unlikely(err))
1721 return err;
1722 nilfs_segbuf_fill_in_segsum(segbuf);
1723 }
1724 return 0;
1725}
1726
1727static int
1728nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
1729{
1730 struct page *clone_page;
1731 struct buffer_head *bh, *head, *bh2;
1732 void *kaddr;
1733
1734 bh = head = page_buffers(page);
1735
1736 clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0);
1737 if (unlikely(!clone_page))
1738 return -ENOMEM;
1739
1740 bh2 = page_buffers(clone_page);
1741 kaddr = kmap_atomic(page, KM_USER0);
1742 do {
1743 if (list_empty(&bh->b_assoc_buffers))
1744 continue;
1745 get_bh(bh2);
1746 page_cache_get(clone_page); /* for each bh */
1747 memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size);
1748 bh2->b_blocknr = bh->b_blocknr;
1749 list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers);
1750 list_add_tail(&bh->b_assoc_buffers, out);
1751 } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head);
1752 kunmap_atomic(kaddr, KM_USER0);
1753
1754 if (!TestSetPageWriteback(clone_page))
1755 inc_zone_page_state(clone_page, NR_WRITEBACK);
1756 unlock_page(clone_page);
1757
1758 return 0;
1759}
1760
1761static int nilfs_test_page_to_be_frozen(struct page *page)
1762{
1763 struct address_space *mapping = page->mapping;
1764
1765 if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode))
1766 return 0;
1767
1768 if (page_mapped(page)) {
1769 ClearPageChecked(page);
1770 return 1;
1771 }
1772 return PageChecked(page);
1773}
1774
1775static int nilfs_begin_page_io(struct page *page, struct list_head *out)
1776{
1777 if (!page || PageWriteback(page))
1778 /* For split b-tree node pages, this function may be called
1779 twice. We ignore the 2nd or later calls by this check. */
1780 return 0;
1781
1782 lock_page(page);
1783 clear_page_dirty_for_io(page);
1784 set_page_writeback(page);
1785 unlock_page(page);
1786
1787 if (nilfs_test_page_to_be_frozen(page)) {
1788 int err = nilfs_copy_replace_page_buffers(page, out);
1789 if (unlikely(err))
1790 return err;
1791 }
1792 return 0;
1793}
1794
1795static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1796 struct page **failed_page)
1797{
1798 struct nilfs_segment_buffer *segbuf;
1799 struct page *bd_page = NULL, *fs_page = NULL;
1800 struct list_head *list = &sci->sc_copied_buffers;
1801 int err;
1802
1803 *failed_page = NULL;
1804 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1805 struct buffer_head *bh;
1806
1807 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1808 b_assoc_buffers) {
1809 if (bh->b_page != bd_page) {
1810 if (bd_page) {
1811 lock_page(bd_page);
1812 clear_page_dirty_for_io(bd_page);
1813 set_page_writeback(bd_page);
1814 unlock_page(bd_page);
1815 }
1816 bd_page = bh->b_page;
1817 }
1818 }
1819
1820 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1821 b_assoc_buffers) {
1822 if (bh == sci->sc_super_root) {
1823 if (bh->b_page != bd_page) {
1824 lock_page(bd_page);
1825 clear_page_dirty_for_io(bd_page);
1826 set_page_writeback(bd_page);
1827 unlock_page(bd_page);
1828 bd_page = bh->b_page;
1829 }
1830 break;
1831 }
1832 if (bh->b_page != fs_page) {
1833 err = nilfs_begin_page_io(fs_page, list);
1834 if (unlikely(err)) {
1835 *failed_page = fs_page;
1836 goto out;
1837 }
1838 fs_page = bh->b_page;
1839 }
1840 }
1841 }
1842 if (bd_page) {
1843 lock_page(bd_page);
1844 clear_page_dirty_for_io(bd_page);
1845 set_page_writeback(bd_page);
1846 unlock_page(bd_page);
1847 }
1848 err = nilfs_begin_page_io(fs_page, list);
1849 if (unlikely(err))
1850 *failed_page = fs_page;
1851 out:
1852 return err;
1853}
1854
1855static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1856 struct backing_dev_info *bdi)
1857{
1858 struct nilfs_segment_buffer *segbuf;
1859 struct nilfs_write_info wi;
1860 int err, res;
1861
1862 wi.sb = sci->sc_super;
1863 wi.bh_sr = sci->sc_super_root;
1864 wi.bdi = bdi;
1865
1866 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1867 nilfs_segbuf_prepare_write(segbuf, &wi);
1868 err = nilfs_segbuf_write(segbuf, &wi);
1869
1870 res = nilfs_segbuf_wait(segbuf, &wi);
1871 err = unlikely(err) ? : res;
1872 if (unlikely(err))
1873 return err;
1874 }
1875 return 0;
1876}
1877
1878static int nilfs_page_has_uncleared_buffer(struct page *page)
1879{
1880 struct buffer_head *head, *bh;
1881
1882 head = bh = page_buffers(page);
1883 do {
1884 if (buffer_dirty(bh) && !list_empty(&bh->b_assoc_buffers))
1885 return 1;
1886 bh = bh->b_this_page;
1887 } while (bh != head);
1888 return 0;
1889}
1890
1891static void __nilfs_end_page_io(struct page *page, int err)
1892{
1893 /* BUG_ON(err > 0); */
1894 if (!err) {
1895 if (!nilfs_page_buffers_clean(page))
1896 __set_page_dirty_nobuffers(page);
1897 ClearPageError(page);
1898 } else {
1899 __set_page_dirty_nobuffers(page);
1900 SetPageError(page);
1901 }
1902
1903 if (buffer_nilfs_allocated(page_buffers(page))) {
1904 if (TestClearPageWriteback(page))
1905 dec_zone_page_state(page, NR_WRITEBACK);
1906 } else
1907 end_page_writeback(page);
1908}
1909
1910static void nilfs_end_page_io(struct page *page, int err)
1911{
1912 if (!page)
1913 return;
1914
1915 if (buffer_nilfs_node(page_buffers(page)) &&
1916 nilfs_page_has_uncleared_buffer(page))
1917 /* For b-tree node pages, this function may be called twice
1918 or more because they might be split in a segment.
1919 This check assures that cleanup has been done for all
1920 buffers in a split btnode page. */
1921 return;
1922
1923 __nilfs_end_page_io(page, err);
1924}
1925
1926static void nilfs_clear_copied_buffers(struct list_head *list, int err)
1927{
1928 struct buffer_head *bh, *head;
1929 struct page *page;
1930
1931 while (!list_empty(list)) {
1932 bh = list_entry(list->next, struct buffer_head,
1933 b_assoc_buffers);
1934 page = bh->b_page;
1935 page_cache_get(page);
1936 head = bh = page_buffers(page);
1937 do {
1938 if (!list_empty(&bh->b_assoc_buffers)) {
1939 list_del_init(&bh->b_assoc_buffers);
1940 if (!err) {
1941 set_buffer_uptodate(bh);
1942 clear_buffer_dirty(bh);
1943 clear_buffer_nilfs_volatile(bh);
1944 }
1945 brelse(bh); /* for b_assoc_buffers */
1946 }
1947 } while ((bh = bh->b_this_page) != head);
1948
1949 __nilfs_end_page_io(page, err);
1950 page_cache_release(page);
1951 }
1952}
1953
1954static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
1955 struct page *failed_page, int err)
1956{
1957 struct nilfs_segment_buffer *segbuf;
1958 struct page *bd_page = NULL, *fs_page = NULL;
1959
1960 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1961 struct buffer_head *bh;
1962
1963 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1964 b_assoc_buffers) {
1965 if (bh->b_page != bd_page) {
1966 if (bd_page)
1967 end_page_writeback(bd_page);
1968 bd_page = bh->b_page;
1969 }
1970 }
1971
1972 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1973 b_assoc_buffers) {
1974 if (bh == sci->sc_super_root) {
1975 if (bh->b_page != bd_page) {
1976 end_page_writeback(bd_page);
1977 bd_page = bh->b_page;
1978 }
1979 break;
1980 }
1981 if (bh->b_page != fs_page) {
1982 nilfs_end_page_io(fs_page, err);
1983 if (unlikely(fs_page == failed_page))
1984 goto done;
1985 fs_page = bh->b_page;
1986 }
1987 }
1988 }
1989 if (bd_page)
1990 end_page_writeback(bd_page);
1991
1992 nilfs_end_page_io(fs_page, err);
1993 done:
1994 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err);
1995}
1996
1997static void nilfs_set_next_segment(struct the_nilfs *nilfs,
1998 struct nilfs_segment_buffer *segbuf)
1999{
2000 nilfs->ns_segnum = segbuf->sb_segnum;
2001 nilfs->ns_nextnum = segbuf->sb_nextnum;
2002 nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start
2003 + segbuf->sb_sum.nblocks;
2004 nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq;
2005 nilfs->ns_ctime = segbuf->sb_sum.ctime;
2006}
2007
2008static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
2009{
2010 struct nilfs_segment_buffer *segbuf;
2011 struct page *bd_page = NULL, *fs_page = NULL;
2012 struct nilfs_sb_info *sbi = sci->sc_sbi;
2013 struct the_nilfs *nilfs = sbi->s_nilfs;
2014 int update_sr = (sci->sc_super_root != NULL);
2015
2016 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
2017 struct buffer_head *bh;
2018
2019 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
2020 b_assoc_buffers) {
2021 set_buffer_uptodate(bh);
2022 clear_buffer_dirty(bh);
2023 if (bh->b_page != bd_page) {
2024 if (bd_page)
2025 end_page_writeback(bd_page);
2026 bd_page = bh->b_page;
2027 }
2028 }
2029 /*
2030 * We assume that the buffers which belong to the same page
2031 * continue over the buffer list.
2032 * Under this assumption, the last BHs of pages is
2033 * identifiable by the discontinuity of bh->b_page
2034 * (page != fs_page).
2035 *
2036 * For B-tree node blocks, however, this assumption is not
2037 * guaranteed. The cleanup code of B-tree node pages needs
2038 * special care.
2039 */
2040 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
2041 b_assoc_buffers) {
2042 set_buffer_uptodate(bh);
2043 clear_buffer_dirty(bh);
2044 clear_buffer_nilfs_volatile(bh);
2045 if (bh == sci->sc_super_root) {
2046 if (bh->b_page != bd_page) {
2047 end_page_writeback(bd_page);
2048 bd_page = bh->b_page;
2049 }
2050 break;
2051 }
2052 if (bh->b_page != fs_page) {
2053 nilfs_end_page_io(fs_page, 0);
2054 fs_page = bh->b_page;
2055 }
2056 }
2057
2058 if (!NILFS_SEG_SIMPLEX(&segbuf->sb_sum)) {
2059 if (NILFS_SEG_LOGBGN(&segbuf->sb_sum)) {
2060 set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
2061 sci->sc_lseg_stime = jiffies;
2062 }
2063 if (NILFS_SEG_LOGEND(&segbuf->sb_sum))
2064 clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
2065 }
2066 }
2067 /*
2068 * Since pages may continue over multiple segment buffers,
2069 * end of the last page must be checked outside of the loop.
2070 */
2071 if (bd_page)
2072 end_page_writeback(bd_page);
2073
2074 nilfs_end_page_io(fs_page, 0);
2075
2076 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0);
2077
2078 nilfs_drop_collected_inodes(&sci->sc_dirty_files);
2079
2080 if (nilfs_doing_gc()) {
2081 nilfs_drop_collected_inodes(&sci->sc_gc_inodes);
2082 if (update_sr)
2083 nilfs_commit_gcdat_inode(nilfs);
2084 } else {
2085 nilfs->ns_nongc_ctime = sci->sc_seg_ctime;
2086 set_nilfs_cond_nongc_write(nilfs);
2087 wake_up(&nilfs->ns_cleanerd_wq);
2088 }
2089
2090 sci->sc_nblk_inc += sci->sc_nblk_this_inc;
2091
2092 segbuf = NILFS_LAST_SEGBUF(&sci->sc_segbufs);
2093 nilfs_set_next_segment(nilfs, segbuf);
2094
2095 if (update_sr) {
2096 nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start,
2097 segbuf->sb_sum.seg_seq, nilfs->ns_cno);
2098
2099 clear_bit(NILFS_SC_DIRTY, &sci->sc_flags);
2100 set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
2101 } else
2102 clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
2103}
2104
2105static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci,
2106 struct nilfs_sb_info *sbi)
2107{
2108 struct nilfs_inode_info *ii, *n;
2109 __u64 cno = sbi->s_nilfs->ns_cno;
2110
2111 spin_lock(&sbi->s_inode_lock);
2112 retry:
2113 list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) {
2114 if (!ii->i_bh) {
2115 struct buffer_head *ibh;
2116 int err;
2117
2118 spin_unlock(&sbi->s_inode_lock);
2119 err = nilfs_ifile_get_inode_block(
2120 sbi->s_ifile, ii->vfs_inode.i_ino, &ibh);
2121 if (unlikely(err)) {
2122 nilfs_warning(sbi->s_super, __func__,
2123 "failed to get inode block.\n");
2124 return err;
2125 }
2126 nilfs_mdt_mark_buffer_dirty(ibh);
2127 nilfs_mdt_mark_dirty(sbi->s_ifile);
2128 spin_lock(&sbi->s_inode_lock);
2129 if (likely(!ii->i_bh))
2130 ii->i_bh = ibh;
2131 else
2132 brelse(ibh);
2133 goto retry;
2134 }
2135 ii->i_cno = cno;
2136
2137 clear_bit(NILFS_I_QUEUED, &ii->i_state);
2138 set_bit(NILFS_I_BUSY, &ii->i_state);
2139 list_del(&ii->i_dirty);
2140 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
2141 }
2142 spin_unlock(&sbi->s_inode_lock);
2143
2144 NILFS_I(sbi->s_ifile)->i_cno = cno;
2145
2146 return 0;
2147}
2148
2149static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci,
2150 struct nilfs_sb_info *sbi)
2151{
2152 struct nilfs_transaction_info *ti = current->journal_info;
2153 struct nilfs_inode_info *ii, *n;
2154 __u64 cno = sbi->s_nilfs->ns_cno;
2155
2156 spin_lock(&sbi->s_inode_lock);
2157 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
2158 if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) ||
2159 test_bit(NILFS_I_DIRTY, &ii->i_state)) {
2160 /* The current checkpoint number (=nilfs->ns_cno) is
2161 changed between check-in and check-out only if the
2162 super root is written out. So, we can update i_cno
2163 for the inodes that remain in the dirty list. */
2164 ii->i_cno = cno;
2165 continue;
2166 }
2167 clear_bit(NILFS_I_BUSY, &ii->i_state);
2168 brelse(ii->i_bh);
2169 ii->i_bh = NULL;
2170 list_del(&ii->i_dirty);
2171 list_add_tail(&ii->i_dirty, &ti->ti_garbage);
2172 }
2173 spin_unlock(&sbi->s_inode_lock);
2174}
2175
2176/*
2177 * Nasty routines to manipulate active flags on sufile.
2178 * These would be removed in a future release.
2179 */
2180static void nilfs_segctor_reactivate_segments(struct nilfs_sc_info *sci,
2181 struct the_nilfs *nilfs)
2182{
2183 struct nilfs_segment_buffer *segbuf, *last;
2184 struct nilfs_segment_entry *ent, *n;
2185 struct inode *sufile = nilfs->ns_sufile;
2186 struct list_head *head;
2187
2188 last = NILFS_LAST_SEGBUF(&sci->sc_segbufs);
2189 nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) {
2190 ent = segbuf->sb_segent;
2191 if (!ent)
2192 break; /* ignore unmapped segments (should check it?)*/
2193 nilfs_segment_usage_set_active(ent->raw_su);
2194 nilfs_close_segment_entry(ent, sufile);
2195 }
2196
2197 head = &sci->sc_active_segments;
2198 list_for_each_entry_safe(ent, n, head, list) {
2199 nilfs_segment_usage_set_active(ent->raw_su);
2200 nilfs_close_segment_entry(ent, sufile);
2201 }
2202
2203 down_write(&nilfs->ns_sem);
2204 head = &nilfs->ns_used_segments;
2205 list_for_each_entry(ent, head, list) {
2206 nilfs_segment_usage_set_volatile_active(ent->raw_su);
2207 }
2208 up_write(&nilfs->ns_sem);
2209}
2210
2211static int nilfs_segctor_deactivate_segments(struct nilfs_sc_info *sci,
2212 struct the_nilfs *nilfs)
2213{
2214 struct nilfs_segment_buffer *segbuf, *last;
2215 struct nilfs_segment_entry *ent;
2216 struct inode *sufile = nilfs->ns_sufile;
2217 struct list_head *head;
2218 int err;
2219
2220 last = NILFS_LAST_SEGBUF(&sci->sc_segbufs);
2221 nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) {
2222 /*
2223 * Deactivate ongoing full segments. The last segment is kept
2224 * active because it is a start point of recovery, and is not
2225 * relocatable until the super block points to a newer
2226 * checkpoint.
2227 */
2228 ent = segbuf->sb_segent;
2229 if (!ent)
2230 break; /* ignore unmapped segments (should check it?)*/
2231 err = nilfs_open_segment_entry(ent, sufile);
2232 if (unlikely(err))
2233 goto failed;
2234 nilfs_segment_usage_clear_active(ent->raw_su);
2235 BUG_ON(!buffer_dirty(ent->bh_su));
2236 }
2237
2238 head = &sci->sc_active_segments;
2239 list_for_each_entry(ent, head, list) {
2240 err = nilfs_open_segment_entry(ent, sufile);
2241 if (unlikely(err))
2242 goto failed;
2243 nilfs_segment_usage_clear_active(ent->raw_su);
2244 BUG_ON(!buffer_dirty(ent->bh_su));
2245 }
2246
2247 down_write(&nilfs->ns_sem);
2248 head = &nilfs->ns_used_segments;
2249 list_for_each_entry(ent, head, list) {
2250 /* clear volatile active for segments of older generations */
2251 nilfs_segment_usage_clear_volatile_active(ent->raw_su);
2252 }
2253 up_write(&nilfs->ns_sem);
2254 return 0;
2255
2256 failed:
2257 nilfs_segctor_reactivate_segments(sci, nilfs);
2258 return err;
2259}
2260
2261static void nilfs_segctor_bead_completed_segments(struct nilfs_sc_info *sci)
2262{
2263 struct nilfs_segment_buffer *segbuf, *last;
2264 struct nilfs_segment_entry *ent;
2265
2266 /* move each segbuf->sb_segent to the list of used active segments */
2267 last = NILFS_LAST_SEGBUF(&sci->sc_segbufs);
2268 nilfs_for_each_segbuf_before(segbuf, last, &sci->sc_segbufs) {
2269 ent = segbuf->sb_segent;
2270 if (!ent)
2271 break; /* ignore unmapped segments (should check it?)*/
2272 list_add_tail(&ent->list, &sci->sc_active_segments);
2273 segbuf->sb_segent = NULL;
2274 }
2275}
2276
2277static void
2278__nilfs_segctor_commit_deactivate_segments(struct nilfs_sc_info *sci,
2279 struct the_nilfs *nilfs)
2280
2281{
2282 struct nilfs_segment_entry *ent;
2283
2284 list_splice_init(&sci->sc_active_segments,
2285 nilfs->ns_used_segments.prev);
2286
2287 list_for_each_entry(ent, &nilfs->ns_used_segments, list) {
2288 nilfs_segment_usage_set_volatile_active(ent->raw_su);
2289 /* These segments are kept open */
2290 }
2291}
2292
2293/*
2294 * Main procedure of segment constructor
2295 */
2296static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2297{
2298 struct nilfs_sb_info *sbi = sci->sc_sbi;
2299 struct the_nilfs *nilfs = sbi->s_nilfs;
2300 struct page *failed_page;
2301 int err, has_sr = 0;
2302
2303 sci->sc_stage.scnt = NILFS_ST_INIT;
2304
2305 err = nilfs_segctor_check_in_files(sci, sbi);
2306 if (unlikely(err))
2307 goto out;
2308
2309 if (nilfs_test_metadata_dirty(sbi))
2310 set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
2311
2312 if (nilfs_segctor_clean(sci))
2313 goto out;
2314
2315 do {
2316 sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK;
2317
2318 err = nilfs_segctor_begin_construction(sci, nilfs);
2319 if (unlikely(err))
2320 goto out;
2321
2322 /* Update time stamp */
2323 sci->sc_seg_ctime = get_seconds();
2324
2325 err = nilfs_segctor_collect(sci, nilfs, mode);
2326 if (unlikely(err))
2327 goto failed;
2328
2329 has_sr = (sci->sc_super_root != NULL);
2330
2331 /* Avoid empty segment */
2332 if (sci->sc_stage.scnt == NILFS_ST_DONE &&
2333 NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) {
2334 BUG_ON(mode == SC_LSEG_SR);
2335 nilfs_segctor_end_construction(sci, nilfs, 1);
2336 goto out;
2337 }
2338
2339 err = nilfs_segctor_assign(sci, mode);
2340 if (unlikely(err))
2341 goto failed;
2342
2343 if (has_sr) {
2344 err = nilfs_segctor_deactivate_segments(sci, nilfs);
2345 if (unlikely(err))
2346 goto failed;
2347 }
2348 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2349 nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile);
2350
2351 if (has_sr) {
2352 err = nilfs_segctor_fill_in_checkpoint(sci);
2353 if (unlikely(err))
2354 goto failed_to_make_up;
2355
2356 nilfs_segctor_fill_in_super_root(sci, nilfs);
2357 }
2358 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
2359
2360 /* Write partial segments */
2361 err = nilfs_segctor_prepare_write(sci, &failed_page);
2362 if (unlikely(err))
2363 goto failed_to_write;
2364
2365 nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed);
2366
2367 err = nilfs_segctor_write(sci, nilfs->ns_bdi);
2368 if (unlikely(err))
2369 goto failed_to_write;
2370
2371 nilfs_segctor_complete_write(sci);
2372
2373 /* Commit segments */
2374 nilfs_segctor_bead_completed_segments(sci);
2375 if (has_sr) {
2376 down_write(&nilfs->ns_sem);
2377 nilfs_update_last_segment(sbi, 1);
2378 __nilfs_segctor_commit_deactivate_segments(sci, nilfs);
2379 up_write(&nilfs->ns_sem);
2380 nilfs_segctor_commit_free_segments(sci);
2381 nilfs_segctor_clear_metadata_dirty(sci);
2382 }
2383
2384 nilfs_segctor_end_construction(sci, nilfs, 0);
2385
2386 } while (sci->sc_stage.scnt != NILFS_ST_DONE);
2387
2388 /* Clearing sketch data */
2389 if (has_sr && sci->sc_sketch_inode) {
2390 if (i_size_read(sci->sc_sketch_inode) == 0)
2391 clear_bit(NILFS_I_DIRTY,
2392 &NILFS_I(sci->sc_sketch_inode)->i_state);
2393 i_size_write(sci->sc_sketch_inode, 0);
2394 }
2395 out:
2396 nilfs_segctor_destroy_segment_buffers(sci);
2397 nilfs_segctor_check_out_files(sci, sbi);
2398 return err;
2399
2400 failed_to_write:
2401 nilfs_segctor_abort_write(sci, failed_page, err);
2402 nilfs_segctor_cancel_segusage(sci, nilfs->ns_sufile);
2403
2404 failed_to_make_up:
2405 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2406 nilfs_redirty_inodes(&sci->sc_dirty_files);
2407 if (has_sr)
2408 nilfs_segctor_reactivate_segments(sci, nilfs);
2409
2410 failed:
2411 if (nilfs_doing_gc())
2412 nilfs_redirty_inodes(&sci->sc_gc_inodes);
2413 nilfs_segctor_end_construction(sci, nilfs, err);
2414 goto out;
2415}
2416
2417/**
2418 * nilfs_secgtor_start_timer - set timer of background write
2419 * @sci: nilfs_sc_info
2420 *
2421 * If the timer has already been set, it ignores the new request.
2422 * This function MUST be called within a section locking the segment
2423 * semaphore.
2424 */
2425static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
2426{
2427 spin_lock(&sci->sc_state_lock);
2428 if (sci->sc_timer && !(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
2429 sci->sc_timer->expires = jiffies + sci->sc_interval;
2430 add_timer(sci->sc_timer);
2431 sci->sc_state |= NILFS_SEGCTOR_COMMIT;
2432 }
2433 spin_unlock(&sci->sc_state_lock);
2434}
2435
2436static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
2437{
2438 spin_lock(&sci->sc_state_lock);
2439 if (!(sci->sc_flush_request & (1 << bn))) {
2440 unsigned long prev_req = sci->sc_flush_request;
2441
2442 sci->sc_flush_request |= (1 << bn);
2443 if (!prev_req)
2444 wake_up(&sci->sc_wait_daemon);
2445 }
2446 spin_unlock(&sci->sc_state_lock);
2447}
2448
2449/**
2450 * nilfs_flush_segment - trigger a segment construction for resource control
2451 * @sb: super block
2452 * @ino: inode number of the file to be flushed out.
2453 */
2454void nilfs_flush_segment(struct super_block *sb, ino_t ino)
2455{
2456 struct nilfs_sb_info *sbi = NILFS_SB(sb);
2457 struct nilfs_sc_info *sci = NILFS_SC(sbi);
2458
2459 if (!sci || nilfs_doing_construction())
2460 return;
2461 nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0);
2462 /* assign bit 0 to data files */
2463}
2464
2465int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci,
2466 __u64 *segnum, size_t nsegs)
2467{
2468 struct nilfs_segment_entry *ent;
2469 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs;
2470 struct inode *sufile = nilfs->ns_sufile;
2471 LIST_HEAD(list);
2472 __u64 *pnum;
2473 const char *flag_name;
2474 size_t i;
2475 int err, err2 = 0;
2476
2477 for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) {
2478 ent = nilfs_alloc_segment_entry(*pnum);
2479 if (unlikely(!ent)) {
2480 err = -ENOMEM;
2481 goto failed;
2482 }
2483 list_add_tail(&ent->list, &list);
2484
2485 err = nilfs_open_segment_entry(ent, sufile);
2486 if (unlikely(err))
2487 goto failed;
2488
2489 if (unlikely(le32_to_cpu(ent->raw_su->su_flags) !=
2490 (1UL << NILFS_SEGMENT_USAGE_DIRTY))) {
2491 if (nilfs_segment_usage_clean(ent->raw_su))
2492 flag_name = "clean";
2493 else if (nilfs_segment_usage_active(ent->raw_su))
2494 flag_name = "active";
2495 else if (nilfs_segment_usage_volatile_active(
2496 ent->raw_su))
2497 flag_name = "volatile active";
2498 else if (!nilfs_segment_usage_dirty(ent->raw_su))
2499 flag_name = "non-dirty";
2500 else
2501 flag_name = "erroneous";
2502
2503 printk(KERN_ERR
2504 "NILFS: %s segment is requested to be cleaned "
2505 "(segnum=%llu)\n",
2506 flag_name, (unsigned long long)ent->segnum);
2507 err2 = -EINVAL;
2508 }
2509 nilfs_close_segment_entry(ent, sufile);
2510 }
2511 if (unlikely(err2)) {
2512 err = err2;
2513 goto failed;
2514 }
2515 list_splice(&list, sci->sc_cleaning_segments.prev);
2516 return 0;
2517
2518 failed:
2519 nilfs_dispose_segment_list(&list);
2520 return err;
2521}
2522
2523void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *sci)
2524{
2525 nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
2526}
2527
2528struct nilfs_segctor_wait_request {
2529 wait_queue_t wq;
2530 __u32 seq;
2531 int err;
2532 atomic_t done;
2533};
2534
2535static int nilfs_segctor_sync(struct nilfs_sc_info *sci)
2536{
2537 struct nilfs_segctor_wait_request wait_req;
2538 int err = 0;
2539
2540 spin_lock(&sci->sc_state_lock);
2541 init_wait(&wait_req.wq);
2542 wait_req.err = 0;
2543 atomic_set(&wait_req.done, 0);
2544 wait_req.seq = ++sci->sc_seq_request;
2545 spin_unlock(&sci->sc_state_lock);
2546
2547 init_waitqueue_entry(&wait_req.wq, current);
2548 add_wait_queue(&sci->sc_wait_request, &wait_req.wq);
2549 set_current_state(TASK_INTERRUPTIBLE);
2550 wake_up(&sci->sc_wait_daemon);
2551
2552 for (;;) {
2553 if (atomic_read(&wait_req.done)) {
2554 err = wait_req.err;
2555 break;
2556 }
2557 if (!signal_pending(current)) {
2558 schedule();
2559 continue;
2560 }
2561 err = -ERESTARTSYS;
2562 break;
2563 }
2564 finish_wait(&sci->sc_wait_request, &wait_req.wq);
2565 return err;
2566}
2567
2568static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
2569{
2570 struct nilfs_segctor_wait_request *wrq, *n;
2571 unsigned long flags;
2572
2573 spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
2574 list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list,
2575 wq.task_list) {
2576 if (!atomic_read(&wrq->done) &&
2577 nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
2578 wrq->err = err;
2579 atomic_set(&wrq->done, 1);
2580 }
2581 if (atomic_read(&wrq->done)) {
2582 wrq->wq.func(&wrq->wq,
2583 TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
2584 0, NULL);
2585 }
2586 }
2587 spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags);
2588}
2589
2590/**
2591 * nilfs_construct_segment - construct a logical segment
2592 * @sb: super block
2593 *
2594 * Return Value: On success, 0 is retured. On errors, one of the following
2595 * negative error code is returned.
2596 *
2597 * %-EROFS - Read only filesystem.
2598 *
2599 * %-EIO - I/O error
2600 *
2601 * %-ENOSPC - No space left on device (only in a panic state).
2602 *
2603 * %-ERESTARTSYS - Interrupted.
2604 *
2605 * %-ENOMEM - Insufficient memory available.
2606 */
2607int nilfs_construct_segment(struct super_block *sb)
2608{
2609 struct nilfs_sb_info *sbi = NILFS_SB(sb);
2610 struct nilfs_sc_info *sci = NILFS_SC(sbi);
2611 struct nilfs_transaction_info *ti;
2612 int err;
2613
2614 if (!sci)
2615 return -EROFS;
2616
2617 /* A call inside transactions causes a deadlock. */
2618 BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC);
2619
2620 err = nilfs_segctor_sync(sci);
2621 return err;
2622}
2623
2624/**
2625 * nilfs_construct_dsync_segment - construct a data-only logical segment
2626 * @sb: super block
2627 * @inode: the inode whose data blocks should be written out
2628 *
2629 * Return Value: On success, 0 is retured. On errors, one of the following
2630 * negative error code is returned.
2631 *
2632 * %-EROFS - Read only filesystem.
2633 *
2634 * %-EIO - I/O error
2635 *
2636 * %-ENOSPC - No space left on device (only in a panic state).
2637 *
2638 * %-ERESTARTSYS - Interrupted.
2639 *
2640 * %-ENOMEM - Insufficient memory available.
2641 */
2642int nilfs_construct_dsync_segment(struct super_block *sb,
2643 struct inode *inode)
2644{
2645 struct nilfs_sb_info *sbi = NILFS_SB(sb);
2646 struct nilfs_sc_info *sci = NILFS_SC(sbi);
2647 struct nilfs_inode_info *ii;
2648 struct nilfs_transaction_info ti;
2649 int err = 0;
2650
2651 if (!sci)
2652 return -EROFS;
2653
2654 nilfs_transaction_lock(sbi, &ti, 0);
2655
2656 ii = NILFS_I(inode);
2657 if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) ||
2658 nilfs_test_opt(sbi, STRICT_ORDER) ||
2659 test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2660 nilfs_discontinued(sbi->s_nilfs)) {
2661 nilfs_transaction_unlock(sbi);
2662 err = nilfs_segctor_sync(sci);
2663 return err;
2664 }
2665
2666 spin_lock(&sbi->s_inode_lock);
2667 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
2668 !test_bit(NILFS_I_BUSY, &ii->i_state)) {
2669 spin_unlock(&sbi->s_inode_lock);
2670 nilfs_transaction_unlock(sbi);
2671 return 0;
2672 }
2673 spin_unlock(&sbi->s_inode_lock);
2674 sci->sc_stage.dirty_file_ptr = ii;
2675
2676 err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC);
2677
2678 nilfs_transaction_unlock(sbi);
2679 return err;
2680}
2681
2682struct nilfs_segctor_req {
2683 int mode;
2684 __u32 seq_accepted;
2685 int sc_err; /* construction failure */
2686 int sb_err; /* super block writeback failure */
2687};
2688
2689#define FLUSH_FILE_BIT (0x1) /* data file only */
2690#define FLUSH_DAT_BIT (1 << NILFS_DAT_INO) /* DAT only */
2691
2692static void nilfs_segctor_accept(struct nilfs_sc_info *sci,
2693 struct nilfs_segctor_req *req)
2694{
2695 BUG_ON(!sci);
2696
2697 req->sc_err = req->sb_err = 0;
2698 spin_lock(&sci->sc_state_lock);
2699 req->seq_accepted = sci->sc_seq_request;
2700 spin_unlock(&sci->sc_state_lock);
2701
2702 if (sci->sc_timer)
2703 del_timer_sync(sci->sc_timer);
2704}
2705
2706static void nilfs_segctor_notify(struct nilfs_sc_info *sci,
2707 struct nilfs_segctor_req *req)
2708{
2709 /* Clear requests (even when the construction failed) */
2710 spin_lock(&sci->sc_state_lock);
2711
2712 sci->sc_state &= ~NILFS_SEGCTOR_COMMIT;
2713
2714 if (req->mode == SC_LSEG_SR) {
2715 sci->sc_seq_done = req->seq_accepted;
2716 nilfs_segctor_wakeup(sci, req->sc_err ? : req->sb_err);
2717 sci->sc_flush_request = 0;
2718 } else if (req->mode == SC_FLUSH_FILE)
2719 sci->sc_flush_request &= ~FLUSH_FILE_BIT;
2720 else if (req->mode == SC_FLUSH_DAT)
2721 sci->sc_flush_request &= ~FLUSH_DAT_BIT;
2722
2723 spin_unlock(&sci->sc_state_lock);
2724}
2725
2726static int nilfs_segctor_construct(struct nilfs_sc_info *sci,
2727 struct nilfs_segctor_req *req)
2728{
2729 struct nilfs_sb_info *sbi = sci->sc_sbi;
2730 struct the_nilfs *nilfs = sbi->s_nilfs;
2731 int err = 0;
2732
2733 if (nilfs_discontinued(nilfs))
2734 req->mode = SC_LSEG_SR;
2735 if (!nilfs_segctor_confirm(sci)) {
2736 err = nilfs_segctor_do_construct(sci, req->mode);
2737 req->sc_err = err;
2738 }
2739 if (likely(!err)) {
2740 if (req->mode != SC_FLUSH_DAT)
2741 atomic_set(&nilfs->ns_ndirtyblks, 0);
2742 if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) &&
2743 nilfs_discontinued(nilfs)) {
2744 down_write(&nilfs->ns_sem);
2745 req->sb_err = nilfs_commit_super(sbi);
2746 up_write(&nilfs->ns_sem);
2747 }
2748 }
2749 return err;
2750}
2751
2752static void nilfs_construction_timeout(unsigned long data)
2753{
2754 struct task_struct *p = (struct task_struct *)data;
2755 wake_up_process(p);
2756}
2757
2758static void
2759nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
2760{
2761 struct nilfs_inode_info *ii, *n;
2762
2763 list_for_each_entry_safe(ii, n, head, i_dirty) {
2764 if (!test_bit(NILFS_I_UPDATED, &ii->i_state))
2765 continue;
2766 hlist_del_init(&ii->vfs_inode.i_hash);
2767 list_del_init(&ii->i_dirty);
2768 nilfs_clear_gcinode(&ii->vfs_inode);
2769 }
2770}
2771
2772int nilfs_clean_segments(struct super_block *sb, void __user *argp)
2773{
2774 struct nilfs_sb_info *sbi = NILFS_SB(sb);
2775 struct nilfs_sc_info *sci = NILFS_SC(sbi);
2776 struct the_nilfs *nilfs = sbi->s_nilfs;
2777 struct nilfs_transaction_info ti;
2778 struct nilfs_segctor_req req = { .mode = SC_LSEG_SR };
2779 int err;
2780
2781 if (unlikely(!sci))
2782 return -EROFS;
2783
2784 nilfs_transaction_lock(sbi, &ti, 1);
2785
2786 err = nilfs_init_gcdat_inode(nilfs);
2787 if (unlikely(err))
2788 goto out_unlock;
2789 err = nilfs_ioctl_prepare_clean_segments(nilfs, argp);
2790 if (unlikely(err))
2791 goto out_unlock;
2792
2793 list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev);
2794
2795 for (;;) {
2796 nilfs_segctor_accept(sci, &req);
2797 err = nilfs_segctor_construct(sci, &req);
2798 nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes);
2799 nilfs_segctor_notify(sci, &req);
2800
2801 if (likely(!err))
2802 break;
2803
2804 nilfs_warning(sb, __func__,
2805 "segment construction failed. (err=%d)", err);
2806 set_current_state(TASK_INTERRUPTIBLE);
2807 schedule_timeout(sci->sc_interval);
2808 }
2809
2810 out_unlock:
2811 nilfs_clear_gcdat_inode(nilfs);
2812 nilfs_transaction_unlock(sbi);
2813 return err;
2814}
2815
2816static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
2817{
2818 struct nilfs_sb_info *sbi = sci->sc_sbi;
2819 struct nilfs_transaction_info ti;
2820 struct nilfs_segctor_req req = { .mode = mode };
2821
2822 nilfs_transaction_lock(sbi, &ti, 0);
2823
2824 nilfs_segctor_accept(sci, &req);
2825 nilfs_segctor_construct(sci, &req);
2826 nilfs_segctor_notify(sci, &req);
2827
2828 /*
2829 * Unclosed segment should be retried. We do this using sc_timer.
2830 * Timeout of sc_timer will invoke complete construction which leads
2831 * to close the current logical segment.
2832 */
2833 if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags))
2834 nilfs_segctor_start_timer(sci);
2835
2836 nilfs_transaction_unlock(sbi);
2837}
2838
2839static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
2840{
2841 int mode = 0;
2842 int err;
2843
2844 spin_lock(&sci->sc_state_lock);
2845 mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ?
2846 SC_FLUSH_DAT : SC_FLUSH_FILE;
2847 spin_unlock(&sci->sc_state_lock);
2848
2849 if (mode) {
2850 err = nilfs_segctor_do_construct(sci, mode);
2851
2852 spin_lock(&sci->sc_state_lock);
2853 sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ?
2854 ~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT;
2855 spin_unlock(&sci->sc_state_lock);
2856 }
2857 clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
2858}
2859
2860static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci)
2861{
2862 if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2863 time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) {
2864 if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT))
2865 return SC_FLUSH_FILE;
2866 else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT))
2867 return SC_FLUSH_DAT;
2868 }
2869 return SC_LSEG_SR;
2870}
2871
2872/**
2873 * nilfs_segctor_thread - main loop of the segment constructor thread.
2874 * @arg: pointer to a struct nilfs_sc_info.
2875 *
2876 * nilfs_segctor_thread() initializes a timer and serves as a daemon
2877 * to execute segment constructions.
2878 */
2879static int nilfs_segctor_thread(void *arg)
2880{
2881 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
2882 struct timer_list timer;
2883 int timeout = 0;
2884
2885 init_timer(&timer);
2886 timer.data = (unsigned long)current;
2887 timer.function = nilfs_construction_timeout;
2888 sci->sc_timer = &timer;
2889
2890 /* start sync. */
2891 sci->sc_task = current;
2892 wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */
2893 printk(KERN_INFO
2894 "segctord starting. Construction interval = %lu seconds, "
2895 "CP frequency < %lu seconds\n",
2896 sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ);
2897
2898 spin_lock(&sci->sc_state_lock);
2899 loop:
2900 for (;;) {
2901 int mode;
2902
2903 if (sci->sc_state & NILFS_SEGCTOR_QUIT)
2904 goto end_thread;
2905
2906 if (timeout || sci->sc_seq_request != sci->sc_seq_done)
2907 mode = SC_LSEG_SR;
2908 else if (!sci->sc_flush_request)
2909 break;
2910 else
2911 mode = nilfs_segctor_flush_mode(sci);
2912
2913 spin_unlock(&sci->sc_state_lock);
2914 nilfs_segctor_thread_construct(sci, mode);
2915 spin_lock(&sci->sc_state_lock);
2916 timeout = 0;
2917 }
2918
2919
2920 if (freezing(current)) {
2921 spin_unlock(&sci->sc_state_lock);
2922 refrigerator();
2923 spin_lock(&sci->sc_state_lock);
2924 } else {
2925 DEFINE_WAIT(wait);
2926 int should_sleep = 1;
2927
2928 prepare_to_wait(&sci->sc_wait_daemon, &wait,
2929 TASK_INTERRUPTIBLE);
2930
2931 if (sci->sc_seq_request != sci->sc_seq_done)
2932 should_sleep = 0;
2933 else if (sci->sc_flush_request)
2934 should_sleep = 0;
2935 else if (sci->sc_state & NILFS_SEGCTOR_COMMIT)
2936 should_sleep = time_before(jiffies,
2937 sci->sc_timer->expires);
2938
2939 if (should_sleep) {
2940 spin_unlock(&sci->sc_state_lock);
2941 schedule();
2942 spin_lock(&sci->sc_state_lock);
2943 }
2944 finish_wait(&sci->sc_wait_daemon, &wait);
2945 timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2946 time_after_eq(jiffies, sci->sc_timer->expires));
2947 }
2948 goto loop;
2949
2950 end_thread:
2951 spin_unlock(&sci->sc_state_lock);
2952 del_timer_sync(sci->sc_timer);
2953 sci->sc_timer = NULL;
2954
2955 /* end sync. */
2956 sci->sc_task = NULL;
2957 wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
2958 return 0;
2959}
2960
2961static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci)
2962{
2963 struct task_struct *t;
2964
2965 t = kthread_run(nilfs_segctor_thread, sci, "segctord");
2966 if (IS_ERR(t)) {
2967 int err = PTR_ERR(t);
2968
2969 printk(KERN_ERR "NILFS: error %d creating segctord thread\n",
2970 err);
2971 return err;
2972 }
2973 wait_event(sci->sc_wait_task, sci->sc_task != NULL);
2974 return 0;
2975}
2976
2977static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
2978{
2979 sci->sc_state |= NILFS_SEGCTOR_QUIT;
2980
2981 while (sci->sc_task) {
2982 wake_up(&sci->sc_wait_daemon);
2983 spin_unlock(&sci->sc_state_lock);
2984 wait_event(sci->sc_wait_task, sci->sc_task == NULL);
2985 spin_lock(&sci->sc_state_lock);
2986 }
2987}
2988
2989static int nilfs_segctor_init(struct nilfs_sc_info *sci,
2990 struct nilfs_recovery_info *ri)
2991{
2992 int err;
2993 struct inode *inode = nilfs_iget(sci->sc_super, NILFS_SKETCH_INO);
2994
2995 sci->sc_sketch_inode = IS_ERR(inode) ? NULL : inode;
2996 if (sci->sc_sketch_inode)
2997 i_size_write(sci->sc_sketch_inode, 0);
2998
2999 sci->sc_seq_done = sci->sc_seq_request;
3000 if (ri)
3001 list_splice_init(&ri->ri_used_segments,
3002 sci->sc_active_segments.prev);
3003
3004 err = nilfs_segctor_start_thread(sci);
3005 if (err) {
3006 if (ri)
3007 list_splice_init(&sci->sc_active_segments,
3008 ri->ri_used_segments.prev);
3009 if (sci->sc_sketch_inode) {
3010 iput(sci->sc_sketch_inode);
3011 sci->sc_sketch_inode = NULL;
3012 }
3013 }
3014 return err;
3015}
3016
3017/*
3018 * Setup & clean-up functions
3019 */
3020static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi)
3021{
3022 struct nilfs_sc_info *sci;
3023
3024 sci = kzalloc(sizeof(*sci), GFP_KERNEL);
3025 if (!sci)
3026 return NULL;
3027
3028 sci->sc_sbi = sbi;
3029 sci->sc_super = sbi->s_super;
3030
3031 init_waitqueue_head(&sci->sc_wait_request);
3032 init_waitqueue_head(&sci->sc_wait_daemon);
3033 init_waitqueue_head(&sci->sc_wait_task);
3034 spin_lock_init(&sci->sc_state_lock);
3035 INIT_LIST_HEAD(&sci->sc_dirty_files);
3036 INIT_LIST_HEAD(&sci->sc_segbufs);
3037 INIT_LIST_HEAD(&sci->sc_gc_inodes);
3038 INIT_LIST_HEAD(&sci->sc_active_segments);
3039 INIT_LIST_HEAD(&sci->sc_cleaning_segments);
3040 INIT_LIST_HEAD(&sci->sc_copied_buffers);
3041
3042 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
3043 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
3044 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
3045
3046 if (sbi->s_interval)
3047 sci->sc_interval = sbi->s_interval;
3048 if (sbi->s_watermark)
3049 sci->sc_watermark = sbi->s_watermark;
3050 return sci;
3051}
3052
3053static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
3054{
3055 int ret, retrycount = NILFS_SC_CLEANUP_RETRY;
3056
3057 /* The segctord thread was stopped and its timer was removed.
3058 But some tasks remain. */
3059 do {
3060 struct nilfs_sb_info *sbi = sci->sc_sbi;
3061 struct nilfs_transaction_info ti;
3062 struct nilfs_segctor_req req = { .mode = SC_LSEG_SR };
3063
3064 nilfs_transaction_lock(sbi, &ti, 0);
3065 nilfs_segctor_accept(sci, &req);
3066 ret = nilfs_segctor_construct(sci, &req);
3067 nilfs_segctor_notify(sci, &req);
3068 nilfs_transaction_unlock(sbi);
3069
3070 } while (ret && retrycount-- > 0);
3071}
3072
3073/**
3074 * nilfs_segctor_destroy - destroy the segment constructor.
3075 * @sci: nilfs_sc_info
3076 *
3077 * nilfs_segctor_destroy() kills the segctord thread and frees
3078 * the nilfs_sc_info struct.
3079 * Caller must hold the segment semaphore.
3080 */
3081static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
3082{
3083 struct nilfs_sb_info *sbi = sci->sc_sbi;
3084 int flag;
3085
3086 up_write(&sbi->s_nilfs->ns_segctor_sem);
3087
3088 spin_lock(&sci->sc_state_lock);
3089 nilfs_segctor_kill_thread(sci);
3090 flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request
3091 || sci->sc_seq_request != sci->sc_seq_done);
3092 spin_unlock(&sci->sc_state_lock);
3093
3094 if (flag || nilfs_segctor_confirm(sci))
3095 nilfs_segctor_write_out(sci);
3096
3097 BUG_ON(!list_empty(&sci->sc_copied_buffers));
3098
3099 if (!list_empty(&sci->sc_dirty_files)) {
3100 nilfs_warning(sbi->s_super, __func__,
3101 "dirty file(s) after the final construction\n");
3102 nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1);
3103 }
3104 if (!list_empty(&sci->sc_active_segments))
3105 nilfs_dispose_segment_list(&sci->sc_active_segments);
3106
3107 if (!list_empty(&sci->sc_cleaning_segments))
3108 nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
3109
3110 BUG_ON(!list_empty(&sci->sc_segbufs));
3111
3112 if (sci->sc_sketch_inode) {
3113 iput(sci->sc_sketch_inode);
3114 sci->sc_sketch_inode = NULL;
3115 }
3116 down_write(&sbi->s_nilfs->ns_segctor_sem);
3117
3118 kfree(sci);
3119}
3120
3121/**
3122 * nilfs_attach_segment_constructor - attach a segment constructor
3123 * @sbi: nilfs_sb_info
3124 * @ri: nilfs_recovery_info
3125 *
3126 * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info,
3127 * initilizes it, and starts the segment constructor.
3128 *
3129 * Return Value: On success, 0 is returned. On error, one of the following
3130 * negative error code is returned.
3131 *
3132 * %-ENOMEM - Insufficient memory available.
3133 */
3134int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi,
3135 struct nilfs_recovery_info *ri)
3136{
3137 struct the_nilfs *nilfs = sbi->s_nilfs;
3138 int err;
3139
3140 /* Each field of nilfs_segctor is cleared through the initialization
3141 of super-block info */
3142 sbi->s_sc_info = nilfs_segctor_new(sbi);
3143 if (!sbi->s_sc_info)
3144 return -ENOMEM;
3145
3146 nilfs_attach_writer(nilfs, sbi);
3147 err = nilfs_segctor_init(NILFS_SC(sbi), ri);
3148 if (err) {
3149 nilfs_detach_writer(nilfs, sbi);
3150 kfree(sbi->s_sc_info);
3151 sbi->s_sc_info = NULL;
3152 }
3153 return err;
3154}
3155
3156/**
3157 * nilfs_detach_segment_constructor - destroy the segment constructor
3158 * @sbi: nilfs_sb_info
3159 *
3160 * nilfs_detach_segment_constructor() kills the segment constructor daemon,
3161 * frees the struct nilfs_sc_info, and destroy the dirty file list.
3162 */
3163void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi)
3164{
3165 struct the_nilfs *nilfs = sbi->s_nilfs;
3166 LIST_HEAD(garbage_list);
3167
3168 down_write(&nilfs->ns_segctor_sem);
3169 if (NILFS_SC(sbi)) {
3170 nilfs_segctor_destroy(NILFS_SC(sbi));
3171 sbi->s_sc_info = NULL;
3172 }
3173
3174 /* Force to free the list of dirty files */
3175 spin_lock(&sbi->s_inode_lock);
3176 if (!list_empty(&sbi->s_dirty_files)) {
3177 list_splice_init(&sbi->s_dirty_files, &garbage_list);
3178 nilfs_warning(sbi->s_super, __func__,
3179 "Non empty dirty list after the last "
3180 "segment construction\n");
3181 }
3182 spin_unlock(&sbi->s_inode_lock);
3183 up_write(&nilfs->ns_segctor_sem);
3184
3185 nilfs_dispose_list(sbi, &garbage_list, 1);
3186 nilfs_detach_writer(nilfs, sbi);
3187}
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
new file mode 100644
index 000000000000..615654b8c329
--- /dev/null
+++ b/fs/nilfs2/segment.h
@@ -0,0 +1,246 @@
1/*
2 * segment.h - NILFS Segment constructor prototypes and definitions
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 *
22 */
23#ifndef _NILFS_SEGMENT_H
24#define _NILFS_SEGMENT_H
25
26#include <linux/types.h>
27#include <linux/fs.h>
28#include <linux/buffer_head.h>
29#include <linux/nilfs2_fs.h>
30#include "sb.h"
31
32/**
33 * struct nilfs_recovery_info - Recovery infomation
34 * @ri_need_recovery: Recovery status
35 * @ri_super_root: Block number of the last super root
36 * @ri_ri_cno: Number of the last checkpoint
37 * @ri_lsegs_start: Region for roll-forwarding (start block number)
38 * @ri_lsegs_end: Region for roll-forwarding (end block number)
39 * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start
40 * @ri_used_segments: List of segments to be mark active
41 * @ri_pseg_start: Block number of the last partial segment
42 * @ri_seq: Sequence number on the last partial segment
43 * @ri_segnum: Segment number on the last partial segment
44 * @ri_nextnum: Next segment number on the last partial segment
45 */
46struct nilfs_recovery_info {
47 int ri_need_recovery;
48 sector_t ri_super_root;
49 __u64 ri_cno;
50
51 sector_t ri_lsegs_start;
52 sector_t ri_lsegs_end;
53 u64 ri_lsegs_start_seq;
54 struct list_head ri_used_segments;
55 sector_t ri_pseg_start;
56 u64 ri_seq;
57 __u64 ri_segnum;
58 __u64 ri_nextnum;
59};
60
61/* ri_need_recovery */
62#define NILFS_RECOVERY_SR_UPDATED 1 /* The super root was updated */
63#define NILFS_RECOVERY_ROLLFORWARD_DONE 2 /* Rollforward was carried out */
64
65/**
66 * struct nilfs_cstage - Context of collection stage
67 * @scnt: Stage count
68 * @flags: State flags
69 * @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file
70 * @gc_inode_ptr: Pointer on the list of gc-inodes
71 */
72struct nilfs_cstage {
73 int scnt;
74 unsigned flags;
75 struct nilfs_inode_info *dirty_file_ptr;
76 struct nilfs_inode_info *gc_inode_ptr;
77};
78
79struct nilfs_segment_buffer;
80
81struct nilfs_segsum_pointer {
82 struct buffer_head *bh;
83 unsigned offset; /* offset in bytes */
84};
85
86/**
87 * struct nilfs_sc_info - Segment constructor information
88 * @sc_super: Back pointer to super_block struct
89 * @sc_sbi: Back pointer to nilfs_sb_info struct
90 * @sc_nblk_inc: Block count of current generation
91 * @sc_dirty_files: List of files to be written
92 * @sc_gc_inodes: List of GC inodes having blocks to be written
93 * @sc_active_segments: List of active segments that were already written out
94 * @sc_cleaning_segments: List of segments to be freed through construction
95 * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data
96 * @sc_segbufs: List of segment buffers
97 * @sc_segbuf_nblocks: Number of available blocks in segment buffers.
98 * @sc_curseg: Current segment buffer
99 * @sc_super_root: Pointer to the super root buffer
100 * @sc_stage: Collection stage
101 * @sc_finfo_ptr: pointer to the current finfo struct in the segment summary
102 * @sc_binfo_ptr: pointer to the current binfo struct in the segment summary
103 * @sc_blk_cnt: Block count of a file
104 * @sc_datablk_cnt: Data block count of a file
105 * @sc_nblk_this_inc: Number of blocks included in the current logical segment
106 * @sc_seg_ctime: Creation time
107 * @sc_flags: Internal flags
108 * @sc_sketch_inode: Inode of the sketch file
109 * @sc_state_lock: spinlock for sc_state and so on
110 * @sc_state: Segctord state flags
111 * @sc_flush_request: inode bitmap of metadata files to be flushed
112 * @sc_wait_request: Client request queue
113 * @sc_wait_daemon: Daemon wait queue
114 * @sc_wait_task: Start/end wait queue to control segctord task
115 * @sc_seq_request: Request counter
116 * @sc_seq_done: Completion counter
117 * @sc_sync: Request of explicit sync operation
118 * @sc_interval: Timeout value of background construction
119 * @sc_mjcp_freq: Frequency of creating checkpoints
120 * @sc_lseg_stime: Start time of the latest logical segment
121 * @sc_watermark: Watermark for the number of dirty buffers
122 * @sc_timer: Timer for segctord
123 * @sc_task: current thread of segctord
124 */
125struct nilfs_sc_info {
126 struct super_block *sc_super;
127 struct nilfs_sb_info *sc_sbi;
128
129 unsigned long sc_nblk_inc;
130
131 struct list_head sc_dirty_files;
132 struct list_head sc_gc_inodes;
133 struct list_head sc_active_segments;
134 struct list_head sc_cleaning_segments;
135 struct list_head sc_copied_buffers;
136
137 /* Segment buffers */
138 struct list_head sc_segbufs;
139 unsigned long sc_segbuf_nblocks;
140 struct nilfs_segment_buffer *sc_curseg;
141 struct buffer_head *sc_super_root;
142
143 struct nilfs_cstage sc_stage;
144
145 struct nilfs_segsum_pointer sc_finfo_ptr;
146 struct nilfs_segsum_pointer sc_binfo_ptr;
147 unsigned long sc_blk_cnt;
148 unsigned long sc_datablk_cnt;
149 unsigned long sc_nblk_this_inc;
150 time_t sc_seg_ctime;
151
152 unsigned long sc_flags;
153
154 /*
155 * Pointer to an inode of the sketch.
156 * This pointer is kept only while it contains data.
157 * We protect it with a semaphore of the segment constructor.
158 */
159 struct inode *sc_sketch_inode;
160
161 spinlock_t sc_state_lock;
162 unsigned long sc_state;
163 unsigned long sc_flush_request;
164
165 wait_queue_head_t sc_wait_request;
166 wait_queue_head_t sc_wait_daemon;
167 wait_queue_head_t sc_wait_task;
168
169 __u32 sc_seq_request;
170 __u32 sc_seq_done;
171
172 int sc_sync;
173 unsigned long sc_interval;
174 unsigned long sc_mjcp_freq;
175 unsigned long sc_lseg_stime; /* in 1/HZ seconds */
176 unsigned long sc_watermark;
177
178 struct timer_list *sc_timer;
179 struct task_struct *sc_task;
180};
181
182/* sc_flags */
183enum {
184 NILFS_SC_DIRTY, /* One or more dirty meta-data blocks exist */
185 NILFS_SC_UNCLOSED, /* Logical segment is not closed */
186 NILFS_SC_SUPER_ROOT, /* The latest segment has a super root */
187 NILFS_SC_PRIOR_FLUSH, /* Requesting immediate flush without making a
188 checkpoint */
189};
190
191/* sc_state */
192#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */
193#define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */
194
195/*
196 * Constant parameters
197 */
198#define NILFS_SC_CLEANUP_RETRY 3 /* Retry count of construction when
199 destroying segctord */
200
201/*
202 * Default values of timeout, in seconds.
203 */
204#define NILFS_SC_DEFAULT_TIMEOUT 5 /* Timeout value of dirty blocks.
205 It triggers construction of a
206 logical segment with a super root */
207#define NILFS_SC_DEFAULT_SR_FREQ 30 /* Maximum frequency of super root
208 creation */
209#define NILFS_SC_DEFAULT_SB_FREQ 30 /* Minimum interval of periodical
210 update of superblock (reserved) */
211
212/*
213 * The default threshold amount of data, in block counts.
214 */
215#define NILFS_SC_DEFAULT_WATERMARK 3600
216
217
218/* segment.c */
219extern int nilfs_init_transaction_cache(void);
220extern void nilfs_destroy_transaction_cache(void);
221extern void nilfs_relax_pressure_in_lock(struct super_block *);
222
223extern int nilfs_construct_segment(struct super_block *);
224extern int nilfs_construct_dsync_segment(struct super_block *,
225 struct inode *);
226extern void nilfs_flush_segment(struct super_block *, ino_t);
227extern int nilfs_clean_segments(struct super_block *, void __user *);
228
229extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *,
230 __u64 *, size_t);
231extern void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *);
232
233extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *,
234 struct nilfs_recovery_info *);
235extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *);
236
237/* recovery.c */
238extern int nilfs_read_super_root_block(struct super_block *, sector_t,
239 struct buffer_head **, int);
240extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *,
241 struct nilfs_recovery_info *);
242extern int nilfs_recover_logical_segments(struct the_nilfs *,
243 struct nilfs_sb_info *,
244 struct nilfs_recovery_info *);
245
246#endif /* _NILFS_SEGMENT_H */