aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs/node.c
diff options
context:
space:
mode:
authorJaegeuk Kim <jaegeuk.kim@samsung.com>2012-11-02 04:08:50 -0400
committerJaegeuk Kim <jaegeuk.kim@samsung.com>2012-12-10 23:43:40 -0500
commite05df3b115e7308afbca652769b54e4549fcc723 (patch)
tree8622a461c42f436d46b3877b8c6d67fe4d660586 /fs/f2fs/node.c
parent127e670abfa7fa150f6550d620ded930f5bdb4e7 (diff)
f2fs: add node operations
This adds specific functions to manage NAT pages, a cache for NAT entries, free nids, direct/indirect node blocks for indexing data, and address space for node pages. - The key information of an NAT entry consists of a node id and a block address. - An NAT page is composed of block addresses covered by a certain range of NAT entries, which is maintained by the address space of meta_inode. - A radix tree structure is used to cache NAT entries. The index for the tree is a node id. - When there is no free nid, F2FS should scan NAT entries to find new one. In order to avoid scanning frequently, F2FS manages a list containing a number of free nids in memory. Only when free nids in the list are exhausted, scanning process, build_free_nids(), is triggered. - F2FS has direct and indirect node blocks for indexing data. This patch adds fuctions related to the node block management such as getting, allocating, and truncating node blocks to index data. - In order to cache node blocks in memory, F2FS has a node_inode with an address space for node pages. This patch also adds the address space operations for node_inode. Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Diffstat (limited to 'fs/f2fs/node.c')
-rw-r--r--fs/f2fs/node.c1763
1 files changed, 1763 insertions, 0 deletions
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
new file mode 100644
index 000000000000..216f04dc1177
--- /dev/null
+++ b/fs/f2fs/node.c
@@ -0,0 +1,1763 @@
1/**
2 * fs/f2fs/node.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/mpage.h>
14#include <linux/backing-dev.h>
15#include <linux/blkdev.h>
16#include <linux/pagevec.h>
17#include <linux/swap.h>
18
19#include "f2fs.h"
20#include "node.h"
21#include "segment.h"
22
23static struct kmem_cache *nat_entry_slab;
24static struct kmem_cache *free_nid_slab;
25
26static void clear_node_page_dirty(struct page *page)
27{
28 struct address_space *mapping = page->mapping;
29 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
30 unsigned int long flags;
31
32 if (PageDirty(page)) {
33 spin_lock_irqsave(&mapping->tree_lock, flags);
34 radix_tree_tag_clear(&mapping->page_tree,
35 page_index(page),
36 PAGECACHE_TAG_DIRTY);
37 spin_unlock_irqrestore(&mapping->tree_lock, flags);
38
39 clear_page_dirty_for_io(page);
40 dec_page_count(sbi, F2FS_DIRTY_NODES);
41 }
42 ClearPageUptodate(page);
43}
44
45static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
46{
47 pgoff_t index = current_nat_addr(sbi, nid);
48 return get_meta_page(sbi, index);
49}
50
51static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
52{
53 struct page *src_page;
54 struct page *dst_page;
55 pgoff_t src_off;
56 pgoff_t dst_off;
57 void *src_addr;
58 void *dst_addr;
59 struct f2fs_nm_info *nm_i = NM_I(sbi);
60
61 src_off = current_nat_addr(sbi, nid);
62 dst_off = next_nat_addr(sbi, src_off);
63
64 /* get current nat block page with lock */
65 src_page = get_meta_page(sbi, src_off);
66
67 /* Dirty src_page means that it is already the new target NAT page. */
68 if (PageDirty(src_page))
69 return src_page;
70
71 dst_page = grab_meta_page(sbi, dst_off);
72
73 src_addr = page_address(src_page);
74 dst_addr = page_address(dst_page);
75 memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
76 set_page_dirty(dst_page);
77 f2fs_put_page(src_page, 1);
78
79 set_to_next_nat(nm_i, nid);
80
81 return dst_page;
82}
83
84/**
85 * Readahead NAT pages
86 */
87static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
88{
89 struct address_space *mapping = sbi->meta_inode->i_mapping;
90 struct f2fs_nm_info *nm_i = NM_I(sbi);
91 struct page *page;
92 pgoff_t index;
93 int i;
94
95 for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
96 if (nid >= nm_i->max_nid)
97 nid = 0;
98 index = current_nat_addr(sbi, nid);
99
100 page = grab_cache_page(mapping, index);
101 if (!page)
102 continue;
103 if (f2fs_readpage(sbi, page, index, READ)) {
104 f2fs_put_page(page, 1);
105 continue;
106 }
107 page_cache_release(page);
108 }
109}
110
111static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
112{
113 return radix_tree_lookup(&nm_i->nat_root, n);
114}
115
116static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
117 nid_t start, unsigned int nr, struct nat_entry **ep)
118{
119 return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
120}
121
122static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
123{
124 list_del(&e->list);
125 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
126 nm_i->nat_cnt--;
127 kmem_cache_free(nat_entry_slab, e);
128}
129
130int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
131{
132 struct f2fs_nm_info *nm_i = NM_I(sbi);
133 struct nat_entry *e;
134 int is_cp = 1;
135
136 read_lock(&nm_i->nat_tree_lock);
137 e = __lookup_nat_cache(nm_i, nid);
138 if (e && !e->checkpointed)
139 is_cp = 0;
140 read_unlock(&nm_i->nat_tree_lock);
141 return is_cp;
142}
143
144static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
145{
146 struct nat_entry *new;
147
148 new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
149 if (!new)
150 return NULL;
151 if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
152 kmem_cache_free(nat_entry_slab, new);
153 return NULL;
154 }
155 memset(new, 0, sizeof(struct nat_entry));
156 nat_set_nid(new, nid);
157 list_add_tail(&new->list, &nm_i->nat_entries);
158 nm_i->nat_cnt++;
159 return new;
160}
161
162static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
163 struct f2fs_nat_entry *ne)
164{
165 struct nat_entry *e;
166retry:
167 write_lock(&nm_i->nat_tree_lock);
168 e = __lookup_nat_cache(nm_i, nid);
169 if (!e) {
170 e = grab_nat_entry(nm_i, nid);
171 if (!e) {
172 write_unlock(&nm_i->nat_tree_lock);
173 goto retry;
174 }
175 nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
176 nat_set_ino(e, le32_to_cpu(ne->ino));
177 nat_set_version(e, ne->version);
178 e->checkpointed = true;
179 }
180 write_unlock(&nm_i->nat_tree_lock);
181}
182
183static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
184 block_t new_blkaddr)
185{
186 struct f2fs_nm_info *nm_i = NM_I(sbi);
187 struct nat_entry *e;
188retry:
189 write_lock(&nm_i->nat_tree_lock);
190 e = __lookup_nat_cache(nm_i, ni->nid);
191 if (!e) {
192 e = grab_nat_entry(nm_i, ni->nid);
193 if (!e) {
194 write_unlock(&nm_i->nat_tree_lock);
195 goto retry;
196 }
197 e->ni = *ni;
198 e->checkpointed = true;
199 BUG_ON(ni->blk_addr == NEW_ADDR);
200 } else if (new_blkaddr == NEW_ADDR) {
201 /*
202 * when nid is reallocated,
203 * previous nat entry can be remained in nat cache.
204 * So, reinitialize it with new information.
205 */
206 e->ni = *ni;
207 BUG_ON(ni->blk_addr != NULL_ADDR);
208 }
209
210 if (new_blkaddr == NEW_ADDR)
211 e->checkpointed = false;
212
213 /* sanity check */
214 BUG_ON(nat_get_blkaddr(e) != ni->blk_addr);
215 BUG_ON(nat_get_blkaddr(e) == NULL_ADDR &&
216 new_blkaddr == NULL_ADDR);
217 BUG_ON(nat_get_blkaddr(e) == NEW_ADDR &&
218 new_blkaddr == NEW_ADDR);
219 BUG_ON(nat_get_blkaddr(e) != NEW_ADDR &&
220 nat_get_blkaddr(e) != NULL_ADDR &&
221 new_blkaddr == NEW_ADDR);
222
223 /* increament version no as node is removed */
224 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
225 unsigned char version = nat_get_version(e);
226 nat_set_version(e, inc_node_version(version));
227 }
228
229 /* change address */
230 nat_set_blkaddr(e, new_blkaddr);
231 __set_nat_cache_dirty(nm_i, e);
232 write_unlock(&nm_i->nat_tree_lock);
233}
234
235static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
236{
237 struct f2fs_nm_info *nm_i = NM_I(sbi);
238
239 if (nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD)
240 return 0;
241
242 write_lock(&nm_i->nat_tree_lock);
243 while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
244 struct nat_entry *ne;
245 ne = list_first_entry(&nm_i->nat_entries,
246 struct nat_entry, list);
247 __del_from_nat_cache(nm_i, ne);
248 nr_shrink--;
249 }
250 write_unlock(&nm_i->nat_tree_lock);
251 return nr_shrink;
252}
253
254/**
255 * This function returns always success
256 */
257void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
258{
259 struct f2fs_nm_info *nm_i = NM_I(sbi);
260 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
261 struct f2fs_summary_block *sum = curseg->sum_blk;
262 nid_t start_nid = START_NID(nid);
263 struct f2fs_nat_block *nat_blk;
264 struct page *page = NULL;
265 struct f2fs_nat_entry ne;
266 struct nat_entry *e;
267 int i;
268
269 ni->nid = nid;
270
271 /* Check nat cache */
272 read_lock(&nm_i->nat_tree_lock);
273 e = __lookup_nat_cache(nm_i, nid);
274 if (e) {
275 ni->ino = nat_get_ino(e);
276 ni->blk_addr = nat_get_blkaddr(e);
277 ni->version = nat_get_version(e);
278 }
279 read_unlock(&nm_i->nat_tree_lock);
280 if (e)
281 return;
282
283 /* Check current segment summary */
284 mutex_lock(&curseg->curseg_mutex);
285 i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
286 if (i >= 0) {
287 ne = nat_in_journal(sum, i);
288 node_info_from_raw_nat(ni, &ne);
289 }
290 mutex_unlock(&curseg->curseg_mutex);
291 if (i >= 0)
292 goto cache;
293
294 /* Fill node_info from nat page */
295 page = get_current_nat_page(sbi, start_nid);
296 nat_blk = (struct f2fs_nat_block *)page_address(page);
297 ne = nat_blk->entries[nid - start_nid];
298 node_info_from_raw_nat(ni, &ne);
299 f2fs_put_page(page, 1);
300cache:
301 /* cache nat entry */
302 cache_nat_entry(NM_I(sbi), nid, &ne);
303}
304
305/**
306 * The maximum depth is four.
307 * Offset[0] will have raw inode offset.
308 */
309static int get_node_path(long block, int offset[4], unsigned int noffset[4])
310{
311 const long direct_index = ADDRS_PER_INODE;
312 const long direct_blks = ADDRS_PER_BLOCK;
313 const long dptrs_per_blk = NIDS_PER_BLOCK;
314 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
315 const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
316 int n = 0;
317 int level = 0;
318
319 noffset[0] = 0;
320
321 if (block < direct_index) {
322 offset[n++] = block;
323 level = 0;
324 goto got;
325 }
326 block -= direct_index;
327 if (block < direct_blks) {
328 offset[n++] = NODE_DIR1_BLOCK;
329 noffset[n] = 1;
330 offset[n++] = block;
331 level = 1;
332 goto got;
333 }
334 block -= direct_blks;
335 if (block < direct_blks) {
336 offset[n++] = NODE_DIR2_BLOCK;
337 noffset[n] = 2;
338 offset[n++] = block;
339 level = 1;
340 goto got;
341 }
342 block -= direct_blks;
343 if (block < indirect_blks) {
344 offset[n++] = NODE_IND1_BLOCK;
345 noffset[n] = 3;
346 offset[n++] = block / direct_blks;
347 noffset[n] = 4 + offset[n - 1];
348 offset[n++] = block % direct_blks;
349 level = 2;
350 goto got;
351 }
352 block -= indirect_blks;
353 if (block < indirect_blks) {
354 offset[n++] = NODE_IND2_BLOCK;
355 noffset[n] = 4 + dptrs_per_blk;
356 offset[n++] = block / direct_blks;
357 noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
358 offset[n++] = block % direct_blks;
359 level = 2;
360 goto got;
361 }
362 block -= indirect_blks;
363 if (block < dindirect_blks) {
364 offset[n++] = NODE_DIND_BLOCK;
365 noffset[n] = 5 + (dptrs_per_blk * 2);
366 offset[n++] = block / indirect_blks;
367 noffset[n] = 6 + (dptrs_per_blk * 2) +
368 offset[n - 1] * (dptrs_per_blk + 1);
369 offset[n++] = (block / direct_blks) % dptrs_per_blk;
370 noffset[n] = 7 + (dptrs_per_blk * 2) +
371 offset[n - 2] * (dptrs_per_blk + 1) +
372 offset[n - 1];
373 offset[n++] = block % direct_blks;
374 level = 3;
375 goto got;
376 } else {
377 BUG();
378 }
379got:
380 return level;
381}
382
383/*
384 * Caller should call f2fs_put_dnode(dn).
385 */
386int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro)
387{
388 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
389 struct page *npage[4];
390 struct page *parent;
391 int offset[4];
392 unsigned int noffset[4];
393 nid_t nids[4];
394 int level, i;
395 int err = 0;
396
397 level = get_node_path(index, offset, noffset);
398
399 nids[0] = dn->inode->i_ino;
400 npage[0] = get_node_page(sbi, nids[0]);
401 if (IS_ERR(npage[0]))
402 return PTR_ERR(npage[0]);
403
404 parent = npage[0];
405 nids[1] = get_nid(parent, offset[0], true);
406 dn->inode_page = npage[0];
407 dn->inode_page_locked = true;
408
409 /* get indirect or direct nodes */
410 for (i = 1; i <= level; i++) {
411 bool done = false;
412
413 if (!nids[i] && !ro) {
414 mutex_lock_op(sbi, NODE_NEW);
415
416 /* alloc new node */
417 if (!alloc_nid(sbi, &(nids[i]))) {
418 mutex_unlock_op(sbi, NODE_NEW);
419 err = -ENOSPC;
420 goto release_pages;
421 }
422
423 dn->nid = nids[i];
424 npage[i] = new_node_page(dn, noffset[i]);
425 if (IS_ERR(npage[i])) {
426 alloc_nid_failed(sbi, nids[i]);
427 mutex_unlock_op(sbi, NODE_NEW);
428 err = PTR_ERR(npage[i]);
429 goto release_pages;
430 }
431
432 set_nid(parent, offset[i - 1], nids[i], i == 1);
433 alloc_nid_done(sbi, nids[i]);
434 mutex_unlock_op(sbi, NODE_NEW);
435 done = true;
436 } else if (ro && i == level && level > 1) {
437 npage[i] = get_node_page_ra(parent, offset[i - 1]);
438 if (IS_ERR(npage[i])) {
439 err = PTR_ERR(npage[i]);
440 goto release_pages;
441 }
442 done = true;
443 }
444 if (i == 1) {
445 dn->inode_page_locked = false;
446 unlock_page(parent);
447 } else {
448 f2fs_put_page(parent, 1);
449 }
450
451 if (!done) {
452 npage[i] = get_node_page(sbi, nids[i]);
453 if (IS_ERR(npage[i])) {
454 err = PTR_ERR(npage[i]);
455 f2fs_put_page(npage[0], 0);
456 goto release_out;
457 }
458 }
459 if (i < level) {
460 parent = npage[i];
461 nids[i + 1] = get_nid(parent, offset[i], false);
462 }
463 }
464 dn->nid = nids[level];
465 dn->ofs_in_node = offset[level];
466 dn->node_page = npage[level];
467 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
468 return 0;
469
470release_pages:
471 f2fs_put_page(parent, 1);
472 if (i > 1)
473 f2fs_put_page(npage[0], 0);
474release_out:
475 dn->inode_page = NULL;
476 dn->node_page = NULL;
477 return err;
478}
479
480static void truncate_node(struct dnode_of_data *dn)
481{
482 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
483 struct node_info ni;
484
485 get_node_info(sbi, dn->nid, &ni);
486 BUG_ON(ni.blk_addr == NULL_ADDR);
487
488 if (ni.blk_addr != NULL_ADDR)
489 invalidate_blocks(sbi, ni.blk_addr);
490
491 /* Deallocate node address */
492 dec_valid_node_count(sbi, dn->inode, 1);
493 set_node_addr(sbi, &ni, NULL_ADDR);
494
495 if (dn->nid == dn->inode->i_ino) {
496 remove_orphan_inode(sbi, dn->nid);
497 dec_valid_inode_count(sbi);
498 } else {
499 sync_inode_page(dn);
500 }
501
502 clear_node_page_dirty(dn->node_page);
503 F2FS_SET_SB_DIRT(sbi);
504
505 f2fs_put_page(dn->node_page, 1);
506 dn->node_page = NULL;
507}
508
509static int truncate_dnode(struct dnode_of_data *dn)
510{
511 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
512 struct page *page;
513
514 if (dn->nid == 0)
515 return 1;
516
517 /* get direct node */
518 page = get_node_page(sbi, dn->nid);
519 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
520 return 1;
521 else if (IS_ERR(page))
522 return PTR_ERR(page);
523
524 /* Make dnode_of_data for parameter */
525 dn->node_page = page;
526 dn->ofs_in_node = 0;
527 truncate_data_blocks(dn);
528 truncate_node(dn);
529 return 1;
530}
531
532static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
533 int ofs, int depth)
534{
535 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
536 struct dnode_of_data rdn = *dn;
537 struct page *page;
538 struct f2fs_node *rn;
539 nid_t child_nid;
540 unsigned int child_nofs;
541 int freed = 0;
542 int i, ret;
543
544 if (dn->nid == 0)
545 return NIDS_PER_BLOCK + 1;
546
547 page = get_node_page(sbi, dn->nid);
548 if (IS_ERR(page))
549 return PTR_ERR(page);
550
551 rn = (struct f2fs_node *)page_address(page);
552 if (depth < 3) {
553 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
554 child_nid = le32_to_cpu(rn->in.nid[i]);
555 if (child_nid == 0)
556 continue;
557 rdn.nid = child_nid;
558 ret = truncate_dnode(&rdn);
559 if (ret < 0)
560 goto out_err;
561 set_nid(page, i, 0, false);
562 }
563 } else {
564 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
565 for (i = ofs; i < NIDS_PER_BLOCK; i++) {
566 child_nid = le32_to_cpu(rn->in.nid[i]);
567 if (child_nid == 0) {
568 child_nofs += NIDS_PER_BLOCK + 1;
569 continue;
570 }
571 rdn.nid = child_nid;
572 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
573 if (ret == (NIDS_PER_BLOCK + 1)) {
574 set_nid(page, i, 0, false);
575 child_nofs += ret;
576 } else if (ret < 0 && ret != -ENOENT) {
577 goto out_err;
578 }
579 }
580 freed = child_nofs;
581 }
582
583 if (!ofs) {
584 /* remove current indirect node */
585 dn->node_page = page;
586 truncate_node(dn);
587 freed++;
588 } else {
589 f2fs_put_page(page, 1);
590 }
591 return freed;
592
593out_err:
594 f2fs_put_page(page, 1);
595 return ret;
596}
597
598static int truncate_partial_nodes(struct dnode_of_data *dn,
599 struct f2fs_inode *ri, int *offset, int depth)
600{
601 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
602 struct page *pages[2];
603 nid_t nid[3];
604 nid_t child_nid;
605 int err = 0;
606 int i;
607 int idx = depth - 2;
608
609 nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
610 if (!nid[0])
611 return 0;
612
613 /* get indirect nodes in the path */
614 for (i = 0; i < depth - 1; i++) {
615 /* refernece count'll be increased */
616 pages[i] = get_node_page(sbi, nid[i]);
617 if (IS_ERR(pages[i])) {
618 depth = i + 1;
619 err = PTR_ERR(pages[i]);
620 goto fail;
621 }
622 nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
623 }
624
625 /* free direct nodes linked to a partial indirect node */
626 for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) {
627 child_nid = get_nid(pages[idx], i, false);
628 if (!child_nid)
629 continue;
630 dn->nid = child_nid;
631 err = truncate_dnode(dn);
632 if (err < 0)
633 goto fail;
634 set_nid(pages[idx], i, 0, false);
635 }
636
637 if (offset[depth - 1] == 0) {
638 dn->node_page = pages[idx];
639 dn->nid = nid[idx];
640 truncate_node(dn);
641 } else {
642 f2fs_put_page(pages[idx], 1);
643 }
644 offset[idx]++;
645 offset[depth - 1] = 0;
646fail:
647 for (i = depth - 3; i >= 0; i--)
648 f2fs_put_page(pages[i], 1);
649 return err;
650}
651
652/**
653 * All the block addresses of data and nodes should be nullified.
654 */
655int truncate_inode_blocks(struct inode *inode, pgoff_t from)
656{
657 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
658 int err = 0, cont = 1;
659 int level, offset[4], noffset[4];
660 unsigned int nofs;
661 struct f2fs_node *rn;
662 struct dnode_of_data dn;
663 struct page *page;
664
665 level = get_node_path(from, offset, noffset);
666
667 page = get_node_page(sbi, inode->i_ino);
668 if (IS_ERR(page))
669 return PTR_ERR(page);
670
671 set_new_dnode(&dn, inode, page, NULL, 0);
672 unlock_page(page);
673
674 rn = page_address(page);
675 switch (level) {
676 case 0:
677 case 1:
678 nofs = noffset[1];
679 break;
680 case 2:
681 nofs = noffset[1];
682 if (!offset[level - 1])
683 goto skip_partial;
684 err = truncate_partial_nodes(&dn, &rn->i, offset, level);
685 if (err < 0 && err != -ENOENT)
686 goto fail;
687 nofs += 1 + NIDS_PER_BLOCK;
688 break;
689 case 3:
690 nofs = 5 + 2 * NIDS_PER_BLOCK;
691 if (!offset[level - 1])
692 goto skip_partial;
693 err = truncate_partial_nodes(&dn, &rn->i, offset, level);
694 if (err < 0 && err != -ENOENT)
695 goto fail;
696 break;
697 default:
698 BUG();
699 }
700
701skip_partial:
702 while (cont) {
703 dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]);
704 switch (offset[0]) {
705 case NODE_DIR1_BLOCK:
706 case NODE_DIR2_BLOCK:
707 err = truncate_dnode(&dn);
708 break;
709
710 case NODE_IND1_BLOCK:
711 case NODE_IND2_BLOCK:
712 err = truncate_nodes(&dn, nofs, offset[1], 2);
713 break;
714
715 case NODE_DIND_BLOCK:
716 err = truncate_nodes(&dn, nofs, offset[1], 3);
717 cont = 0;
718 break;
719
720 default:
721 BUG();
722 }
723 if (err < 0 && err != -ENOENT)
724 goto fail;
725 if (offset[1] == 0 &&
726 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) {
727 lock_page(page);
728 wait_on_page_writeback(page);
729 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
730 set_page_dirty(page);
731 unlock_page(page);
732 }
733 offset[1] = 0;
734 offset[0]++;
735 nofs += err;
736 }
737fail:
738 f2fs_put_page(page, 0);
739 return err > 0 ? 0 : err;
740}
741
742int remove_inode_page(struct inode *inode)
743{
744 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
745 struct page *page;
746 nid_t ino = inode->i_ino;
747 struct dnode_of_data dn;
748
749 mutex_lock_op(sbi, NODE_TRUNC);
750 page = get_node_page(sbi, ino);
751 if (IS_ERR(page)) {
752 mutex_unlock_op(sbi, NODE_TRUNC);
753 return PTR_ERR(page);
754 }
755
756 if (F2FS_I(inode)->i_xattr_nid) {
757 nid_t nid = F2FS_I(inode)->i_xattr_nid;
758 struct page *npage = get_node_page(sbi, nid);
759
760 if (IS_ERR(npage)) {
761 mutex_unlock_op(sbi, NODE_TRUNC);
762 return PTR_ERR(npage);
763 }
764
765 F2FS_I(inode)->i_xattr_nid = 0;
766 set_new_dnode(&dn, inode, page, npage, nid);
767 dn.inode_page_locked = 1;
768 truncate_node(&dn);
769 }
770 if (inode->i_blocks == 1) {
771 /* inernally call f2fs_put_page() */
772 set_new_dnode(&dn, inode, page, page, ino);
773 truncate_node(&dn);
774 } else if (inode->i_blocks == 0) {
775 struct node_info ni;
776 get_node_info(sbi, inode->i_ino, &ni);
777
778 /* called after f2fs_new_inode() is failed */
779 BUG_ON(ni.blk_addr != NULL_ADDR);
780 f2fs_put_page(page, 1);
781 } else {
782 BUG();
783 }
784 mutex_unlock_op(sbi, NODE_TRUNC);
785 return 0;
786}
787
788int new_inode_page(struct inode *inode, struct dentry *dentry)
789{
790 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
791 struct page *page;
792 struct dnode_of_data dn;
793
794 /* allocate inode page for new inode */
795 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
796 mutex_lock_op(sbi, NODE_NEW);
797 page = new_node_page(&dn, 0);
798 init_dent_inode(dentry, page);
799 mutex_unlock_op(sbi, NODE_NEW);
800 if (IS_ERR(page))
801 return PTR_ERR(page);
802 f2fs_put_page(page, 1);
803 return 0;
804}
805
806struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
807{
808 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
809 struct address_space *mapping = sbi->node_inode->i_mapping;
810 struct node_info old_ni, new_ni;
811 struct page *page;
812 int err;
813
814 if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
815 return ERR_PTR(-EPERM);
816
817 page = grab_cache_page(mapping, dn->nid);
818 if (!page)
819 return ERR_PTR(-ENOMEM);
820
821 get_node_info(sbi, dn->nid, &old_ni);
822
823 SetPageUptodate(page);
824 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
825
826 /* Reinitialize old_ni with new node page */
827 BUG_ON(old_ni.blk_addr != NULL_ADDR);
828 new_ni = old_ni;
829 new_ni.ino = dn->inode->i_ino;
830
831 if (!inc_valid_node_count(sbi, dn->inode, 1)) {
832 err = -ENOSPC;
833 goto fail;
834 }
835 set_node_addr(sbi, &new_ni, NEW_ADDR);
836
837 dn->node_page = page;
838 sync_inode_page(dn);
839 set_page_dirty(page);
840 set_cold_node(dn->inode, page);
841 if (ofs == 0)
842 inc_valid_inode_count(sbi);
843
844 return page;
845
846fail:
847 f2fs_put_page(page, 1);
848 return ERR_PTR(err);
849}
850
851static int read_node_page(struct page *page, int type)
852{
853 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
854 struct node_info ni;
855
856 get_node_info(sbi, page->index, &ni);
857
858 if (ni.blk_addr == NULL_ADDR)
859 return -ENOENT;
860 return f2fs_readpage(sbi, page, ni.blk_addr, type);
861}
862
863/**
864 * Readahead a node page
865 */
866void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
867{
868 struct address_space *mapping = sbi->node_inode->i_mapping;
869 struct page *apage;
870
871 apage = find_get_page(mapping, nid);
872 if (apage && PageUptodate(apage))
873 goto release_out;
874 f2fs_put_page(apage, 0);
875
876 apage = grab_cache_page(mapping, nid);
877 if (!apage)
878 return;
879
880 if (read_node_page(apage, READA))
881 goto unlock_out;
882
883 page_cache_release(apage);
884 return;
885
886unlock_out:
887 unlock_page(apage);
888release_out:
889 page_cache_release(apage);
890}
891
892struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
893{
894 int err;
895 struct page *page;
896 struct address_space *mapping = sbi->node_inode->i_mapping;
897
898 page = grab_cache_page(mapping, nid);
899 if (!page)
900 return ERR_PTR(-ENOMEM);
901
902 err = read_node_page(page, READ_SYNC);
903 if (err) {
904 f2fs_put_page(page, 1);
905 return ERR_PTR(err);
906 }
907
908 BUG_ON(nid != nid_of_node(page));
909 mark_page_accessed(page);
910 return page;
911}
912
913/**
914 * Return a locked page for the desired node page.
915 * And, readahead MAX_RA_NODE number of node pages.
916 */
917struct page *get_node_page_ra(struct page *parent, int start)
918{
919 struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb);
920 struct address_space *mapping = sbi->node_inode->i_mapping;
921 int i, end;
922 int err = 0;
923 nid_t nid;
924 struct page *page;
925
926 /* First, try getting the desired direct node. */
927 nid = get_nid(parent, start, false);
928 if (!nid)
929 return ERR_PTR(-ENOENT);
930
931 page = find_get_page(mapping, nid);
932 if (page && PageUptodate(page))
933 goto page_hit;
934 f2fs_put_page(page, 0);
935
936repeat:
937 page = grab_cache_page(mapping, nid);
938 if (!page)
939 return ERR_PTR(-ENOMEM);
940
941 err = read_node_page(page, READA);
942 if (err) {
943 f2fs_put_page(page, 1);
944 return ERR_PTR(err);
945 }
946
947 /* Then, try readahead for siblings of the desired node */
948 end = start + MAX_RA_NODE;
949 end = min(end, NIDS_PER_BLOCK);
950 for (i = start + 1; i < end; i++) {
951 nid = get_nid(parent, i, false);
952 if (!nid)
953 continue;
954 ra_node_page(sbi, nid);
955 }
956
957page_hit:
958 lock_page(page);
959 if (PageError(page)) {
960 f2fs_put_page(page, 1);
961 return ERR_PTR(-EIO);
962 }
963
964 /* Has the page been truncated? */
965 if (page->mapping != mapping) {
966 f2fs_put_page(page, 1);
967 goto repeat;
968 }
969 return page;
970}
971
972void sync_inode_page(struct dnode_of_data *dn)
973{
974 if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
975 update_inode(dn->inode, dn->node_page);
976 } else if (dn->inode_page) {
977 if (!dn->inode_page_locked)
978 lock_page(dn->inode_page);
979 update_inode(dn->inode, dn->inode_page);
980 if (!dn->inode_page_locked)
981 unlock_page(dn->inode_page);
982 } else {
983 f2fs_write_inode(dn->inode, NULL);
984 }
985}
986
987int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
988 struct writeback_control *wbc)
989{
990 struct address_space *mapping = sbi->node_inode->i_mapping;
991 pgoff_t index, end;
992 struct pagevec pvec;
993 int step = ino ? 2 : 0;
994 int nwritten = 0, wrote = 0;
995
996 pagevec_init(&pvec, 0);
997
998next_step:
999 index = 0;
1000 end = LONG_MAX;
1001
1002 while (index <= end) {
1003 int i, nr_pages;
1004 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1005 PAGECACHE_TAG_DIRTY,
1006 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1007 if (nr_pages == 0)
1008 break;
1009
1010 for (i = 0; i < nr_pages; i++) {
1011 struct page *page = pvec.pages[i];
1012
1013 /*
1014 * flushing sequence with step:
1015 * 0. indirect nodes
1016 * 1. dentry dnodes
1017 * 2. file dnodes
1018 */
1019 if (step == 0 && IS_DNODE(page))
1020 continue;
1021 if (step == 1 && (!IS_DNODE(page) ||
1022 is_cold_node(page)))
1023 continue;
1024 if (step == 2 && (!IS_DNODE(page) ||
1025 !is_cold_node(page)))
1026 continue;
1027
1028 /*
1029 * If an fsync mode,
1030 * we should not skip writing node pages.
1031 */
1032 if (ino && ino_of_node(page) == ino)
1033 lock_page(page);
1034 else if (!trylock_page(page))
1035 continue;
1036
1037 if (unlikely(page->mapping != mapping)) {
1038continue_unlock:
1039 unlock_page(page);
1040 continue;
1041 }
1042 if (ino && ino_of_node(page) != ino)
1043 goto continue_unlock;
1044
1045 if (!PageDirty(page)) {
1046 /* someone wrote it for us */
1047 goto continue_unlock;
1048 }
1049
1050 if (!clear_page_dirty_for_io(page))
1051 goto continue_unlock;
1052
1053 /* called by fsync() */
1054 if (ino && IS_DNODE(page)) {
1055 int mark = !is_checkpointed_node(sbi, ino);
1056 set_fsync_mark(page, 1);
1057 if (IS_INODE(page))
1058 set_dentry_mark(page, mark);
1059 nwritten++;
1060 } else {
1061 set_fsync_mark(page, 0);
1062 set_dentry_mark(page, 0);
1063 }
1064 mapping->a_ops->writepage(page, wbc);
1065 wrote++;
1066
1067 if (--wbc->nr_to_write == 0)
1068 break;
1069 }
1070 pagevec_release(&pvec);
1071 cond_resched();
1072
1073 if (wbc->nr_to_write == 0) {
1074 step = 2;
1075 break;
1076 }
1077 }
1078
1079 if (step < 2) {
1080 step++;
1081 goto next_step;
1082 }
1083
1084 if (wrote)
1085 f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL);
1086
1087 return nwritten;
1088}
1089
1090static int f2fs_write_node_page(struct page *page,
1091 struct writeback_control *wbc)
1092{
1093 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1094 nid_t nid;
1095 unsigned int nofs;
1096 block_t new_addr;
1097 struct node_info ni;
1098
1099 if (wbc->for_reclaim) {
1100 dec_page_count(sbi, F2FS_DIRTY_NODES);
1101 wbc->pages_skipped++;
1102 set_page_dirty(page);
1103 return AOP_WRITEPAGE_ACTIVATE;
1104 }
1105
1106 wait_on_page_writeback(page);
1107
1108 mutex_lock_op(sbi, NODE_WRITE);
1109
1110 /* get old block addr of this node page */
1111 nid = nid_of_node(page);
1112 nofs = ofs_of_node(page);
1113 BUG_ON(page->index != nid);
1114
1115 get_node_info(sbi, nid, &ni);
1116
1117 /* This page is already truncated */
1118 if (ni.blk_addr == NULL_ADDR)
1119 return 0;
1120
1121 set_page_writeback(page);
1122
1123 /* insert node offset */
1124 write_node_page(sbi, page, nid, ni.blk_addr, &new_addr);
1125 set_node_addr(sbi, &ni, new_addr);
1126 dec_page_count(sbi, F2FS_DIRTY_NODES);
1127
1128 mutex_unlock_op(sbi, NODE_WRITE);
1129 unlock_page(page);
1130 return 0;
1131}
1132
1133static int f2fs_write_node_pages(struct address_space *mapping,
1134 struct writeback_control *wbc)
1135{
1136 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1137 struct block_device *bdev = sbi->sb->s_bdev;
1138 long nr_to_write = wbc->nr_to_write;
1139
1140 if (wbc->for_kupdate)
1141 return 0;
1142
1143 if (get_pages(sbi, F2FS_DIRTY_NODES) == 0)
1144 return 0;
1145
1146 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
1147 write_checkpoint(sbi, false, false);
1148 return 0;
1149 }
1150
1151 /* if mounting is failed, skip writing node pages */
1152 wbc->nr_to_write = bio_get_nr_vecs(bdev);
1153 sync_node_pages(sbi, 0, wbc);
1154 wbc->nr_to_write = nr_to_write -
1155 (bio_get_nr_vecs(bdev) - wbc->nr_to_write);
1156 return 0;
1157}
1158
1159static int f2fs_set_node_page_dirty(struct page *page)
1160{
1161 struct address_space *mapping = page->mapping;
1162 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1163
1164 SetPageUptodate(page);
1165 if (!PageDirty(page)) {
1166 __set_page_dirty_nobuffers(page);
1167 inc_page_count(sbi, F2FS_DIRTY_NODES);
1168 SetPagePrivate(page);
1169 return 1;
1170 }
1171 return 0;
1172}
1173
1174static void f2fs_invalidate_node_page(struct page *page, unsigned long offset)
1175{
1176 struct inode *inode = page->mapping->host;
1177 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1178 if (PageDirty(page))
1179 dec_page_count(sbi, F2FS_DIRTY_NODES);
1180 ClearPagePrivate(page);
1181}
1182
1183static int f2fs_release_node_page(struct page *page, gfp_t wait)
1184{
1185 ClearPagePrivate(page);
1186 return 0;
1187}
1188
1189/**
1190 * Structure of the f2fs node operations
1191 */
1192const struct address_space_operations f2fs_node_aops = {
1193 .writepage = f2fs_write_node_page,
1194 .writepages = f2fs_write_node_pages,
1195 .set_page_dirty = f2fs_set_node_page_dirty,
1196 .invalidatepage = f2fs_invalidate_node_page,
1197 .releasepage = f2fs_release_node_page,
1198};
1199
1200static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head)
1201{
1202 struct list_head *this;
1203 struct free_nid *i = NULL;
1204 list_for_each(this, head) {
1205 i = list_entry(this, struct free_nid, list);
1206 if (i->nid == n)
1207 break;
1208 i = NULL;
1209 }
1210 return i;
1211}
1212
1213static void __del_from_free_nid_list(struct free_nid *i)
1214{
1215 list_del(&i->list);
1216 kmem_cache_free(free_nid_slab, i);
1217}
1218
1219static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1220{
1221 struct free_nid *i;
1222
1223 if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
1224 return 0;
1225retry:
1226 i = kmem_cache_alloc(free_nid_slab, GFP_NOFS);
1227 if (!i) {
1228 cond_resched();
1229 goto retry;
1230 }
1231 i->nid = nid;
1232 i->state = NID_NEW;
1233
1234 spin_lock(&nm_i->free_nid_list_lock);
1235 if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) {
1236 spin_unlock(&nm_i->free_nid_list_lock);
1237 kmem_cache_free(free_nid_slab, i);
1238 return 0;
1239 }
1240 list_add_tail(&i->list, &nm_i->free_nid_list);
1241 nm_i->fcnt++;
1242 spin_unlock(&nm_i->free_nid_list_lock);
1243 return 1;
1244}
1245
1246static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1247{
1248 struct free_nid *i;
1249 spin_lock(&nm_i->free_nid_list_lock);
1250 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
1251 if (i && i->state == NID_NEW) {
1252 __del_from_free_nid_list(i);
1253 nm_i->fcnt--;
1254 }
1255 spin_unlock(&nm_i->free_nid_list_lock);
1256}
1257
1258static int scan_nat_page(struct f2fs_nm_info *nm_i,
1259 struct page *nat_page, nid_t start_nid)
1260{
1261 struct f2fs_nat_block *nat_blk = page_address(nat_page);
1262 block_t blk_addr;
1263 int fcnt = 0;
1264 int i;
1265
1266 /* 0 nid should not be used */
1267 if (start_nid == 0)
1268 ++start_nid;
1269
1270 i = start_nid % NAT_ENTRY_PER_BLOCK;
1271
1272 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1273 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1274 BUG_ON(blk_addr == NEW_ADDR);
1275 if (blk_addr == NULL_ADDR)
1276 fcnt += add_free_nid(nm_i, start_nid);
1277 }
1278 return fcnt;
1279}
1280
1281static void build_free_nids(struct f2fs_sb_info *sbi)
1282{
1283 struct free_nid *fnid, *next_fnid;
1284 struct f2fs_nm_info *nm_i = NM_I(sbi);
1285 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1286 struct f2fs_summary_block *sum = curseg->sum_blk;
1287 nid_t nid = 0;
1288 bool is_cycled = false;
1289 int fcnt = 0;
1290 int i;
1291
1292 nid = nm_i->next_scan_nid;
1293 nm_i->init_scan_nid = nid;
1294
1295 ra_nat_pages(sbi, nid);
1296
1297 while (1) {
1298 struct page *page = get_current_nat_page(sbi, nid);
1299
1300 fcnt += scan_nat_page(nm_i, page, nid);
1301 f2fs_put_page(page, 1);
1302
1303 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
1304
1305 if (nid >= nm_i->max_nid) {
1306 nid = 0;
1307 is_cycled = true;
1308 }
1309 if (fcnt > MAX_FREE_NIDS)
1310 break;
1311 if (is_cycled && nm_i->init_scan_nid <= nid)
1312 break;
1313 }
1314
1315 nm_i->next_scan_nid = nid;
1316
1317 /* find free nids from current sum_pages */
1318 mutex_lock(&curseg->curseg_mutex);
1319 for (i = 0; i < nats_in_cursum(sum); i++) {
1320 block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
1321 nid = le32_to_cpu(nid_in_journal(sum, i));
1322 if (addr == NULL_ADDR)
1323 add_free_nid(nm_i, nid);
1324 else
1325 remove_free_nid(nm_i, nid);
1326 }
1327 mutex_unlock(&curseg->curseg_mutex);
1328
1329 /* remove the free nids from current allocated nids */
1330 list_for_each_entry_safe(fnid, next_fnid, &nm_i->free_nid_list, list) {
1331 struct nat_entry *ne;
1332
1333 read_lock(&nm_i->nat_tree_lock);
1334 ne = __lookup_nat_cache(nm_i, fnid->nid);
1335 if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
1336 remove_free_nid(nm_i, fnid->nid);
1337 read_unlock(&nm_i->nat_tree_lock);
1338 }
1339}
1340
1341/*
1342 * If this function returns success, caller can obtain a new nid
1343 * from second parameter of this function.
1344 * The returned nid could be used ino as well as nid when inode is created.
1345 */
1346bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1347{
1348 struct f2fs_nm_info *nm_i = NM_I(sbi);
1349 struct free_nid *i = NULL;
1350 struct list_head *this;
1351retry:
1352 mutex_lock(&nm_i->build_lock);
1353 if (!nm_i->fcnt) {
1354 /* scan NAT in order to build free nid list */
1355 build_free_nids(sbi);
1356 if (!nm_i->fcnt) {
1357 mutex_unlock(&nm_i->build_lock);
1358 return false;
1359 }
1360 }
1361 mutex_unlock(&nm_i->build_lock);
1362
1363 /*
1364 * We check fcnt again since previous check is racy as
1365 * we didn't hold free_nid_list_lock. So other thread
1366 * could consume all of free nids.
1367 */
1368 spin_lock(&nm_i->free_nid_list_lock);
1369 if (!nm_i->fcnt) {
1370 spin_unlock(&nm_i->free_nid_list_lock);
1371 goto retry;
1372 }
1373
1374 BUG_ON(list_empty(&nm_i->free_nid_list));
1375 list_for_each(this, &nm_i->free_nid_list) {
1376 i = list_entry(this, struct free_nid, list);
1377 if (i->state == NID_NEW)
1378 break;
1379 }
1380
1381 BUG_ON(i->state != NID_NEW);
1382 *nid = i->nid;
1383 i->state = NID_ALLOC;
1384 nm_i->fcnt--;
1385 spin_unlock(&nm_i->free_nid_list_lock);
1386 return true;
1387}
1388
1389/**
1390 * alloc_nid() should be called prior to this function.
1391 */
1392void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1393{
1394 struct f2fs_nm_info *nm_i = NM_I(sbi);
1395 struct free_nid *i;
1396
1397 spin_lock(&nm_i->free_nid_list_lock);
1398 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
1399 if (i) {
1400 BUG_ON(i->state != NID_ALLOC);
1401 __del_from_free_nid_list(i);
1402 }
1403 spin_unlock(&nm_i->free_nid_list_lock);
1404}
1405
1406/**
1407 * alloc_nid() should be called prior to this function.
1408 */
1409void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1410{
1411 alloc_nid_done(sbi, nid);
1412 add_free_nid(NM_I(sbi), nid);
1413}
1414
1415void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1416 struct f2fs_summary *sum, struct node_info *ni,
1417 block_t new_blkaddr)
1418{
1419 rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
1420 set_node_addr(sbi, ni, new_blkaddr);
1421 clear_node_page_dirty(page);
1422}
1423
1424int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1425{
1426 struct address_space *mapping = sbi->node_inode->i_mapping;
1427 struct f2fs_node *src, *dst;
1428 nid_t ino = ino_of_node(page);
1429 struct node_info old_ni, new_ni;
1430 struct page *ipage;
1431
1432 ipage = grab_cache_page(mapping, ino);
1433 if (!ipage)
1434 return -ENOMEM;
1435
1436 /* Should not use this inode from free nid list */
1437 remove_free_nid(NM_I(sbi), ino);
1438
1439 get_node_info(sbi, ino, &old_ni);
1440 SetPageUptodate(ipage);
1441 fill_node_footer(ipage, ino, ino, 0, true);
1442
1443 src = (struct f2fs_node *)page_address(page);
1444 dst = (struct f2fs_node *)page_address(ipage);
1445
1446 memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i);
1447 dst->i.i_size = 0;
1448 dst->i.i_blocks = 1;
1449 dst->i.i_links = 1;
1450 dst->i.i_xattr_nid = 0;
1451
1452 new_ni = old_ni;
1453 new_ni.ino = ino;
1454
1455 set_node_addr(sbi, &new_ni, NEW_ADDR);
1456 inc_valid_inode_count(sbi);
1457
1458 f2fs_put_page(ipage, 1);
1459 return 0;
1460}
1461
1462int restore_node_summary(struct f2fs_sb_info *sbi,
1463 unsigned int segno, struct f2fs_summary_block *sum)
1464{
1465 struct f2fs_node *rn;
1466 struct f2fs_summary *sum_entry;
1467 struct page *page;
1468 block_t addr;
1469 int i, last_offset;
1470
1471 /* alloc temporal page for read node */
1472 page = alloc_page(GFP_NOFS | __GFP_ZERO);
1473 if (IS_ERR(page))
1474 return PTR_ERR(page);
1475 lock_page(page);
1476
1477 /* scan the node segment */
1478 last_offset = sbi->blocks_per_seg;
1479 addr = START_BLOCK(sbi, segno);
1480 sum_entry = &sum->entries[0];
1481
1482 for (i = 0; i < last_offset; i++, sum_entry++) {
1483 if (f2fs_readpage(sbi, page, addr, READ_SYNC))
1484 goto out;
1485
1486 rn = (struct f2fs_node *)page_address(page);
1487 sum_entry->nid = rn->footer.nid;
1488 sum_entry->version = 0;
1489 sum_entry->ofs_in_node = 0;
1490 addr++;
1491
1492 /*
1493 * In order to read next node page,
1494 * we must clear PageUptodate flag.
1495 */
1496 ClearPageUptodate(page);
1497 }
1498out:
1499 unlock_page(page);
1500 __free_pages(page, 0);
1501 return 0;
1502}
1503
1504static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1505{
1506 struct f2fs_nm_info *nm_i = NM_I(sbi);
1507 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1508 struct f2fs_summary_block *sum = curseg->sum_blk;
1509 int i;
1510
1511 mutex_lock(&curseg->curseg_mutex);
1512
1513 if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
1514 mutex_unlock(&curseg->curseg_mutex);
1515 return false;
1516 }
1517
1518 for (i = 0; i < nats_in_cursum(sum); i++) {
1519 struct nat_entry *ne;
1520 struct f2fs_nat_entry raw_ne;
1521 nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
1522
1523 raw_ne = nat_in_journal(sum, i);
1524retry:
1525 write_lock(&nm_i->nat_tree_lock);
1526 ne = __lookup_nat_cache(nm_i, nid);
1527 if (ne) {
1528 __set_nat_cache_dirty(nm_i, ne);
1529 write_unlock(&nm_i->nat_tree_lock);
1530 continue;
1531 }
1532 ne = grab_nat_entry(nm_i, nid);
1533 if (!ne) {
1534 write_unlock(&nm_i->nat_tree_lock);
1535 goto retry;
1536 }
1537 nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr));
1538 nat_set_ino(ne, le32_to_cpu(raw_ne.ino));
1539 nat_set_version(ne, raw_ne.version);
1540 __set_nat_cache_dirty(nm_i, ne);
1541 write_unlock(&nm_i->nat_tree_lock);
1542 }
1543 update_nats_in_cursum(sum, -i);
1544 mutex_unlock(&curseg->curseg_mutex);
1545 return true;
1546}
1547
1548/**
1549 * This function is called during the checkpointing process.
1550 */
1551void flush_nat_entries(struct f2fs_sb_info *sbi)
1552{
1553 struct f2fs_nm_info *nm_i = NM_I(sbi);
1554 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1555 struct f2fs_summary_block *sum = curseg->sum_blk;
1556 struct list_head *cur, *n;
1557 struct page *page = NULL;
1558 struct f2fs_nat_block *nat_blk = NULL;
1559 nid_t start_nid = 0, end_nid = 0;
1560 bool flushed;
1561
1562 flushed = flush_nats_in_journal(sbi);
1563
1564 if (!flushed)
1565 mutex_lock(&curseg->curseg_mutex);
1566
1567 /* 1) flush dirty nat caches */
1568 list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) {
1569 struct nat_entry *ne;
1570 nid_t nid;
1571 struct f2fs_nat_entry raw_ne;
1572 int offset = -1;
1573 block_t old_blkaddr, new_blkaddr;
1574
1575 ne = list_entry(cur, struct nat_entry, list);
1576 nid = nat_get_nid(ne);
1577
1578 if (nat_get_blkaddr(ne) == NEW_ADDR)
1579 continue;
1580 if (flushed)
1581 goto to_nat_page;
1582
1583 /* if there is room for nat enries in curseg->sumpage */
1584 offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
1585 if (offset >= 0) {
1586 raw_ne = nat_in_journal(sum, offset);
1587 old_blkaddr = le32_to_cpu(raw_ne.block_addr);
1588 goto flush_now;
1589 }
1590to_nat_page:
1591 if (!page || (start_nid > nid || nid > end_nid)) {
1592 if (page) {
1593 f2fs_put_page(page, 1);
1594 page = NULL;
1595 }
1596 start_nid = START_NID(nid);
1597 end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
1598
1599 /*
1600 * get nat block with dirty flag, increased reference
1601 * count, mapped and lock
1602 */
1603 page = get_next_nat_page(sbi, start_nid);
1604 nat_blk = page_address(page);
1605 }
1606
1607 BUG_ON(!nat_blk);
1608 raw_ne = nat_blk->entries[nid - start_nid];
1609 old_blkaddr = le32_to_cpu(raw_ne.block_addr);
1610flush_now:
1611 new_blkaddr = nat_get_blkaddr(ne);
1612
1613 raw_ne.ino = cpu_to_le32(nat_get_ino(ne));
1614 raw_ne.block_addr = cpu_to_le32(new_blkaddr);
1615 raw_ne.version = nat_get_version(ne);
1616
1617 if (offset < 0) {
1618 nat_blk->entries[nid - start_nid] = raw_ne;
1619 } else {
1620 nat_in_journal(sum, offset) = raw_ne;
1621 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1622 }
1623
1624 if (nat_get_blkaddr(ne) == NULL_ADDR) {
1625 write_lock(&nm_i->nat_tree_lock);
1626 __del_from_nat_cache(nm_i, ne);
1627 write_unlock(&nm_i->nat_tree_lock);
1628
1629 /* We can reuse this freed nid at this point */
1630 add_free_nid(NM_I(sbi), nid);
1631 } else {
1632 write_lock(&nm_i->nat_tree_lock);
1633 __clear_nat_cache_dirty(nm_i, ne);
1634 ne->checkpointed = true;
1635 write_unlock(&nm_i->nat_tree_lock);
1636 }
1637 }
1638 if (!flushed)
1639 mutex_unlock(&curseg->curseg_mutex);
1640 f2fs_put_page(page, 1);
1641
1642 /* 2) shrink nat caches if necessary */
1643 try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);
1644}
1645
1646static int init_node_manager(struct f2fs_sb_info *sbi)
1647{
1648 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
1649 struct f2fs_nm_info *nm_i = NM_I(sbi);
1650 unsigned char *version_bitmap;
1651 unsigned int nat_segs, nat_blocks;
1652
1653 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
1654
1655 /* segment_count_nat includes pair segment so divide to 2. */
1656 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
1657 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
1658 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
1659 nm_i->fcnt = 0;
1660 nm_i->nat_cnt = 0;
1661
1662 INIT_LIST_HEAD(&nm_i->free_nid_list);
1663 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1664 INIT_LIST_HEAD(&nm_i->nat_entries);
1665 INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
1666
1667 mutex_init(&nm_i->build_lock);
1668 spin_lock_init(&nm_i->free_nid_list_lock);
1669 rwlock_init(&nm_i->nat_tree_lock);
1670
1671 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
1672 nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
1673 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
1674
1675 nm_i->nat_bitmap = kzalloc(nm_i->bitmap_size, GFP_KERNEL);
1676 if (!nm_i->nat_bitmap)
1677 return -ENOMEM;
1678 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
1679 if (!version_bitmap)
1680 return -EFAULT;
1681
1682 /* copy version bitmap */
1683 memcpy(nm_i->nat_bitmap, version_bitmap, nm_i->bitmap_size);
1684 return 0;
1685}
1686
1687int build_node_manager(struct f2fs_sb_info *sbi)
1688{
1689 int err;
1690
1691 sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
1692 if (!sbi->nm_info)
1693 return -ENOMEM;
1694
1695 err = init_node_manager(sbi);
1696 if (err)
1697 return err;
1698
1699 build_free_nids(sbi);
1700 return 0;
1701}
1702
1703void destroy_node_manager(struct f2fs_sb_info *sbi)
1704{
1705 struct f2fs_nm_info *nm_i = NM_I(sbi);
1706 struct free_nid *i, *next_i;
1707 struct nat_entry *natvec[NATVEC_SIZE];
1708 nid_t nid = 0;
1709 unsigned int found;
1710
1711 if (!nm_i)
1712 return;
1713
1714 /* destroy free nid list */
1715 spin_lock(&nm_i->free_nid_list_lock);
1716 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
1717 BUG_ON(i->state == NID_ALLOC);
1718 __del_from_free_nid_list(i);
1719 nm_i->fcnt--;
1720 }
1721 BUG_ON(nm_i->fcnt);
1722 spin_unlock(&nm_i->free_nid_list_lock);
1723
1724 /* destroy nat cache */
1725 write_lock(&nm_i->nat_tree_lock);
1726 while ((found = __gang_lookup_nat_cache(nm_i,
1727 nid, NATVEC_SIZE, natvec))) {
1728 unsigned idx;
1729 for (idx = 0; idx < found; idx++) {
1730 struct nat_entry *e = natvec[idx];
1731 nid = nat_get_nid(e) + 1;
1732 __del_from_nat_cache(nm_i, e);
1733 }
1734 }
1735 BUG_ON(nm_i->nat_cnt);
1736 write_unlock(&nm_i->nat_tree_lock);
1737
1738 kfree(nm_i->nat_bitmap);
1739 sbi->nm_info = NULL;
1740 kfree(nm_i);
1741}
1742
1743int create_node_manager_caches(void)
1744{
1745 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1746 sizeof(struct nat_entry), NULL);
1747 if (!nat_entry_slab)
1748 return -ENOMEM;
1749
1750 free_nid_slab = f2fs_kmem_cache_create("free_nid",
1751 sizeof(struct free_nid), NULL);
1752 if (!free_nid_slab) {
1753 kmem_cache_destroy(nat_entry_slab);
1754 return -ENOMEM;
1755 }
1756 return 0;
1757}
1758
1759void destroy_node_manager_caches(void)
1760{
1761 kmem_cache_destroy(free_nid_slab);
1762 kmem_cache_destroy(nat_entry_slab);
1763}