diff options
author | Jaegeuk Kim <jaegeuk.kim@samsung.com> | 2013-04-25 03:05:51 -0400 |
---|---|---|
committer | Jaegeuk Kim <jaegeuk.kim@samsung.com> | 2013-04-28 22:19:21 -0400 |
commit | 55008d845d233396ed374473da4613cee691aa03 (patch) | |
tree | f869d0a791bee739b31714a49b8d8e092de44bf6 | |
parent | d70b4f53b950676228297f0b204f2e1512c1ff6c (diff) |
f2fs: enhance alloc_nid and build_free_nids flows
In order to avoid build_free_nid lock contention, let's change the order of
function calls as follows.
At first, check whether there is enough free nids.
- If available, just get a free nid with spin_lock without any overhead.
- Otherwise, conduct build_free_nids.
: scan nat pages, journal nat entries, and nat cache entries.
We should consider carefullly not to serve free nids intermediately made by
build_free_nids.
We can get stable free nids only after build_free_nids is done.
Reviewed-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
-rw-r--r-- | fs/f2fs/f2fs.h | 2 | ||||
-rw-r--r-- | fs/f2fs/node.c | 82 |
2 files changed, 37 insertions, 47 deletions
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6283c8d77c2e..20aab02f2a42 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -190,7 +190,6 @@ static inline void set_raw_extent(struct extent_info *ext, | |||
190 | struct f2fs_nm_info { | 190 | struct f2fs_nm_info { |
191 | block_t nat_blkaddr; /* base disk address of NAT */ | 191 | block_t nat_blkaddr; /* base disk address of NAT */ |
192 | nid_t max_nid; /* maximum possible node ids */ | 192 | nid_t max_nid; /* maximum possible node ids */ |
193 | nid_t init_scan_nid; /* the first nid to be scanned */ | ||
194 | nid_t next_scan_nid; /* the next nid to be scanned */ | 193 | nid_t next_scan_nid; /* the next nid to be scanned */ |
195 | 194 | ||
196 | /* NAT cache management */ | 195 | /* NAT cache management */ |
@@ -360,6 +359,7 @@ struct f2fs_sb_info { | |||
360 | struct mutex writepages; /* mutex for writepages() */ | 359 | struct mutex writepages; /* mutex for writepages() */ |
361 | unsigned char next_lock_num; /* round-robin global locks */ | 360 | unsigned char next_lock_num; /* round-robin global locks */ |
362 | int por_doing; /* recovery is doing or not */ | 361 | int por_doing; /* recovery is doing or not */ |
362 | int on_build_free_nids; /* build_free_nids is doing */ | ||
363 | 363 | ||
364 | /* for orphan inode management */ | 364 | /* for orphan inode management */ |
365 | struct list_head orphan_inode_list; /* orphan inode list */ | 365 | struct list_head orphan_inode_list; /* orphan inode list */ |
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c8f48d436487..aede91071f71 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -1309,14 +1309,14 @@ static void build_free_nids(struct f2fs_sb_info *sbi) | |||
1309 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1309 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
1310 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | 1310 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); |
1311 | struct f2fs_summary_block *sum = curseg->sum_blk; | 1311 | struct f2fs_summary_block *sum = curseg->sum_blk; |
1312 | nid_t nid = 0; | 1312 | int fcnt = 0, i = 0; |
1313 | bool is_cycled = false; | 1313 | nid_t nid = nm_i->next_scan_nid; |
1314 | int fcnt = 0; | ||
1315 | int i; | ||
1316 | 1314 | ||
1317 | nid = nm_i->next_scan_nid; | 1315 | /* Enough entries */ |
1318 | nm_i->init_scan_nid = nid; | 1316 | if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK) |
1317 | return; | ||
1319 | 1318 | ||
1319 | /* readahead nat pages to be scanned */ | ||
1320 | ra_nat_pages(sbi, nid); | 1320 | ra_nat_pages(sbi, nid); |
1321 | 1321 | ||
1322 | while (1) { | 1322 | while (1) { |
@@ -1326,19 +1326,15 @@ static void build_free_nids(struct f2fs_sb_info *sbi) | |||
1326 | f2fs_put_page(page, 1); | 1326 | f2fs_put_page(page, 1); |
1327 | 1327 | ||
1328 | nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); | 1328 | nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); |
1329 | 1329 | if (nid >= nm_i->max_nid) | |
1330 | if (nid >= nm_i->max_nid) { | ||
1331 | nid = 0; | 1330 | nid = 0; |
1332 | is_cycled = true; | 1331 | |
1333 | } | 1332 | if (i++ == FREE_NID_PAGES) |
1334 | if (fcnt > MAX_FREE_NIDS) | ||
1335 | break; | ||
1336 | if (is_cycled && nm_i->init_scan_nid <= nid) | ||
1337 | break; | 1333 | break; |
1338 | } | 1334 | } |
1339 | 1335 | ||
1340 | /* go to the next nat page in order to reuse free nids first */ | 1336 | /* go to the next free nat pages to find free nids abundantly */ |
1341 | nm_i->next_scan_nid = nm_i->init_scan_nid + NAT_ENTRY_PER_BLOCK; | 1337 | nm_i->next_scan_nid = nid; |
1342 | 1338 | ||
1343 | /* find free nids from current sum_pages */ | 1339 | /* find free nids from current sum_pages */ |
1344 | mutex_lock(&curseg->curseg_mutex); | 1340 | mutex_lock(&curseg->curseg_mutex); |
@@ -1375,41 +1371,36 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) | |||
1375 | struct free_nid *i = NULL; | 1371 | struct free_nid *i = NULL; |
1376 | struct list_head *this; | 1372 | struct list_head *this; |
1377 | retry: | 1373 | retry: |
1378 | mutex_lock(&nm_i->build_lock); | 1374 | if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) |
1379 | if (!nm_i->fcnt) { | 1375 | return false; |
1380 | /* scan NAT in order to build free nid list */ | ||
1381 | build_free_nids(sbi); | ||
1382 | if (!nm_i->fcnt) { | ||
1383 | mutex_unlock(&nm_i->build_lock); | ||
1384 | return false; | ||
1385 | } | ||
1386 | } | ||
1387 | mutex_unlock(&nm_i->build_lock); | ||
1388 | 1376 | ||
1389 | /* | ||
1390 | * We check fcnt again since previous check is racy as | ||
1391 | * we didn't hold free_nid_list_lock. So other thread | ||
1392 | * could consume all of free nids. | ||
1393 | */ | ||
1394 | spin_lock(&nm_i->free_nid_list_lock); | 1377 | spin_lock(&nm_i->free_nid_list_lock); |
1395 | if (!nm_i->fcnt) { | ||
1396 | spin_unlock(&nm_i->free_nid_list_lock); | ||
1397 | goto retry; | ||
1398 | } | ||
1399 | 1378 | ||
1400 | BUG_ON(list_empty(&nm_i->free_nid_list)); | 1379 | /* We should not use stale free nids created by build_free_nids */ |
1401 | list_for_each(this, &nm_i->free_nid_list) { | 1380 | if (nm_i->fcnt && !sbi->on_build_free_nids) { |
1402 | i = list_entry(this, struct free_nid, list); | 1381 | BUG_ON(list_empty(&nm_i->free_nid_list)); |
1403 | if (i->state == NID_NEW) | 1382 | list_for_each(this, &nm_i->free_nid_list) { |
1404 | break; | 1383 | i = list_entry(this, struct free_nid, list); |
1405 | } | 1384 | if (i->state == NID_NEW) |
1385 | break; | ||
1386 | } | ||
1406 | 1387 | ||
1407 | BUG_ON(i->state != NID_NEW); | 1388 | BUG_ON(i->state != NID_NEW); |
1408 | *nid = i->nid; | 1389 | *nid = i->nid; |
1409 | i->state = NID_ALLOC; | 1390 | i->state = NID_ALLOC; |
1410 | nm_i->fcnt--; | 1391 | nm_i->fcnt--; |
1392 | spin_unlock(&nm_i->free_nid_list_lock); | ||
1393 | return true; | ||
1394 | } | ||
1411 | spin_unlock(&nm_i->free_nid_list_lock); | 1395 | spin_unlock(&nm_i->free_nid_list_lock); |
1412 | return true; | 1396 | |
1397 | /* Let's scan nat pages and its caches to get free nids */ | ||
1398 | mutex_lock(&nm_i->build_lock); | ||
1399 | sbi->on_build_free_nids = 1; | ||
1400 | build_free_nids(sbi); | ||
1401 | sbi->on_build_free_nids = 0; | ||
1402 | mutex_unlock(&nm_i->build_lock); | ||
1403 | goto retry; | ||
1413 | } | 1404 | } |
1414 | 1405 | ||
1415 | /* | 1406 | /* |
@@ -1696,7 +1687,6 @@ static int init_node_manager(struct f2fs_sb_info *sbi) | |||
1696 | spin_lock_init(&nm_i->free_nid_list_lock); | 1687 | spin_lock_init(&nm_i->free_nid_list_lock); |
1697 | rwlock_init(&nm_i->nat_tree_lock); | 1688 | rwlock_init(&nm_i->nat_tree_lock); |
1698 | 1689 | ||
1699 | nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); | ||
1700 | nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); | 1690 | nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); |
1701 | nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); | 1691 | nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); |
1702 | version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); | 1692 | version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); |