diff options
author | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2011-05-15 05:05:54 -0400 |
---|---|---|
committer | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2011-05-16 03:31:40 -0400 |
commit | debf12d54182b324a01c4276b003669c94b7b531 (patch) | |
tree | 972640a41690e8946bd89636765cbaa6a5aa63f5 /fs/ubifs | |
parent | 074bcb9b5ce698bd7b02ddb469da9cba21fe83fd (diff) |
UBIFS: substitute the replay tree with a replay list
This patch simplifies replay even further - it removes the replay tree and
adds the replay list instead. Indeed, we just do not need to use a tree here -
all we need to do is to add all nodes to the list and then sort it. Using
RB-tree is an overkill - more code and slower. And since we replay buds in
order, we expect the nodes to follow in _mostly_ sorted order, so the merge
sort becomes much cheaper in average than an RB-tree.
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Diffstat (limited to 'fs/ubifs')
-rw-r--r-- | fs/ubifs/replay.c | 172 | ||||
-rw-r--r-- | fs/ubifs/ubifs.h | 2 |
2 files changed, 74 insertions, 100 deletions
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 08f5036a9056..47915a789927 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c | |||
@@ -33,22 +33,24 @@ | |||
33 | */ | 33 | */ |
34 | 34 | ||
35 | #include "ubifs.h" | 35 | #include "ubifs.h" |
36 | #include <linux/list_sort.h> | ||
36 | 37 | ||
37 | /** | 38 | /** |
38 | * struct replay_entry - replay tree entry. | 39 | * struct replay_entry - replay list entry. |
39 | * @lnum: logical eraseblock number of the node | 40 | * @lnum: logical eraseblock number of the node |
40 | * @offs: node offset | 41 | * @offs: node offset |
41 | * @len: node length | 42 | * @len: node length |
42 | * @deletion: non-zero if this entry corresponds to a node deletion | 43 | * @deletion: non-zero if this entry corresponds to a node deletion |
43 | * @sqnum: node sequence number | 44 | * @sqnum: node sequence number |
44 | * @rb: links the replay tree | 45 | * @list: links the replay list |
45 | * @key: node key | 46 | * @key: node key |
46 | * @nm: directory entry name | 47 | * @nm: directory entry name |
47 | * @old_size: truncation old size | 48 | * @old_size: truncation old size |
48 | * @new_size: truncation new size | 49 | * @new_size: truncation new size |
49 | * | 50 | * |
50 | * UBIFS journal replay must compare node sequence numbers, which means it must | 51 | * The replay process first scans all buds and builds the replay list, then |
51 | * build a tree of node information to insert into the TNC. | 52 | * sorts the replay list in nodes sequence number order, and then inserts all |
53 | * the replay entries to the TNC. | ||
52 | */ | 54 | */ |
53 | struct replay_entry { | 55 | struct replay_entry { |
54 | int lnum; | 56 | int lnum; |
@@ -56,7 +58,7 @@ struct replay_entry { | |||
56 | int len; | 58 | int len; |
57 | unsigned int deletion:1; | 59 | unsigned int deletion:1; |
58 | unsigned long long sqnum; | 60 | unsigned long long sqnum; |
59 | struct rb_node rb; | 61 | struct list_head list; |
60 | union ubifs_key key; | 62 | union ubifs_key key; |
61 | union { | 63 | union { |
62 | struct qstr nm; | 64 | struct qstr nm; |
@@ -263,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) | |||
263 | } | 265 | } |
264 | 266 | ||
265 | /** | 267 | /** |
266 | * destroy_replay_tree - destroy the replay. | 268 | * replay_entries_cmp - compare 2 replay entries. |
267 | * @c: UBIFS file-system description object | 269 | * @priv: UBIFS file-system description object |
270 | * @a: first replay entry | ||
271 | * @a: second replay entry | ||
268 | * | 272 | * |
269 | * Destroy the replay tree. | 273 | * This is a comparios function for 'list_sort()' which compares 2 replay |
274 | * entries @a and @b by comparing their sequence numer. Returns %1 if @a has | ||
275 | * greater sequence number and %-1 otherwise. | ||
270 | */ | 276 | */ |
271 | static void destroy_replay_tree(struct ubifs_info *c) | 277 | static int replay_entries_cmp(void *priv, struct list_head *a, |
278 | struct list_head *b) | ||
272 | { | 279 | { |
273 | struct rb_node *this = c->replay_tree.rb_node; | 280 | struct replay_entry *ra, *rb; |
274 | struct replay_entry *r; | 281 | |
275 | 282 | cond_resched(); | |
276 | while (this) { | 283 | if (a == b) |
277 | if (this->rb_left) { | 284 | return 0; |
278 | this = this->rb_left; | 285 | |
279 | continue; | 286 | ra = list_entry(a, struct replay_entry, list); |
280 | } else if (this->rb_right) { | 287 | rb = list_entry(b, struct replay_entry, list); |
281 | this = this->rb_right; | 288 | ubifs_assert(ra->sqnum != rb->sqnum); |
282 | continue; | 289 | if (ra->sqnum > rb->sqnum) |
283 | } | 290 | return 1; |
284 | r = rb_entry(this, struct replay_entry, rb); | 291 | return -1; |
285 | this = rb_parent(this); | ||
286 | if (this) { | ||
287 | if (this->rb_left == &r->rb) | ||
288 | this->rb_left = NULL; | ||
289 | else | ||
290 | this->rb_right = NULL; | ||
291 | } | ||
292 | if (is_hash_key(c, &r->key)) | ||
293 | kfree(r->nm.name); | ||
294 | kfree(r); | ||
295 | } | ||
296 | c->replay_tree = RB_ROOT; | ||
297 | } | 292 | } |
298 | 293 | ||
299 | /** | 294 | /** |
300 | * apply_replay_tree - apply the replay tree to the TNC. | 295 | * apply_replay_list - apply the replay list to the TNC. |
301 | * @c: UBIFS file-system description object | 296 | * @c: UBIFS file-system description object |
302 | * | 297 | * |
303 | * Apply the replay tree. | 298 | * Apply all entries in the replay list to the TNC. Returns zero in case of |
304 | * Returns zero in case of success and a negative error code in case of | 299 | * success and a negative error code in case of failure. |
305 | * failure. | ||
306 | */ | 300 | */ |
307 | static int apply_replay_tree(struct ubifs_info *c) | 301 | static int apply_replay_list(struct ubifs_info *c) |
308 | { | 302 | { |
309 | struct rb_node *this = rb_first(&c->replay_tree); | 303 | struct replay_entry *r; |
304 | int err; | ||
310 | 305 | ||
311 | while (this) { | 306 | list_sort(c, &c->replay_list, &replay_entries_cmp); |
312 | struct replay_entry *r; | ||
313 | int err; | ||
314 | 307 | ||
308 | list_for_each_entry(r, &c->replay_list, list) { | ||
315 | cond_resched(); | 309 | cond_resched(); |
316 | 310 | ||
317 | r = rb_entry(this, struct replay_entry, rb); | ||
318 | err = apply_replay_entry(c, r); | 311 | err = apply_replay_entry(c, r); |
319 | if (err) | 312 | if (err) |
320 | return err; | 313 | return err; |
321 | this = rb_next(this); | ||
322 | } | 314 | } |
315 | |||
323 | return 0; | 316 | return 0; |
324 | } | 317 | } |
325 | 318 | ||
326 | /** | 319 | /** |
327 | * insert_node - insert a node to the replay tree. | 320 | * destroy_replay_list - destroy the replay. |
321 | * @c: UBIFS file-system description object | ||
322 | * | ||
323 | * Destroy the replay list. | ||
324 | */ | ||
325 | static void destroy_replay_list(struct ubifs_info *c) | ||
326 | { | ||
327 | struct replay_entry *r, *tmp; | ||
328 | |||
329 | list_for_each_entry_safe(r, tmp, &c->replay_list, list) { | ||
330 | if (is_hash_key(c, &r->key)) | ||
331 | kfree(r->nm.name); | ||
332 | list_del(&r->list); | ||
333 | kfree(r); | ||
334 | } | ||
335 | } | ||
336 | |||
337 | /** | ||
338 | * insert_node - insert a node to the replay list | ||
328 | * @c: UBIFS file-system description object | 339 | * @c: UBIFS file-system description object |
329 | * @lnum: node logical eraseblock number | 340 | * @lnum: node logical eraseblock number |
330 | * @offs: node offset | 341 | * @offs: node offset |
@@ -336,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c) | |||
336 | * @old_size: truncation old size | 347 | * @old_size: truncation old size |
337 | * @new_size: truncation new size | 348 | * @new_size: truncation new size |
338 | * | 349 | * |
339 | * This function inserts a scanned non-direntry node to the replay tree. The | 350 | * This function inserts a scanned non-direntry node to the replay list. The |
340 | * replay tree is an RB-tree containing @struct replay_entry elements which are | 351 | * replay list contains @struct replay_entry elements, and we sort this list in |
341 | * indexed by the sequence number. The replay tree is applied at the very end | 352 | * sequence number order before applying it. The replay list is applied at the |
342 | * of the replay process. Since the tree is sorted in sequence number order, | 353 | * very end of the replay process. Since the list is sorted in sequence number |
343 | * the older modifications are applied first. This function returns zero in | 354 | * order, the older modifications are applied first. This function returns zero |
344 | * case of success and a negative error code in case of failure. | 355 | * in case of success and a negative error code in case of failure. |
345 | */ | 356 | */ |
346 | static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, | 357 | static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, |
347 | union ubifs_key *key, unsigned long long sqnum, | 358 | union ubifs_key *key, unsigned long long sqnum, |
348 | int deletion, int *used, loff_t old_size, | 359 | int deletion, int *used, loff_t old_size, |
349 | loff_t new_size) | 360 | loff_t new_size) |
350 | { | 361 | { |
351 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
352 | struct replay_entry *r; | 362 | struct replay_entry *r; |
353 | 363 | ||
364 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
365 | |||
354 | if (key_inum(c, key) >= c->highest_inum) | 366 | if (key_inum(c, key) >= c->highest_inum) |
355 | c->highest_inum = key_inum(c, key); | 367 | c->highest_inum = key_inum(c, key); |
356 | 368 | ||
357 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
358 | while (*p) { | ||
359 | parent = *p; | ||
360 | r = rb_entry(parent, struct replay_entry, rb); | ||
361 | if (sqnum < r->sqnum) { | ||
362 | p = &(*p)->rb_left; | ||
363 | continue; | ||
364 | } else if (sqnum > r->sqnum) { | ||
365 | p = &(*p)->rb_right; | ||
366 | continue; | ||
367 | } | ||
368 | ubifs_err("duplicate sqnum in replay"); | ||
369 | return -EINVAL; | ||
370 | } | ||
371 | |||
372 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | 369 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); |
373 | if (!r) | 370 | if (!r) |
374 | return -ENOMEM; | 371 | return -ENOMEM; |
@@ -384,13 +381,12 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, | |||
384 | r->old_size = old_size; | 381 | r->old_size = old_size; |
385 | r->new_size = new_size; | 382 | r->new_size = new_size; |
386 | 383 | ||
387 | rb_link_node(&r->rb, parent, p); | 384 | list_add_tail(&r->list, &c->replay_list); |
388 | rb_insert_color(&r->rb, &c->replay_tree); | ||
389 | return 0; | 385 | return 0; |
390 | } | 386 | } |
391 | 387 | ||
392 | /** | 388 | /** |
393 | * insert_dent - insert a directory entry node into the replay tree. | 389 | * insert_dent - insert a directory entry node into the replay list. |
394 | * @c: UBIFS file-system description object | 390 | * @c: UBIFS file-system description object |
395 | * @lnum: node logical eraseblock number | 391 | * @lnum: node logical eraseblock number |
396 | * @offs: node offset | 392 | * @offs: node offset |
@@ -402,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, | |||
402 | * @deletion: non-zero if this is a deletion | 398 | * @deletion: non-zero if this is a deletion |
403 | * @used: number of bytes in use in a LEB | 399 | * @used: number of bytes in use in a LEB |
404 | * | 400 | * |
405 | * This function inserts a scanned directory entry node to the replay tree. | 401 | * This function inserts a scanned directory entry node or an extended |
406 | * Returns zero in case of success and a negative error code in case of | 402 | * attribute entry to the replay list. Returns zero in case of success and a |
407 | * failure. | 403 | * negative error code in case of failure. |
408 | * | ||
409 | * This function is also used for extended attribute entries because they are | ||
410 | * implemented as directory entry nodes. | ||
411 | */ | 404 | */ |
412 | static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, | 405 | static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, |
413 | union ubifs_key *key, const char *name, int nlen, | 406 | union ubifs_key *key, const char *name, int nlen, |
414 | unsigned long long sqnum, int deletion, int *used) | 407 | unsigned long long sqnum, int deletion, int *used) |
415 | { | 408 | { |
416 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
417 | struct replay_entry *r; | 409 | struct replay_entry *r; |
418 | char *nbuf; | 410 | char *nbuf; |
419 | 411 | ||
412 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
420 | if (key_inum(c, key) >= c->highest_inum) | 413 | if (key_inum(c, key) >= c->highest_inum) |
421 | c->highest_inum = key_inum(c, key); | 414 | c->highest_inum = key_inum(c, key); |
422 | 415 | ||
423 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
424 | while (*p) { | ||
425 | parent = *p; | ||
426 | r = rb_entry(parent, struct replay_entry, rb); | ||
427 | if (sqnum < r->sqnum) { | ||
428 | p = &(*p)->rb_left; | ||
429 | continue; | ||
430 | } | ||
431 | if (sqnum > r->sqnum) { | ||
432 | p = &(*p)->rb_right; | ||
433 | continue; | ||
434 | } | ||
435 | ubifs_err("duplicate sqnum in replay"); | ||
436 | return -EINVAL; | ||
437 | } | ||
438 | |||
439 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | 416 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); |
440 | if (!r) | 417 | if (!r) |
441 | return -ENOMEM; | 418 | return -ENOMEM; |
419 | |||
442 | nbuf = kmalloc(nlen + 1, GFP_KERNEL); | 420 | nbuf = kmalloc(nlen + 1, GFP_KERNEL); |
443 | if (!nbuf) { | 421 | if (!nbuf) { |
444 | kfree(r); | 422 | kfree(r); |
@@ -458,9 +436,7 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, | |||
458 | nbuf[nlen] = '\0'; | 436 | nbuf[nlen] = '\0'; |
459 | r->nm.name = nbuf; | 437 | r->nm.name = nbuf; |
460 | 438 | ||
461 | ubifs_assert(!*p); | 439 | list_add_tail(&r->list, &c->replay_list); |
462 | rb_link_node(&r->rb, parent, p); | ||
463 | rb_insert_color(&r->rb, &c->replay_tree); | ||
464 | return 0; | 440 | return 0; |
465 | } | 441 | } |
466 | 442 | ||
@@ -1017,7 +993,7 @@ int ubifs_replay_journal(struct ubifs_info *c) | |||
1017 | if (err) | 993 | if (err) |
1018 | goto out; | 994 | goto out; |
1019 | 995 | ||
1020 | err = apply_replay_tree(c); | 996 | err = apply_replay_list(c); |
1021 | if (err) | 997 | if (err) |
1022 | goto out; | 998 | goto out; |
1023 | 999 | ||
@@ -1039,7 +1015,7 @@ int ubifs_replay_journal(struct ubifs_info *c) | |||
1039 | "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, | 1015 | "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, |
1040 | (unsigned long)c->highest_inum); | 1016 | (unsigned long)c->highest_inum); |
1041 | out: | 1017 | out: |
1042 | destroy_replay_tree(c); | 1018 | destroy_replay_list(c); |
1043 | destroy_bud_list(c); | 1019 | destroy_bud_list(c); |
1044 | c->replaying = 0; | 1020 | c->replaying = 0; |
1045 | return err; | 1021 | return err; |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 26a7ebe56d2e..a2f9d4e3519f 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -1205,7 +1205,6 @@ struct ubifs_debug_info; | |||
1205 | * @replaying: %1 during journal replay | 1205 | * @replaying: %1 during journal replay |
1206 | * @mounting: %1 while mounting | 1206 | * @mounting: %1 while mounting |
1207 | * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode | 1207 | * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode |
1208 | * @replay_tree: temporary tree used during journal replay | ||
1209 | * @replay_list: temporary list used during journal replay | 1208 | * @replay_list: temporary list used during journal replay |
1210 | * @replay_buds: list of buds to replay | 1209 | * @replay_buds: list of buds to replay |
1211 | * @cs_sqnum: sequence number of first node in the log (commit start node) | 1210 | * @cs_sqnum: sequence number of first node in the log (commit start node) |
@@ -1435,7 +1434,6 @@ struct ubifs_info { | |||
1435 | unsigned int replaying:1; | 1434 | unsigned int replaying:1; |
1436 | unsigned int mounting:1; | 1435 | unsigned int mounting:1; |
1437 | unsigned int remounting_rw:1; | 1436 | unsigned int remounting_rw:1; |
1438 | struct rb_root replay_tree; | ||
1439 | struct list_head replay_list; | 1437 | struct list_head replay_list; |
1440 | struct list_head replay_buds; | 1438 | struct list_head replay_buds; |
1441 | unsigned long long cs_sqnum; | 1439 | unsigned long long cs_sqnum; |