1 files changed, 739 insertions, 0 deletions
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
new file mode 100644
index 000000000000..76c242fbe1b0
--- /dev/null
+++ b/fs/logfs/gc.c
@@ -0,0 +1,739 @@
+/*
+ * fs/logfs/gc.c        - garbage collection code
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
+ */
+#include "logfs.h"
+#include <linux/sched.h>
+#include <linux/slab.h>
+/*
+ * Wear leveling needs to kick in when the difference between low erase
+ * counts and high erase counts gets too big.  A good value for "too big"
+ * may be somewhat below 10% of maximum erase count for the device.
+ * Why not 397, to pick a nice round number with no specific meaning? :)
+ *
+ * WL_RATELIMIT is the minimum time between two wear level events.  A huge
+ * number of segments may fulfil the requirements for wear leveling at the
+ * same time.  If that happens we don't want to cause a latency from hell,
+ * but just gently pick one segment every so often and minimize overhead.
+ */
+#define WL_DELTA 397
+#define WL_RATELIMIT 100
+#define MAX_OBJ_ALIASES 2600
+#define SCAN_RATIO 512  /* number of scanned segments per gc'd segment */
+#define LIST_SIZE 64    /* base size of candidate lists */
+#define SCAN_ROUNDS 128 /* maximum number of complete medium scans */
+#define SCAN_ROUNDS_HIGH 4 /* maximum number of higher-level scans */
+static int no_free_segments(struct super_block *sb)
+{
+        struct logfs_super *super = logfs_super(sb);
+        return super->s_free_list.count;
+}
+/* journal has distance -1, top-most ifile layer distance 0 */
+static u8 root_distance(struct super_block *sb, gc_level_t __gc_level)
+{
+        struct logfs_super *super = logfs_super(sb);
+        u8 gc_level = (__force u8)__gc_level;
+        switch (gc_level) {
+        case 0: /* fall through */
+        case 1: /* fall through */
+        case 2: /* fall through */
+        case 3:
+                /* file data or indirect blocks */
+                return super->s_ifile_levels + super->s_iblock_levels - gc_level;
+        case 6: /* fall through */
+        case 7: /* fall through */
+        case 8: /* fall through */
+        case 9:
+                /* inode file data or indirect blocks */
+                return super->s_ifile_levels - (gc_level - 6);
+        default:
+                printk(KERN_ERR"LOGFS: segment of unknown level %x found\n",
+                                gc_level);
+                WARN_ON(1);
+                return super->s_ifile_levels + super->s_iblock_levels;
+        }
+}
+static int segment_is_reserved(struct super_block *sb, u32 segno)
+{
+        struct logfs_super *super = logfs_super(sb);
+        struct logfs_area *area;
+        void *reserved;
+        int i;
+        /* Some segments are reserved.  Just pretend they were all valid */
+        reserved = btree_lookup32(&super->s_reserved_segments, segno);
+        if (reserved)
+                return 1;
+        /* Currently open segments */
+        for_each_area(i) {
+                area = super->s_area[i];
+                if (area->a_is_open && area->a_segno == segno)
+                        return 1;
+        }
+        return 0;
+}
+static void logfs_mark_segment_bad(struct super_block *sb, u32 segno)
+{
+        BUG();
+}
+/*
+ * Returns the bytes consumed by valid objects in this segment.  Object headers
+ * are counted, the segment header is not.
+ */
+static u32 logfs_valid_bytes(struct super_block *sb, u32 segno, u32 *ec,
+                gc_level_t *gc_level)
+{
+        struct logfs_segment_entry se;
+        u32 ec_level;
+        logfs_get_segment_entry(sb, segno, &se);
+        if (se.ec_level == cpu_to_be32(BADSEG) ||
+                        se.valid == cpu_to_be32(RESERVED))
+                return RESERVED;
+        ec_level = be32_to_cpu(se.ec_level);
+        *ec = ec_level >> 4;
+        *gc_level = GC_LEVEL(ec_level & 0xf);
+        return be32_to_cpu(se.valid);
+}
+static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
+                u64 bix, gc_level_t gc_level)
+{
+        struct inode *inode;
+        int err, cookie;
+        inode = logfs_safe_iget(sb, ino, &cookie);
+        err = logfs_rewrite_block(inode, bix, ofs, gc_level, 0);
+        BUG_ON(err);
+        logfs_safe_iput(inode, cookie);
+}
+static u32 logfs_gc_segment(struct super_block *sb, u32 segno, u8 dist)
+{
+        struct logfs_super *super = logfs_super(sb);
+        struct logfs_segment_header sh;
+        struct logfs_object_header oh;
+        u64 ofs, ino, bix;
+        u32 seg_ofs, logical_segno, cleaned = 0;
+        int err, len, valid;
+        gc_level_t gc_level;
+        LOGFS_BUG_ON(segment_is_reserved(sb, segno), sb);
+        btree_insert32(&super->s_reserved_segments, segno, (void *)1, GFP_NOFS);
+        err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh);
+        BUG_ON(err);
+        gc_level = GC_LEVEL(sh.level);
+        logical_segno = be32_to_cpu(sh.segno);
+        if (sh.crc != logfs_crc32(&sh, sizeof(sh), 4)) {
+                logfs_mark_segment_bad(sb, segno);
+                cleaned = -1;
+                goto out;
+        }
+        for (seg_ofs = LOGFS_SEGMENT_HEADERSIZE;
+                        seg_ofs + sizeof(oh) < super->s_segsize; ) {
+                ofs = dev_ofs(sb, logical_segno, seg_ofs);
+                err = wbuf_read(sb, dev_ofs(sb, segno, seg_ofs), sizeof(oh),
+                                &oh);
+                BUG_ON(err);
+                if (!memchr_inv(&oh, 0xff, sizeof(oh)))
+                        break;
+                if (oh.crc != logfs_crc32(&oh, sizeof(oh) - 4, 4)) {
+                        logfs_mark_segment_bad(sb, segno);
+                        cleaned = super->s_segsize - 1;
+                        goto out;
+                }
+                ino = be64_to_cpu(oh.ino);
+                bix = be64_to_cpu(oh.bix);
+                len = sizeof(oh) + be16_to_cpu(oh.len);
+                valid = logfs_is_valid_block(sb, ofs, ino, bix, gc_level);
+                if (valid == 1) {
+                        logfs_cleanse_block(sb, ofs, ino, bix, gc_level);
+                        cleaned += len;
+                } else if (valid == 2) {
+                        /* Will be invalid upon journal commit */
+                        cleaned += len;
+                }
+                seg_ofs += len;
+        }
+out:
+        btree_remove32(&super->s_reserved_segments, segno);
+        return cleaned;
+}
+static struct gc_candidate *add_list(struct gc_candidate *cand,
+                struct candidate_list *list)
+{
+        struct rb_node **p = &list->rb_tree.rb_node;
+        struct rb_node *parent = NULL;
+        struct gc_candidate *cur;
+        int comp;
+        cand->list = list;
+        while (*p) {
+                parent = *p;
+                cur = rb_entry(parent, struct gc_candidate, rb_node);
+                if (list->sort_by_ec)
+                        comp = cand->erase_count < cur->erase_count;
+                else
+                        comp = cand->valid < cur->valid;
+                if (comp)
+                        p = &parent->rb_left;
+                else
+                        p = &parent->rb_right;
+        }
+        rb_link_node(&cand->rb_node, parent, p);
+        rb_insert_color(&cand->rb_node, &list->rb_tree);
+        if (list->count <= list->maxcount) {
+                list->count++;
+                return NULL;
+        }
+        cand = rb_entry(rb_last(&list->rb_tree), struct gc_candidate, rb_node);
+        rb_erase(&cand->rb_node, &list->rb_tree);
+        cand->list = NULL;
+        return cand;
+}
+static void remove_from_list(struct gc_candidate *cand)
+{
+        struct candidate_list *list = cand->list;
+        rb_erase(&cand->rb_node, &list->rb_tree);
+        list->count--;
+}
+static void free_candidate(struct super_block *sb, struct gc_candidate *cand)
+{
+        struct logfs_super *super = logfs_super(sb);
+        btree_remove32(&super->s_cand_tree, cand->segno);
+        kfree(cand);
+}
+u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec)
+{
+        struct gc_candidate *cand;
+        u32 segno;
+        BUG_ON(list->count == 0);
+        cand = rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
+        remove_from_list(cand);
+        segno = cand->segno;
+        if (ec)
+                *ec = cand->erase_count;
+        free_candidate(sb, cand);
+        return segno;
+}
+/*
+ * We have several lists to manage segments with.  The reserve_list is used to
+ * deal with bad blocks.  We try to keep the best (lowest ec) segments on this
+ * list.
+ * The free_list contains free segments for normal usage.  It usually gets the
+ * second pick after the reserve_list.  But when the free_list is running short
+ * it is more important to keep the free_list full than to keep a reserve.
+ *
+ * Segments that are not free are put onto a per-level low_list.  If we have
+ * to run garbage collection, we pick a candidate from there.  All segments on
+ * those lists should have at least some free space so GC will make progress.
+ *
+ * And last we have the ec_list, which is used to pick segments for wear
+ * leveling.
+ *
+ * If all appropriate lists are full, we simply free the candidate and forget
+ * about that segment for a while.  We have better candidates for each purpose.
+ */
+static void __add_candidate(struct super_block *sb, struct gc_candidate *cand)
+{
+        struct logfs_super *super = logfs_super(sb);
+        u32 full = super->s_segsize - LOGFS_SEGMENT_RESERVE;
+        if (cand->valid == 0) {
+                /* 100% free segments */
+                log_gc_noisy("add reserve segment %x (ec %x) at %llx\n",
+                                cand->segno, cand->erase_count,
+                                dev_ofs(sb, cand->segno, 0));
+                cand = add_list(cand, &super->s_reserve_list);
+                if (cand) {
+                        log_gc_noisy("add free segment %x (ec %x) at %llx\n",
+                                        cand->segno, cand->erase_count,
+                                        dev_ofs(sb, cand->segno, 0));
+                        cand = add_list(cand, &super->s_free_list);
+                }
+        } else {
+                /* good candidates for Garbage Collection */
+                if (cand->valid < full)
+                        cand = add_list(cand, &super->s_low_list[cand->dist]);
+                /* good candidates for wear leveling,
+                 * segments that were recently written get ignored */
+                if (cand)
+                        cand = add_list(cand, &super->s_ec_list);
+        }
+        if (cand)
+                free_candidate(sb, cand);
+}
+static int add_candidate(struct super_block *sb, u32 segno, u32 valid, u32 ec,
+                u8 dist)
+{
+        struct logfs_super *super = logfs_super(sb);
+        struct gc_candidate *cand;
+        cand = kmalloc(sizeof(*cand), GFP_NOFS);
+        if (!cand)
+                return -ENOMEM;
+        cand->segno = segno;
+        cand->valid = valid;
+        cand->erase_count = ec;
+        cand->dist = dist;
+        btree_insert32(&super->s_cand_tree, segno, cand, GFP_NOFS);
+        __add_candidate(sb, cand);
+        return 0;
+}
+static void remove_segment_from_lists(struct super_block *sb, u32 segno)
+{
+        struct logfs_super *super = logfs_super(sb);
+        struct gc_candidate *cand;
+        cand = btree_lookup32(&super->s_cand_tree, segno);
+        if (cand) {
+                remove_from_list(cand);
+                free_candidate(sb, cand);
+        }
+}
+static void scan_segment(struct super_block *sb, u32 segno)
+{
+        u32 valid, ec = 0;
+        gc_level_t gc_level = 0;
+        u8 dist;
+        if (segment_is_reserved(sb, segno))
+                return;
+        remove_segment_from_lists(sb, segno);
+        valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
+        if (valid == RESERVED)
+                return;
+        dist = root_distance(sb, gc_level);
+        add_candidate(sb, segno, valid, ec, dist);
+}
+static struct gc_candidate *first_in_list(struct candidate_list *list)
+{
+        if (list->count == 0)
+                return NULL;
+        return rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
+}
+/*
+ * Find the best segment for garbage collection.  Main criterion is
+ * the segment requiring the least effort to clean.  Secondary
+ * criterion is to GC on the lowest level available.
+ *
+ * So we search the least effort segment on the lowest level first,
+ * then move up and pick another segment iff is requires significantly
+ * less effort.  Hence the LOGFS_MAX_OBJECTSIZE in the comparison.
+ */
+static struct gc_candidate *get_candidate(struct super_block *sb)
+{
+        struct logfs_super *super = logfs_super(sb);
+        int i, max_dist;
+        struct gc_candidate *cand = NULL, *this;
+        max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS);
+        for (i = max_dist; i >= 0; i--) {
+                this = first_in_list(&super->s_low_list[i]);
+                if (!this)
+                        continue;
+                if (!cand)
+                        cand = this;
+                if (this->valid + LOGFS_MAX_OBJECTSIZE <= cand->valid)
+                        cand = this;
+        }
+        return cand;
+}
+static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand)
+{
+        struct logfs_super *super = logfs_super(sb);
+        gc_level_t gc_level;
+        u32 cleaned, valid, segno, ec;
+        u8 dist;
+        if (!cand) {
+                log_gc("GC attempted, but no candidate found\n");
+                return 0;
+        }
+        segno = cand->segno;
+        dist = cand->dist;
+        valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
+        free_candidate(sb, cand);
+        log_gc("GC segment #%02x at %llx, %x required, %x free, %x valid, %llx free\n",
+                        segno, (u64)segno << super->s_segshift,
+                        dist, no_free_segments(sb), valid,
+                        super->s_free_bytes);
+        cleaned = logfs_gc_segment(sb, segno, dist);
+        log_gc("GC segment #%02x complete - now %x valid\n", segno,
+                        valid - cleaned);
+        BUG_ON(cleaned != valid);
+        return 1;
+}
+static int logfs_gc_once(struct super_block *sb)
+{
+        struct gc_candidate *cand;
+        cand = get_candidate(sb);
+        if (cand)
+                remove_from_list(cand);
+        return __logfs_gc_once(sb, cand);
+}
+/* returns 1 if a wrap occurs, 0 otherwise */
+static int logfs_scan_some(struct super_block *sb)
+{
+        struct logfs_super *super = logfs_super(sb);
+        u32 segno;
+        int i, ret = 0;
+        segno = super->s_sweeper;
+        for (i = SCAN_RATIO; i > 0; i--) {
+                segno++;
+                if (segno >= super->s_no_segs) {
+                        segno = 0;
+                        ret = 1;
+                        /* Break out of the loop.  We want to read a single
+                         * block from the segment size on next invocation if
+                         * SCAN_RATIO is set to match block size
+                         */
+                        break;
+                }
+                scan_segment(sb, segno);
+        }
+        super->s_sweeper = segno;
+        return ret;
+}
+/*
+ * In principle, this function should loop forever, looking for GC candidates
+ * and moving data.  LogFS is designed in such a way that this loop is
+ * guaranteed to terminate.
+ *
+ * Limiting the loop to some iterations serves purely to catch cases when
+ * these guarantees have failed.  An actual endless loop is an obvious bug
+ * and should be reported as such.
+ */
+static void __logfs_gc_pass(struct super_block *sb, int target)
+{
+        struct logfs_super *super = logfs_super(sb);
+        struct logfs_block *block;
+        int round, progress, last_progress = 0;
+        /*
+         * Doing too many changes to the segfile at once would result
+         * in a large number of aliases.  Write the journal before
+         * things get out of hand.
+         */
+        if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES)
+                logfs_write_anchor(sb);
+        if (no_free_segments(sb) >= target &&
+                        super->s_no_object_aliases < MAX_OBJ_ALIASES)
+                return;
+        log_gc("__logfs_gc_pass(%x)\n", target);
+        for (round = 0; round < SCAN_ROUNDS; ) {
+                if (no_free_segments(sb) >= target)
+                        goto write_alias;
+                /* Sync in-memory state with on-medium state in case they
+                 * diverged */
+                logfs_write_anchor(sb);
+                round += logfs_scan_some(sb);
+                if (no_free_segments(sb) >= target)
+                        goto write_alias;
+                progress = logfs_gc_once(sb);
+                if (progress)
+                        last_progress = round;
+                else if (round - last_progress > 2)
+                        break;
+                continue;
+                /*
+                 * The goto logic is nasty, I just don't know a better way to
+                 * code it.  GC is supposed to ensure two things:
+                 * 1. Enough free segments are available.
+                 * 2. The number of aliases is bounded.
+                 * When 1. is achieved, we take a look at 2. and write back
+                 * some alias-containing blocks, if necessary.  However, after
+                 * each such write we need to go back to 1., as writes can
+                 * consume free segments.
+                 */
+write_alias:
+                if (super->s_no_object_aliases < MAX_OBJ_ALIASES)
+                        return;
+                if (list_empty(&super->s_object_alias)) {
+                        /* All aliases are still in btree */
+                        return;
+                }
+                log_gc("Write back one alias\n");
+                block = list_entry(super->s_object_alias.next,
+                                struct logfs_block, alias_list);
+                block->ops->write_block(block);
+                /*
+                 * To round off the nasty goto logic, we reset round here.  It
+                 * is a safety-net for GC not making any progress and limited
+                 * to something reasonably small.  If incremented it for every
+                 * single alias, the loop could terminate rather quickly.
+                 */
+                round = 0;
+        }
+        LOGFS_BUG(sb);
+}
+static int wl_ratelimit(struct super_block *sb, u64 *next_event)
+{
+        struct logfs_super *super = logfs_super(sb);
+        if (*next_event < super->s_gec) {
+                *next_event = super->s_gec + WL_RATELIMIT;
+                return 0;
+        }
+        return 1;
+}
+static void logfs_wl_pass(struct super_block *sb)
+{
+        struct logfs_super *super = logfs_super(sb);
+        struct gc_candidate *wl_cand, *free_cand;
+        if (wl_ratelimit(sb, &super->s_wl_gec_ostore))
+                return;
+        wl_cand = first_in_list(&super->s_ec_list);
+        if (!wl_cand)
+                return;
+        free_cand = first_in_list(&super->s_free_list);
+        if (!free_cand)
+                return;
+        if (wl_cand->erase_count < free_cand->erase_count + WL_DELTA) {
+                remove_from_list(wl_cand);
+                __logfs_gc_once(sb, wl_cand);
+        }
+}
+/*
+ * The journal needs wear leveling as well.  But moving the journal is an
+ * expensive operation so we try to avoid it as much as possible.  And if we
+ * have to do it, we move the whole journal, not individual segments.
+ *
+ * Ratelimiting is not strictly necessary here, it mainly serves to avoid the
+ * calculations.  First we check whether moving the journal would be a
+ * significant improvement.  That means that a) the current journal segments
+ * have more wear than the future journal segments and b) the current journal
+ * segments have more wear than normal ostore segments.
+ * Rationale for b) is that we don't have to move the journal if it is aging
+ * less than the ostore, even if the reserve segments age even less (they are
+ * excluded from wear leveling, after all).
+ * Next we check that the superblocks have less wear than the journal.  Since
+ * moving the journal requires writing the superblocks, we have to protect the
+ * superblocks even more than the journal.
+ *
+ * Also we double the acceptable wear difference, compared to ostore wear
+ * leveling.  Journal data is read and rewritten rapidly, comparatively.  So
+ * soft errors have much less time to accumulate and we allow the journal to
+ * be a bit worse than the ostore.
+ */
+static void logfs_journal_wl_pass(struct super_block *sb)
+{
+        struct logfs_super *super = logfs_super(sb);
+        struct gc_candidate *cand;
+        u32 min_journal_ec = -1, max_reserve_ec = 0;
+        int i;
+        if (wl_ratelimit(sb, &super->s_wl_gec_journal))
+                return;
+        if (super->s_reserve_list.count < super->s_no_journal_segs) {
+                /* Reserve is not full enough to move complete journal */
+                return;
+        }
+        journal_for_each(i)
+                if (super->s_journal_seg[i])
+                        min_journal_ec = min(min_journal_ec,
+                                        super->s_journal_ec[i]);
+        cand = rb_entry(rb_first(&super->s_free_list.rb_tree),
+                        struct gc_candidate, rb_node);
+        max_reserve_ec = cand->erase_count;
+        for (i = 0; i < 2; i++) {
+                struct logfs_segment_entry se;
+                u32 segno = seg_no(sb, super->s_sb_ofs[i]);
+                u32 ec;
+                logfs_get_segment_entry(sb, segno, &se);
+                ec = be32_to_cpu(se.ec_level) >> 4;
+                max_reserve_ec = max(max_reserve_ec, ec);
+        }
+        if (min_journal_ec > max_reserve_ec + 2 * WL_DELTA) {
+                do_logfs_journal_wl_pass(sb);
+        }
+}
+void logfs_gc_pass(struct super_block *sb)
+{
+        struct logfs_super *super = logfs_super(sb);
+        //BUG_ON(mutex_trylock(&logfs_super(sb)->s_w_mutex));
+        /* Write journal before free space is getting saturated with dirty
+         * objects.
+         */
+        if (super->s_dirty_used_bytes + super->s_dirty_free_bytes
+                        + LOGFS_MAX_OBJECTSIZE >= super->s_free_bytes)
+                logfs_write_anchor(sb);
+        __logfs_gc_pass(sb, super->s_total_levels);
+        logfs_wl_pass(sb);
+        logfs_journal_wl_pass(sb);
+}
+static int check_area(struct super_block *sb, int i)
+{
+        struct logfs_super *super = logfs_super(sb);
+        struct logfs_area *area = super->s_area[i];
+        struct logfs_object_header oh;
+        u32 segno = area->a_segno;
+        u32 ofs = area->a_used_bytes;
+        __be32 crc;
+        int err;
+        if (!area->a_is_open)
+                return 0;
+        for (ofs = area->a_used_bytes;
+             ofs <= super->s_segsize - sizeof(oh);
+             ofs += (u32)be16_to_cpu(oh.len) + sizeof(oh)) {
+                err = wbuf_read(sb, dev_ofs(sb, segno, ofs), sizeof(oh), &oh);
+                if (err)
+                        return err;
+                if (!memchr_inv(&oh, 0xff, sizeof(oh)))
+                        break;
+                crc = logfs_crc32(&oh, sizeof(oh) - 4, 4);
+                if (crc != oh.crc) {
+                        printk(KERN_INFO "interrupted header at %llx\n",
+                                        dev_ofs(sb, segno, ofs));
+                        return 0;
+                }
+        }
+        if (ofs != area->a_used_bytes) {
+                printk(KERN_INFO "%x bytes unaccounted data found at %llx\n",
+                                ofs - area->a_used_bytes,
+                                dev_ofs(sb, segno, area->a_used_bytes));
+                area->a_used_bytes = ofs;
+        }
+        return 0;
+}
+int logfs_check_areas(struct super_block *sb)
+{
+        int i, err;
+        for_each_area(i) {
+                err = check_area(sb, i);
+                if (err)
+                        return err;
+        }
+        return 0;
+}
+static void logfs_init_candlist(struct candidate_list *list, int maxcount,
+                int sort_by_ec)
+{
+        list->count = 0;
+        list->maxcount = maxcount;
+        list->sort_by_ec = sort_by_ec;
+        list->rb_tree = RB_ROOT;
+}
+int logfs_init_gc(struct super_block *sb)
+{
+        struct logfs_super *super = logfs_super(sb);
+        int i;
+        btree_init_mempool32(&super->s_cand_tree, super->s_btree_pool);
+        logfs_init_candlist(&super->s_free_list, LIST_SIZE + SCAN_RATIO, 1);
+        logfs_init_candlist(&super->s_reserve_list,
+                        super->s_bad_seg_reserve, 1);
+        for_each_area(i)
+                logfs_init_candlist(&super->s_low_list[i], LIST_SIZE, 0);
+        logfs_init_candlist(&super->s_ec_list, LIST_SIZE, 1);
+        return 0;
+}
+static void logfs_cleanup_list(struct super_block *sb,
+                struct candidate_list *list)
+{
+        struct gc_candidate *cand;
+        while (list->count) {
+                cand = rb_entry(list->rb_tree.rb_node, struct gc_candidate,
+                                rb_node);
+                remove_from_list(cand);
+                free_candidate(sb, cand);
+        }
+        BUG_ON(list->rb_tree.rb_node);
+}
+void logfs_cleanup_gc(struct super_block *sb)
+{
+        struct logfs_super *super = logfs_super(sb);
+        int i;
+        if (!super->s_free_list.count)
+                return;
+        /*
+         * FIXME: The btree may still contain a single empty node.  So we
+         * call the grim visitor to clean up that mess.  Btree code should
+         * do it for us, really.
+         */
+        btree_grim_visitor32(&super->s_cand_tree, 0, NULL);
+        logfs_cleanup_list(sb, &super->s_free_list);
+        logfs_cleanup_list(sb, &super->s_reserve_list);
+        for_each_area(i)
+                logfs_cleanup_list(sb, &super->s_low_list[i]);
+        logfs_cleanup_list(sb, &super->s_ec_list);
+}