/* * Copyright (C) 2007 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License v2 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. */ #include #include #include #include #include #include #include #include #include #include #include #include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" #include "volumes.h" #include "print-tree.h" #include "async-thread.h" #include "locking.h" #include "tree-log.h" #include "free-space-cache.h" static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); static void free_fs_root(struct btrfs_root *root); static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); /* * end_io_wq structs are used to do processing in task context when an IO is * complete. This is used during reads to verify checksums, and it is used * by writes to insert metadata for new file extents after IO is complete. */ struct end_io_wq { struct bio *bio; bio_end_io_t *end_io; void *private; struct btrfs_fs_info *info; int error; int metadata; struct list_head list; struct btrfs_work work; }; /* * async submit bios are used to offload expensive checksumming * onto the worker threads. They checksum file and metadata bios * just before they are sent down the IO stack. */ struct async_submit_bio { struct inode *inode; struct bio *bio; struct list_head list; extent_submit_bio_hook_t *submit_bio_start; extent_submit_bio_hook_t *submit_bio_done; int rw; int mirror_num; unsigned long bio_flags; struct btrfs_work work; }; /* These are used to set the lockdep class on the extent buffer locks. * The class is set by the readpage_end_io_hook after the buffer has * passed csum validation but before the pages are unlocked. * * The lockdep class is also set by btrfs_init_new_buffer on freshly * allocated blocks. * * The class is based on the level in the tree block, which allows lockdep * to know that lower nodes nest inside the locks of higher nodes. * * We also add a check to make sure the highest level of the tree is * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this * code needs update as well. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC # if BTRFS_MAX_LEVEL != 8 # error # endif static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { /* leaf */ "btrfs-extent-00", "btrfs-extent-01", "btrfs-extent-02", "btrfs-extent-03", "btrfs-extent-04", "btrfs-extent-05", "btrfs-extent-06", "btrfs-extent-07", /* highest possible level */ "btrfs-extent-08", }; #endif /* * extents on the btree inode are pretty simple, there's one extent * that covers the entire device */ static struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 len, int create) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em; int ret; read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (em) { em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; read_unlock(&em_tree->lock); goto out; } read_unlock(&em_tree->lock); em = alloc_extent_map(GFP_NOFS); if (!em) { em = ERR_PTR(-ENOMEM); goto out; } em->start = 0; em->len = (u64)-1; em->block_len = (u64)-1; em->block_start = 0; em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); if (ret == -EEXIST) { u64 failed_start = em->start; u64 failed_len = em->len; free_extent_map(em); em = lookup_extent_mapping(em_tree, start, len); if (em) { ret = 0; } else { em = lookup_extent_mapping(em_tree, failed_start, failed_len); ret = -EIO; } } else if (ret) { free_extent_map(em); em = NULL; } write_unlock(&em_tree->lock); if (ret) em = ERR_PTR(ret); out: return em; } u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) { return crc32c(seed, data, len); } void btrfs_csum_final(u32 crc, char *result) { *(__le32 *)result = ~cpu_to_le32(crc); } /* * compute the csum for a btree block, and either verify it or write it * into the csum field of the block. */ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); char *result = NULL; unsigned long len; unsigned long cur_len; unsigned long offset = BTRFS_CSUM_SIZE; char *map_token = NULL; char *kaddr; unsigned long map_start; unsigned long map_len; int err; u32 crc = ~(u32)0; unsigned long inline_result; len = buf->len - offset; while (len > 0) { err = map_private_extent_buffer(buf, offset, 32, &map_token, &kaddr, &map_start, &map_len, KM_USER0); if (err) return 1; cur_len = min(len, map_len - (offset - map_start)); crc = btrfs_csum_data(root, kaddr + offset - map_start, crc, cur_len); len -= cur_len; offset += cur_len; unmap_extent_buffer(buf, map_token, KM_USER0); } if (csum_size > sizeof(inline_result)) { result = kzalloc(csum_size * sizeof(char), GFP_NOFS); if (!result) return 1; } else { result = (char *)&inline_result; } btrfs_csum_final(crc, result); if (verify) { if (memcmp_extent_buffer(buf, result, 0, csum_size)) { u32 val; u32 found = 0; memcpy(&found, result, csum_size); read_extent_buffer(buf, &val, 0, csum_size); if (printk_ratelimit()) { printk(KERN_INFO "btrfs: %s checksum verify " "failed on %llu wanted %X found %X " "level %d\n", root->fs_info->sb->s_id, (unsigned long long)buf->start, val, found, btrfs_header_level(buf)); } if (result != (char *)&inline_result) kfree(result); return 1; } } else { write_extent_buffer(buf, result, 0, csum_size); } if (result != (char *)&inline_result) kfree(result); return 0; } /* * we can't consider a given block up to date unless the transid of the * block matches the transid in the parent node's pointer. This is how we * detect blocks that either didn't get written at all or got written * in the wrong place. */ static int verify_parent_transid(struct extent_io_tree *io_tree, struct extent_buffer *eb, u64 parent_transid) { int ret; if (!parent_transid || btrfs_header_generation(eb) == parent_transid) return 0; lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); if (extent_buffer_uptodate(io_tree, eb) && btrfs_header_generation(eb) == parent_transid) { ret = 0; goto out; } if (printk_ratelimit()) { printk("parent transid verify failed on %llu wanted %llu " "found %llu\n", (unsigned long long)eb->start, (unsigned long long)parent_transid, (unsigned long long)btrfs_header_generation(eb)); } ret = 1; clear_extent_buffer_uptodate(io_tree, eb); out: unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); return ret; } /* * helper to read a given tree block, doing retries as required when * the checksums don't match and we have alternate mirrors to try. */ static int btree_read_extent_buffer_pages(struct btrfs_root *root, struct extent_buffer *eb, u64 start, u64 parent_transid) { struct extent_io_tree *io_tree; int ret; int num_copies = 0; int mirror_num = 0; io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; while (1) { ret = read_extent_buffer_pages(io_tree, eb, start, 1, btree_get_extent, mirror_num); if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) return ret; num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, eb->start, eb->len); if (num_copies == 1) return ret; mirror_num++; if (mirror_num > num_copies) return ret; } return -EIO; } /* * checksum a dirty tree block before IO. This has extra checks to make sure * we only fill in the checksum field in the first page of a multi-page block */ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { struct extent_io_tree *tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 found_start; int found_level; unsigned long len; struct extent_buffer *eb; int ret; tree = &BTRFS_I(page->mapping->host)->io_tree; if (page->private == EXTENT_PAGE_PRIVATE) goto out; if (!page->private) goto out; len = page->private >> 2; WARN_ON(len == 0); eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); found_start = btrfs_header_bytenr(eb); if (found_start != start) { WARN_ON(1); goto err; } if (eb->first_page != page) { WARN_ON(1); goto err; } if (!PageUptodate(page)) { WARN_ON(1); goto err; } found_level = btrfs_header_level(eb); csum_tree_block(root, eb, 0); err: free_extent_buffer(eb); out: return 0; } static int check_tree_block_fsid(struct btrfs_root *root, struct extent_buffer *eb) { struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; u8 fsid[BTRFS_UUID_SIZE]; int ret = 1; read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb), BTRFS_FSID_SIZE); while (fs_devices) { if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { ret = 0; break; } fs_devices = fs_devices->seed; } return ret; } #ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) { lockdep_set_class_and_name(&eb->lock, &btrfs_eb_class[level], btrfs_eb_name[level]); } #endif static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { struct extent_io_tree *tree; u64 found_start; int found_level; unsigned long len; struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; int ret = 0; tree = &BTRFS_I(page->mapping->host)->io_tree; if (page->private == EXTENT_PAGE_PRIVATE) goto out; if (!page->private) goto out; len = page->private >> 2; WARN_ON(len == 0); eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); found_start = btrfs_header_bytenr(eb); if (found_start != start) { if (printk_ratelimit()) { printk(KERN_INFO "btrfs bad tree block start " "%llu %llu\n", (unsigned long long)found_start, (unsigned long long)eb->start); } ret = -EIO; goto err; } if (eb->first_page != page) { printk(KERN_INFO "btrfs bad first page %lu %lu\n", eb->first_page->index, page->index); WARN_ON(1); ret = -EIO; goto err; } if (check_tree_block_fsid(root, eb)) { if (printk_ratelimit()) { printk(KERN_INFO "btrfs bad fsid on block %llu\n", (unsigned long long)eb->start); } ret = -EIO; goto err; } found_level = btrfs_header_level(eb); btrfs_set_buffer_lockdep_class(eb, found_level); ret = csum_tree_block(root, eb, 1); if (ret) ret = -EIO; end = min_t(u64, eb->len, PAGE_CACHE_SIZE); end = eb->start + end - 1; err: free_extent_buffer(eb); out: return ret; } static void end_workqueue_bio(struct bio *bio, int err) { struct end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; fs_info = end_io_wq->info; end_io_wq->error = err; end_io_wq->work.func = end_workqueue_fn; end_io_wq->work.flags = 0; if (bio->bi_rw & (1 << BIO_RW)) { if (end_io_wq->metadata) btrfs_queue_worker(&fs_info->endio_meta_write_workers, &end_io_wq->work); else btrfs_queue_worker(&fs_info->endio_write_workers, &end_io_wq->work); } else { if (end_io_wq->metadata) btrfs_queue_worker(&fs_info->endio_meta_workers, &end_io_wq->work); else btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); } } int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, int metadata) { struct end_io_wq *end_io_wq; end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); if (!end_io_wq) return -ENOMEM; end_io_wq->private = bio->bi_private; end_io_wq->end_io = bio->bi_end_io; end_io_wq->info = info; end_io_wq->error = 0; end_io_wq->bio = bio; end_io_wq->metadata = metadata; bio->bi_private = end_io_wq; bio->bi_end_io = end_workqueue_bio; return 0; } unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) { unsigned long limit = min_t(unsigned long, info->workers.max_workers, info->fs_devices->open_devices); return 256 * limit; } int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) { return atomic_read(&info->nr_async_bios) > btrfs_async_submit_limit(info); } static void run_one_async_start(struct btrfs_work *work) { struct btrfs_fs_info *fs_info; struct async_submit_bio *async; async = container_of(work, struct async_submit_bio, work); fs_info = BTRFS_I(async->inode)->root->fs_info; async->submit_bio_start(async->inode, async->rw, async->bio, async->mirror_num, async->bio_flags); } static void run_one_async_done(struct btrfs_work *work) { struct btrfs_fs_info *fs_info; struct async_submit_bio *async; int limit; async = container_of(work, struct async_submit_bio, work); fs_info = BTRFS_I(async->inode)->root->fs_info; limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; atomic_dec(&fs_info->nr_async_submits); if (atomic_read(&fs_info->nr_async_submits) < limit && waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); async->submit_bio_done(async->inode, async->rw, async->bio, async->mirror_num, async->bio_flags); } static void run_one_async_free(struct btrfs_work *work) { struct async_submit_bio *async; async = container_of(work, struct async_submit_bio, work); kfree(async); } int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, int rw, struct bio *bio, int mirror_num, unsigned long bio_flags, extent_submit_bio_hook_t *submit_bio_start, extent_submit_bio_hook_t *submit_bio_done) { struct async_submit_bio *async; async = kmalloc(sizeof(*async), GFP_NOFS); if (!async) return -ENOMEM; async->inode = inode; async->rw = rw; async->bio = bio; async->mirror_num = mirror_num; async->submit_bio_start = submit_bio_start; async->submit_bio_done = submit_bio_done; async->work.func = run_one_async_start; async->work.ordered_func = run_one_async_done; async->work.ordered_free = run_one_async_free; async->work.flags = 0; async->bio_flags = bio_flags; atomic_inc(&fs_info->nr_async_submits); if (rw & (1 << BIO_RW_SYNCIO)) btrfs_set_work_high_prio(&async->work); btrfs_queue_worker(&fs_info->workers, &async->work); while (atomic_read(&fs_info->async_submit_draining) && atomic_read(&fs_info->nr_async_submits)) { wait_event(fs_info->async_submit_wait, (atomic_read(&fs_info->nr_async_submits) == 0)); } return 0; } static int btree_csum_one_bio(struct bio *bio) { struct bio_vec *bvec = bio->bi_io_vec; int bio_index = 0; struct btrfs_root *root; WARN_ON(bio->bi_vcnt <= 0); while (bio_index < bio->bi_vcnt) { root = BTRFS_I(bvec->bv_page->mapping->host)->root; csum_dirty_buffer(root, bvec->bv_page); bio_index++; bvec++; } return 0; } static int __btree_submit_bio_start(struct inode *inode, int rw, struct bio *bio, int mirror_num, unsigned long bio_flags) { /* * when we're called for a write, we're already in the async * submission context. Just jump into btrfs_map_bio */ btree_csum_one_bio(bio); return 0; } static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, int mirror_num, unsigned long bio_flags) { /* * when we're called for a write, we're already in the async * submission context. Just jump into btrfs_map_bio */ return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); } static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num, unsigned long bio_flags) { int ret; ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, bio, 1); BUG_ON(ret); if (!(rw & (1 << BIO_RW))) { /* * called for a read, do the setup so that checksum validation * can happen in the async kernel threads */ return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 0); } /* * kthread helpers are used to submit writes so that checksumming * can happen in parallel across all CPUs */ return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, inode, rw, bio, mirror_num, 0, __btree_submit_bio_start, __btree_submit_bio_done); } static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_io_tree *tree; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; struct extent_buffer *eb; int was_dirty; tree = &BTRFS_I(page->mapping->host)->io_tree; if (!(current->flags & PF_MEMALLOC)) { return extent_write_full_page(tree, page, btree_get_extent, wbc); } redirty_page_for_writepage(wbc, page); eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE); WARN_ON(!eb); was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); if (!was_dirty) { spin_lock(&root->fs_info->delalloc_lock); root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE; spin_unlock(&root->fs_info->delalloc_lock); } free_extent_buffer(eb); unlock_page(page); return 0; } static int btree_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct extent_io_tree *tree; tree = &BTRFS_I(mapping->host)->io_tree; if (wbc->sync_mode == WB_SYNC_NONE) { struct btrfs_root *root = BTRFS_I(mapping->host)->root; u64 num_dirty; unsigned long thresh = 32 * 1024 * 1024; if (wbc->for_kupdate) return 0; /* this is a bit racy, but that's ok */ num_dirty = root->fs_info->dirty_metadata_bytes; if (num_dirty < thresh) return 0; } return extent_writepages(tree, mapping, btree_get_extent, wbc); } static int btree_readpage(struct file *file, struct page *page) { struct extent_io_tree *tree; tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_read_full_page(tree, page, btree_get_extent); } static int btree_releasepage(struct page *page, gfp_t gfp_flags) { struct extent_io_tree *tree; struct extent_map_tree *map; int ret; if (PageWriteback(page) || PageDirty(page)) return 0; tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_state(map, tree, page, gfp_flags); if (!ret) return 0; ret = try_release_extent_buffer(tree, page); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); } return ret; } static void btree_invalidatepage(struct page *page, unsigned long offset) { struct extent_io_tree *tree; tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); if (PagePrivate(page)) { printk(KERN_WARNING "btrfs warning page private not zero " "on page %llu\n", (unsigned long long)page_offset(page)); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); } } static const struct address_space_operations btree_aops = { .readpage = btree_readpage, .writepage = btree_writepage, .writepages = btree_writepages, .releasepage = btree_releasepage, .invalidatepage = btree_invalidatepage, .sync_page = block_sync_page, }; int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; int ret = 0; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 0, btree_get_extent, 0); free_extent_buffer(buf); return ret; } struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, GFP_NOFS); return eb; } struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, NULL, GFP_NOFS); return eb; } int btrfs_write_tree_block(struct extent_buffer *buf) { return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, buf->start + buf->len - 1); } int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) { return filemap_fdatawait_range(buf->first_page->mapping, buf->start, buf->start + buf->len - 1); } struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; struct extent_io_tree *io_tree; int ret; io_tree = &BTRFS_I(btree_inode)->io_tree; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret == 0) set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); return buf; } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) { btrfs_assert_tree_locked(buf); if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { spin_lock(&root->fs_info->delalloc_lock); if (root->fs_info->dirty_metadata_bytes >= buf->len) root->fs_info->dirty_metadata_bytes -= buf->len; else WARN_ON(1); spin_unlock(&root->fs_info->delalloc_lock); } /* ugh, clear_extent_buffer_dirty needs to lock the page */ btrfs_set_lock_blocking(buf); clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } return 0; } static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, u32 stripesize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { root->node = NULL; root->commit_root = NULL; root->sectorsize = sectorsize; root->nodesize = nodesize; root->leafsize = leafsize; root->stripesize = stripesize; root->ref_cows = 0; root->track_dirty = 0; root->in_radix = 0; root->clean_orphans = 0; root->fs_info = fs_info; root->objectid = objectid; root->last_trans = 0; root->highest_objectid = 0; root->name = NULL; root->in_sysfs = 0; root->inode_tree.rb_node = NULL; INIT_LIST_HEAD(&root->dirty_list); INIT_LIST_HEAD(&root->orphan_list); INIT_LIST_HEAD(&root->root_list); spin_lock_init(&root->node_lock); spin_lock_init(&root->list_lock); spin_lock_init(&root->inode_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); init_waitqueue_head(&root->log_writer_wait); init_waitqueue_head(&root->log_commit_wait[0]); init_waitqueue_head(&root->log_commit_wait[1]); atomic_set(&root->log_commit[0], 0); atomic_set(&root->log_commit[1], 0); atomic_set(&root->log_writers, 0); root->log_batch = 0; root->log_transid = 0; root->last_log_commit = 0; extent_io_tree_init(&root->dirty_log_pages, fs_info->btree_inode->i_mapping, GFP_NOFS); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); root->defrag_trans_start = fs_info->generation; init_completion(&root->kobj_unregister); root->defrag_running = 0; root->root_key.objectid = objectid; root->anon_super.s_root = NULL; root->anon_super.s_dev = 0; INIT_LIST_HEAD(&root->anon_super.s_list); INIT_LIST_HEAD(&root->anon_super.s_instances); init_rwsem(&root->anon_super.s_umount); return 0; } static int find_and_setup_root(struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, struct btrfs_root *root) { int ret; u32 blocksize; u64 generation; __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); if (ret > 0) return -ENOENT; BUG_ON(ret); generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); BUG_ON(!root->node); root->commit_root = btrfs_root_node(root); return 0; } int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct extent_buffer *eb; struct btrfs_root *log_root_tree = fs_info->log_root_tree; u64 start = 0; u64 end = 0; int ret; if (!log_root_tree) return 0; while (1) { ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); if (ret) break; clear_extent_bits(&log_root_tree->dirty_log_pages, start, end, EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); } eb = fs_info->log_root_tree->node; WARN_ON(btrfs_header_level(eb) != 0); WARN_ON(btrfs_header_nritems(eb) != 0); ret = btrfs_free_reserved_extent(fs_info->tree_root, eb->start, eb->len); BUG_ON(ret); free_extent_buffer(eb); kfree(fs_info->log_root_tree); fs_info->log_root_tree = NULL; return 0; } static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; struct extent_buffer *leaf; root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, root, fs_info, BTRFS_TREE_LOG_OBJECTID); root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; /* * log trees do not get reference counted because they go away * before a real commit is actually done. They do store pointers * to file data extents, and those reference counts still get * updated (along with back refs to the log tree). */ root->ref_cows = 0; leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, BTRFS_TREE_LOG_OBJECTID,/* drivers/rtc/rtc-s3c.c * * Copyright (c) 2004,2006 Simtec Electronics * Ben Dooks, <ben@simtec.co.uk> * http://armlinux.simtec.co.uk/ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * S3C2410/S3C2440/S3C24XX Internal RTC Driver */ #include <linux/module.h> #include <linux/fs.h> #include <linux/string.h> #include <linux/init.h> #include <linux/platform_device.h> #include <linux/interrupt.h> #include <linux/rtc.h> #include <linux/bcd.h> #include <linux/clk.h> #include <linux/log2.h> #include <mach/hardware.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/irq.h> #include <asm/plat-s3c/regs-rtc.h> /* I have yet to find an S3C implementation with more than one * of these rtc blocks in */ static struct resource *s3c_rtc_mem; static void __iomem *s3c_rtc_base; static int s3c_rtc_alarmno = NO_IRQ; static int s3c_rtc_tickno = NO_IRQ; static DEFINE_SPINLOCK(s3c_rtc_pie_lock); /* IRQ Handlers */ static irqreturn_t s3c_rtc_alarmirq(int irq, void *id) { struct rtc_device *rdev = id; rtc_update_irq(rdev, 1, RTC_AF | RTC_IRQF); return IRQ_HANDLED; } static irqreturn_t s3c_rtc_tickirq(int irq, void *id) { struct rtc_device *rdev = id; rtc_update_irq(rdev, 1, RTC_PF | RTC_IRQF); return IRQ_HANDLED; } /* Update control registers */ static void s3c_rtc_setaie(int to) { unsigned int tmp; pr_debug("%s: aie=%d\n", __func__, to); tmp = readb(s3c_rtc_base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN; if (to) tmp |= S3C2410_RTCALM_ALMEN; writeb(tmp, s3c_rtc_base + S3C2410_RTCALM); } static int s3c_rtc_setpie(struct device *dev, int enabled) { unsigned int tmp; pr_debug("%s: pie=%d\n", __func__, enabled); spin_lock_irq(&s3c_rtc_pie_lock); tmp = readb(s3c_rtc_base + S3C2410_TICNT) & ~S3C2410_TICNT_ENABLE; if (enabled) tmp |= S3C2410_TICNT_ENABLE; writeb(tmp, s3c_rtc_base + S3C2410_TICNT); spin_unlock_irq(&s3c_rtc_pie_lock); return 0; } static int s3c_rtc_setfreq(struct device *dev, int freq) { unsigned int tmp; spin_lock_irq(&s3c_rtc_pie_lock); tmp = readb(s3c_rtc_base + S3C2410_TICNT) & S3C2410_TICNT_ENABLE; tmp |= (128 / freq)-1; writeb(tmp, s3c_rtc_base + S3C2410_TICNT); spin_unlock_irq(&s3c_rtc_pie_lock); return 0; } /* Time read/write */ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) { unsigned int have_retried = 0; void __iomem *base = s3c_rtc_base; retry_get_time: rtc_tm->tm_min = readb(base + S3C2410_RTCMIN); rtc_tm->tm_hour = readb(base + S3C2410_RTCHOUR); rtc_tm->tm_mday = readb(base + S3C2410_RTCDATE); rtc_tm->tm_mon = readb(base + S3C2410_RTCMON); rtc_tm->tm_year = readb(base + S3C2410_RTCYEAR); rtc_tm->tm_sec = readb(base + S3C2410_RTCSEC); /* the only way to work out wether the system was mid-update * when we read it is to check the second counter, and if it * is zero, then we re-try the entire read */ if (rtc_tm->tm_sec == 0 && !have_retried) { have_retried = 1; goto retry_get_time; } pr_debug("read time %02x.%02x.%02x %02x/%02x/%02x\n", rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday, rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec); BCD_TO_BIN(rtc_tm->tm_sec); BCD_TO_BIN(rtc_tm->tm_min); BCD_TO_BIN(rtc_tm->tm_hour); BCD_TO_BIN(rtc_tm->tm_mday); BCD_TO_BIN(rtc_tm->tm_mon); BCD_TO_BIN(rtc_tm->tm_year); rtc_tm->tm_year += 100; rtc_tm->tm_mon -= 1; return 0; } static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm) { void __iomem *base = s3c_rtc_base; int year = tm->tm_year - 100; pr_debug("set time %02d.%02d.%02d %02d/%02d/%02d\n", tm->tm_year, tm->tm_mon, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec); /* we get around y2k by simply not supporting it */ if (year < 0 || year >= 100) { dev_err(dev, "rtc only supports 100 years\n"); return -EINVAL; } writeb(BIN2BCD(tm->tm_sec), base + S3C2410_RTCSEC); writeb(BIN2BCD(tm->tm_min), base + S3C2410_RTCMIN); writeb(BIN2BCD(tm->tm_hour), base + S3C2410_RTCHOUR); writeb(BIN2BCD(tm->tm_mday), base + S3C2410_RTCDATE); writeb(BIN2BCD(tm->tm_mon + 1), base + S3C2410_RTCMON); writeb(BIN2BCD(year), base + S3C2410_RTCYEAR); return 0; } static int s3c_rtc_getalarm(struct device *dev, struct rtc_wkalrm *alrm) { struct rtc_time *alm_tm = &alrm->time; void __iomem *base = s3c_rtc_base; unsigned int alm_en; alm_tm->tm_sec = readb(base + S3C2410_ALMSEC); alm_tm->tm_min = readb(base + S3C2410_ALMMIN); alm_tm->tm_hour = readb(base + S3C2410_ALMHOUR); alm_tm->tm_mon = readb(base + S3C2410_ALMMON); alm_tm->tm_mday = readb(base + S3C2410_ALMDATE); alm_tm->tm_year = readb(base + S3C2410_ALMYEAR); alm_en = readb(base + S3C2410_RTCALM); alrm->enabled = (alm_en & S3C2410_RTCALM_ALMEN) ? 1 : 0; pr_debug("read alarm %02x %02x.%02x.%02x %02x/%02x/%02x\n", alm_en, alm_tm->tm_year, alm_tm->tm_mon, alm_tm->tm_mday, alm_tm->tm_hour, alm_tm->tm_min, alm_tm->tm_sec); /* decode the alarm enable field */ if (alm_en & S3C2410_RTCALM_SECEN) BCD_TO_BIN(alm_tm->tm_sec); else alm_tm->tm_sec = 0xff; if (alm_en & S3C2410_RTCALM_MINEN) BCD_TO_BIN(alm_tm->tm_min); else alm_tm->tm_min = 0xff; if (alm_en & S3C2410_RTCALM_HOUREN) BCD_TO_BIN(alm_tm->tm_hour); else alm_tm->tm_hour = 0xff; if (alm_en & S3C2410_RTCALM_DAYEN) BCD_TO_BIN(alm_tm->tm_mday); else alm_tm->tm_mday = 0xff; if (alm_en & S3C2410_RTCALM_MONEN) { BCD_TO_BIN(alm_tm->tm_mon); alm_tm->tm_mon -= 1; } else { alm_tm->tm_mon = 0xff; } if (alm_en & S3C2410_RTCALM_YEAREN) BCD_TO_BIN(alm_tm->tm_year); else alm_tm->tm_year = 0xffff; return 0; } static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) { struct rtc_time *tm = &alrm->time; void __iomem *base = s3c_rtc_base; unsigned int alrm_en; pr_debug("s3c_rtc_setalarm: %d, %02x/%02x/%02x %02x.%02x.%02x\n", alrm->enabled, tm->tm_mday & 0xff, tm->tm_mon & 0xff, tm->tm_year & 0xff, tm->tm_hour & 0xff, tm->tm_min & 0xff, tm->tm_sec); alrm_en = readb(base + S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN; writeb(0x00, base + S3C2410_RTCALM); if (tm->tm_sec < 60 && tm->tm_sec >= 0) { alrm_en |= S3C2410_RTCALM_SECEN; writeb(BIN2BCD(tm->tm_sec), base + S3C2410_ALMSEC); } if (tm->tm_min < 60 && tm->tm_min >= 0) { alrm_en |= S3C2410_RTCALM_MINEN; writeb(BIN2BCD(tm->tm_min), base + S3C2410_ALMMIN); } if (tm->tm_hour < 24 && tm->tm_hour >= 0) { alrm_en |= S3C2410_RTCALM_HOUREN; writeb(BIN2BCD(tm->tm_hour), base + S3C2410_ALMHOUR); } pr_debug("setting S3C2410_RTCALM to %08x\n", alrm_en); writeb(alrm_en, base + S3C2410_RTCALM); s3c_rtc_setaie(alrm->enabled); if (alrm->enabled) enable_irq_wake(s3c_rtc_alarmno); else disable_irq_wake(s3c_rtc_alarmno); return 0; } static int s3c_rtc_proc(struct device *dev, struct seq_file *seq) { unsigned int ticnt = readb(s3c_rtc_base + S3C2410_TICNT); seq_printf(seq, "periodic_IRQ\t: %s\n", (ticnt & S3C2410_TICNT_ENABLE) ? "yes" : "no" ); return 0; } static int s3c_rtc_open(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct rtc_device *rtc_dev = platform_get_drvdata(pdev); int ret; ret = request_irq(s3c_rtc_alarmno, s3c_rtc_alarmirq, IRQF_DISABLED, "s3c2410-rtc alarm", rtc_dev); if (ret) { dev_err(dev, "IRQ%d error %d\n", s3c_rtc_alarmno, ret); return ret; } ret = request_irq(s3c_rtc_tickno, s3c_rtc_tickirq, IRQF_DISABLED, "s3c2410-rtc tick", rtc_dev); if (ret) { dev_err(dev, "IRQ%d error %d\n", s3c_rtc_tickno, ret); goto tick_err; } return ret; tick_err: free_irq(s3c_rtc_alarmno, rtc_dev); return ret; } static void s3c_rtc_release(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct rtc_device *rtc_dev = platform_get_drvdata(pdev); /* do not clear AIE here, it may be needed for wake */ s3c_rtc_setpie(dev, 0); free_irq(s3c_rtc_alarmno, rtc_dev); free_irq(s3c_rtc_tickno, rtc_dev); } static const struct rtc_class_ops s3c_rtcops = { .open = s3c_rtc_open, .release = s3c_rtc_release, .read_time = s3c_rtc_gettime, .set_time = s3c_rtc_settime, .read_alarm = s3c_rtc_getalarm, .set_alarm = s3c_rtc_setalarm, .irq_set_freq = s3c_rtc_setfreq, .irq_set_state = s3c_rtc_setpie, .proc = s3c_rtc_proc, }; static void s3c_rtc_enable(struct platform_device *pdev, int en) { void __iomem *base = s3c_rtc_base; unsigned int tmp; if (s3c_rtc_base == NULL) return; if (!en) { tmp = readb(base + S3C2410_RTCCON); writeb(tmp & ~S3C2410_RTCCON_RTCEN, base + S3C2410_RTCCON); tmp = readb(base + S3C2410_TICNT); writeb(tmp & ~S3C2410_TICNT_ENABLE, base + S3C2410_TICNT); } else { /* re-enable the device, and check it is ok */ if ((readb(base+S3C2410_RTCCON) & S3C2410_RTCCON_RTCEN) == 0){ dev_info(&pdev->dev, "rtc disabled, re-enabling\n"); tmp = readb(base + S3C2410_RTCCON); writeb(tmp|S3C2410_RTCCON_RTCEN, base+S3C2410_RTCCON); } if ((readb(base + S3C2410_RTCCON) & S3C2410_RTCCON_CNTSEL)){ dev_info(&pdev->dev, "removing RTCCON_CNTSEL\n"); tmp = readb(base + S3C2410_RTCCON); writeb(tmp& ~S3C2410_RTCCON_CNTSEL, base+S3C2410_RTCCON); } if ((readb(base + S3C2410_RTCCON) & S3C2410_RTCCON_CLKRST)){ dev_info(&pdev->dev, "removing RTCCON_CLKRST\n"); tmp = readb(base + S3C2410_RTCCON); writeb(tmp & ~S3C2410_RTCCON_CLKRST, base+S3C2410_RTCCON); } } } static int __devexit s3c_rtc_remove(struct platform_device *dev) { struct rtc_device *rtc = platform_get_drvdata(dev); platform_set_drvdata(dev, NULL); rtc_device_unregister(rtc); s3c_rtc_setpie(&dev->dev, 0); s3c_rtc_setaie(0); iounmap(s3c_rtc_base); release_resource(s3c_rtc_mem); kfree(s3c_rtc_mem); return 0; } static int __devinit s3c_rtc_probe(struct platform_device *pdev) { struct rtc_device *rtc; struct resource *res; int ret; pr_debug("%s: probe=%p\n", __func__, pdev);