diff options
| -rw-r--r-- | fs/btrfs/btrfs_inode.h | 1 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 45 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 12 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 257 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 12 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 448 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.h | 31 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 7 |
8 files changed, 678 insertions, 135 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index d0b0e43a6a8b..93b1aa932014 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -153,6 +153,7 @@ struct btrfs_inode { | |||
| 153 | unsigned ordered_data_close:1; | 153 | unsigned ordered_data_close:1; |
| 154 | unsigned orphan_meta_reserved:1; | 154 | unsigned orphan_meta_reserved:1; |
| 155 | unsigned dummy_inode:1; | 155 | unsigned dummy_inode:1; |
| 156 | unsigned in_defrag:1; | ||
| 156 | 157 | ||
| 157 | /* | 158 | /* |
| 158 | * always compress this one file | 159 | * always compress this one file |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 026fc47b42cf..332323e19dd1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -1074,6 +1074,11 @@ struct btrfs_fs_info { | |||
| 1074 | /* all metadata allocations go through this cluster */ | 1074 | /* all metadata allocations go through this cluster */ |
| 1075 | struct btrfs_free_cluster meta_alloc_cluster; | 1075 | struct btrfs_free_cluster meta_alloc_cluster; |
| 1076 | 1076 | ||
| 1077 | /* auto defrag inodes go here */ | ||
| 1078 | spinlock_t defrag_inodes_lock; | ||
| 1079 | struct rb_root defrag_inodes; | ||
| 1080 | atomic_t defrag_running; | ||
| 1081 | |||
| 1077 | spinlock_t ref_cache_lock; | 1082 | spinlock_t ref_cache_lock; |
| 1078 | u64 total_ref_cache_size; | 1083 | u64 total_ref_cache_size; |
| 1079 | 1084 | ||
| @@ -1205,6 +1210,38 @@ struct btrfs_root { | |||
| 1205 | struct super_block anon_super; | 1210 | struct super_block anon_super; |
| 1206 | }; | 1211 | }; |
| 1207 | 1212 | ||
| 1213 | struct btrfs_ioctl_defrag_range_args { | ||
| 1214 | /* start of the defrag operation */ | ||
| 1215 | __u64 start; | ||
| 1216 | |||
| 1217 | /* number of bytes to defrag, use (u64)-1 to say all */ | ||
| 1218 | __u64 len; | ||
| 1219 | |||
| 1220 | /* | ||
| 1221 | * flags for the operation, which can include turning | ||
| 1222 | * on compression for this one defrag | ||
| 1223 | */ | ||
| 1224 | __u64 flags; | ||
| 1225 | |||
| 1226 | /* | ||
| 1227 | * any extent bigger than this will be considered | ||
| 1228 | * already defragged. Use 0 to take the kernel default | ||
| 1229 | * Use 1 to say every single extent must be rewritten | ||
| 1230 | */ | ||
| 1231 | __u32 extent_thresh; | ||
| 1232 | |||
| 1233 | /* | ||
| 1234 | * which compression method to use if turning on compression | ||
| 1235 | * for this defrag operation. If unspecified, zlib will | ||
| 1236 | * be used | ||
| 1237 | */ | ||
| 1238 | __u32 compress_type; | ||
| 1239 | |||
| 1240 | /* spare for later */ | ||
| 1241 | __u32 unused[4]; | ||
| 1242 | }; | ||
| 1243 | |||
| 1244 | |||
| 1208 | /* | 1245 | /* |
| 1209 | * inode items have the data typically returned from stat and store other | 1246 | * inode items have the data typically returned from stat and store other |
| 1210 | * info about object characteristics. There is one for every file and dir in | 1247 | * info about object characteristics. There is one for every file and dir in |
| @@ -1302,6 +1339,7 @@ struct btrfs_root { | |||
| 1302 | #define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) | 1339 | #define BTRFS_MOUNT_CLEAR_CACHE (1 << 13) |
| 1303 | #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) | 1340 | #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) |
| 1304 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) | 1341 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) |
| 1342 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) | ||
| 1305 | 1343 | ||
| 1306 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1344 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
| 1307 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1345 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
| @@ -2528,8 +2566,13 @@ extern const struct dentry_operations btrfs_dentry_operations; | |||
| 2528 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 2566 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
| 2529 | void btrfs_update_iflags(struct inode *inode); | 2567 | void btrfs_update_iflags(struct inode *inode); |
| 2530 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); | 2568 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); |
| 2531 | 2569 | int btrfs_defrag_file(struct inode *inode, struct file *file, | |
| 2570 | struct btrfs_ioctl_defrag_range_args *range, | ||
| 2571 | u64 newer_than, unsigned long max_pages); | ||
| 2532 | /* file.c */ | 2572 | /* file.c */ |
| 2573 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | ||
| 2574 | struct inode *inode); | ||
| 2575 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); | ||
| 2533 | int btrfs_sync_file(struct file *file, int datasync); | 2576 | int btrfs_sync_file(struct file *file, int datasync); |
| 2534 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 2577 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
| 2535 | int skip_pinned); | 2578 | int skip_pinned); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 16d335b342a2..b2588a552658 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -1475,6 +1475,7 @@ static int cleaner_kthread(void *arg) | |||
| 1475 | btrfs_run_delayed_iputs(root); | 1475 | btrfs_run_delayed_iputs(root); |
| 1476 | btrfs_clean_old_snapshots(root); | 1476 | btrfs_clean_old_snapshots(root); |
| 1477 | mutex_unlock(&root->fs_info->cleaner_mutex); | 1477 | mutex_unlock(&root->fs_info->cleaner_mutex); |
| 1478 | btrfs_run_defrag_inodes(root->fs_info); | ||
| 1478 | } | 1479 | } |
| 1479 | 1480 | ||
| 1480 | if (freezing(current)) { | 1481 | if (freezing(current)) { |
| @@ -1616,6 +1617,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1616 | spin_lock_init(&fs_info->ref_cache_lock); | 1617 | spin_lock_init(&fs_info->ref_cache_lock); |
| 1617 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 1618 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
| 1618 | spin_lock_init(&fs_info->delayed_iput_lock); | 1619 | spin_lock_init(&fs_info->delayed_iput_lock); |
| 1620 | spin_lock_init(&fs_info->defrag_inodes_lock); | ||
| 1619 | 1621 | ||
| 1620 | init_completion(&fs_info->kobj_unregister); | 1622 | init_completion(&fs_info->kobj_unregister); |
| 1621 | fs_info->tree_root = tree_root; | 1623 | fs_info->tree_root = tree_root; |
| @@ -1638,9 +1640,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1638 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1640 | atomic_set(&fs_info->async_delalloc_pages, 0); |
| 1639 | atomic_set(&fs_info->async_submit_draining, 0); | 1641 | atomic_set(&fs_info->async_submit_draining, 0); |
| 1640 | atomic_set(&fs_info->nr_async_bios, 0); | 1642 | atomic_set(&fs_info->nr_async_bios, 0); |
| 1643 | atomic_set(&fs_info->defrag_running, 0); | ||
| 1641 | fs_info->sb = sb; | 1644 | fs_info->sb = sb; |
| 1642 | fs_info->max_inline = 8192 * 1024; | 1645 | fs_info->max_inline = 8192 * 1024; |
| 1643 | fs_info->metadata_ratio = 0; | 1646 | fs_info->metadata_ratio = 0; |
| 1647 | fs_info->defrag_inodes = RB_ROOT; | ||
| 1644 | 1648 | ||
| 1645 | fs_info->thread_pool_size = min_t(unsigned long, | 1649 | fs_info->thread_pool_size = min_t(unsigned long, |
| 1646 | num_online_cpus() + 2, 8); | 1650 | num_online_cpus() + 2, 8); |
| @@ -2501,6 +2505,14 @@ int close_ctree(struct btrfs_root *root) | |||
| 2501 | smp_mb(); | 2505 | smp_mb(); |
| 2502 | 2506 | ||
| 2503 | btrfs_scrub_cancel(root); | 2507 | btrfs_scrub_cancel(root); |
| 2508 | |||
| 2509 | /* wait for any defraggers to finish */ | ||
| 2510 | wait_event(fs_info->transaction_wait, | ||
| 2511 | (atomic_read(&fs_info->defrag_running) == 0)); | ||
| 2512 | |||
| 2513 | /* clear out the rbtree of defraggable inodes */ | ||
| 2514 | btrfs_run_defrag_inodes(root->fs_info); | ||
| 2515 | |||
| 2504 | btrfs_put_block_group_cache(fs_info); | 2516 | btrfs_put_block_group_cache(fs_info); |
| 2505 | 2517 | ||
| 2506 | /* | 2518 | /* |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 58ddc4442159..c6a22d783c35 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -40,6 +40,263 @@ | |||
| 40 | #include "locking.h" | 40 | #include "locking.h" |
| 41 | #include "compat.h" | 41 | #include "compat.h" |
| 42 | 42 | ||
| 43 | /* | ||
| 44 | * when auto defrag is enabled we | ||
| 45 | * queue up these defrag structs to remember which | ||
| 46 | * inodes need defragging passes | ||
| 47 | */ | ||
| 48 | struct inode_defrag { | ||
| 49 | struct rb_node rb_node; | ||
| 50 | /* objectid */ | ||
| 51 | u64 ino; | ||
| 52 | /* | ||
| 53 | * transid where the defrag was added, we search for | ||
| 54 | * extents newer than this | ||
| 55 | */ | ||
| 56 | u64 transid; | ||
| 57 | |||
| 58 | /* root objectid */ | ||
| 59 | u64 root; | ||
| 60 | |||
| 61 | /* last offset we were able to defrag */ | ||
| 62 | u64 last_offset; | ||
| 63 | |||
| 64 | /* if we've wrapped around back to zero once already */ | ||
| 65 | int cycled; | ||
| 66 | }; | ||
| 67 | |||
| 68 | /* pop a record for an inode into the defrag tree. The lock | ||
| 69 | * must be held already | ||
| 70 | * | ||
| 71 | * If you're inserting a record for an older transid than an | ||
| 72 | * existing record, the transid already in the tree is lowered | ||
| 73 | * | ||
| 74 | * If an existing record is found the defrag item you | ||
| 75 | * pass in is freed | ||
| 76 | */ | ||
| 77 | static int __btrfs_add_inode_defrag(struct inode *inode, | ||
| 78 | struct inode_defrag *defrag) | ||
| 79 | { | ||
| 80 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 81 | struct inode_defrag *entry; | ||
| 82 | struct rb_node **p; | ||
| 83 | struct rb_node *parent = NULL; | ||
| 84 | |||
| 85 | p = &root->fs_info->defrag_inodes.rb_node; | ||
| 86 | while (*p) { | ||
| 87 | parent = *p; | ||
| 88 | entry = rb_entry(parent, struct inode_defrag, rb_node); | ||
| 89 | |||
| 90 | if (defrag->ino < entry->ino) | ||
| 91 | p = &parent->rb_left; | ||
| 92 | else if (defrag->ino > entry->ino) | ||
| 93 | p = &parent->rb_right; | ||
| 94 | else { | ||
| 95 | /* if we're reinserting an entry for | ||
| 96 | * an old defrag run, make sure to | ||
| 97 | * lower the transid of our existing record | ||
| 98 | */ | ||
| 99 | if (defrag->transid < entry->transid) | ||
| 100 | entry->transid = defrag->transid; | ||
| 101 | if (defrag->last_offset > entry->last_offset) | ||
| 102 | entry->last_offset = defrag->last_offset; | ||
| 103 | goto exists; | ||
| 104 | } | ||
| 105 | } | ||
| 106 | BTRFS_I(inode)->in_defrag = 1; | ||
| 107 | rb_link_node(&defrag->rb_node, parent, p); | ||
| 108 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); | ||
| 109 | return 0; | ||
| 110 | |||
| 111 | exists: | ||
| 112 | kfree(defrag); | ||
| 113 | return 0; | ||
| 114 | |||
| 115 | } | ||
| 116 | |||
| 117 | /* | ||
| 118 | * insert a defrag record for this inode if auto defrag is | ||
| 119 | * enabled | ||
| 120 | */ | ||
| 121 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | ||
| 122 | struct inode *inode) | ||
| 123 | { | ||
| 124 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 125 | struct inode_defrag *defrag; | ||
| 126 | int ret = 0; | ||
| 127 | u64 transid; | ||
| 128 | |||
| 129 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) | ||
| 130 | return 0; | ||
| 131 | |||
| 132 | if (root->fs_info->closing) | ||
| 133 | return 0; | ||
| 134 | |||
| 135 | if (BTRFS_I(inode)->in_defrag) | ||
| 136 | return 0; | ||
| 137 | |||
| 138 | if (trans) | ||
| 139 | transid = trans->transid; | ||
| 140 | else | ||
| 141 | transid = BTRFS_I(inode)->root->last_trans; | ||
| 142 | |||
| 143 | defrag = kzalloc(sizeof(*defrag), GFP_NOFS); | ||
| 144 | if (!defrag) | ||
| 145 | return -ENOMEM; | ||
| 146 | |||
| 147 | defrag->ino = inode->i_ino; | ||
| 148 | defrag->transid = transid; | ||
| 149 | defrag->root = root->root_key.objectid; | ||
| 150 | |||
| 151 | spin_lock(&root->fs_info->defrag_inodes_lock); | ||
| 152 | if (!BTRFS_I(inode)->in_defrag) | ||
| 153 | ret = __btrfs_add_inode_defrag(inode, defrag); | ||
| 154 | spin_unlock(&root->fs_info->defrag_inodes_lock); | ||
| 155 | return ret; | ||
| 156 | } | ||
| 157 | |||
| 158 | /* | ||
| 159 | * must be called with the defrag_inodes lock held | ||
| 160 | */ | ||
| 161 | struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino, | ||
| 162 | struct rb_node **next) | ||
| 163 | { | ||
| 164 | struct inode_defrag *entry = NULL; | ||
| 165 | struct rb_node *p; | ||
| 166 | struct rb_node *parent = NULL; | ||
| 167 | |||
| 168 | p = info->defrag_inodes.rb_node; | ||
| 169 | while (p) { | ||
| 170 | parent = p; | ||
| 171 | entry = rb_entry(parent, struct inode_defrag, rb_node); | ||
| 172 | |||
| 173 | if (ino < entry->ino) | ||
| 174 | p = parent->rb_left; | ||
| 175 | else if (ino > entry->ino) | ||
| 176 | p = parent->rb_right; | ||
| 177 | else | ||
| 178 | return entry; | ||
| 179 | } | ||
| 180 | |||
| 181 | if (next) { | ||
| 182 | while (parent && ino > entry->ino) { | ||
| 183 | parent = rb_next(parent); | ||
| 184 | entry = rb_entry(parent, struct inode_defrag, rb_node); | ||
| 185 | } | ||
| 186 | *next = parent; | ||
| 187 | } | ||
| 188 | return NULL; | ||
| 189 | } | ||
| 190 | |||
| 191 | /* | ||
| 192 | * run through the list of inodes in the FS that need | ||
| 193 | * defragging | ||
| 194 | */ | ||
| 195 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | ||
| 196 | { | ||
| 197 | struct inode_defrag *defrag; | ||
| 198 | struct btrfs_root *inode_root; | ||
| 199 | struct inode *inode; | ||
| 200 | struct rb_node *n; | ||
| 201 | struct btrfs_key key; | ||
| 202 | struct btrfs_ioctl_defrag_range_args range; | ||
| 203 | u64 first_ino = 0; | ||
| 204 | int num_defrag; | ||
| 205 | int defrag_batch = 1024; | ||
| 206 | |||
| 207 | memset(&range, 0, sizeof(range)); | ||
| 208 | range.len = (u64)-1; | ||
| 209 | |||
| 210 | atomic_inc(&fs_info->defrag_running); | ||
| 211 | spin_lock(&fs_info->defrag_inodes_lock); | ||
| 212 | while(1) { | ||
| 213 | n = NULL; | ||
| 214 | |||
| 215 | /* find an inode to defrag */ | ||
| 216 | defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n); | ||
| 217 | if (!defrag) { | ||
| 218 | if (n) | ||
| 219 | defrag = rb_entry(n, struct inode_defrag, rb_node); | ||
| 220 | else if (first_ino) { | ||
| 221 | first_ino = 0; | ||
| 222 | continue; | ||
| 223 | } else { | ||
| 224 | break; | ||
| 225 | } | ||
| 226 | } | ||
| 227 | |||
| 228 | /* remove it from the rbtree */ | ||
| 229 | first_ino = defrag->ino + 1; | ||
| 230 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); | ||
| 231 | |||
| 232 | if (fs_info->closing) | ||
| 233 | goto next_free; | ||
| 234 | |||
| 235 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
| 236 | |||
| 237 | /* get the inode */ | ||
| 238 | key.objectid = defrag->root; | ||
| 239 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 240 | key.offset = (u64)-1; | ||
| 241 | inode_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
| 242 | if (IS_ERR(inode_root)) | ||
| 243 | goto next; | ||
| 244 | |||
| 245 | key.objectid = defrag->ino; | ||
| 246 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
| 247 | key.offset = 0; | ||
| 248 | |||
| 249 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); | ||
| 250 | if (IS_ERR(inode)) | ||
| 251 | goto next; | ||
| 252 | |||
| 253 | /* do a chunk of defrag */ | ||
| 254 | BTRFS_I(inode)->in_defrag = 0; | ||
| 255 | range.start = defrag->last_offset; | ||
| 256 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | ||
| 257 | defrag_batch); | ||
| 258 | /* | ||
| 259 | * if we filled the whole defrag batch, there | ||
| 260 | * must be more work to do. Queue this defrag | ||
| 261 | * again | ||
| 262 | */ | ||
| 263 | if (num_defrag == defrag_batch) { | ||
| 264 | defrag->last_offset = range.start; | ||
| 265 | __btrfs_add_inode_defrag(inode, defrag); | ||
| 266 | /* | ||
| 267 | * we don't want to kfree defrag, we added it back to | ||
| 268 | * the rbtree | ||
| 269 | */ | ||
| 270 | defrag = NULL; | ||
| 271 | } else if (defrag->last_offset && !defrag->cycled) { | ||
| 272 | /* | ||
| 273 | * we didn't fill our defrag batch, but | ||
| 274 | * we didn't start at zero. Make sure we loop | ||
| 275 | * around to the start of the file. | ||
| 276 | */ | ||
| 277 | defrag->last_offset = 0; | ||
| 278 | defrag->cycled = 1; | ||
| 279 | __btrfs_add_inode_defrag(inode, defrag); | ||
| 280 | defrag = NULL; | ||
| 281 | } | ||
| 282 | |||
| 283 | iput(inode); | ||
| 284 | next: | ||
| 285 | spin_lock(&fs_info->defrag_inodes_lock); | ||
| 286 | next_free: | ||
| 287 | kfree(defrag); | ||
| 288 | } | ||
| 289 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
| 290 | |||
| 291 | atomic_dec(&fs_info->defrag_running); | ||
| 292 | |||
| 293 | /* | ||
| 294 | * during unmount, we use the transaction_wait queue to | ||
| 295 | * wait for the defragger to stop | ||
| 296 | */ | ||
| 297 | wake_up(&fs_info->transaction_wait); | ||
| 298 | return 0; | ||
| 299 | } | ||
| 43 | 300 | ||
| 44 | /* simple helper to fault in pages and copy. This should go away | 301 | /* simple helper to fault in pages and copy. This should go away |
| 45 | * and be replaced with calls into generic code. | 302 | * and be replaced with calls into generic code. |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d378f8b70ef7..bb51bb1fa44f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -342,6 +342,10 @@ static noinline int compress_file_range(struct inode *inode, | |||
| 342 | int will_compress; | 342 | int will_compress; |
| 343 | int compress_type = root->fs_info->compress_type; | 343 | int compress_type = root->fs_info->compress_type; |
| 344 | 344 | ||
| 345 | /* if this is a small write inside eof, kick off a defragbot */ | ||
| 346 | if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024) | ||
| 347 | btrfs_add_inode_defrag(NULL, inode); | ||
| 348 | |||
| 345 | actual_end = min_t(u64, isize, end + 1); | 349 | actual_end = min_t(u64, isize, end + 1); |
| 346 | again: | 350 | again: |
| 347 | will_compress = 0; | 351 | will_compress = 0; |
| @@ -799,6 +803,10 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 799 | disk_num_bytes = num_bytes; | 803 | disk_num_bytes = num_bytes; |
| 800 | ret = 0; | 804 | ret = 0; |
| 801 | 805 | ||
| 806 | /* if this is a small write inside eof, kick off defrag */ | ||
| 807 | if (end <= BTRFS_I(inode)->disk_i_size && num_bytes < 64 * 1024) | ||
| 808 | btrfs_add_inode_defrag(trans, inode); | ||
| 809 | |||
| 802 | if (start == 0) { | 810 | if (start == 0) { |
| 803 | /* lets try to make an inline extent */ | 811 | /* lets try to make an inline extent */ |
| 804 | ret = cow_file_range_inline(trans, root, inode, | 812 | ret = cow_file_range_inline(trans, root, inode, |
| @@ -5371,6 +5379,9 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
| 5371 | if (IS_ERR(trans)) | 5379 | if (IS_ERR(trans)) |
| 5372 | return ERR_CAST(trans); | 5380 | return ERR_CAST(trans); |
| 5373 | 5381 | ||
| 5382 | if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024) | ||
| 5383 | btrfs_add_inode_defrag(trans, inode); | ||
| 5384 | |||
| 5374 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 5385 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
| 5375 | 5386 | ||
| 5376 | alloc_hint = get_extent_allocation_hint(inode, start, len); | 5387 | alloc_hint = get_extent_allocation_hint(inode, start, len); |
| @@ -6682,6 +6693,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 6682 | ei->ordered_data_close = 0; | 6693 | ei->ordered_data_close = 0; |
| 6683 | ei->orphan_meta_reserved = 0; | 6694 | ei->orphan_meta_reserved = 0; |
| 6684 | ei->dummy_inode = 0; | 6695 | ei->dummy_inode = 0; |
| 6696 | ei->in_defrag = 0; | ||
| 6685 | ei->force_compress = BTRFS_COMPRESS_NONE; | 6697 | ei->force_compress = BTRFS_COMPRESS_NONE; |
| 6686 | 6698 | ||
| 6687 | ei->delayed_node = NULL; | 6699 | ei->delayed_node = NULL; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c4f17e4e2c9c..85e818ce00c5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -656,6 +656,106 @@ out_unlock: | |||
| 656 | return error; | 656 | return error; |
| 657 | } | 657 | } |
| 658 | 658 | ||
| 659 | /* | ||
| 660 | * When we're defragging a range, we don't want to kick it off again | ||
| 661 | * if it is really just waiting for delalloc to send it down. | ||
| 662 | * If we find a nice big extent or delalloc range for the bytes in the | ||
| 663 | * file you want to defrag, we return 0 to let you know to skip this | ||
| 664 | * part of the file | ||
| 665 | */ | ||
| 666 | static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh) | ||
| 667 | { | ||
| 668 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 669 | struct extent_map *em = NULL; | ||
| 670 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 671 | u64 end; | ||
| 672 | |||
| 673 | read_lock(&em_tree->lock); | ||
| 674 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | ||
| 675 | read_unlock(&em_tree->lock); | ||
| 676 | |||
| 677 | if (em) { | ||
| 678 | end = extent_map_end(em); | ||
| 679 | free_extent_map(em); | ||
| 680 | if (end - offset > thresh) | ||
| 681 | return 0; | ||
| 682 | } | ||
| 683 | /* if we already have a nice delalloc here, just stop */ | ||
| 684 | thresh /= 2; | ||
| 685 | end = count_range_bits(io_tree, &offset, offset + thresh, | ||
| 686 | thresh, EXTENT_DELALLOC, 1); | ||
| 687 | if (end >= thresh) | ||
| 688 | return 0; | ||
| 689 | return 1; | ||
| 690 | } | ||
| 691 | |||
| 692 | /* | ||
| 693 | * helper function to walk through a file and find extents | ||
| 694 | * newer than a specific transid, and smaller than thresh. | ||
| 695 | * | ||
| 696 | * This is used by the defragging code to find new and small | ||
| 697 | * extents | ||
| 698 | */ | ||
| 699 | static int find_new_extents(struct btrfs_root *root, | ||
| 700 | struct inode *inode, u64 newer_than, | ||
| 701 | u64 *off, int thresh) | ||
| 702 | { | ||
| 703 | struct btrfs_path *path; | ||
| 704 | struct btrfs_key min_key; | ||
| 705 | struct btrfs_key max_key; | ||
| 706 | struct extent_buffer *leaf; | ||
| 707 | struct btrfs_file_extent_item *extent; | ||
| 708 | int type; | ||
| 709 | int ret; | ||
| 710 | |||
| 711 | path = btrfs_alloc_path(); | ||
| 712 | if (!path) | ||
| 713 | return -ENOMEM; | ||
| 714 | |||
| 715 | min_key.objectid = inode->i_ino; | ||
| 716 | min_key.type = BTRFS_EXTENT_DATA_KEY; | ||
| 717 | min_key.offset = *off; | ||
| 718 | |||
| 719 | max_key.objectid = inode->i_ino; | ||
| 720 | max_key.type = (u8)-1; | ||
| 721 | max_key.offset = (u64)-1; | ||
| 722 | |||
| 723 | path->keep_locks = 1; | ||
| 724 | |||
| 725 | while(1) { | ||
| 726 | ret = btrfs_search_forward(root, &min_key, &max_key, | ||
| 727 | path, 0, newer_than); | ||
| 728 | if (ret != 0) | ||
| 729 | goto none; | ||
| 730 | if (min_key.objectid != inode->i_ino) | ||
| 731 | goto none; | ||
| 732 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) | ||
| 733 | goto none; | ||
| 734 | |||
| 735 | leaf = path->nodes[0]; | ||
| 736 | extent = btrfs_item_ptr(leaf, path->slots[0], | ||
| 737 | struct btrfs_file_extent_item); | ||
| 738 | |||
| 739 | type = btrfs_file_extent_type(leaf, extent); | ||
| 740 | if (type == BTRFS_FILE_EXTENT_REG && | ||
| 741 | btrfs_file_extent_num_bytes(leaf, extent) < thresh && | ||
| 742 | check_defrag_in_cache(inode, min_key.offset, thresh)) { | ||
| 743 | *off = min_key.offset; | ||
| 744 | btrfs_free_path(path); | ||
| 745 | return 0; | ||
| 746 | } | ||
| 747 | |||
| 748 | if (min_key.offset == (u64)-1) | ||
| 749 | goto none; | ||
| 750 | |||
| 751 | min_key.offset++; | ||
| 752 | btrfs_release_path(path); | ||
| 753 | } | ||
| 754 | none: | ||
| 755 | btrfs_free_path(path); | ||
| 756 | return -ENOENT; | ||
| 757 | } | ||
| 758 | |||
| 659 | static int should_defrag_range(struct inode *inode, u64 start, u64 len, | 759 | static int should_defrag_range(struct inode *inode, u64 start, u64 len, |
| 660 | int thresh, u64 *last_len, u64 *skip, | 760 | int thresh, u64 *last_len, u64 *skip, |
| 661 | u64 *defrag_end) | 761 | u64 *defrag_end) |
| @@ -665,10 +765,6 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, | |||
| 665 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 765 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
| 666 | int ret = 1; | 766 | int ret = 1; |
| 667 | 767 | ||
| 668 | |||
| 669 | if (thresh == 0) | ||
| 670 | thresh = 256 * 1024; | ||
| 671 | |||
| 672 | /* | 768 | /* |
| 673 | * make sure that once we start defragging and extent, we keep on | 769 | * make sure that once we start defragging and extent, we keep on |
| 674 | * defragging it | 770 | * defragging it |
| @@ -727,27 +823,176 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, | |||
| 727 | return ret; | 823 | return ret; |
| 728 | } | 824 | } |
| 729 | 825 | ||
| 730 | static int btrfs_defrag_file(struct file *file, | 826 | /* |
| 731 | struct btrfs_ioctl_defrag_range_args *range) | 827 | * it doesn't do much good to defrag one or two pages |
| 828 | * at a time. This pulls in a nice chunk of pages | ||
| 829 | * to COW and defrag. | ||
| 830 | * | ||
| 831 | * It also makes sure the delalloc code has enough | ||
| 832 | * dirty data to avoid making new small extents as part | ||
| 833 | * of the defrag | ||
| 834 | * | ||
| 835 | * It's a good idea to start RA on this range | ||
| 836 | * before calling this. | ||
| 837 | */ | ||
| 838 | static int cluster_pages_for_defrag(struct inode *inode, | ||
| 839 | struct page **pages, | ||
| 840 | unsigned long start_index, | ||
| 841 | int num_pages) | ||
| 732 | { | 842 | { |
| 733 | struct inode *inode = fdentry(file)->d_inode; | 843 | unsigned long file_end; |
| 734 | struct btrfs_root *root = BTRFS_I(inode)->root; | 844 | u64 isize = i_size_read(inode); |
| 735 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 845 | u64 page_start; |
| 846 | u64 page_end; | ||
| 847 | int ret; | ||
| 848 | int i; | ||
| 849 | int i_done; | ||
| 736 | struct btrfs_ordered_extent *ordered; | 850 | struct btrfs_ordered_extent *ordered; |
| 737 | struct page *page; | 851 | struct extent_state *cached_state = NULL; |
| 852 | |||
| 853 | if (isize == 0) | ||
| 854 | return 0; | ||
| 855 | file_end = (isize - 1) >> PAGE_CACHE_SHIFT; | ||
| 856 | |||
| 857 | ret = btrfs_delalloc_reserve_space(inode, | ||
| 858 | num_pages << PAGE_CACHE_SHIFT); | ||
| 859 | if (ret) | ||
| 860 | return ret; | ||
| 861 | again: | ||
| 862 | ret = 0; | ||
| 863 | i_done = 0; | ||
| 864 | |||
| 865 | /* step one, lock all the pages */ | ||
| 866 | for (i = 0; i < num_pages; i++) { | ||
| 867 | struct page *page; | ||
| 868 | page = grab_cache_page(inode->i_mapping, | ||
| 869 | start_index + i); | ||
| 870 | if (!page) | ||
| 871 | break; | ||
| 872 | |||
| 873 | if (!PageUptodate(page)) { | ||
| 874 | btrfs_readpage(NULL, page); | ||
| 875 | lock_page(page); | ||
| 876 | if (!PageUptodate(page)) { | ||
| 877 | unlock_page(page); | ||
| 878 | page_cache_release(page); | ||
| 879 | ret = -EIO; | ||
| 880 | break; | ||
| 881 | } | ||
| 882 | } | ||
| 883 | isize = i_size_read(inode); | ||
| 884 | file_end = (isize - 1) >> PAGE_CACHE_SHIFT; | ||
| 885 | if (!isize || page->index > file_end || | ||
| 886 | page->mapping != inode->i_mapping) { | ||
| 887 | /* whoops, we blew past eof, skip this page */ | ||
| 888 | unlock_page(page); | ||
| 889 | page_cache_release(page); | ||
| 890 | break; | ||
| 891 | } | ||
| 892 | pages[i] = page; | ||
| 893 | i_done++; | ||
| 894 | } | ||
| 895 | if (!i_done || ret) | ||
| 896 | goto out; | ||
| 897 | |||
| 898 | if (!(inode->i_sb->s_flags & MS_ACTIVE)) | ||
| 899 | goto out; | ||
| 900 | |||
| 901 | /* | ||
| 902 | * so now we have a nice long stream of locked | ||
| 903 | * and up to date pages, lets wait on them | ||
| 904 | */ | ||
| 905 | for (i = 0; i < i_done; i++) | ||
| 906 | wait_on_page_writeback(pages[i]); | ||
| 907 | |||
| 908 | page_start = page_offset(pages[0]); | ||
| 909 | page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE; | ||
| 910 | |||
| 911 | lock_extent_bits(&BTRFS_I(inode)->io_tree, | ||
| 912 | page_start, page_end - 1, 0, &cached_state, | ||
| 913 | GFP_NOFS); | ||
| 914 | ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1); | ||
| 915 | if (ordered && | ||
| 916 | ordered->file_offset + ordered->len > page_start && | ||
| 917 | ordered->file_offset < page_end) { | ||
| 918 | btrfs_put_ordered_extent(ordered); | ||
| 919 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
| 920 | page_start, page_end - 1, | ||
| 921 | &cached_state, GFP_NOFS); | ||
| 922 | for (i = 0; i < i_done; i++) { | ||
| 923 | unlock_page(pages[i]); | ||
| 924 | page_cache_release(pages[i]); | ||
| 925 | } | ||
| 926 | btrfs_wait_ordered_range(inode, page_start, | ||
| 927 | page_end - page_start); | ||
| 928 | goto again; | ||
| 929 | } | ||
| 930 | if (ordered) | ||
| 931 | btrfs_put_ordered_extent(ordered); | ||
| 932 | |||
| 933 | clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, | ||
| 934 | page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | | ||
| 935 | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, | ||
| 936 | GFP_NOFS); | ||
| 937 | |||
| 938 | if (i_done != num_pages) { | ||
| 939 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
| 940 | btrfs_delalloc_release_space(inode, | ||
| 941 | (num_pages - i_done) << PAGE_CACHE_SHIFT); | ||
| 942 | } | ||
| 943 | |||
| 944 | |||
| 945 | btrfs_set_extent_delalloc(inode, page_start, page_end - 1, | ||
| 946 | &cached_state); | ||
| 947 | |||
| 948 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | ||
| 949 | page_start, page_end - 1, &cached_state, | ||
| 950 | GFP_NOFS); | ||
| 951 | |||
| 952 | for (i = 0; i < i_done; i++) { | ||
| 953 | clear_page_dirty_for_io(pages[i]); | ||
| 954 | ClearPageChecked(pages[i]); | ||
| 955 | set_page_extent_mapped(pages[i]); | ||
| 956 | set_page_dirty(pages[i]); | ||
| 957 | unlock_page(pages[i]); | ||
| 958 | page_cache_release(pages[i]); | ||
| 959 | } | ||
| 960 | return i_done; | ||
| 961 | out: | ||
| 962 | for (i = 0; i < i_done; i++) { | ||
| 963 | unlock_page(pages[i]); | ||
| 964 | page_cache_release(pages[i]); | ||
| 965 | } | ||
| 966 | btrfs_delalloc_release_space(inode, num_pages << PAGE_CACHE_SHIFT); | ||
| 967 | return ret; | ||
| 968 | |||
| 969 | } | ||
| 970 | |||
| 971 | int btrfs_defrag_file(struct inode *inode, struct file *file, | ||
| 972 | struct btrfs_ioctl_defrag_range_args *range, | ||
| 973 | u64 newer_than, unsigned long max_to_defrag) | ||
| 974 | { | ||
| 975 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 738 | struct btrfs_super_block *disk_super; | 976 | struct btrfs_super_block *disk_super; |
| 977 | struct file_ra_state *ra = NULL; | ||
| 739 | unsigned long last_index; | 978 | unsigned long last_index; |
| 740 | unsigned long ra_pages = root->fs_info->bdi.ra_pages; | ||
| 741 | unsigned long total_read = 0; | ||
| 742 | u64 features; | 979 | u64 features; |
| 743 | u64 page_start; | ||
| 744 | u64 page_end; | ||
| 745 | u64 last_len = 0; | 980 | u64 last_len = 0; |
| 746 | u64 skip = 0; | 981 | u64 skip = 0; |
| 747 | u64 defrag_end = 0; | 982 | u64 defrag_end = 0; |
| 983 | u64 newer_off = range->start; | ||
| 984 | int newer_left = 0; | ||
| 748 | unsigned long i; | 985 | unsigned long i; |
| 749 | int ret; | 986 | int ret; |
| 987 | int defrag_count = 0; | ||
| 750 | int compress_type = BTRFS_COMPRESS_ZLIB; | 988 | int compress_type = BTRFS_COMPRESS_ZLIB; |
| 989 | int extent_thresh = range->extent_thresh; | ||
| 990 | int newer_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; | ||
| 991 | u64 new_align = ~((u64)128 * 1024 - 1); | ||
| 992 | struct page **pages = NULL; | ||
| 993 | |||
| 994 | if (extent_thresh == 0) | ||
| 995 | extent_thresh = 256 * 1024; | ||
| 751 | 996 | ||
| 752 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { | 997 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { |
| 753 | if (range->compress_type > BTRFS_COMPRESS_TYPES) | 998 | if (range->compress_type > BTRFS_COMPRESS_TYPES) |
| @@ -759,6 +1004,27 @@ static int btrfs_defrag_file(struct file *file, | |||
| 759 | if (inode->i_size == 0) | 1004 | if (inode->i_size == 0) |
| 760 | return 0; | 1005 | return 0; |
| 761 | 1006 | ||
| 1007 | /* | ||
| 1008 | * if we were not given a file, allocate a readahead | ||
| 1009 | * context | ||
| 1010 | */ | ||
| 1011 | if (!file) { | ||
| 1012 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | ||
| 1013 | if (!ra) | ||
| 1014 | return -ENOMEM; | ||
| 1015 | file_ra_state_init(ra, inode->i_mapping); | ||
| 1016 | } else { | ||
| 1017 | ra = &file->f_ra; | ||
| 1018 | } | ||
| 1019 | |||
| 1020 | pages = kmalloc(sizeof(struct page *) * newer_cluster, | ||
| 1021 | GFP_NOFS); | ||
| 1022 | if (!pages) { | ||
| 1023 | ret = -ENOMEM; | ||
| 1024 | goto out_ra; | ||
| 1025 | } | ||
| 1026 | |||
| 1027 | /* find the last page to defrag */ | ||
| 762 | if (range->start + range->len > range->start) { | 1028 | if (range->start + range->len > range->start) { |
| 763 | last_index = min_t(u64, inode->i_size - 1, | 1029 | last_index = min_t(u64, inode->i_size - 1, |
| 764 | range->start + range->len - 1) >> PAGE_CACHE_SHIFT; | 1030 | range->start + range->len - 1) >> PAGE_CACHE_SHIFT; |
| @@ -766,11 +1032,37 @@ static int btrfs_defrag_file(struct file *file, | |||
| 766 | last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; | 1032 | last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; |
| 767 | } | 1033 | } |
| 768 | 1034 | ||
| 769 | i = range->start >> PAGE_CACHE_SHIFT; | 1035 | if (newer_than) { |
| 770 | while (i <= last_index) { | 1036 | ret = find_new_extents(root, inode, newer_than, |
| 771 | if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, | 1037 | &newer_off, 64 * 1024); |
| 1038 | if (!ret) { | ||
| 1039 | range->start = newer_off; | ||
| 1040 | /* | ||
| 1041 | * we always align our defrag to help keep | ||
| 1042 | * the extents in the file evenly spaced | ||
| 1043 | */ | ||
| 1044 | i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; | ||
| 1045 | newer_left = newer_cluster; | ||
| 1046 | } else | ||
| 1047 | goto out_ra; | ||
| 1048 | } else { | ||
| 1049 | i = range->start >> PAGE_CACHE_SHIFT; | ||
| 1050 | } | ||
| 1051 | if (!max_to_defrag) | ||
| 1052 | max_to_defrag = last_index - 1; | ||
| 1053 | |||
| 1054 | while (i <= last_index && defrag_count < max_to_defrag) { | ||
| 1055 | /* | ||
| 1056 | * make sure we stop running if someone unmounts | ||
| 1057 | * the FS | ||
| 1058 | */ | ||
| 1059 | if (!(inode->i_sb->s_flags & MS_ACTIVE)) | ||
| 1060 | break; | ||
| 1061 | |||
| 1062 | if (!newer_than && | ||
| 1063 | !should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, | ||
| 772 | PAGE_CACHE_SIZE, | 1064 | PAGE_CACHE_SIZE, |
| 773 | range->extent_thresh, | 1065 | extent_thresh, |
| 774 | &last_len, &skip, | 1066 | &last_len, &skip, |
| 775 | &defrag_end)) { | 1067 | &defrag_end)) { |
| 776 | unsigned long next; | 1068 | unsigned long next; |
| @@ -782,92 +1074,39 @@ static int btrfs_defrag_file(struct file *file, | |||
| 782 | i = max(i + 1, next); | 1074 | i = max(i + 1, next); |
| 783 | continue; | 1075 | continue; |
| 784 | } | 1076 | } |
| 785 | |||
| 786 | if (total_read % ra_pages == 0) { | ||
| 787 | btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, | ||
| 788 | min(last_index, i + ra_pages - 1)); | ||
| 789 | } | ||
| 790 | total_read++; | ||
| 791 | mutex_lock(&inode->i_mutex); | ||
| 792 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 1077 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
| 793 | BTRFS_I(inode)->force_compress = compress_type; | 1078 | BTRFS_I(inode)->force_compress = compress_type; |
| 794 | 1079 | ||
| 795 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 1080 | btrfs_force_ra(inode->i_mapping, ra, file, i, newer_cluster); |
| 796 | if (ret) | ||
| 797 | goto err_unlock; | ||
| 798 | again: | ||
| 799 | if (inode->i_size == 0 || | ||
| 800 | i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { | ||
| 801 | ret = 0; | ||
| 802 | goto err_reservations; | ||
| 803 | } | ||
| 804 | 1081 | ||
| 805 | page = grab_cache_page(inode->i_mapping, i); | 1082 | ret = cluster_pages_for_defrag(inode, pages, i, newer_cluster); |
| 806 | if (!page) { | 1083 | if (ret < 0) |
| 807 | ret = -ENOMEM; | 1084 | goto out_ra; |
| 808 | goto err_reservations; | ||
| 809 | } | ||
| 810 | |||
| 811 | if (!PageUptodate(page)) { | ||
| 812 | btrfs_readpage(NULL, page); | ||
| 813 | lock_page(page); | ||
| 814 | if (!PageUptodate(page)) { | ||
| 815 | unlock_page(page); | ||
| 816 | page_cache_release(page); | ||
| 817 | ret = -EIO; | ||
| 818 | goto err_reservations; | ||
| 819 | } | ||
| 820 | } | ||
| 821 | |||
| 822 | if (page->mapping != inode->i_mapping) { | ||
| 823 | unlock_page(page); | ||
| 824 | page_cache_release(page); | ||
| 825 | goto again; | ||
| 826 | } | ||
| 827 | |||
| 828 | wait_on_page_writeback(page); | ||
| 829 | 1085 | ||
| 830 | if (PageDirty(page)) { | 1086 | defrag_count += ret; |
| 831 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 1087 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); |
| 832 | goto loop_unlock; | 1088 | i += ret; |
| 833 | } | ||
| 834 | 1089 | ||
| 835 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 1090 | if (newer_than) { |
| 836 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 1091 | if (newer_off == (u64)-1) |
| 837 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 1092 | break; |
| 838 | 1093 | ||
| 839 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 1094 | newer_off = max(newer_off + 1, |
| 840 | if (ordered) { | 1095 | (u64)i << PAGE_CACHE_SHIFT); |
| 841 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 1096 | |
| 842 | unlock_page(page); | 1097 | ret = find_new_extents(root, inode, |
| 843 | page_cache_release(page); | 1098 | newer_than, &newer_off, |
| 844 | btrfs_start_ordered_extent(inode, ordered, 1); | 1099 | 64 * 1024); |
| 845 | btrfs_put_ordered_extent(ordered); | 1100 | if (!ret) { |
| 846 | goto again; | 1101 | range->start = newer_off; |
| 1102 | i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; | ||
| 1103 | newer_left = newer_cluster; | ||
| 1104 | } else { | ||
| 1105 | break; | ||
| 1106 | } | ||
| 1107 | } else { | ||
| 1108 | i++; | ||
| 847 | } | 1109 | } |
| 848 | set_page_extent_mapped(page); | ||
| 849 | |||
| 850 | /* | ||
| 851 | * this makes sure page_mkwrite is called on the | ||
| 852 | * page if it is dirtied again later | ||
| 853 | */ | ||
| 854 | clear_page_dirty_for_io(page); | ||
| 855 | clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, | ||
| 856 | page_end, EXTENT_DIRTY | EXTENT_DELALLOC | | ||
| 857 | EXTENT_DO_ACCOUNTING, GFP_NOFS); | ||
| 858 | |||
| 859 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); | ||
| 860 | ClearPageChecked(page); | ||
| 861 | set_page_dirty(page); | ||
| 862 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 863 | |||
| 864 | loop_unlock: | ||
| 865 | unlock_page(page); | ||
| 866 | page_cache_release(page); | ||
| 867 | mutex_unlock(&inode->i_mutex); | ||
| 868 | |||
| 869 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | ||
| 870 | i++; | ||
| 871 | } | 1110 | } |
| 872 | 1111 | ||
| 873 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) | 1112 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) |
| @@ -899,12 +1138,14 @@ loop_unlock: | |||
| 899 | btrfs_set_super_incompat_flags(disk_super, features); | 1138 | btrfs_set_super_incompat_flags(disk_super, features); |
| 900 | } | 1139 | } |
| 901 | 1140 | ||
| 902 | return 0; | 1141 | if (!file) |
| 1142 | kfree(ra); | ||
| 1143 | return defrag_count; | ||
| 903 | 1144 | ||
| 904 | err_reservations: | 1145 | out_ra: |
| 905 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 1146 | if (!file) |
| 906 | err_unlock: | 1147 | kfree(ra); |
| 907 | mutex_unlock(&inode->i_mutex); | 1148 | kfree(pages); |
| 908 | return ret; | 1149 | return ret; |
| 909 | } | 1150 | } |
| 910 | 1151 | ||
| @@ -1756,7 +1997,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
| 1756 | /* the rest are all set to zero by kzalloc */ | 1997 | /* the rest are all set to zero by kzalloc */ |
| 1757 | range->len = (u64)-1; | 1998 | range->len = (u64)-1; |
| 1758 | } | 1999 | } |
| 1759 | ret = btrfs_defrag_file(file, range); | 2000 | ret = btrfs_defrag_file(fdentry(file)->d_inode, file, |
| 2001 | range, 0, 0); | ||
| 2002 | if (ret > 0) | ||
| 2003 | ret = 0; | ||
| 1760 | kfree(range); | 2004 | kfree(range); |
| 1761 | break; | 2005 | break; |
| 1762 | default: | 2006 | default: |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index e5e0ee2cad4e..ad1ea789fcb4 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
| @@ -181,37 +181,6 @@ struct btrfs_ioctl_clone_range_args { | |||
| 181 | #define BTRFS_DEFRAG_RANGE_COMPRESS 1 | 181 | #define BTRFS_DEFRAG_RANGE_COMPRESS 1 |
| 182 | #define BTRFS_DEFRAG_RANGE_START_IO 2 | 182 | #define BTRFS_DEFRAG_RANGE_START_IO 2 |
| 183 | 183 | ||
| 184 | struct btrfs_ioctl_defrag_range_args { | ||
| 185 | /* start of the defrag operation */ | ||
| 186 | __u64 start; | ||
| 187 | |||
| 188 | /* number of bytes to defrag, use (u64)-1 to say all */ | ||
| 189 | __u64 len; | ||
| 190 | |||
| 191 | /* | ||
| 192 | * flags for the operation, which can include turning | ||
| 193 | * on compression for this one defrag | ||
| 194 | */ | ||
| 195 | __u64 flags; | ||
| 196 | |||
| 197 | /* | ||
| 198 | * any extent bigger than this will be considered | ||
| 199 | * already defragged. Use 0 to take the kernel default | ||
| 200 | * Use 1 to say every single extent must be rewritten | ||
| 201 | */ | ||
| 202 | __u32 extent_thresh; | ||
| 203 | |||
| 204 | /* | ||
| 205 | * which compression method to use if turning on compression | ||
| 206 | * for this defrag operation. If unspecified, zlib will | ||
| 207 | * be used | ||
| 208 | */ | ||
| 209 | __u32 compress_type; | ||
| 210 | |||
| 211 | /* spare for later */ | ||
| 212 | __u32 unused[4]; | ||
| 213 | }; | ||
| 214 | |||
| 215 | struct btrfs_ioctl_space_info { | 184 | struct btrfs_ioctl_space_info { |
| 216 | __u64 flags; | 185 | __u64 flags; |
| 217 | __u64 total_bytes; | 186 | __u64 total_bytes; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index cd0c7cd2c8fb..28e3cb2607ff 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -160,7 +160,7 @@ enum { | |||
| 160 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, | 160 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, |
| 161 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, | 161 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
| 162 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, | 162 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, |
| 163 | Opt_enospc_debug, Opt_subvolrootid, Opt_err, | 163 | Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_err, |
| 164 | }; | 164 | }; |
| 165 | 165 | ||
| 166 | static match_table_t tokens = { | 166 | static match_table_t tokens = { |
| @@ -191,6 +191,7 @@ static match_table_t tokens = { | |||
| 191 | {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, | 191 | {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, |
| 192 | {Opt_enospc_debug, "enospc_debug"}, | 192 | {Opt_enospc_debug, "enospc_debug"}, |
| 193 | {Opt_subvolrootid, "subvolrootid=%d"}, | 193 | {Opt_subvolrootid, "subvolrootid=%d"}, |
| 194 | {Opt_defrag, "autodefrag"}, | ||
| 194 | {Opt_err, NULL}, | 195 | {Opt_err, NULL}, |
| 195 | }; | 196 | }; |
| 196 | 197 | ||
| @@ -369,6 +370,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 369 | case Opt_enospc_debug: | 370 | case Opt_enospc_debug: |
| 370 | btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); | 371 | btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); |
| 371 | break; | 372 | break; |
| 373 | case Opt_defrag: | ||
| 374 | printk(KERN_INFO "btrfs: enabling auto defrag"); | ||
| 375 | btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); | ||
| 376 | break; | ||
| 372 | case Opt_err: | 377 | case Opt_err: |
| 373 | printk(KERN_INFO "btrfs: unrecognized mount option " | 378 | printk(KERN_INFO "btrfs: unrecognized mount option " |
| 374 | "'%s'\n", p); | 379 | "'%s'\n", p); |
