diff options
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 296 |
1 files changed, 1 insertions, 295 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d4ae948606e8..11e1fd59acbd 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -21,21 +21,7 @@ | |||
21 | * mballoc.c contains the multiblocks allocation routines | 21 | * mballoc.c contains the multiblocks allocation routines |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/time.h> | 24 | #include "mballoc.h" |
25 | #include <linux/fs.h> | ||
26 | #include <linux/namei.h> | ||
27 | #include <linux/quotaops.h> | ||
28 | #include <linux/buffer_head.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/swap.h> | ||
31 | #include <linux/proc_fs.h> | ||
32 | #include <linux/pagemap.h> | ||
33 | #include <linux/seq_file.h> | ||
34 | #include <linux/version.h> | ||
35 | #include "ext4_jbd2.h" | ||
36 | #include "ext4.h" | ||
37 | #include "group.h" | ||
38 | |||
39 | /* | 25 | /* |
40 | * MUSTDO: | 26 | * MUSTDO: |
41 | * - test ext4_ext_search_left() and ext4_ext_search_right() | 27 | * - test ext4_ext_search_left() and ext4_ext_search_right() |
@@ -345,286 +331,6 @@ | |||
345 | * | 331 | * |
346 | */ | 332 | */ |
347 | 333 | ||
348 | /* | ||
349 | * with AGGRESSIVE_CHECK allocator runs consistency checks over | ||
350 | * structures. these checks slow things down a lot | ||
351 | */ | ||
352 | #define AGGRESSIVE_CHECK__ | ||
353 | |||
354 | /* | ||
355 | * with DOUBLE_CHECK defined mballoc creates persistent in-core | ||
356 | * bitmaps, maintains and uses them to check for double allocations | ||
357 | */ | ||
358 | #define DOUBLE_CHECK__ | ||
359 | |||
360 | /* | ||
361 | */ | ||
362 | #define MB_DEBUG__ | ||
363 | #ifdef MB_DEBUG | ||
364 | #define mb_debug(fmt, a...) printk(fmt, ##a) | ||
365 | #else | ||
366 | #define mb_debug(fmt, a...) | ||
367 | #endif | ||
368 | |||
369 | /* | ||
370 | * with EXT4_MB_HISTORY mballoc stores last N allocations in memory | ||
371 | * and you can monitor it in /proc/fs/ext4/<dev>/mb_history | ||
372 | */ | ||
373 | #define EXT4_MB_HISTORY | ||
374 | #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ | ||
375 | #define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ | ||
376 | #define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */ | ||
377 | #define EXT4_MB_HISTORY_FREE 8 /* free */ | ||
378 | |||
379 | #define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \ | ||
380 | EXT4_MB_HISTORY_PREALLOC) | ||
381 | |||
382 | /* | ||
383 | * How long mballoc can look for a best extent (in found extents) | ||
384 | */ | ||
385 | #define MB_DEFAULT_MAX_TO_SCAN 200 | ||
386 | |||
387 | /* | ||
388 | * How long mballoc must look for a best extent | ||
389 | */ | ||
390 | #define MB_DEFAULT_MIN_TO_SCAN 10 | ||
391 | |||
392 | /* | ||
393 | * How many groups mballoc will scan looking for the best chunk | ||
394 | */ | ||
395 | #define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5 | ||
396 | |||
397 | /* | ||
398 | * with 'ext4_mb_stats' allocator will collect stats that will be | ||
399 | * shown at umount. The collecting costs though! | ||
400 | */ | ||
401 | #define MB_DEFAULT_STATS 1 | ||
402 | |||
403 | /* | ||
404 | * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served | ||
405 | * by the stream allocator, which purpose is to pack requests | ||
406 | * as close each to other as possible to produce smooth I/O traffic | ||
407 | * We use locality group prealloc space for stream request. | ||
408 | * We can tune the same via /proc/fs/ext4/<parition>/stream_req | ||
409 | */ | ||
410 | #define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */ | ||
411 | |||
412 | /* | ||
413 | * for which requests use 2^N search using buddies | ||
414 | */ | ||
415 | #define MB_DEFAULT_ORDER2_REQS 2 | ||
416 | |||
417 | /* | ||
418 | * default group prealloc size 512 blocks | ||
419 | */ | ||
420 | #define MB_DEFAULT_GROUP_PREALLOC 512 | ||
421 | |||
422 | static struct kmem_cache *ext4_pspace_cachep; | ||
423 | static struct kmem_cache *ext4_ac_cachep; | ||
424 | |||
425 | #ifdef EXT4_BB_MAX_BLOCKS | ||
426 | #undef EXT4_BB_MAX_BLOCKS | ||
427 | #endif | ||
428 | #define EXT4_BB_MAX_BLOCKS 30 | ||
429 | |||
430 | struct ext4_free_metadata { | ||
431 | ext4_group_t group; | ||
432 | unsigned short num; | ||
433 | ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; | ||
434 | struct list_head list; | ||
435 | }; | ||
436 | |||
437 | struct ext4_group_info { | ||
438 | unsigned long bb_state; | ||
439 | unsigned long bb_tid; | ||
440 | struct ext4_free_metadata *bb_md_cur; | ||
441 | unsigned short bb_first_free; | ||
442 | unsigned short bb_free; | ||
443 | unsigned short bb_fragments; | ||
444 | struct list_head bb_prealloc_list; | ||
445 | #ifdef DOUBLE_CHECK | ||
446 | void *bb_bitmap; | ||
447 | #endif | ||
448 | unsigned short bb_counters[]; | ||
449 | }; | ||
450 | |||
451 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | ||
452 | #define EXT4_GROUP_INFO_LOCKED_BIT 1 | ||
453 | |||
454 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | ||
455 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | ||
456 | |||
457 | |||
458 | struct ext4_prealloc_space { | ||
459 | struct list_head pa_inode_list; | ||
460 | struct list_head pa_group_list; | ||
461 | union { | ||
462 | struct list_head pa_tmp_list; | ||
463 | struct rcu_head pa_rcu; | ||
464 | } u; | ||
465 | spinlock_t pa_lock; | ||
466 | atomic_t pa_count; | ||
467 | unsigned pa_deleted; | ||
468 | ext4_fsblk_t pa_pstart; /* phys. block */ | ||
469 | ext4_lblk_t pa_lstart; /* log. block */ | ||
470 | unsigned short pa_len; /* len of preallocated chunk */ | ||
471 | unsigned short pa_free; /* how many blocks are free */ | ||
472 | unsigned short pa_linear; /* consumed in one direction | ||
473 | * strictly, for grp prealloc */ | ||
474 | spinlock_t *pa_obj_lock; | ||
475 | struct inode *pa_inode; /* hack, for history only */ | ||
476 | }; | ||
477 | |||
478 | |||
479 | struct ext4_free_extent { | ||
480 | ext4_lblk_t fe_logical; | ||
481 | ext4_grpblk_t fe_start; | ||
482 | ext4_group_t fe_group; | ||
483 | int fe_len; | ||
484 | }; | ||
485 | |||
486 | /* | ||
487 | * Locality group: | ||
488 | * we try to group all related changes together | ||
489 | * so that writeback can flush/allocate them together as well | ||
490 | */ | ||
491 | struct ext4_locality_group { | ||
492 | /* for allocator */ | ||
493 | struct mutex lg_mutex; /* to serialize allocates */ | ||
494 | struct list_head lg_prealloc_list;/* list of preallocations */ | ||
495 | spinlock_t lg_prealloc_lock; | ||
496 | }; | ||
497 | |||
498 | struct ext4_allocation_context { | ||
499 | struct inode *ac_inode; | ||
500 | struct super_block *ac_sb; | ||
501 | |||
502 | /* original request */ | ||
503 | struct ext4_free_extent ac_o_ex; | ||
504 | |||
505 | /* goal request (after normalization) */ | ||
506 | struct ext4_free_extent ac_g_ex; | ||
507 | |||
508 | /* the best found extent */ | ||
509 | struct ext4_free_extent ac_b_ex; | ||
510 | |||
511 | /* copy of the bext found extent taken before preallocation efforts */ | ||
512 | struct ext4_free_extent ac_f_ex; | ||
513 | |||
514 | /* number of iterations done. we have to track to limit searching */ | ||
515 | unsigned long ac_ex_scanned; | ||
516 | __u16 ac_groups_scanned; | ||
517 | __u16 ac_found; | ||
518 | __u16 ac_tail; | ||
519 | __u16 ac_buddy; | ||
520 | __u16 ac_flags; /* allocation hints */ | ||
521 | __u8 ac_status; | ||
522 | __u8 ac_criteria; | ||
523 | __u8 ac_repeats; | ||
524 | __u8 ac_2order; /* if request is to allocate 2^N blocks and | ||
525 | * N > 0, the field stores N, otherwise 0 */ | ||
526 | __u8 ac_op; /* operation, for history only */ | ||
527 | struct page *ac_bitmap_page; | ||
528 | struct page *ac_buddy_page; | ||
529 | struct ext4_prealloc_space *ac_pa; | ||
530 | struct ext4_locality_group *ac_lg; | ||
531 | }; | ||
532 | |||
533 | #define AC_STATUS_CONTINUE 1 | ||
534 | #define AC_STATUS_FOUND 2 | ||
535 | #define AC_STATUS_BREAK 3 | ||
536 | |||
537 | struct ext4_mb_history { | ||
538 | struct ext4_free_extent orig; /* orig allocation */ | ||
539 | struct ext4_free_extent goal; /* goal allocation */ | ||
540 | struct ext4_free_extent result; /* result allocation */ | ||
541 | unsigned pid; | ||
542 | unsigned ino; | ||
543 | __u16 found; /* how many extents have been found */ | ||
544 | __u16 groups; /* how many groups have been scanned */ | ||
545 | __u16 tail; /* what tail broke some buddy */ | ||
546 | __u16 buddy; /* buddy the tail ^^^ broke */ | ||
547 | __u16 flags; | ||
548 | __u8 cr:3; /* which phase the result extent was found at */ | ||
549 | __u8 op:4; | ||
550 | __u8 merged:1; | ||
551 | }; | ||
552 | |||
553 | struct ext4_buddy { | ||
554 | struct page *bd_buddy_page; | ||
555 | void *bd_buddy; | ||
556 | struct page *bd_bitmap_page; | ||
557 | void *bd_bitmap; | ||
558 | struct ext4_group_info *bd_info; | ||
559 | struct super_block *bd_sb; | ||
560 | __u16 bd_blkbits; | ||
561 | ext4_group_t bd_group; | ||
562 | }; | ||
563 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) | ||
564 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) | ||
565 | |||
566 | #ifndef EXT4_MB_HISTORY | ||
567 | static inline void ext4_mb_store_history(struct ext4_allocation_context *ac) | ||
568 | { | ||
569 | return; | ||
570 | } | ||
571 | #else | ||
572 | static void ext4_mb_store_history(struct ext4_allocation_context *ac); | ||
573 | #endif | ||
574 | |||
575 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | ||
576 | |||
577 | static struct proc_dir_entry *proc_root_ext4; | ||
578 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); | ||
579 | |||
580 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | ||
581 | ext4_group_t group); | ||
582 | static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); | ||
583 | static void ext4_mb_free_committed_blocks(struct super_block *); | ||
584 | static void ext4_mb_return_to_preallocation(struct inode *inode, | ||
585 | struct ext4_buddy *e4b, sector_t block, | ||
586 | int count); | ||
587 | static void ext4_mb_put_pa(struct ext4_allocation_context *, | ||
588 | struct super_block *, struct ext4_prealloc_space *pa); | ||
589 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); | ||
590 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); | ||
591 | |||
592 | |||
593 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | ||
594 | { | ||
595 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | ||
596 | |||
597 | bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | ||
598 | } | ||
599 | |||
600 | static inline void ext4_unlock_group(struct super_block *sb, | ||
601 | ext4_group_t group) | ||
602 | { | ||
603 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | ||
604 | |||
605 | bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | ||
606 | } | ||
607 | |||
608 | static inline int ext4_is_group_locked(struct super_block *sb, | ||
609 | ext4_group_t group) | ||
610 | { | ||
611 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | ||
612 | |||
613 | return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, | ||
614 | &(grinfo->bb_state)); | ||
615 | } | ||
616 | |||
617 | static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, | ||
618 | struct ext4_free_extent *fex) | ||
619 | { | ||
620 | ext4_fsblk_t block; | ||
621 | |||
622 | block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb) | ||
623 | + fex->fe_start | ||
624 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | ||
625 | return block; | ||
626 | } | ||
627 | |||
628 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) | 334 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) |
629 | { | 335 | { |
630 | #if BITS_PER_LONG == 64 | 336 | #if BITS_PER_LONG == 64 |