diff options
author | Timofey Titovets <nefelim4ag@gmail.com> | 2017-09-28 10:33:37 -0400 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2017-11-01 15:45:36 -0400 |
commit | 17b5a6c17e265ca84fac9c3256ff86c691f04aab (patch) | |
tree | 2eb9c8dab73fa0f881a94c67ff7030adc4ec6630 | |
parent | 4e439a0b184f014a5833fa468af36cc9f59b8fb1 (diff) |
Btrfs: heuristic: add bucket and sample counters and other defines
Add basic defines and structures for data sampling.
Added macros:
- For future sampling algo
- For bucket size
Heuristic workspace:
- Add bucket for storing byte type counters
- Add sample array for storing partial copy of input data range
- Add counter for store current sample size to workspace
Signed-off-by: Timofey Titovets <nefelim4ag@gmail.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ minor coding style fixes, comments updated ]
Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r-- | fs/btrfs/compression.c | 53 |
1 files changed, 52 insertions, 1 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b155542c5e6b..4f5d958c71ad 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -707,8 +707,47 @@ out: | |||
707 | return ret; | 707 | return ret; |
708 | } | 708 | } |
709 | 709 | ||
710 | /* | ||
711 | * Heuristic uses systematic sampling to collect data from the input data | ||
712 | * range, the logic can be tuned by the following constants: | ||
713 | * | ||
714 | * @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample | ||
715 | * @SAMPLING_INTERVAL - range from which the sampled data can be collected | ||
716 | */ | ||
717 | #define SAMPLING_READ_SIZE (16) | ||
718 | #define SAMPLING_INTERVAL (256) | ||
719 | |||
720 | /* | ||
721 | * For statistical analysis of the input data we consider bytes that form a | ||
722 | * Galois Field of 256 objects. Each object has an attribute count, ie. how | ||
723 | * many times the object appeared in the sample. | ||
724 | */ | ||
725 | #define BUCKET_SIZE (256) | ||
726 | |||
727 | /* | ||
728 | * The size of the sample is based on a statistical sampling rule of thumb. | ||
729 | * The common way is to perform sampling tests as long as the number of | ||
730 | * elements in each cell is at least 5. | ||
731 | * | ||
732 | * Instead of 5, we choose 32 to obtain more accurate results. | ||
733 | * If the data contain the maximum number of symbols, which is 256, we obtain a | ||
734 | * sample size bound by 8192. | ||
735 | * | ||
736 | * For a sample of at most 8KB of data per data range: 16 consecutive bytes | ||
737 | * from up to 512 locations. | ||
738 | */ | ||
739 | #define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED * \ | ||
740 | SAMPLING_READ_SIZE / SAMPLING_INTERVAL) | ||
741 | |||
742 | struct bucket_item { | ||
743 | u32 count; | ||
744 | }; | ||
710 | 745 | ||
711 | struct heuristic_ws { | 746 | struct heuristic_ws { |
747 | /* Partial copy of input data */ | ||
748 | u8 *sample; | ||
749 | /* Buckets store counters for each byte value */ | ||
750 | struct bucket_item *bucket; | ||
712 | struct list_head list; | 751 | struct list_head list; |
713 | }; | 752 | }; |
714 | 753 | ||
@@ -718,6 +757,8 @@ static void free_heuristic_ws(struct list_head *ws) | |||
718 | 757 | ||
719 | workspace = list_entry(ws, struct heuristic_ws, list); | 758 | workspace = list_entry(ws, struct heuristic_ws, list); |
720 | 759 | ||
760 | kvfree(workspace->sample); | ||
761 | kfree(workspace->bucket); | ||
721 | kfree(workspace); | 762 | kfree(workspace); |
722 | } | 763 | } |
723 | 764 | ||
@@ -729,9 +770,19 @@ static struct list_head *alloc_heuristic_ws(void) | |||
729 | if (!ws) | 770 | if (!ws) |
730 | return ERR_PTR(-ENOMEM); | 771 | return ERR_PTR(-ENOMEM); |
731 | 772 | ||
732 | INIT_LIST_HEAD(&ws->list); | 773 | ws->sample = kvmalloc(MAX_SAMPLE_SIZE, GFP_KERNEL); |
774 | if (!ws->sample) | ||
775 | goto fail; | ||
776 | |||
777 | ws->bucket = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket), GFP_KERNEL); | ||
778 | if (!ws->bucket) | ||
779 | goto fail; | ||
733 | 780 | ||
781 | INIT_LIST_HEAD(&ws->list); | ||
734 | return &ws->list; | 782 | return &ws->list; |
783 | fail: | ||
784 | free_heuristic_ws(&ws->list); | ||
785 | return ERR_PTR(-ENOMEM); | ||
735 | } | 786 | } |
736 | 787 | ||
737 | struct workspaces_list { | 788 | struct workspaces_list { |