diff options
author | Mark Fasheh <mfasheh@suse.com> | 2010-04-05 21:17:14 -0400 |
---|---|---|
committer | Joel Becker <joel.becker@oracle.com> | 2010-05-05 21:18:07 -0400 |
commit | 6b82021b9e91cd689fdffadbcdb9a42597bbe764 (patch) | |
tree | ac4235e792e74a2e60a41e95d62965b7ed4b3232 | |
parent | 73c8a80003d13be54e2309865030404441075182 (diff) |
ocfs2: increase the default size of local alloc windows
I have observed that the current size of 8M gives us pretty poor
fragmentation on multi-threaded workloads which do lots of writes.
Generally, I can increase the size of local alloc windows and observe a
marked decrease in fragmentation, even up and beyond window sizes of 512
megabytes. This makes sense for a couple reasons - larger local alloc means
more room for reservation windows. On multi-node workloads the larger local
alloc helps as well because we don't have to do window slides as often.
Also, I removed the OCFS2_DEFAULT_LOCAL_ALLOC_SIZE constant as it is no
longer used and the comment above it was out of date.
To test fragmentation, I used a workload which launched 4 threads that did
4k writes into a series of about 140 alternating files.
With resv_level=2, and a 4k/4k file system I observed the following average
fragmentation for various localalloc= parameters:
localalloc= avg. fragmentation
8 48
32 16
64 10
120 7
On larger cluster sizes, the difference is more dramatic.
The new default size top out at 256M, which we'll only get for cluster
sizes of 32K and above.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
-rw-r--r-- | fs/ocfs2/localalloc.c | 114 | ||||
-rw-r--r-- | fs/ocfs2/localalloc.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2.h | 3 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2_fs.h | 8 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 3 |
5 files changed, 118 insertions, 11 deletions
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index e39a3e7146c9..00022aac2e8c 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -75,10 +75,120 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | 75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, |
76 | struct inode *local_alloc_inode); | 76 | struct inode *local_alloc_inode); |
77 | 77 | ||
78 | /* | ||
79 | * ocfs2_la_default_mb() - determine a default size, in megabytes of | ||
80 | * the local alloc. | ||
81 | * | ||
82 | * Generally, we'd like to pick as large a local alloc as | ||
83 | * possible. Performance on large workloads tends to scale | ||
84 | * proportionally to la size. In addition to that, the reservations | ||
85 | * code functions more efficiently as it can reserve more windows for | ||
86 | * write. | ||
87 | * | ||
88 | * Some things work against us when trying to choose a large local alloc: | ||
89 | * | ||
90 | * - We need to ensure our sizing is picked to leave enough space in | ||
91 | * group descriptors for other allocations (such as block groups, | ||
92 | * etc). Picking default sizes which are a multiple of 4 could help | ||
93 | * - block groups are allocated in 2mb and 4mb chunks. | ||
94 | * | ||
95 | * - Likewise, we don't want to starve other nodes of bits on small | ||
96 | * file systems. This can easily be taken care of by limiting our | ||
97 | * default to a reasonable size (256M) on larger cluster sizes. | ||
98 | * | ||
99 | * - Some file systems can't support very large sizes - 4k and 8k in | ||
100 | * particular are limited to less than 128 and 256 megabytes respectively. | ||
101 | * | ||
102 | * The following reference table shows group descriptor and local | ||
103 | * alloc maximums at various cluster sizes (4k blocksize) | ||
104 | * | ||
105 | * csize: 4K group: 126M la: 121M | ||
106 | * csize: 8K group: 252M la: 243M | ||
107 | * csize: 16K group: 504M la: 486M | ||
108 | * csize: 32K group: 1008M la: 972M | ||
109 | * csize: 64K group: 2016M la: 1944M | ||
110 | * csize: 128K group: 4032M la: 3888M | ||
111 | * csize: 256K group: 8064M la: 7776M | ||
112 | * csize: 512K group: 16128M la: 15552M | ||
113 | * csize: 1024K group: 32256M la: 31104M | ||
114 | */ | ||
115 | #define OCFS2_LA_MAX_DEFAULT_MB 256 | ||
116 | #define OCFS2_LA_OLD_DEFAULT 8 | ||
117 | unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) | ||
118 | { | ||
119 | unsigned int la_mb; | ||
120 | unsigned int gd_mb; | ||
121 | unsigned int megs_per_slot; | ||
122 | struct super_block *sb = osb->sb; | ||
123 | |||
124 | gd_mb = ocfs2_clusters_to_megabytes(osb->sb, | ||
125 | 8 * ocfs2_group_bitmap_size(sb)); | ||
126 | |||
127 | /* | ||
128 | * This takes care of files systems with very small group | ||
129 | * descriptors - 512 byte blocksize at cluster sizes lower | ||
130 | * than 16K and also 1k blocksize with 4k cluster size. | ||
131 | */ | ||
132 | if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192) | ||
133 | || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096)) | ||
134 | return OCFS2_LA_OLD_DEFAULT; | ||
135 | |||
136 | /* | ||
137 | * Leave enough room for some block groups and make the final | ||
138 | * value we work from a multiple of 4. | ||
139 | */ | ||
140 | gd_mb -= 16; | ||
141 | gd_mb &= 0xFFFFFFFB; | ||
142 | |||
143 | la_mb = gd_mb; | ||
144 | |||
145 | /* | ||
146 | * Keep window sizes down to a reasonable default | ||
147 | */ | ||
148 | if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) { | ||
149 | /* | ||
150 | * Some clustersize / blocksize combinations will have | ||
151 | * given us a larger than OCFS2_LA_MAX_DEFAULT_MB | ||
152 | * default size, but get poor distribution when | ||
153 | * limited to exactly 256 megabytes. | ||
154 | * | ||
155 | * As an example, 16K clustersize at 4K blocksize | ||
156 | * gives us a cluster group size of 504M. Paring the | ||
157 | * local alloc size down to 256 however, would give us | ||
158 | * only one window and around 200MB left in the | ||
159 | * cluster group. Instead, find the first size below | ||
160 | * 256 which would give us an even distribution. | ||
161 | * | ||
162 | * Larger cluster group sizes actually work out pretty | ||
163 | * well when pared to 256, so we don't have to do this | ||
164 | * for any group that fits more than two | ||
165 | * OCFS2_LA_MAX_DEFAULT_MB windows. | ||
166 | */ | ||
167 | if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB)) | ||
168 | la_mb = 256; | ||
169 | else { | ||
170 | unsigned int gd_mult = gd_mb; | ||
171 | |||
172 | while (gd_mult > 256) | ||
173 | gd_mult = gd_mult >> 1; | ||
174 | |||
175 | la_mb = gd_mult; | ||
176 | } | ||
177 | } | ||
178 | |||
179 | megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots; | ||
180 | megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot); | ||
181 | /* Too many nodes, too few disk clusters. */ | ||
182 | if (megs_per_slot < la_mb) | ||
183 | la_mb = megs_per_slot; | ||
184 | |||
185 | return la_mb; | ||
186 | } | ||
187 | |||
78 | void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb) | 188 | void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb) |
79 | { | 189 | { |
80 | struct super_block *sb = osb->sb; | 190 | struct super_block *sb = osb->sb; |
81 | unsigned int la_default_mb = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | 191 | unsigned int la_default_mb = ocfs2_la_default_mb(osb); |
82 | unsigned int la_max_mb; | 192 | unsigned int la_max_mb; |
83 | 193 | ||
84 | la_max_mb = ocfs2_clusters_to_megabytes(sb, | 194 | la_max_mb = ocfs2_clusters_to_megabytes(sb, |
@@ -185,7 +295,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
185 | osb->local_alloc_bits, (osb->bitmap_cpg - 1)); | 295 | osb->local_alloc_bits, (osb->bitmap_cpg - 1)); |
186 | osb->local_alloc_bits = | 296 | osb->local_alloc_bits = |
187 | ocfs2_megabytes_to_clusters(osb->sb, | 297 | ocfs2_megabytes_to_clusters(osb->sb, |
188 | OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); | 298 | ocfs2_la_default_mb(osb)); |
189 | } | 299 | } |
190 | 300 | ||
191 | /* read the alloc off disk */ | 301 | /* read the alloc off disk */ |
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h index 04195c67f7c1..1be9b5864460 100644 --- a/fs/ocfs2/localalloc.h +++ b/fs/ocfs2/localalloc.h | |||
@@ -31,6 +31,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb); | |||
31 | void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb); | 31 | void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb); |
32 | 32 | ||
33 | void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb); | 33 | void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb); |
34 | unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb); | ||
34 | 35 | ||
35 | int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, | 36 | int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, |
36 | int node_num, | 37 | int node_num, |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index e98c954cf961..09d7aee3dabe 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -342,6 +342,9 @@ struct ocfs2_super | |||
342 | */ | 342 | */ |
343 | unsigned int local_alloc_bits; | 343 | unsigned int local_alloc_bits; |
344 | unsigned int local_alloc_default_bits; | 344 | unsigned int local_alloc_default_bits; |
345 | /* osb_clusters_at_boot can become stale! Do not trust it to | ||
346 | * be up to date. */ | ||
347 | unsigned int osb_clusters_at_boot; | ||
345 | 348 | ||
346 | enum ocfs2_local_alloc_state local_alloc_state; /* protected | 349 | enum ocfs2_local_alloc_state local_alloc_state; /* protected |
347 | * by osb_lock */ | 350 | * by osb_lock */ |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index bb37218a7978..d61a1521b10e 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -283,14 +283,6 @@ | |||
283 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) | 283 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) |
284 | 284 | ||
285 | /* | 285 | /* |
286 | * Default local alloc size (in megabytes) | ||
287 | * | ||
288 | * The value chosen should be such that most allocations, including new | ||
289 | * block groups, use local alloc. | ||
290 | */ | ||
291 | #define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 | ||
292 | |||
293 | /* | ||
294 | * Inline extended attribute size (in bytes) | 286 | * Inline extended attribute size (in bytes) |
295 | * The value chosen should be aligned to 16 byte boundaries. | 287 | * The value chosen should be aligned to 16 byte boundaries. |
296 | */ | 288 | */ |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index fc839996d052..5745682eb1c0 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1503,7 +1503,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1503 | (unsigned) (osb->osb_commit_interval / HZ)); | 1503 | (unsigned) (osb->osb_commit_interval / HZ)); |
1504 | 1504 | ||
1505 | local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits); | 1505 | local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits); |
1506 | if (local_alloc_megs != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) | 1506 | if (local_alloc_megs != ocfs2_la_default_mb(osb)) |
1507 | seq_printf(s, ",localalloc=%d", local_alloc_megs); | 1507 | seq_printf(s, ",localalloc=%d", local_alloc_megs); |
1508 | 1508 | ||
1509 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) | 1509 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) |
@@ -2251,6 +2251,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2251 | } | 2251 | } |
2252 | 2252 | ||
2253 | osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; | 2253 | osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; |
2254 | osb->osb_clusters_at_boot = OCFS2_I(inode)->ip_clusters; | ||
2254 | iput(inode); | 2255 | iput(inode); |
2255 | 2256 | ||
2256 | osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; | 2257 | osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; |