aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Fasheh <mfasheh@suse.com>2010-04-05 21:17:14 -0400
committerJoel Becker <joel.becker@oracle.com>2010-05-05 21:18:07 -0400
commit6b82021b9e91cd689fdffadbcdb9a42597bbe764 (patch)
treeac4235e792e74a2e60a41e95d62965b7ed4b3232
parent73c8a80003d13be54e2309865030404441075182 (diff)
ocfs2: increase the default size of local alloc windows
I have observed that the current size of 8M gives us pretty poor fragmentation on multi-threaded workloads which do lots of writes. Generally, I can increase the size of local alloc windows and observe a marked decrease in fragmentation, even up and beyond window sizes of 512 megabytes. This makes sense for a couple reasons - larger local alloc means more room for reservation windows. On multi-node workloads the larger local alloc helps as well because we don't have to do window slides as often. Also, I removed the OCFS2_DEFAULT_LOCAL_ALLOC_SIZE constant as it is no longer used and the comment above it was out of date. To test fragmentation, I used a workload which launched 4 threads that did 4k writes into a series of about 140 alternating files. With resv_level=2, and a 4k/4k file system I observed the following average fragmentation for various localalloc= parameters: localalloc= avg. fragmentation 8 48 32 16 64 10 120 7 On larger cluster sizes, the difference is more dramatic. The new default size top out at 256M, which we'll only get for cluster sizes of 32K and above. Signed-off-by: Mark Fasheh <mfasheh@suse.com> Signed-off-by: Joel Becker <joel.becker@oracle.com>
-rw-r--r--fs/ocfs2/localalloc.c114
-rw-r--r--fs/ocfs2/localalloc.h1
-rw-r--r--fs/ocfs2/ocfs2.h3
-rw-r--r--fs/ocfs2/ocfs2_fs.h8
-rw-r--r--fs/ocfs2/super.c3
5 files changed, 118 insertions, 11 deletions
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index e39a3e7146c9..00022aac2e8c 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -75,10 +75,120 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
75static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 75static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
76 struct inode *local_alloc_inode); 76 struct inode *local_alloc_inode);
77 77
78/*
79 * ocfs2_la_default_mb() - determine a default size, in megabytes of
80 * the local alloc.
81 *
82 * Generally, we'd like to pick as large a local alloc as
83 * possible. Performance on large workloads tends to scale
84 * proportionally to la size. In addition to that, the reservations
85 * code functions more efficiently as it can reserve more windows for
86 * write.
87 *
88 * Some things work against us when trying to choose a large local alloc:
89 *
90 * - We need to ensure our sizing is picked to leave enough space in
91 * group descriptors for other allocations (such as block groups,
92 * etc). Picking default sizes which are a multiple of 4 could help
93 * - block groups are allocated in 2mb and 4mb chunks.
94 *
95 * - Likewise, we don't want to starve other nodes of bits on small
96 * file systems. This can easily be taken care of by limiting our
97 * default to a reasonable size (256M) on larger cluster sizes.
98 *
99 * - Some file systems can't support very large sizes - 4k and 8k in
100 * particular are limited to less than 128 and 256 megabytes respectively.
101 *
102 * The following reference table shows group descriptor and local
103 * alloc maximums at various cluster sizes (4k blocksize)
104 *
105 * csize: 4K group: 126M la: 121M
106 * csize: 8K group: 252M la: 243M
107 * csize: 16K group: 504M la: 486M
108 * csize: 32K group: 1008M la: 972M
109 * csize: 64K group: 2016M la: 1944M
110 * csize: 128K group: 4032M la: 3888M
111 * csize: 256K group: 8064M la: 7776M
112 * csize: 512K group: 16128M la: 15552M
113 * csize: 1024K group: 32256M la: 31104M
114 */
115#define OCFS2_LA_MAX_DEFAULT_MB 256
116#define OCFS2_LA_OLD_DEFAULT 8
117unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
118{
119 unsigned int la_mb;
120 unsigned int gd_mb;
121 unsigned int megs_per_slot;
122 struct super_block *sb = osb->sb;
123
124 gd_mb = ocfs2_clusters_to_megabytes(osb->sb,
125 8 * ocfs2_group_bitmap_size(sb));
126
127 /*
128 * This takes care of files systems with very small group
129 * descriptors - 512 byte blocksize at cluster sizes lower
130 * than 16K and also 1k blocksize with 4k cluster size.
131 */
132 if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192)
133 || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096))
134 return OCFS2_LA_OLD_DEFAULT;
135
136 /*
137 * Leave enough room for some block groups and make the final
138 * value we work from a multiple of 4.
139 */
140 gd_mb -= 16;
141 gd_mb &= 0xFFFFFFFB;
142
143 la_mb = gd_mb;
144
145 /*
146 * Keep window sizes down to a reasonable default
147 */
148 if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) {
149 /*
150 * Some clustersize / blocksize combinations will have
151 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB
152 * default size, but get poor distribution when
153 * limited to exactly 256 megabytes.
154 *
155 * As an example, 16K clustersize at 4K blocksize
156 * gives us a cluster group size of 504M. Paring the
157 * local alloc size down to 256 however, would give us
158 * only one window and around 200MB left in the
159 * cluster group. Instead, find the first size below
160 * 256 which would give us an even distribution.
161 *
162 * Larger cluster group sizes actually work out pretty
163 * well when pared to 256, so we don't have to do this
164 * for any group that fits more than two
165 * OCFS2_LA_MAX_DEFAULT_MB windows.
166 */
167 if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB))
168 la_mb = 256;
169 else {
170 unsigned int gd_mult = gd_mb;
171
172 while (gd_mult > 256)
173 gd_mult = gd_mult >> 1;
174
175 la_mb = gd_mult;
176 }
177 }
178
179 megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots;
180 megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot);
181 /* Too many nodes, too few disk clusters. */
182 if (megs_per_slot < la_mb)
183 la_mb = megs_per_slot;
184
185 return la_mb;
186}
187
78void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb) 188void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb)
79{ 189{
80 struct super_block *sb = osb->sb; 190 struct super_block *sb = osb->sb;
81 unsigned int la_default_mb = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; 191 unsigned int la_default_mb = ocfs2_la_default_mb(osb);
82 unsigned int la_max_mb; 192 unsigned int la_max_mb;
83 193
84 la_max_mb = ocfs2_clusters_to_megabytes(sb, 194 la_max_mb = ocfs2_clusters_to_megabytes(sb,
@@ -185,7 +295,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
185 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 295 osb->local_alloc_bits, (osb->bitmap_cpg - 1));
186 osb->local_alloc_bits = 296 osb->local_alloc_bits =
187 ocfs2_megabytes_to_clusters(osb->sb, 297 ocfs2_megabytes_to_clusters(osb->sb,
188 OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); 298 ocfs2_la_default_mb(osb));
189 } 299 }
190 300
191 /* read the alloc off disk */ 301 /* read the alloc off disk */
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 04195c67f7c1..1be9b5864460 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -31,6 +31,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb);
31void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb); 31void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb);
32 32
33void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb); 33void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb);
34unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb);
34 35
35int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 36int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
36 int node_num, 37 int node_num,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index e98c954cf961..09d7aee3dabe 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -342,6 +342,9 @@ struct ocfs2_super
342 */ 342 */
343 unsigned int local_alloc_bits; 343 unsigned int local_alloc_bits;
344 unsigned int local_alloc_default_bits; 344 unsigned int local_alloc_default_bits;
345 /* osb_clusters_at_boot can become stale! Do not trust it to
346 * be up to date. */
347 unsigned int osb_clusters_at_boot;
345 348
346 enum ocfs2_local_alloc_state local_alloc_state; /* protected 349 enum ocfs2_local_alloc_state local_alloc_state; /* protected
347 * by osb_lock */ 350 * by osb_lock */
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index bb37218a7978..d61a1521b10e 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -283,14 +283,6 @@
283#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) 283#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024)
284 284
285/* 285/*
286 * Default local alloc size (in megabytes)
287 *
288 * The value chosen should be such that most allocations, including new
289 * block groups, use local alloc.
290 */
291#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8
292
293/*
294 * Inline extended attribute size (in bytes) 286 * Inline extended attribute size (in bytes)
295 * The value chosen should be aligned to 16 byte boundaries. 287 * The value chosen should be aligned to 16 byte boundaries.
296 */ 288 */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index fc839996d052..5745682eb1c0 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1503,7 +1503,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1503 (unsigned) (osb->osb_commit_interval / HZ)); 1503 (unsigned) (osb->osb_commit_interval / HZ));
1504 1504
1505 local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits); 1505 local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits);
1506 if (local_alloc_megs != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) 1506 if (local_alloc_megs != ocfs2_la_default_mb(osb))
1507 seq_printf(s, ",localalloc=%d", local_alloc_megs); 1507 seq_printf(s, ",localalloc=%d", local_alloc_megs);
1508 1508
1509 if (opts & OCFS2_MOUNT_LOCALFLOCKS) 1509 if (opts & OCFS2_MOUNT_LOCALFLOCKS)
@@ -2251,6 +2251,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
2251 } 2251 }
2252 2252
2253 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; 2253 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
2254 osb->osb_clusters_at_boot = OCFS2_I(inode)->ip_clusters;
2254 iput(inode); 2255 iput(inode);
2255 2256
2256 osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; 2257 osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8;