aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Fasheh <mark.fasheh@oracle.com>2007-04-23 21:53:12 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2007-04-26 18:10:40 -0400
commit83418978827324918a8cd25ce5227312de1d4468 (patch)
treef7baefb1fc8721d6d8d1f1f937bc55535b13e18f
parent7cdfc3a1c3971c9125c317cb8c2525745851798e (diff)
ocfs2: Cache extent records
The extent map code was ripped out earlier because of an inability to deal with holes. This patch adds back a simpler caching scheme requiring far less code. Our old extent map caching was designed back when meta data block caching in Ocfs2 didn't work very well, resulting in many disk reads. These days our metadata caching is much better, resulting in no un-necessary disk reads. As a result, extent caching doesn't have to be as fancy, nor does it have to cache as many extents. Keeping the last 3 extents seen should be sufficient to give us a small performance boost on some streaming workloads. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
-rw-r--r--fs/ocfs2/alloc.c5
-rw-r--r--fs/ocfs2/dlmglue.c2
-rw-r--r--fs/ocfs2/extent_map.c255
-rw-r--r--fs/ocfs2/extent_map.h20
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/ocfs2/inode.h4
-rw-r--r--fs/ocfs2/super.c1
7 files changed, 289 insertions, 0 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 412a2888a3ed..a0c8667caa72 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2417,6 +2417,8 @@ out_add:
2417 status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); 2417 status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert);
2418 if (status < 0) 2418 if (status < 0)
2419 mlog_errno(status); 2419 mlog_errno(status);
2420 else
2421 ocfs2_extent_map_insert_rec(inode, &rec);
2420 2422
2421bail: 2423bail:
2422 if (bh) 2424 if (bh)
@@ -3640,6 +3642,9 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
3640 mlog_errno(status); 3642 mlog_errno(status);
3641 goto bail; 3643 goto bail;
3642 } 3644 }
3645
3646 ocfs2_extent_map_trunc(inode, new_highest_cpos);
3647
3643start: 3648start:
3644 /* 3649 /*
3645 * Check that we still have allocation to delete. 3650 * Check that we still have allocation to delete.
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 43267eea3538..27e43b0c0eae 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1613,6 +1613,8 @@ static int ocfs2_meta_lock_update(struct inode *inode,
1613 * for the inode metadata. */ 1613 * for the inode metadata. */
1614 ocfs2_metadata_cache_purge(inode); 1614 ocfs2_metadata_cache_purge(inode);
1615 1615
1616 ocfs2_extent_map_trunc(inode, 0);
1617
1616 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) { 1618 if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
1617 mlog(0, "Trusting LVB on inode %llu\n", 1619 mlog(0, "Trusting LVB on inode %llu\n",
1618 (unsigned long long)oi->ip_blkno); 1620 (unsigned long long)oi->ip_blkno);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index f35e04f27f32..ba2b2ab1c6e4 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -39,6 +39,254 @@
39#include "buffer_head_io.h" 39#include "buffer_head_io.h"
40 40
41/* 41/*
42 * The extent caching implementation is intentionally trivial.
43 *
44 * We only cache a small number of extents stored directly on the
45 * inode, so linear order operations are acceptable. If we ever want
46 * to increase the size of the extent map, then these algorithms must
47 * get smarter.
48 */
49
50void ocfs2_extent_map_init(struct inode *inode)
51{
52 struct ocfs2_inode_info *oi = OCFS2_I(inode);
53
54 oi->ip_extent_map.em_num_items = 0;
55 INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
56}
57
58static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
59 unsigned int cpos,
60 struct ocfs2_extent_map_item **ret_emi)
61{
62 unsigned int range;
63 struct ocfs2_extent_map_item *emi;
64
65 *ret_emi = NULL;
66
67 list_for_each_entry(emi, &em->em_list, ei_list) {
68 range = emi->ei_cpos + emi->ei_clusters;
69
70 if (cpos >= emi->ei_cpos && cpos < range) {
71 list_move(&emi->ei_list, &em->em_list);
72
73 *ret_emi = emi;
74 break;
75 }
76 }
77}
78
79static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
80 unsigned int *phys, unsigned int *len,
81 unsigned int *flags)
82{
83 unsigned int coff;
84 struct ocfs2_inode_info *oi = OCFS2_I(inode);
85 struct ocfs2_extent_map_item *emi;
86
87 spin_lock(&oi->ip_lock);
88
89 __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
90 if (emi) {
91 coff = cpos - emi->ei_cpos;
92 *phys = emi->ei_phys + coff;
93 if (len)
94 *len = emi->ei_clusters - coff;
95 if (flags)
96 *flags = emi->ei_flags;
97 }
98
99 spin_unlock(&oi->ip_lock);
100
101 if (emi == NULL)
102 return -ENOENT;
103
104 return 0;
105}
106
107/*
108 * Forget about all clusters equal to or greater than cpos.
109 */
110void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
111{
112 struct list_head *p, *n;
113 struct ocfs2_extent_map_item *emi;
114 struct ocfs2_inode_info *oi = OCFS2_I(inode);
115 struct ocfs2_extent_map *em = &oi->ip_extent_map;
116 LIST_HEAD(tmp_list);
117 unsigned int range;
118
119 spin_lock(&oi->ip_lock);
120 list_for_each_safe(p, n, &em->em_list) {
121 emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
122
123 if (emi->ei_cpos >= cpos) {
124 /* Full truncate of this record. */
125 list_move(&emi->ei_list, &tmp_list);
126 BUG_ON(em->em_num_items == 0);
127 em->em_num_items--;
128 continue;
129 }
130
131 range = emi->ei_cpos + emi->ei_clusters;
132 if (range > cpos) {
133 /* Partial truncate */
134 emi->ei_clusters = cpos - emi->ei_cpos;
135 }
136 }
137 spin_unlock(&oi->ip_lock);
138
139 list_for_each_safe(p, n, &tmp_list) {
140 emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
141 list_del(&emi->ei_list);
142 kfree(emi);
143 }
144}
145
146/*
147 * Is any part of emi2 contained within emi1
148 */
149static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
150 struct ocfs2_extent_map_item *emi2)
151{
152 unsigned int range1, range2;
153
154 /*
155 * Check if logical start of emi2 is inside emi1
156 */
157 range1 = emi1->ei_cpos + emi1->ei_clusters;
158 if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
159 return 1;
160
161 /*
162 * Check if logical end of emi2 is inside emi1
163 */
164 range2 = emi2->ei_cpos + emi2->ei_clusters;
165 if (range2 > emi1->ei_cpos && range2 <= range1)
166 return 1;
167
168 return 0;
169}
170
171static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
172 struct ocfs2_extent_map_item *src)
173{
174 dest->ei_cpos = src->ei_cpos;
175 dest->ei_phys = src->ei_phys;
176 dest->ei_clusters = src->ei_clusters;
177 dest->ei_flags = src->ei_flags;
178}
179
180/*
181 * Try to merge emi with ins. Returns 1 if merge succeeds, zero
182 * otherwise.
183 */
184static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
185 struct ocfs2_extent_map_item *ins)
186{
187 /*
188 * Handle contiguousness
189 */
190 if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
191 ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
192 ins->ei_flags == emi->ei_flags) {
193 emi->ei_clusters += ins->ei_clusters;
194 return 1;
195 } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
196 (ins->ei_cpos + ins->ei_clusters) == emi->ei_phys &&
197 ins->ei_flags == emi->ei_flags) {
198 emi->ei_phys = ins->ei_phys;
199 emi->ei_cpos = ins->ei_cpos;
200 emi->ei_clusters += ins->ei_clusters;
201 return 1;
202 }
203
204 /*
205 * Overlapping extents - this shouldn't happen unless we've
206 * split an extent to change it's flags. That is exceedingly
207 * rare, so there's no sense in trying to optimize it yet.
208 */
209 if (ocfs2_ei_is_contained(emi, ins) ||
210 ocfs2_ei_is_contained(ins, emi)) {
211 ocfs2_copy_emi_fields(emi, ins);
212 return 1;
213 }
214
215 /* No merge was possible. */
216 return 0;
217}
218
219/*
220 * In order to reduce complexity on the caller, this insert function
221 * is intentionally liberal in what it will accept.
222 *
223 * The only rule is that the truncate call *must* be used whenever
224 * records have been deleted. This avoids inserting overlapping
225 * records with different physical mappings.
226 */
227void ocfs2_extent_map_insert_rec(struct inode *inode,
228 struct ocfs2_extent_rec *rec)
229{
230 struct ocfs2_inode_info *oi = OCFS2_I(inode);
231 struct ocfs2_extent_map *em = &oi->ip_extent_map;
232 struct ocfs2_extent_map_item *emi, *new_emi = NULL;
233 struct ocfs2_extent_map_item ins;
234
235 ins.ei_cpos = le32_to_cpu(rec->e_cpos);
236 ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
237 le64_to_cpu(rec->e_blkno));
238 ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
239 ins.ei_flags = rec->e_flags;
240
241search:
242 spin_lock(&oi->ip_lock);
243
244 list_for_each_entry(emi, &em->em_list, ei_list) {
245 if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
246 list_move(&emi->ei_list, &em->em_list);
247 spin_unlock(&oi->ip_lock);
248 goto out;
249 }
250 }
251
252 /*
253 * No item could be merged.
254 *
255 * Either allocate and add a new item, or overwrite the last recently
256 * inserted.
257 */
258
259 if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
260 if (new_emi == NULL) {
261 spin_unlock(&oi->ip_lock);
262
263 new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
264 if (new_emi == NULL)
265 goto out;
266
267 goto search;
268 }
269
270 ocfs2_copy_emi_fields(new_emi, &ins);
271 list_add(&new_emi->ei_list, &em->em_list);
272 em->em_num_items++;
273 new_emi = NULL;
274 } else {
275 BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
276 emi = list_entry(em->em_list.prev,
277 struct ocfs2_extent_map_item, ei_list);
278 list_move(&emi->ei_list, &em->em_list);
279 ocfs2_copy_emi_fields(emi, &ins);
280 }
281
282 spin_unlock(&oi->ip_lock);
283
284out:
285 if (new_emi)
286 kfree(new_emi);
287}
288
289/*
42 * Return the 1st index within el which contains an extent start 290 * Return the 1st index within el which contains an extent start
43 * larger than v_cluster. 291 * larger than v_cluster.
44 */ 292 */
@@ -174,6 +422,11 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
174 struct ocfs2_extent_rec *rec; 422 struct ocfs2_extent_rec *rec;
175 u32 coff; 423 u32 coff;
176 424
425 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
426 num_clusters, extent_flags);
427 if (ret == 0)
428 goto out;
429
177 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, 430 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
178 &di_bh, OCFS2_BH_CACHED, inode); 431 &di_bh, OCFS2_BH_CACHED, inode);
179 if (ret) { 432 if (ret) {
@@ -245,6 +498,8 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
245 *num_clusters = ocfs2_rec_clusters(el, rec) - coff; 498 *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
246 499
247 flags = rec->e_flags; 500 flags = rec->e_flags;
501
502 ocfs2_extent_map_insert_rec(inode, rec);
248 } 503 }
249 504
250 if (extent_flags) 505 if (extent_flags)
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index 1d745e174afc..de91e3e41a22 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -25,6 +25,26 @@
25#ifndef _EXTENT_MAP_H 25#ifndef _EXTENT_MAP_H
26#define _EXTENT_MAP_H 26#define _EXTENT_MAP_H
27 27
28struct ocfs2_extent_map_item {
29 unsigned int ei_cpos;
30 unsigned int ei_phys;
31 unsigned int ei_clusters;
32 unsigned int ei_flags;
33
34 struct list_head ei_list;
35};
36
37#define OCFS2_MAX_EXTENT_MAP_ITEMS 3
38struct ocfs2_extent_map {
39 unsigned int em_num_items;
40 struct list_head em_list;
41};
42
43void ocfs2_extent_map_init(struct inode *inode);
44void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cluster);
45void ocfs2_extent_map_insert_rec(struct inode *inode,
46 struct ocfs2_extent_rec *rec);
47
28int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, 48int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster,
29 u32 *num_clusters, unsigned int *extent_flags); 49 u32 *num_clusters, unsigned int *extent_flags);
30int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, 50int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 4bfc98c70137..21a605079c62 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1008,6 +1008,8 @@ void ocfs2_clear_inode(struct inode *inode)
1008 "Clear inode of %llu, inode has io markers\n", 1008 "Clear inode of %llu, inode has io markers\n",
1009 (unsigned long long)oi->ip_blkno); 1009 (unsigned long long)oi->ip_blkno);
1010 1010
1011 ocfs2_extent_map_trunc(inode, 0);
1012
1011 status = ocfs2_drop_inode_locks(inode); 1013 status = ocfs2_drop_inode_locks(inode);
1012 if (status < 0) 1014 if (status < 0)
1013 mlog_errno(status); 1015 mlog_errno(status);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index aa84353d0d19..03ae075869ee 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -26,6 +26,8 @@
26#ifndef OCFS2_INODE_H 26#ifndef OCFS2_INODE_H
27#define OCFS2_INODE_H 27#define OCFS2_INODE_H
28 28
29#include "extent_map.h"
30
29/* OCFS2 Inode Private Data */ 31/* OCFS2 Inode Private Data */
30struct ocfs2_inode_info 32struct ocfs2_inode_info
31{ 33{
@@ -63,6 +65,8 @@ struct ocfs2_inode_info
63 65
64 struct ocfs2_caching_info ip_metadata_cache; 66 struct ocfs2_caching_info ip_metadata_cache;
65 67
68 struct ocfs2_extent_map ip_extent_map;
69
66 struct inode vfs_inode; 70 struct inode vfs_inode;
67}; 71};
68 72
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 6ab52351943a..5c9e8243691f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -942,6 +942,7 @@ static void ocfs2_inode_init_once(void *data,
942 oi->ip_flags = 0; 942 oi->ip_flags = 0;
943 oi->ip_open_count = 0; 943 oi->ip_open_count = 0;
944 spin_lock_init(&oi->ip_lock); 944 spin_lock_init(&oi->ip_lock);
945 ocfs2_extent_map_init(&oi->vfs_inode);
945 INIT_LIST_HEAD(&oi->ip_io_markers); 946 INIT_LIST_HEAD(&oi->ip_io_markers);
946 oi->ip_created_trans = 0; 947 oi->ip_created_trans = 0;
947 oi->ip_last_trans = 0; 948 oi->ip_last_trans = 0;