diff options
author | Fred Isaman <iisaman@citi.umich.edu> | 2011-07-30 20:52:49 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2011-07-31 12:18:17 -0400 |
commit | c1c2a4cd352269f1fb585b4a5c63abe24dd946c6 (patch) | |
tree | 0fc7470702b0fc3e7d156ac49285ec58de797fa3 /fs/nfs/blocklayout | |
parent | 6d742ba538f98164f3c5e05cdcadb4ec6ddf504f (diff) |
pnfsblock: add extent manipulation functions
Adds working implementations of various support functions
to handle INVAL extents, needed by writes, such as
bl_mark_sectors_init and bl_is_sector_init.
[pnfsblock: fix 64-bit compiler warnings for extent manipulation]
Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Benny Halevy <bhalevy@tonian.com>
[Implement release_inval_marks]
Signed-off-by: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn>
Signed-off-by: Jim Rees <rees@umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs/nfs/blocklayout')
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.c | 7 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.h | 30 | ||||
-rw-r--r-- | fs/nfs/blocklayout/extents.c | 253 |
3 files changed, 287 insertions, 3 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 6cd7f4f3acdb..8c29a189f09b 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -78,10 +78,15 @@ release_extents(struct pnfs_block_layout *bl, struct pnfs_layout_range *range) | |||
78 | spin_unlock(&bl->bl_ext_lock); | 78 | spin_unlock(&bl->bl_ext_lock); |
79 | } | 79 | } |
80 | 80 | ||
81 | /* STUB */ | ||
82 | static void | 81 | static void |
83 | release_inval_marks(struct pnfs_inval_markings *marks) | 82 | release_inval_marks(struct pnfs_inval_markings *marks) |
84 | { | 83 | { |
84 | struct pnfs_inval_tracking *pos, *temp; | ||
85 | |||
86 | list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { | ||
87 | list_del(&pos->it_link); | ||
88 | kfree(pos); | ||
89 | } | ||
85 | return; | 90 | return; |
86 | } | 91 | } |
87 | 92 | ||
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 3e1b5fc152d7..fcf47b55b5ce 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -38,6 +38,9 @@ | |||
38 | 38 | ||
39 | #include "../pnfs.h" | 39 | #include "../pnfs.h" |
40 | 40 | ||
41 | #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) | ||
42 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) | ||
43 | |||
41 | struct block_mount_id { | 44 | struct block_mount_id { |
42 | spinlock_t bm_lock; /* protects list */ | 45 | spinlock_t bm_lock; /* protects list */ |
43 | struct list_head bm_devlist; /* holds pnfs_block_dev */ | 46 | struct list_head bm_devlist; /* holds pnfs_block_dev */ |
@@ -56,8 +59,23 @@ enum exstate4 { | |||
56 | PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */ | 59 | PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */ |
57 | }; | 60 | }; |
58 | 61 | ||
62 | #define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */ | ||
63 | |||
64 | struct my_tree { | ||
65 | sector_t mtt_step_size; /* Internal sector alignment */ | ||
66 | struct list_head mtt_stub; /* Should be a radix tree */ | ||
67 | }; | ||
68 | |||
59 | struct pnfs_inval_markings { | 69 | struct pnfs_inval_markings { |
60 | /* STUB */ | 70 | spinlock_t im_lock; |
71 | struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ | ||
72 | sector_t im_block_size; /* Server blocksize in sectors */ | ||
73 | }; | ||
74 | |||
75 | struct pnfs_inval_tracking { | ||
76 | struct list_head it_link; | ||
77 | int it_sector; | ||
78 | int it_tags; | ||
61 | }; | 79 | }; |
62 | 80 | ||
63 | /* sector_t fields are all in 512-byte sectors */ | 81 | /* sector_t fields are all in 512-byte sectors */ |
@@ -76,7 +94,11 @@ struct pnfs_block_extent { | |||
76 | static inline void | 94 | static inline void |
77 | BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize) | 95 | BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize) |
78 | { | 96 | { |
79 | /* STUB */ | 97 | spin_lock_init(&marks->im_lock); |
98 | INIT_LIST_HEAD(&marks->im_tree.mtt_stub); | ||
99 | marks->im_block_size = blocksize; | ||
100 | marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, | ||
101 | blocksize); | ||
80 | } | 102 | } |
81 | 103 | ||
82 | enum extentclass4 { | 104 | enum extentclass4 { |
@@ -156,8 +178,12 @@ void bl_free_block_dev(struct pnfs_block_dev *bdev); | |||
156 | struct pnfs_block_extent * | 178 | struct pnfs_block_extent * |
157 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, | 179 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, |
158 | struct pnfs_block_extent **cow_read); | 180 | struct pnfs_block_extent **cow_read); |
181 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | ||
182 | sector_t offset, sector_t length, | ||
183 | sector_t **pages); | ||
159 | void bl_put_extent(struct pnfs_block_extent *be); | 184 | void bl_put_extent(struct pnfs_block_extent *be); |
160 | struct pnfs_block_extent *bl_alloc_extent(void); | 185 | struct pnfs_block_extent *bl_alloc_extent(void); |
186 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect); | ||
161 | int bl_add_merge_extent(struct pnfs_block_layout *bl, | 187 | int bl_add_merge_extent(struct pnfs_block_layout *bl, |
162 | struct pnfs_block_extent *new); | 188 | struct pnfs_block_extent *new); |
163 | 189 | ||
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 8fa93e23cb24..473faee9cdef 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c | |||
@@ -33,6 +33,259 @@ | |||
33 | #include "blocklayout.h" | 33 | #include "blocklayout.h" |
34 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 34 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
35 | 35 | ||
36 | /* Bit numbers */ | ||
37 | #define EXTENT_INITIALIZED 0 | ||
38 | #define EXTENT_WRITTEN 1 | ||
39 | #define EXTENT_IN_COMMIT 2 | ||
40 | #define INTERNAL_EXISTS MY_MAX_TAGS | ||
41 | #define INTERNAL_MASK ((1 << INTERNAL_EXISTS) - 1) | ||
42 | |||
43 | /* Returns largest t<=s s.t. t%base==0 */ | ||
44 | static inline sector_t normalize(sector_t s, int base) | ||
45 | { | ||
46 | sector_t tmp = s; /* Since do_div modifies its argument */ | ||
47 | return s - do_div(tmp, base); | ||
48 | } | ||
49 | |||
50 | static inline sector_t normalize_up(sector_t s, int base) | ||
51 | { | ||
52 | return normalize(s + base - 1, base); | ||
53 | } | ||
54 | |||
55 | /* Complete stub using list while determine API wanted */ | ||
56 | |||
57 | /* Returns tags, or negative */ | ||
58 | static int32_t _find_entry(struct my_tree *tree, u64 s) | ||
59 | { | ||
60 | struct pnfs_inval_tracking *pos; | ||
61 | |||
62 | dprintk("%s(%llu) enter\n", __func__, s); | ||
63 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
64 | if (pos->it_sector > s) | ||
65 | continue; | ||
66 | else if (pos->it_sector == s) | ||
67 | return pos->it_tags & INTERNAL_MASK; | ||
68 | else | ||
69 | break; | ||
70 | } | ||
71 | return -ENOENT; | ||
72 | } | ||
73 | |||
74 | static inline | ||
75 | int _has_tag(struct my_tree *tree, u64 s, int32_t tag) | ||
76 | { | ||
77 | int32_t tags; | ||
78 | |||
79 | dprintk("%s(%llu, %i) enter\n", __func__, s, tag); | ||
80 | s = normalize(s, tree->mtt_step_size); | ||
81 | tags = _find_entry(tree, s); | ||
82 | if ((tags < 0) || !(tags & (1 << tag))) | ||
83 | return 0; | ||
84 | else | ||
85 | return 1; | ||
86 | } | ||
87 | |||
88 | /* Creates entry with tag, or if entry already exists, unions tag to it. | ||
89 | * If storage is not NULL, newly created entry will use it. | ||
90 | * Returns number of entries added, or negative on error. | ||
91 | */ | ||
92 | static int _add_entry(struct my_tree *tree, u64 s, int32_t tag, | ||
93 | struct pnfs_inval_tracking *storage) | ||
94 | { | ||
95 | int found = 0; | ||
96 | struct pnfs_inval_tracking *pos; | ||
97 | |||
98 | dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage); | ||
99 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
100 | if (pos->it_sector > s) | ||
101 | continue; | ||
102 | else if (pos->it_sector == s) { | ||
103 | found = 1; | ||
104 | break; | ||
105 | } else | ||
106 | break; | ||
107 | } | ||
108 | if (found) { | ||
109 | pos->it_tags |= (1 << tag); | ||
110 | return 0; | ||
111 | } else { | ||
112 | struct pnfs_inval_tracking *new; | ||
113 | if (storage) | ||
114 | new = storage; | ||
115 | else { | ||
116 | new = kmalloc(sizeof(*new), GFP_NOFS); | ||
117 | if (!new) | ||
118 | return -ENOMEM; | ||
119 | } | ||
120 | new->it_sector = s; | ||
121 | new->it_tags = (1 << tag); | ||
122 | list_add(&new->it_link, &pos->it_link); | ||
123 | return 1; | ||
124 | } | ||
125 | } | ||
126 | |||
127 | /* XXXX Really want option to not create */ | ||
128 | /* Over range, unions tag with existing entries, else creates entry with tag */ | ||
129 | static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length) | ||
130 | { | ||
131 | u64 i; | ||
132 | |||
133 | dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length); | ||
134 | for (i = normalize(s, tree->mtt_step_size); i < s + length; | ||
135 | i += tree->mtt_step_size) | ||
136 | if (_add_entry(tree, i, tag, NULL)) | ||
137 | return -ENOMEM; | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | /* Ensure that future operations on given range of tree will not malloc */ | ||
142 | static int _preload_range(struct my_tree *tree, u64 offset, u64 length) | ||
143 | { | ||
144 | u64 start, end, s; | ||
145 | int count, i, used = 0, status = -ENOMEM; | ||
146 | struct pnfs_inval_tracking **storage; | ||
147 | |||
148 | dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); | ||
149 | start = normalize(offset, tree->mtt_step_size); | ||
150 | end = normalize_up(offset + length, tree->mtt_step_size); | ||
151 | count = (int)(end - start) / (int)tree->mtt_step_size; | ||
152 | |||
153 | /* Pre-malloc what memory we might need */ | ||
154 | storage = kmalloc(sizeof(*storage) * count, GFP_NOFS); | ||
155 | if (!storage) | ||
156 | return -ENOMEM; | ||
157 | for (i = 0; i < count; i++) { | ||
158 | storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking), | ||
159 | GFP_NOFS); | ||
160 | if (!storage[i]) | ||
161 | goto out_cleanup; | ||
162 | } | ||
163 | |||
164 | /* Now need lock - HOW??? */ | ||
165 | |||
166 | for (s = start; s < end; s += tree->mtt_step_size) | ||
167 | used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); | ||
168 | |||
169 | /* Unlock - HOW??? */ | ||
170 | status = 0; | ||
171 | |||
172 | out_cleanup: | ||
173 | for (i = used; i < count; i++) { | ||
174 | if (!storage[i]) | ||
175 | break; | ||
176 | kfree(storage[i]); | ||
177 | } | ||
178 | kfree(storage); | ||
179 | return status; | ||
180 | } | ||
181 | |||
182 | static void set_needs_init(sector_t *array, sector_t offset) | ||
183 | { | ||
184 | sector_t *p = array; | ||
185 | |||
186 | dprintk("%s enter\n", __func__); | ||
187 | if (!p) | ||
188 | return; | ||
189 | while (*p < offset) | ||
190 | p++; | ||
191 | if (*p == offset) | ||
192 | return; | ||
193 | else if (*p == ~0) { | ||
194 | *p++ = offset; | ||
195 | *p = ~0; | ||
196 | return; | ||
197 | } else { | ||
198 | sector_t *save = p; | ||
199 | dprintk("%s Adding %llu\n", __func__, (u64)offset); | ||
200 | while (*p != ~0) | ||
201 | p++; | ||
202 | p++; | ||
203 | memmove(save + 1, save, (char *)p - (char *)save); | ||
204 | *save = offset; | ||
205 | return; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | /* We are relying on page lock to serialize this */ | ||
210 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) | ||
211 | { | ||
212 | int rv; | ||
213 | |||
214 | spin_lock(&marks->im_lock); | ||
215 | rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); | ||
216 | spin_unlock(&marks->im_lock); | ||
217 | return rv; | ||
218 | } | ||
219 | |||
220 | /* Marks sectors in [offest, offset_length) as having been initialized. | ||
221 | * All lengths are step-aligned, where step is min(pagesize, blocksize). | ||
222 | * Notes where partial block is initialized, and helps prepare it for | ||
223 | * complete initialization later. | ||
224 | */ | ||
225 | /* Currently assumes offset is page-aligned */ | ||
226 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | ||
227 | sector_t offset, sector_t length, | ||
228 | sector_t **pages) | ||
229 | { | ||
230 | sector_t s, start, end; | ||
231 | sector_t *array = NULL; /* Pages to mark */ | ||
232 | |||
233 | dprintk("%s(offset=%llu,len=%llu) enter\n", | ||
234 | __func__, (u64)offset, (u64)length); | ||
235 | s = max((sector_t) 3, | ||
236 | 2 * (marks->im_block_size / (PAGE_CACHE_SECTORS))); | ||
237 | dprintk("%s set max=%llu\n", __func__, (u64)s); | ||
238 | if (pages) { | ||
239 | array = kmalloc(s * sizeof(sector_t), GFP_NOFS); | ||
240 | if (!array) | ||
241 | goto outerr; | ||
242 | array[0] = ~0; | ||
243 | } | ||
244 | |||
245 | start = normalize(offset, marks->im_block_size); | ||
246 | end = normalize_up(offset + length, marks->im_block_size); | ||
247 | if (_preload_range(&marks->im_tree, start, end - start)) | ||
248 | goto outerr; | ||
249 | |||
250 | spin_lock(&marks->im_lock); | ||
251 | |||
252 | for (s = normalize_up(start, PAGE_CACHE_SECTORS); | ||
253 | s < offset; s += PAGE_CACHE_SECTORS) { | ||
254 | dprintk("%s pre-area pages\n", __func__); | ||
255 | /* Portion of used block is not initialized */ | ||
256 | if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) | ||
257 | set_needs_init(array, s); | ||
258 | } | ||
259 | if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) | ||
260 | goto out_unlock; | ||
261 | for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS); | ||
262 | s < end; s += PAGE_CACHE_SECTORS) { | ||
263 | dprintk("%s post-area pages\n", __func__); | ||
264 | if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) | ||
265 | set_needs_init(array, s); | ||
266 | } | ||
267 | |||
268 | spin_unlock(&marks->im_lock); | ||
269 | |||
270 | if (pages) { | ||
271 | if (array[0] == ~0) { | ||
272 | kfree(array); | ||
273 | *pages = NULL; | ||
274 | } else | ||
275 | *pages = array; | ||
276 | } | ||
277 | return 0; | ||
278 | |||
279 | out_unlock: | ||
280 | spin_unlock(&marks->im_lock); | ||
281 | outerr: | ||
282 | if (pages) { | ||
283 | kfree(array); | ||
284 | *pages = NULL; | ||
285 | } | ||
286 | return -ENOMEM; | ||
287 | } | ||
288 | |||
36 | static void print_bl_extent(struct pnfs_block_extent *be) | 289 | static void print_bl_extent(struct pnfs_block_extent *be) |
37 | { | 290 | { |
38 | dprintk("PRINT EXTENT extent %p\n", be); | 291 | dprintk("PRINT EXTENT extent %p\n", be); |