aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorFred Isaman <iisaman@citi.umich.edu>2011-07-30 20:52:49 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2011-07-31 12:18:17 -0400
commitc1c2a4cd352269f1fb585b4a5c63abe24dd946c6 (patch)
tree0fc7470702b0fc3e7d156ac49285ec58de797fa3 /fs/nfs
parent6d742ba538f98164f3c5e05cdcadb4ec6ddf504f (diff)
pnfsblock: add extent manipulation functions
Adds working implementations of various support functions to handle INVAL extents, needed by writes, such as bl_mark_sectors_init and bl_is_sector_init. [pnfsblock: fix 64-bit compiler warnings for extent manipulation] Signed-off-by: Fred Isaman <iisaman@citi.umich.edu> Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Benny Halevy <bhalevy@tonian.com> [Implement release_inval_marks] Signed-off-by: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn> Signed-off-by: Jim Rees <rees@umich.edu> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c7
-rw-r--r--fs/nfs/blocklayout/blocklayout.h30
-rw-r--r--fs/nfs/blocklayout/extents.c253
3 files changed, 287 insertions, 3 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 6cd7f4f3acdb..8c29a189f09b 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -78,10 +78,15 @@ release_extents(struct pnfs_block_layout *bl, struct pnfs_layout_range *range)
78 spin_unlock(&bl->bl_ext_lock); 78 spin_unlock(&bl->bl_ext_lock);
79} 79}
80 80
81/* STUB */
82static void 81static void
83release_inval_marks(struct pnfs_inval_markings *marks) 82release_inval_marks(struct pnfs_inval_markings *marks)
84{ 83{
84 struct pnfs_inval_tracking *pos, *temp;
85
86 list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
87 list_del(&pos->it_link);
88 kfree(pos);
89 }
85 return; 90 return;
86} 91}
87 92
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 3e1b5fc152d7..fcf47b55b5ce 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -38,6 +38,9 @@
38 38
39#include "../pnfs.h" 39#include "../pnfs.h"
40 40
41#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
42#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
43
41struct block_mount_id { 44struct block_mount_id {
42 spinlock_t bm_lock; /* protects list */ 45 spinlock_t bm_lock; /* protects list */
43 struct list_head bm_devlist; /* holds pnfs_block_dev */ 46 struct list_head bm_devlist; /* holds pnfs_block_dev */
@@ -56,8 +59,23 @@ enum exstate4 {
56 PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */ 59 PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */
57}; 60};
58 61
62#define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */
63
64struct my_tree {
65 sector_t mtt_step_size; /* Internal sector alignment */
66 struct list_head mtt_stub; /* Should be a radix tree */
67};
68
59struct pnfs_inval_markings { 69struct pnfs_inval_markings {
60 /* STUB */ 70 spinlock_t im_lock;
71 struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */
72 sector_t im_block_size; /* Server blocksize in sectors */
73};
74
75struct pnfs_inval_tracking {
76 struct list_head it_link;
77 int it_sector;
78 int it_tags;
61}; 79};
62 80
63/* sector_t fields are all in 512-byte sectors */ 81/* sector_t fields are all in 512-byte sectors */
@@ -76,7 +94,11 @@ struct pnfs_block_extent {
76static inline void 94static inline void
77BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize) 95BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
78{ 96{
79 /* STUB */ 97 spin_lock_init(&marks->im_lock);
98 INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
99 marks->im_block_size = blocksize;
100 marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
101 blocksize);
80} 102}
81 103
82enum extentclass4 { 104enum extentclass4 {
@@ -156,8 +178,12 @@ void bl_free_block_dev(struct pnfs_block_dev *bdev);
156struct pnfs_block_extent * 178struct pnfs_block_extent *
157bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, 179bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
158 struct pnfs_block_extent **cow_read); 180 struct pnfs_block_extent **cow_read);
181int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
182 sector_t offset, sector_t length,
183 sector_t **pages);
159void bl_put_extent(struct pnfs_block_extent *be); 184void bl_put_extent(struct pnfs_block_extent *be);
160struct pnfs_block_extent *bl_alloc_extent(void); 185struct pnfs_block_extent *bl_alloc_extent(void);
186int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect);
161int bl_add_merge_extent(struct pnfs_block_layout *bl, 187int bl_add_merge_extent(struct pnfs_block_layout *bl,
162 struct pnfs_block_extent *new); 188 struct pnfs_block_extent *new);
163 189
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 8fa93e23cb24..473faee9cdef 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -33,6 +33,259 @@
33#include "blocklayout.h" 33#include "blocklayout.h"
34#define NFSDBG_FACILITY NFSDBG_PNFS_LD 34#define NFSDBG_FACILITY NFSDBG_PNFS_LD
35 35
36/* Bit numbers */
37#define EXTENT_INITIALIZED 0
38#define EXTENT_WRITTEN 1
39#define EXTENT_IN_COMMIT 2
40#define INTERNAL_EXISTS MY_MAX_TAGS
41#define INTERNAL_MASK ((1 << INTERNAL_EXISTS) - 1)
42
43/* Returns largest t<=s s.t. t%base==0 */
44static inline sector_t normalize(sector_t s, int base)
45{
46 sector_t tmp = s; /* Since do_div modifies its argument */
47 return s - do_div(tmp, base);
48}
49
50static inline sector_t normalize_up(sector_t s, int base)
51{
52 return normalize(s + base - 1, base);
53}
54
55/* Complete stub using list while determine API wanted */
56
57/* Returns tags, or negative */
58static int32_t _find_entry(struct my_tree *tree, u64 s)
59{
60 struct pnfs_inval_tracking *pos;
61
62 dprintk("%s(%llu) enter\n", __func__, s);
63 list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
64 if (pos->it_sector > s)
65 continue;
66 else if (pos->it_sector == s)
67 return pos->it_tags & INTERNAL_MASK;
68 else
69 break;
70 }
71 return -ENOENT;
72}
73
74static inline
75int _has_tag(struct my_tree *tree, u64 s, int32_t tag)
76{
77 int32_t tags;
78
79 dprintk("%s(%llu, %i) enter\n", __func__, s, tag);
80 s = normalize(s, tree->mtt_step_size);
81 tags = _find_entry(tree, s);
82 if ((tags < 0) || !(tags & (1 << tag)))
83 return 0;
84 else
85 return 1;
86}
87
88/* Creates entry with tag, or if entry already exists, unions tag to it.
89 * If storage is not NULL, newly created entry will use it.
90 * Returns number of entries added, or negative on error.
91 */
92static int _add_entry(struct my_tree *tree, u64 s, int32_t tag,
93 struct pnfs_inval_tracking *storage)
94{
95 int found = 0;
96 struct pnfs_inval_tracking *pos;
97
98 dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
99 list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
100 if (pos->it_sector > s)
101 continue;
102 else if (pos->it_sector == s) {
103 found = 1;
104 break;
105 } else
106 break;
107 }
108 if (found) {
109 pos->it_tags |= (1 << tag);
110 return 0;
111 } else {
112 struct pnfs_inval_tracking *new;
113 if (storage)
114 new = storage;
115 else {
116 new = kmalloc(sizeof(*new), GFP_NOFS);
117 if (!new)
118 return -ENOMEM;
119 }
120 new->it_sector = s;
121 new->it_tags = (1 << tag);
122 list_add(&new->it_link, &pos->it_link);
123 return 1;
124 }
125}
126
127/* XXXX Really want option to not create */
128/* Over range, unions tag with existing entries, else creates entry with tag */
129static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length)
130{
131 u64 i;
132
133 dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length);
134 for (i = normalize(s, tree->mtt_step_size); i < s + length;
135 i += tree->mtt_step_size)
136 if (_add_entry(tree, i, tag, NULL))
137 return -ENOMEM;
138 return 0;
139}
140
141/* Ensure that future operations on given range of tree will not malloc */
142static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
143{
144 u64 start, end, s;
145 int count, i, used = 0, status = -ENOMEM;
146 struct pnfs_inval_tracking **storage;
147
148 dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
149 start = normalize(offset, tree->mtt_step_size);
150 end = normalize_up(offset + length, tree->mtt_step_size);
151 count = (int)(end - start) / (int)tree->mtt_step_size;
152
153 /* Pre-malloc what memory we might need */
154 storage = kmalloc(sizeof(*storage) * count, GFP_NOFS);
155 if (!storage)
156 return -ENOMEM;
157 for (i = 0; i < count; i++) {
158 storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking),
159 GFP_NOFS);
160 if (!storage[i])
161 goto out_cleanup;
162 }
163
164 /* Now need lock - HOW??? */
165
166 for (s = start; s < end; s += tree->mtt_step_size)
167 used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
168
169 /* Unlock - HOW??? */
170 status = 0;
171
172 out_cleanup:
173 for (i = used; i < count; i++) {
174 if (!storage[i])
175 break;
176 kfree(storage[i]);
177 }
178 kfree(storage);
179 return status;
180}
181
182static void set_needs_init(sector_t *array, sector_t offset)
183{
184 sector_t *p = array;
185
186 dprintk("%s enter\n", __func__);
187 if (!p)
188 return;
189 while (*p < offset)
190 p++;
191 if (*p == offset)
192 return;
193 else if (*p == ~0) {
194 *p++ = offset;
195 *p = ~0;
196 return;
197 } else {
198 sector_t *save = p;
199 dprintk("%s Adding %llu\n", __func__, (u64)offset);
200 while (*p != ~0)
201 p++;
202 p++;
203 memmove(save + 1, save, (char *)p - (char *)save);
204 *save = offset;
205 return;
206 }
207}
208
209/* We are relying on page lock to serialize this */
210int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect)
211{
212 int rv;
213
214 spin_lock(&marks->im_lock);
215 rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
216 spin_unlock(&marks->im_lock);
217 return rv;
218}
219
220/* Marks sectors in [offest, offset_length) as having been initialized.
221 * All lengths are step-aligned, where step is min(pagesize, blocksize).
222 * Notes where partial block is initialized, and helps prepare it for
223 * complete initialization later.
224 */
225/* Currently assumes offset is page-aligned */
226int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
227 sector_t offset, sector_t length,
228 sector_t **pages)
229{
230 sector_t s, start, end;
231 sector_t *array = NULL; /* Pages to mark */
232
233 dprintk("%s(offset=%llu,len=%llu) enter\n",
234 __func__, (u64)offset, (u64)length);
235 s = max((sector_t) 3,
236 2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
237 dprintk("%s set max=%llu\n", __func__, (u64)s);
238 if (pages) {
239 array = kmalloc(s * sizeof(sector_t), GFP_NOFS);
240 if (!array)
241 goto outerr;
242 array[0] = ~0;
243 }
244
245 start = normalize(offset, marks->im_block_size);
246 end = normalize_up(offset + length, marks->im_block_size);
247 if (_preload_range(&marks->im_tree, start, end - start))
248 goto outerr;
249
250 spin_lock(&marks->im_lock);
251
252 for (s = normalize_up(start, PAGE_CACHE_SECTORS);
253 s < offset; s += PAGE_CACHE_SECTORS) {
254 dprintk("%s pre-area pages\n", __func__);
255 /* Portion of used block is not initialized */
256 if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
257 set_needs_init(array, s);
258 }
259 if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
260 goto out_unlock;
261 for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
262 s < end; s += PAGE_CACHE_SECTORS) {
263 dprintk("%s post-area pages\n", __func__);
264 if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
265 set_needs_init(array, s);
266 }
267
268 spin_unlock(&marks->im_lock);
269
270 if (pages) {
271 if (array[0] == ~0) {
272 kfree(array);
273 *pages = NULL;
274 } else
275 *pages = array;
276 }
277 return 0;
278
279 out_unlock:
280 spin_unlock(&marks->im_lock);
281 outerr:
282 if (pages) {
283 kfree(array);
284 *pages = NULL;
285 }
286 return -ENOMEM;
287}
288
36static void print_bl_extent(struct pnfs_block_extent *be) 289static void print_bl_extent(struct pnfs_block_extent *be)
37{ 290{
38 dprintk("PRINT EXTENT extent %p\n", be); 291 dprintk("PRINT EXTENT extent %p\n", be);