aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs/pagelist.c
diff options
context:
space:
mode:
authorWeston Andros Adamson <dros@primarydata.com>2014-05-15 11:56:45 -0400
committerTrond Myklebust <trond.myklebust@primarydata.com>2014-05-29 11:11:44 -0400
commit2bfc6e566daa8386c9cffef2f7de17fc330d3835 (patch)
treea615bb7091787ad574c5b31bcd6a30a5bfb8c2f9 /fs/nfs/pagelist.c
parentab75e417192a486ffe63a314b6d2e7361f0e157f (diff)
nfs: add support for multiple nfs reqs per page
Add "page groups" - a circular list of nfs requests (struct nfs_page) that all reference the same page. This gives nfs read and write paths the ability to account for sub-page regions independently. This somewhat follows the design of struct buffer_head's sub-page accounting. Only "head" requests are ever added/removed from the inode list in the buffered write path. "head" and "sub" requests are treated the same through the read path and the rest of the write/commit path. Requests are given an extra reference across the life of the list. Page groups are never rejoined after being split. If the read/write request fails and the client falls back to another path (ie revert to MDS in PNFS case), the already split requests are pushed through the recoalescing code again, which may split them further and then coalesce them into properly sized requests on the wire. Fragmentation shouldn't be a problem with the current design, because we flush all requests in page group when a non-contiguous request is added, so the only time resplitting should occur is on a resend of a read or write. This patch lays the groundwork for sub-page splitting, but does not actually do any splitting. For now all page groups have one request as pg_test functions don't yet split pages. There are several related patches that are needed support multiple requests per page group. Signed-off-by: Weston Andros Adamson <dros@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Diffstat (limited to 'fs/nfs/pagelist.c')
-rw-r--r--fs/nfs/pagelist.c220
1 files changed, 206 insertions, 14 deletions
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index f343f49ff596..015fb7b48dfe 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,6 +29,8 @@
29static struct kmem_cache *nfs_page_cachep; 29static struct kmem_cache *nfs_page_cachep;
30static const struct rpc_call_ops nfs_pgio_common_ops; 30static const struct rpc_call_ops nfs_pgio_common_ops;
31 31
32static void nfs_free_request(struct nfs_page *);
33
32static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) 34static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
33{ 35{
34 p->npages = pagecount; 36 p->npages = pagecount;
@@ -136,10 +138,151 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
136 return __nfs_iocounter_wait(c); 138 return __nfs_iocounter_wait(c);
137} 139}
138 140
141/*
142 * nfs_page_group_lock - lock the head of the page group
143 * @req - request in group that is to be locked
144 *
145 * this lock must be held if modifying the page group list
146 */
147void
148nfs_page_group_lock(struct nfs_page *req)
149{
150 struct nfs_page *head = req->wb_head;
151 int err = -EAGAIN;
152
153 WARN_ON_ONCE(head != head->wb_head);
154
155 while (err)
156 err = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
157 nfs_wait_bit_killable, TASK_KILLABLE);
158}
159
160/*
161 * nfs_page_group_unlock - unlock the head of the page group
162 * @req - request in group that is to be unlocked
163 */
164void
165nfs_page_group_unlock(struct nfs_page *req)
166{
167 struct nfs_page *head = req->wb_head;
168
169 WARN_ON_ONCE(head != head->wb_head);
170
171 smp_mb__before_clear_bit();
172 clear_bit(PG_HEADLOCK, &head->wb_flags);
173 smp_mb__after_clear_bit();
174 wake_up_bit(&head->wb_flags, PG_HEADLOCK);
175}
176
177/*
178 * nfs_page_group_sync_on_bit_locked
179 *
180 * must be called with page group lock held
181 */
182static bool
183nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
184{
185 struct nfs_page *head = req->wb_head;
186 struct nfs_page *tmp;
187
188 WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags));
189 WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags));
190
191 tmp = req->wb_this_page;
192 while (tmp != req) {
193 if (!test_bit(bit, &tmp->wb_flags))
194 return false;
195 tmp = tmp->wb_this_page;
196 }
197
198 /* true! reset all bits */
199 tmp = req;
200 do {
201 clear_bit(bit, &tmp->wb_flags);
202 tmp = tmp->wb_this_page;
203 } while (tmp != req);
204
205 return true;
206}
207
208/*
209 * nfs_page_group_sync_on_bit - set bit on current request, but only
210 * return true if the bit is set for all requests in page group
211 * @req - request in page group
212 * @bit - PG_* bit that is used to sync page group
213 */
214bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
215{
216 bool ret;
217
218 nfs_page_group_lock(req);
219 ret = nfs_page_group_sync_on_bit_locked(req, bit);
220 nfs_page_group_unlock(req);
221
222 return ret;
223}
224
225/*
226 * nfs_page_group_init - Initialize the page group linkage for @req
227 * @req - a new nfs request
228 * @prev - the previous request in page group, or NULL if @req is the first
229 * or only request in the group (the head).
230 */
231static inline void
232nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
233{
234 WARN_ON_ONCE(prev == req);
235
236 if (!prev) {
237 req->wb_head = req;
238 req->wb_this_page = req;
239 } else {
240 WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
241 WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
242 req->wb_head = prev->wb_head;
243 req->wb_this_page = prev->wb_this_page;
244 prev->wb_this_page = req;
245
246 /* grab extra ref if head request has extra ref from
247 * the write/commit path to handle handoff between write
248 * and commit lists */
249 if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags))
250 kref_get(&req->wb_kref);
251 }
252}
253
254/*
255 * nfs_page_group_destroy - sync the destruction of page groups
256 * @req - request that no longer needs the page group
257 *
258 * releases the page group reference from each member once all
259 * members have called this function.
260 */
261static void
262nfs_page_group_destroy(struct kref *kref)
263{
264 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
265 struct nfs_page *tmp, *next;
266
267 if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
268 return;
269
270 tmp = req;
271 do {
272 next = tmp->wb_this_page;
273 /* unlink and free */
274 tmp->wb_this_page = tmp;
275 tmp->wb_head = tmp;
276 nfs_free_request(tmp);
277 tmp = next;
278 } while (tmp != req);
279}
280
139/** 281/**
140 * nfs_create_request - Create an NFS read/write request. 282 * nfs_create_request - Create an NFS read/write request.
141 * @ctx: open context to use 283 * @ctx: open context to use
142 * @page: page to write 284 * @page: page to write
285 * @last: last nfs request created for this page group or NULL if head
143 * @offset: starting offset within the page for the write 286 * @offset: starting offset within the page for the write
144 * @count: number of bytes to read/write 287 * @count: number of bytes to read/write
145 * 288 *
@@ -149,7 +292,8 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
149 */ 292 */
150struct nfs_page * 293struct nfs_page *
151nfs_create_request(struct nfs_open_context *ctx, struct page *page, 294nfs_create_request(struct nfs_open_context *ctx, struct page *page,
152 unsigned int offset, unsigned int count) 295 struct nfs_page *last, unsigned int offset,
296 unsigned int count)
153{ 297{
154 struct nfs_page *req; 298 struct nfs_page *req;
155 struct nfs_lock_context *l_ctx; 299 struct nfs_lock_context *l_ctx;
@@ -181,6 +325,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
181 req->wb_bytes = count; 325 req->wb_bytes = count;
182 req->wb_context = get_nfs_open_context(ctx); 326 req->wb_context = get_nfs_open_context(ctx);
183 kref_init(&req->wb_kref); 327 kref_init(&req->wb_kref);
328 nfs_page_group_init(req, last);
184 return req; 329 return req;
185} 330}
186 331
@@ -238,16 +383,18 @@ static void nfs_clear_request(struct nfs_page *req)
238 } 383 }
239} 384}
240 385
241
242/** 386/**
243 * nfs_release_request - Release the count on an NFS read/write request 387 * nfs_release_request - Release the count on an NFS read/write request
244 * @req: request to release 388 * @req: request to release
245 * 389 *
246 * Note: Should never be called with the spinlock held! 390 * Note: Should never be called with the spinlock held!
247 */ 391 */
248static void nfs_free_request(struct kref *kref) 392static void nfs_free_request(struct nfs_page *req)
249{ 393{
250 struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); 394 WARN_ON_ONCE(req->wb_this_page != req);
395
396 /* extra debug: make sure no sync bits are still set */
397 WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
251 398
252 /* Release struct file and open context */ 399 /* Release struct file and open context */
253 nfs_clear_request(req); 400 nfs_clear_request(req);
@@ -256,7 +403,7 @@ static void nfs_free_request(struct kref *kref)
256 403
257void nfs_release_request(struct nfs_page *req) 404void nfs_release_request(struct nfs_page *req)
258{ 405{
259 kref_put(&req->wb_kref, nfs_free_request); 406 kref_put(&req->wb_kref, nfs_page_group_destroy);
260} 407}
261 408
262static int nfs_wait_bit_uninterruptible(void *word) 409static int nfs_wait_bit_uninterruptible(void *word)
@@ -832,21 +979,66 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
832 * @desc: destination io descriptor 979 * @desc: destination io descriptor
833 * @req: request 980 * @req: request
834 * 981 *
982 * This may split a request into subrequests which are all part of the
983 * same page group.
984 *
835 * Returns true if the request 'req' was successfully coalesced into the 985 * Returns true if the request 'req' was successfully coalesced into the
836 * existing list of pages 'desc'. 986 * existing list of pages 'desc'.
837 */ 987 */
838static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 988static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
839 struct nfs_page *req) 989 struct nfs_page *req)
840{ 990{
841 while (!nfs_pageio_do_add_request(desc, req)) { 991 struct nfs_page *subreq;
842 desc->pg_moreio = 1; 992 unsigned int bytes_left = 0;
843 nfs_pageio_doio(desc); 993 unsigned int offset, pgbase;
844 if (desc->pg_error < 0) 994
845 return 0; 995 nfs_page_group_lock(req);
846 desc->pg_moreio = 0; 996
847 if (desc->pg_recoalesce) 997 subreq = req;
848 return 0; 998 bytes_left = subreq->wb_bytes;
849 } 999 offset = subreq->wb_offset;
1000 pgbase = subreq->wb_pgbase;
1001
1002 do {
1003 if (!nfs_pageio_do_add_request(desc, subreq)) {
1004 /* make sure pg_test call(s) did nothing */
1005 WARN_ON_ONCE(subreq->wb_bytes != bytes_left);
1006 WARN_ON_ONCE(subreq->wb_offset != offset);
1007 WARN_ON_ONCE(subreq->wb_pgbase != pgbase);
1008
1009 nfs_page_group_unlock(req);
1010 desc->pg_moreio = 1;
1011 nfs_pageio_doio(desc);
1012 if (desc->pg_error < 0)
1013 return 0;
1014 desc->pg_moreio = 0;
1015 if (desc->pg_recoalesce)
1016 return 0;
1017 /* retry add_request for this subreq */
1018 nfs_page_group_lock(req);
1019 continue;
1020 }
1021
1022 /* check for buggy pg_test call(s) */
1023 WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE);
1024 WARN_ON_ONCE(subreq->wb_bytes > bytes_left);
1025 WARN_ON_ONCE(subreq->wb_bytes == 0);
1026
1027 bytes_left -= subreq->wb_bytes;
1028 offset += subreq->wb_bytes;
1029 pgbase += subreq->wb_bytes;
1030
1031 if (bytes_left) {
1032 subreq = nfs_create_request(req->wb_context,
1033 req->wb_page,
1034 subreq, pgbase, bytes_left);
1035 nfs_lock_request(subreq);
1036 subreq->wb_offset = offset;
1037 subreq->wb_index = req->wb_index;
1038 }
1039 } while (bytes_left > 0);
1040
1041 nfs_page_group_unlock(req);
850 return 1; 1042 return 1;
851} 1043}
852 1044