diff options
Diffstat (limited to 'fs/exofs/inode.c')
-rw-r--r-- | fs/exofs/inode.c | 1303 |
1 files changed, 1303 insertions, 0 deletions
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c new file mode 100644 index 000000000000..ba8d9fab4693 --- /dev/null +++ b/fs/exofs/inode.c | |||
@@ -0,0 +1,1303 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005, 2006 | ||
3 | * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) | ||
4 | * Copyright (C) 2005, 2006 | ||
5 | * International Business Machines | ||
6 | * Copyright (C) 2008, 2009 | ||
7 | * Boaz Harrosh <bharrosh@panasas.com> | ||
8 | * | ||
9 | * Copyrights for code taken from ext2: | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * from | ||
15 | * linux/fs/minix/inode.c | ||
16 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
17 | * | ||
18 | * This file is part of exofs. | ||
19 | * | ||
20 | * exofs is free software; you can redistribute it and/or modify | ||
21 | * it under the terms of the GNU General Public License as published by | ||
22 | * the Free Software Foundation. Since it is based on ext2, and the only | ||
23 | * valid version of GPL for the Linux kernel is version 2, the only valid | ||
24 | * version of GPL for exofs is version 2. | ||
25 | * | ||
26 | * exofs is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with exofs; if not, write to the Free Software | ||
33 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
34 | */ | ||
35 | |||
36 | #include <linux/writeback.h> | ||
37 | #include <linux/buffer_head.h> | ||
38 | #include <scsi/scsi_device.h> | ||
39 | |||
40 | #include "exofs.h" | ||
41 | |||
42 | #ifdef CONFIG_EXOFS_DEBUG | ||
43 | # define EXOFS_DEBUG_OBJ_ISIZE 1 | ||
44 | #endif | ||
45 | |||
46 | struct page_collect { | ||
47 | struct exofs_sb_info *sbi; | ||
48 | struct request_queue *req_q; | ||
49 | struct inode *inode; | ||
50 | unsigned expected_pages; | ||
51 | |||
52 | struct bio *bio; | ||
53 | unsigned nr_pages; | ||
54 | unsigned long length; | ||
55 | loff_t pg_first; /* keep 64bit also in 32-arches */ | ||
56 | }; | ||
57 | |||
58 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | ||
59 | struct inode *inode) | ||
60 | { | ||
61 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
62 | struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue; | ||
63 | |||
64 | pcol->sbi = sbi; | ||
65 | pcol->req_q = req_q; | ||
66 | pcol->inode = inode; | ||
67 | pcol->expected_pages = expected_pages; | ||
68 | |||
69 | pcol->bio = NULL; | ||
70 | pcol->nr_pages = 0; | ||
71 | pcol->length = 0; | ||
72 | pcol->pg_first = -1; | ||
73 | |||
74 | EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino, | ||
75 | expected_pages); | ||
76 | } | ||
77 | |||
78 | static void _pcol_reset(struct page_collect *pcol) | ||
79 | { | ||
80 | pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); | ||
81 | |||
82 | pcol->bio = NULL; | ||
83 | pcol->nr_pages = 0; | ||
84 | pcol->length = 0; | ||
85 | pcol->pg_first = -1; | ||
86 | EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", | ||
87 | pcol->inode->i_ino, pcol->expected_pages); | ||
88 | |||
89 | /* this is probably the end of the loop but in writes | ||
90 | * it might not end here. don't be left with nothing | ||
91 | */ | ||
92 | if (!pcol->expected_pages) | ||
93 | pcol->expected_pages = 128; | ||
94 | } | ||
95 | |||
96 | static int pcol_try_alloc(struct page_collect *pcol) | ||
97 | { | ||
98 | int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); | ||
99 | |||
100 | for (; pages; pages >>= 1) { | ||
101 | pcol->bio = bio_alloc(GFP_KERNEL, pages); | ||
102 | if (likely(pcol->bio)) | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", | ||
107 | pcol->expected_pages); | ||
108 | return -ENOMEM; | ||
109 | } | ||
110 | |||
111 | static void pcol_free(struct page_collect *pcol) | ||
112 | { | ||
113 | bio_put(pcol->bio); | ||
114 | pcol->bio = NULL; | ||
115 | } | ||
116 | |||
117 | static int pcol_add_page(struct page_collect *pcol, struct page *page, | ||
118 | unsigned len) | ||
119 | { | ||
120 | int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0); | ||
121 | if (unlikely(len != added_len)) | ||
122 | return -ENOMEM; | ||
123 | |||
124 | ++pcol->nr_pages; | ||
125 | pcol->length += len; | ||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | static int update_read_page(struct page *page, int ret) | ||
130 | { | ||
131 | if (ret == 0) { | ||
132 | /* Everything is OK */ | ||
133 | SetPageUptodate(page); | ||
134 | if (PageError(page)) | ||
135 | ClearPageError(page); | ||
136 | } else if (ret == -EFAULT) { | ||
137 | /* In this case we were trying to read something that wasn't on | ||
138 | * disk yet - return a page full of zeroes. This should be OK, | ||
139 | * because the object should be empty (if there was a write | ||
140 | * before this read, the read would be waiting with the page | ||
141 | * locked */ | ||
142 | clear_highpage(page); | ||
143 | |||
144 | SetPageUptodate(page); | ||
145 | if (PageError(page)) | ||
146 | ClearPageError(page); | ||
147 | ret = 0; /* recovered error */ | ||
148 | EXOFS_DBGMSG("recovered read error\n"); | ||
149 | } else /* Error */ | ||
150 | SetPageError(page); | ||
151 | |||
152 | return ret; | ||
153 | } | ||
154 | |||
155 | static void update_write_page(struct page *page, int ret) | ||
156 | { | ||
157 | if (ret) { | ||
158 | mapping_set_error(page->mapping, ret); | ||
159 | SetPageError(page); | ||
160 | } | ||
161 | end_page_writeback(page); | ||
162 | } | ||
163 | |||
164 | /* Called at the end of reads, to optionally unlock pages and update their | ||
165 | * status. | ||
166 | */ | ||
167 | static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | ||
168 | bool do_unlock) | ||
169 | { | ||
170 | struct bio_vec *bvec; | ||
171 | int i; | ||
172 | u64 resid; | ||
173 | u64 good_bytes; | ||
174 | u64 length = 0; | ||
175 | int ret = exofs_check_ok_resid(or, &resid, NULL); | ||
176 | |||
177 | osd_end_request(or); | ||
178 | |||
179 | if (likely(!ret)) | ||
180 | good_bytes = pcol->length; | ||
181 | else if (!resid) | ||
182 | good_bytes = 0; | ||
183 | else | ||
184 | good_bytes = pcol->length - resid; | ||
185 | |||
186 | EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx" | ||
187 | " length=0x%lx nr_pages=%u\n", | ||
188 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | ||
189 | pcol->nr_pages); | ||
190 | |||
191 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
192 | struct page *page = bvec->bv_page; | ||
193 | struct inode *inode = page->mapping->host; | ||
194 | int page_stat; | ||
195 | |||
196 | if (inode != pcol->inode) | ||
197 | continue; /* osd might add more pages at end */ | ||
198 | |||
199 | if (likely(length < good_bytes)) | ||
200 | page_stat = 0; | ||
201 | else | ||
202 | page_stat = ret; | ||
203 | |||
204 | EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n", | ||
205 | inode->i_ino, page->index, | ||
206 | page_stat ? "bad_bytes" : "good_bytes"); | ||
207 | |||
208 | ret = update_read_page(page, page_stat); | ||
209 | if (do_unlock) | ||
210 | unlock_page(page); | ||
211 | length += bvec->bv_len; | ||
212 | } | ||
213 | |||
214 | pcol_free(pcol); | ||
215 | EXOFS_DBGMSG("readpages_done END\n"); | ||
216 | return ret; | ||
217 | } | ||
218 | |||
219 | /* callback of async reads */ | ||
220 | static void readpages_done(struct osd_request *or, void *p) | ||
221 | { | ||
222 | struct page_collect *pcol = p; | ||
223 | |||
224 | __readpages_done(or, pcol, true); | ||
225 | atomic_dec(&pcol->sbi->s_curr_pending); | ||
226 | kfree(p); | ||
227 | } | ||
228 | |||
229 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | ||
230 | { | ||
231 | struct bio_vec *bvec; | ||
232 | int i; | ||
233 | |||
234 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
235 | struct page *page = bvec->bv_page; | ||
236 | |||
237 | if (rw == READ) | ||
238 | update_read_page(page, ret); | ||
239 | else | ||
240 | update_write_page(page, ret); | ||
241 | |||
242 | unlock_page(page); | ||
243 | } | ||
244 | pcol_free(pcol); | ||
245 | } | ||
246 | |||
247 | static int read_exec(struct page_collect *pcol, bool is_sync) | ||
248 | { | ||
249 | struct exofs_i_info *oi = exofs_i(pcol->inode); | ||
250 | struct osd_obj_id obj = {pcol->sbi->s_pid, | ||
251 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
252 | struct osd_request *or = NULL; | ||
253 | struct page_collect *pcol_copy = NULL; | ||
254 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
255 | int ret; | ||
256 | |||
257 | if (!pcol->bio) | ||
258 | return 0; | ||
259 | |||
260 | /* see comment in _readpage() about sync reads */ | ||
261 | WARN_ON(is_sync && (pcol->nr_pages != 1)); | ||
262 | |||
263 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
264 | if (unlikely(!or)) { | ||
265 | ret = -ENOMEM; | ||
266 | goto err; | ||
267 | } | ||
268 | |||
269 | osd_req_read(or, &obj, pcol->bio, i_start); | ||
270 | |||
271 | if (is_sync) { | ||
272 | exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); | ||
273 | return __readpages_done(or, pcol, false); | ||
274 | } | ||
275 | |||
276 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | ||
277 | if (!pcol_copy) { | ||
278 | ret = -ENOMEM; | ||
279 | goto err; | ||
280 | } | ||
281 | |||
282 | *pcol_copy = *pcol; | ||
283 | ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); | ||
284 | if (unlikely(ret)) | ||
285 | goto err; | ||
286 | |||
287 | atomic_inc(&pcol->sbi->s_curr_pending); | ||
288 | |||
289 | EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", | ||
290 | obj.id, _LLU(i_start), pcol->length); | ||
291 | |||
292 | /* pages ownership was passed to pcol_copy */ | ||
293 | _pcol_reset(pcol); | ||
294 | return 0; | ||
295 | |||
296 | err: | ||
297 | if (!is_sync) | ||
298 | _unlock_pcol_pages(pcol, ret, READ); | ||
299 | kfree(pcol_copy); | ||
300 | if (or) | ||
301 | osd_end_request(or); | ||
302 | return ret; | ||
303 | } | ||
304 | |||
305 | /* readpage_strip is called either directly from readpage() or by the VFS from | ||
306 | * within read_cache_pages(), to add one more page to be read. It will try to | ||
307 | * collect as many contiguous pages as posible. If a discontinuity is | ||
308 | * encountered, or it runs out of resources, it will submit the previous segment | ||
309 | * and will start a new collection. Eventually caller must submit the last | ||
310 | * segment if present. | ||
311 | */ | ||
312 | static int readpage_strip(void *data, struct page *page) | ||
313 | { | ||
314 | struct page_collect *pcol = data; | ||
315 | struct inode *inode = pcol->inode; | ||
316 | struct exofs_i_info *oi = exofs_i(inode); | ||
317 | loff_t i_size = i_size_read(inode); | ||
318 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
319 | size_t len; | ||
320 | int ret; | ||
321 | |||
322 | /* FIXME: Just for debugging, will be removed */ | ||
323 | if (PageUptodate(page)) | ||
324 | EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, | ||
325 | page->index); | ||
326 | |||
327 | if (page->index < end_index) | ||
328 | len = PAGE_CACHE_SIZE; | ||
329 | else if (page->index == end_index) | ||
330 | len = i_size & ~PAGE_CACHE_MASK; | ||
331 | else | ||
332 | len = 0; | ||
333 | |||
334 | if (!len || !obj_created(oi)) { | ||
335 | /* this will be out of bounds, or doesn't exist yet. | ||
336 | * Current page is cleared and the request is split | ||
337 | */ | ||
338 | clear_highpage(page); | ||
339 | |||
340 | SetPageUptodate(page); | ||
341 | if (PageError(page)) | ||
342 | ClearPageError(page); | ||
343 | |||
344 | unlock_page(page); | ||
345 | EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," | ||
346 | " splitting\n", inode->i_ino, page->index); | ||
347 | |||
348 | return read_exec(pcol, false); | ||
349 | } | ||
350 | |||
351 | try_again: | ||
352 | |||
353 | if (unlikely(pcol->pg_first == -1)) { | ||
354 | pcol->pg_first = page->index; | ||
355 | } else if (unlikely((pcol->pg_first + pcol->nr_pages) != | ||
356 | page->index)) { | ||
357 | /* Discontinuity detected, split the request */ | ||
358 | ret = read_exec(pcol, false); | ||
359 | if (unlikely(ret)) | ||
360 | goto fail; | ||
361 | goto try_again; | ||
362 | } | ||
363 | |||
364 | if (!pcol->bio) { | ||
365 | ret = pcol_try_alloc(pcol); | ||
366 | if (unlikely(ret)) | ||
367 | goto fail; | ||
368 | } | ||
369 | |||
370 | if (len != PAGE_CACHE_SIZE) | ||
371 | zero_user(page, len, PAGE_CACHE_SIZE - len); | ||
372 | |||
373 | EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", | ||
374 | inode->i_ino, page->index, len); | ||
375 | |||
376 | ret = pcol_add_page(pcol, page, len); | ||
377 | if (ret) { | ||
378 | EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p " | ||
379 | "this_len=0x%zx nr_pages=%u length=0x%lx\n", | ||
380 | page, len, pcol->nr_pages, pcol->length); | ||
381 | |||
382 | /* split the request, and start again with current page */ | ||
383 | ret = read_exec(pcol, false); | ||
384 | if (unlikely(ret)) | ||
385 | goto fail; | ||
386 | |||
387 | goto try_again; | ||
388 | } | ||
389 | |||
390 | return 0; | ||
391 | |||
392 | fail: | ||
393 | /* SetPageError(page); ??? */ | ||
394 | unlock_page(page); | ||
395 | return ret; | ||
396 | } | ||
397 | |||
398 | static int exofs_readpages(struct file *file, struct address_space *mapping, | ||
399 | struct list_head *pages, unsigned nr_pages) | ||
400 | { | ||
401 | struct page_collect pcol; | ||
402 | int ret; | ||
403 | |||
404 | _pcol_init(&pcol, nr_pages, mapping->host); | ||
405 | |||
406 | ret = read_cache_pages(mapping, pages, readpage_strip, &pcol); | ||
407 | if (ret) { | ||
408 | EXOFS_ERR("read_cache_pages => %d\n", ret); | ||
409 | return ret; | ||
410 | } | ||
411 | |||
412 | return read_exec(&pcol, false); | ||
413 | } | ||
414 | |||
415 | static int _readpage(struct page *page, bool is_sync) | ||
416 | { | ||
417 | struct page_collect pcol; | ||
418 | int ret; | ||
419 | |||
420 | _pcol_init(&pcol, 1, page->mapping->host); | ||
421 | |||
422 | /* readpage_strip might call read_exec(,async) inside at several places | ||
423 | * but this is safe for is_async=0 since read_exec will not do anything | ||
424 | * when we have a single page. | ||
425 | */ | ||
426 | ret = readpage_strip(&pcol, page); | ||
427 | if (ret) { | ||
428 | EXOFS_ERR("_readpage => %d\n", ret); | ||
429 | return ret; | ||
430 | } | ||
431 | |||
432 | return read_exec(&pcol, is_sync); | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * We don't need the file | ||
437 | */ | ||
438 | static int exofs_readpage(struct file *file, struct page *page) | ||
439 | { | ||
440 | return _readpage(page, false); | ||
441 | } | ||
442 | |||
443 | /* Callback for osd_write. All writes are asynchronouse */ | ||
444 | static void writepages_done(struct osd_request *or, void *p) | ||
445 | { | ||
446 | struct page_collect *pcol = p; | ||
447 | struct bio_vec *bvec; | ||
448 | int i; | ||
449 | u64 resid; | ||
450 | u64 good_bytes; | ||
451 | u64 length = 0; | ||
452 | |||
453 | int ret = exofs_check_ok_resid(or, NULL, &resid); | ||
454 | |||
455 | osd_end_request(or); | ||
456 | atomic_dec(&pcol->sbi->s_curr_pending); | ||
457 | |||
458 | if (likely(!ret)) | ||
459 | good_bytes = pcol->length; | ||
460 | else if (!resid) | ||
461 | good_bytes = 0; | ||
462 | else | ||
463 | good_bytes = pcol->length - resid; | ||
464 | |||
465 | EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx" | ||
466 | " length=0x%lx nr_pages=%u\n", | ||
467 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | ||
468 | pcol->nr_pages); | ||
469 | |||
470 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
471 | struct page *page = bvec->bv_page; | ||
472 | struct inode *inode = page->mapping->host; | ||
473 | int page_stat; | ||
474 | |||
475 | if (inode != pcol->inode) | ||
476 | continue; /* osd might add more pages to a bio */ | ||
477 | |||
478 | if (likely(length < good_bytes)) | ||
479 | page_stat = 0; | ||
480 | else | ||
481 | page_stat = ret; | ||
482 | |||
483 | update_write_page(page, page_stat); | ||
484 | unlock_page(page); | ||
485 | EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n", | ||
486 | inode->i_ino, page->index, page_stat); | ||
487 | |||
488 | length += bvec->bv_len; | ||
489 | } | ||
490 | |||
491 | pcol_free(pcol); | ||
492 | kfree(pcol); | ||
493 | EXOFS_DBGMSG("writepages_done END\n"); | ||
494 | } | ||
495 | |||
496 | static int write_exec(struct page_collect *pcol) | ||
497 | { | ||
498 | struct exofs_i_info *oi = exofs_i(pcol->inode); | ||
499 | struct osd_obj_id obj = {pcol->sbi->s_pid, | ||
500 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
501 | struct osd_request *or = NULL; | ||
502 | struct page_collect *pcol_copy = NULL; | ||
503 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
504 | int ret; | ||
505 | |||
506 | if (!pcol->bio) | ||
507 | return 0; | ||
508 | |||
509 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
510 | if (unlikely(!or)) { | ||
511 | EXOFS_ERR("write_exec: Faild to osd_start_request()\n"); | ||
512 | ret = -ENOMEM; | ||
513 | goto err; | ||
514 | } | ||
515 | |||
516 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | ||
517 | if (!pcol_copy) { | ||
518 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); | ||
519 | ret = -ENOMEM; | ||
520 | goto err; | ||
521 | } | ||
522 | |||
523 | *pcol_copy = *pcol; | ||
524 | |||
525 | osd_req_write(or, &obj, pcol_copy->bio, i_start); | ||
526 | ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); | ||
527 | if (unlikely(ret)) { | ||
528 | EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); | ||
529 | goto err; | ||
530 | } | ||
531 | |||
532 | atomic_inc(&pcol->sbi->s_curr_pending); | ||
533 | EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", | ||
534 | pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), | ||
535 | pcol->length); | ||
536 | /* pages ownership was passed to pcol_copy */ | ||
537 | _pcol_reset(pcol); | ||
538 | return 0; | ||
539 | |||
540 | err: | ||
541 | _unlock_pcol_pages(pcol, ret, WRITE); | ||
542 | kfree(pcol_copy); | ||
543 | if (or) | ||
544 | osd_end_request(or); | ||
545 | return ret; | ||
546 | } | ||
547 | |||
548 | /* writepage_strip is called either directly from writepage() or by the VFS from | ||
549 | * within write_cache_pages(), to add one more page to be written to storage. | ||
550 | * It will try to collect as many contiguous pages as possible. If a | ||
551 | * discontinuity is encountered or it runs out of resources it will submit the | ||
552 | * previous segment and will start a new collection. | ||
553 | * Eventually caller must submit the last segment if present. | ||
554 | */ | ||
555 | static int writepage_strip(struct page *page, | ||
556 | struct writeback_control *wbc_unused, void *data) | ||
557 | { | ||
558 | struct page_collect *pcol = data; | ||
559 | struct inode *inode = pcol->inode; | ||
560 | struct exofs_i_info *oi = exofs_i(inode); | ||
561 | loff_t i_size = i_size_read(inode); | ||
562 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
563 | size_t len; | ||
564 | int ret; | ||
565 | |||
566 | BUG_ON(!PageLocked(page)); | ||
567 | |||
568 | ret = wait_obj_created(oi); | ||
569 | if (unlikely(ret)) | ||
570 | goto fail; | ||
571 | |||
572 | if (page->index < end_index) | ||
573 | /* in this case, the page is within the limits of the file */ | ||
574 | len = PAGE_CACHE_SIZE; | ||
575 | else { | ||
576 | len = i_size & ~PAGE_CACHE_MASK; | ||
577 | |||
578 | if (page->index > end_index || !len) { | ||
579 | /* in this case, the page is outside the limits | ||
580 | * (truncate in progress) | ||
581 | */ | ||
582 | ret = write_exec(pcol); | ||
583 | if (unlikely(ret)) | ||
584 | goto fail; | ||
585 | if (PageError(page)) | ||
586 | ClearPageError(page); | ||
587 | unlock_page(page); | ||
588 | return 0; | ||
589 | } | ||
590 | } | ||
591 | |||
592 | try_again: | ||
593 | |||
594 | if (unlikely(pcol->pg_first == -1)) { | ||
595 | pcol->pg_first = page->index; | ||
596 | } else if (unlikely((pcol->pg_first + pcol->nr_pages) != | ||
597 | page->index)) { | ||
598 | /* Discontinuity detected, split the request */ | ||
599 | ret = write_exec(pcol); | ||
600 | if (unlikely(ret)) | ||
601 | goto fail; | ||
602 | goto try_again; | ||
603 | } | ||
604 | |||
605 | if (!pcol->bio) { | ||
606 | ret = pcol_try_alloc(pcol); | ||
607 | if (unlikely(ret)) | ||
608 | goto fail; | ||
609 | } | ||
610 | |||
611 | EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", | ||
612 | inode->i_ino, page->index, len); | ||
613 | |||
614 | ret = pcol_add_page(pcol, page, len); | ||
615 | if (unlikely(ret)) { | ||
616 | EXOFS_DBGMSG("Failed pcol_add_page " | ||
617 | "nr_pages=%u total_length=0x%lx\n", | ||
618 | pcol->nr_pages, pcol->length); | ||
619 | |||
620 | /* split the request, next loop will start again */ | ||
621 | ret = write_exec(pcol); | ||
622 | if (unlikely(ret)) { | ||
623 | EXOFS_DBGMSG("write_exec faild => %d", ret); | ||
624 | goto fail; | ||
625 | } | ||
626 | |||
627 | goto try_again; | ||
628 | } | ||
629 | |||
630 | BUG_ON(PageWriteback(page)); | ||
631 | set_page_writeback(page); | ||
632 | |||
633 | return 0; | ||
634 | |||
635 | fail: | ||
636 | set_bit(AS_EIO, &page->mapping->flags); | ||
637 | unlock_page(page); | ||
638 | return ret; | ||
639 | } | ||
640 | |||
641 | static int exofs_writepages(struct address_space *mapping, | ||
642 | struct writeback_control *wbc) | ||
643 | { | ||
644 | struct page_collect pcol; | ||
645 | long start, end, expected_pages; | ||
646 | int ret; | ||
647 | |||
648 | start = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
649 | end = (wbc->range_end == LLONG_MAX) ? | ||
650 | start + mapping->nrpages : | ||
651 | wbc->range_end >> PAGE_CACHE_SHIFT; | ||
652 | |||
653 | if (start || end) | ||
654 | expected_pages = min(end - start + 1, 32L); | ||
655 | else | ||
656 | expected_pages = mapping->nrpages; | ||
657 | |||
658 | EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" | ||
659 | " m->nrpages=%lu start=0x%lx end=0x%lx\n", | ||
660 | mapping->host->i_ino, wbc->range_start, wbc->range_end, | ||
661 | mapping->nrpages, start, end); | ||
662 | |||
663 | _pcol_init(&pcol, expected_pages, mapping->host); | ||
664 | |||
665 | ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol); | ||
666 | if (ret) { | ||
667 | EXOFS_ERR("write_cache_pages => %d\n", ret); | ||
668 | return ret; | ||
669 | } | ||
670 | |||
671 | return write_exec(&pcol); | ||
672 | } | ||
673 | |||
674 | static int exofs_writepage(struct page *page, struct writeback_control *wbc) | ||
675 | { | ||
676 | struct page_collect pcol; | ||
677 | int ret; | ||
678 | |||
679 | _pcol_init(&pcol, 1, page->mapping->host); | ||
680 | |||
681 | ret = writepage_strip(page, NULL, &pcol); | ||
682 | if (ret) { | ||
683 | EXOFS_ERR("exofs_writepage => %d\n", ret); | ||
684 | return ret; | ||
685 | } | ||
686 | |||
687 | return write_exec(&pcol); | ||
688 | } | ||
689 | |||
690 | int exofs_write_begin(struct file *file, struct address_space *mapping, | ||
691 | loff_t pos, unsigned len, unsigned flags, | ||
692 | struct page **pagep, void **fsdata) | ||
693 | { | ||
694 | int ret = 0; | ||
695 | struct page *page; | ||
696 | |||
697 | page = *pagep; | ||
698 | if (page == NULL) { | ||
699 | ret = simple_write_begin(file, mapping, pos, len, flags, pagep, | ||
700 | fsdata); | ||
701 | if (ret) { | ||
702 | EXOFS_DBGMSG("simple_write_begin faild\n"); | ||
703 | return ret; | ||
704 | } | ||
705 | |||
706 | page = *pagep; | ||
707 | } | ||
708 | |||
709 | /* read modify write */ | ||
710 | if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { | ||
711 | ret = _readpage(page, true); | ||
712 | if (ret) { | ||
713 | /*SetPageError was done by _readpage. Is it ok?*/ | ||
714 | unlock_page(page); | ||
715 | EXOFS_DBGMSG("__readpage_filler faild\n"); | ||
716 | } | ||
717 | } | ||
718 | |||
719 | return ret; | ||
720 | } | ||
721 | |||
722 | static int exofs_write_begin_export(struct file *file, | ||
723 | struct address_space *mapping, | ||
724 | loff_t pos, unsigned len, unsigned flags, | ||
725 | struct page **pagep, void **fsdata) | ||
726 | { | ||
727 | *pagep = NULL; | ||
728 | |||
729 | return exofs_write_begin(file, mapping, pos, len, flags, pagep, | ||
730 | fsdata); | ||
731 | } | ||
732 | |||
733 | const struct address_space_operations exofs_aops = { | ||
734 | .readpage = exofs_readpage, | ||
735 | .readpages = exofs_readpages, | ||
736 | .writepage = exofs_writepage, | ||
737 | .writepages = exofs_writepages, | ||
738 | .write_begin = exofs_write_begin_export, | ||
739 | .write_end = simple_write_end, | ||
740 | }; | ||
741 | |||
742 | /****************************************************************************** | ||
743 | * INODE OPERATIONS | ||
744 | *****************************************************************************/ | ||
745 | |||
746 | /* | ||
747 | * Test whether an inode is a fast symlink. | ||
748 | */ | ||
749 | static inline int exofs_inode_is_fast_symlink(struct inode *inode) | ||
750 | { | ||
751 | struct exofs_i_info *oi = exofs_i(inode); | ||
752 | |||
753 | return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * get_block_t - Fill in a buffer_head | ||
758 | * An OSD takes care of block allocation so we just fake an allocation by | ||
759 | * putting in the inode's sector_t in the buffer_head. | ||
760 | * TODO: What about the case of create==0 and @iblock does not exist in the | ||
761 | * object? | ||
762 | */ | ||
763 | static int exofs_get_block(struct inode *inode, sector_t iblock, | ||
764 | struct buffer_head *bh_result, int create) | ||
765 | { | ||
766 | map_bh(bh_result, inode->i_sb, iblock); | ||
767 | return 0; | ||
768 | } | ||
769 | |||
770 | const struct osd_attr g_attr_logical_length = ATTR_DEF( | ||
771 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | ||
772 | |||
773 | /* | ||
774 | * Truncate a file to the specified size - all we have to do is set the size | ||
775 | * attribute. We make sure the object exists first. | ||
776 | */ | ||
777 | void exofs_truncate(struct inode *inode) | ||
778 | { | ||
779 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
780 | struct exofs_i_info *oi = exofs_i(inode); | ||
781 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | ||
782 | struct osd_request *or; | ||
783 | struct osd_attr attr; | ||
784 | loff_t isize = i_size_read(inode); | ||
785 | __be64 newsize; | ||
786 | int ret; | ||
787 | |||
788 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) | ||
789 | || S_ISLNK(inode->i_mode))) | ||
790 | return; | ||
791 | if (exofs_inode_is_fast_symlink(inode)) | ||
792 | return; | ||
793 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
794 | return; | ||
795 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
796 | |||
797 | nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | ||
798 | |||
799 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
800 | if (unlikely(!or)) { | ||
801 | EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n"); | ||
802 | goto fail; | ||
803 | } | ||
804 | |||
805 | osd_req_set_attributes(or, &obj); | ||
806 | |||
807 | newsize = cpu_to_be64((u64)isize); | ||
808 | attr = g_attr_logical_length; | ||
809 | attr.val_ptr = &newsize; | ||
810 | osd_req_add_set_attr_list(or, &attr, 1); | ||
811 | |||
812 | /* if we are about to truncate an object, and it hasn't been | ||
813 | * created yet, wait | ||
814 | */ | ||
815 | if (unlikely(wait_obj_created(oi))) | ||
816 | goto fail; | ||
817 | |||
818 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | ||
819 | osd_end_request(or); | ||
820 | if (ret) | ||
821 | goto fail; | ||
822 | |||
823 | out: | ||
824 | mark_inode_dirty(inode); | ||
825 | return; | ||
826 | fail: | ||
827 | make_bad_inode(inode); | ||
828 | goto out; | ||
829 | } | ||
830 | |||
831 | /* | ||
832 | * Set inode attributes - just call generic functions. | ||
833 | */ | ||
834 | int exofs_setattr(struct dentry *dentry, struct iattr *iattr) | ||
835 | { | ||
836 | struct inode *inode = dentry->d_inode; | ||
837 | int error; | ||
838 | |||
839 | error = inode_change_ok(inode, iattr); | ||
840 | if (error) | ||
841 | return error; | ||
842 | |||
843 | error = inode_setattr(inode, iattr); | ||
844 | return error; | ||
845 | } | ||
846 | |||
847 | /* | ||
848 | * Read an inode from the OSD, and return it as is. We also return the size | ||
849 | * attribute in the 'sanity' argument if we got compiled with debugging turned | ||
850 | * on. | ||
851 | */ | ||
852 | static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, | ||
853 | struct exofs_fcb *inode, uint64_t *sanity) | ||
854 | { | ||
855 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
856 | struct osd_request *or; | ||
857 | struct osd_attr attr; | ||
858 | struct osd_obj_id obj = {sbi->s_pid, | ||
859 | oi->vfs_inode.i_ino + EXOFS_OBJ_OFF}; | ||
860 | int ret; | ||
861 | |||
862 | exofs_make_credential(oi->i_cred, &obj); | ||
863 | |||
864 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
865 | if (unlikely(!or)) { | ||
866 | EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n"); | ||
867 | return -ENOMEM; | ||
868 | } | ||
869 | osd_req_get_attributes(or, &obj); | ||
870 | |||
871 | /* we need the inode attribute */ | ||
872 | osd_req_add_get_attr_list(or, &g_attr_inode_data, 1); | ||
873 | |||
874 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | ||
875 | /* we get the size attributes to do a sanity check */ | ||
876 | osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); | ||
877 | #endif | ||
878 | |||
879 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | ||
880 | if (ret) | ||
881 | goto out; | ||
882 | |||
883 | attr = g_attr_inode_data; | ||
884 | ret = extract_attr_from_req(or, &attr); | ||
885 | if (ret) { | ||
886 | EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n"); | ||
887 | goto out; | ||
888 | } | ||
889 | |||
890 | WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE); | ||
891 | memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE); | ||
892 | |||
893 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | ||
894 | attr = g_attr_logical_length; | ||
895 | ret = extract_attr_from_req(or, &attr); | ||
896 | if (ret) { | ||
897 | EXOFS_ERR("ERROR: extract attr from or failed\n"); | ||
898 | goto out; | ||
899 | } | ||
900 | *sanity = get_unaligned_be64(attr.val_ptr); | ||
901 | #endif | ||
902 | |||
903 | out: | ||
904 | osd_end_request(or); | ||
905 | return ret; | ||
906 | } | ||
907 | |||
908 | /* | ||
909 | * Fill in an inode read from the OSD and set it up for use | ||
910 | */ | ||
911 | struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | ||
912 | { | ||
913 | struct exofs_i_info *oi; | ||
914 | struct exofs_fcb fcb; | ||
915 | struct inode *inode; | ||
916 | uint64_t uninitialized_var(sanity); | ||
917 | int ret; | ||
918 | |||
919 | inode = iget_locked(sb, ino); | ||
920 | if (!inode) | ||
921 | return ERR_PTR(-ENOMEM); | ||
922 | if (!(inode->i_state & I_NEW)) | ||
923 | return inode; | ||
924 | oi = exofs_i(inode); | ||
925 | |||
926 | /* read the inode from the osd */ | ||
927 | ret = exofs_get_inode(sb, oi, &fcb, &sanity); | ||
928 | if (ret) | ||
929 | goto bad_inode; | ||
930 | |||
931 | init_waitqueue_head(&oi->i_wq); | ||
932 | set_obj_created(oi); | ||
933 | |||
934 | /* copy stuff from on-disk struct to in-memory struct */ | ||
935 | inode->i_mode = le16_to_cpu(fcb.i_mode); | ||
936 | inode->i_uid = le32_to_cpu(fcb.i_uid); | ||
937 | inode->i_gid = le32_to_cpu(fcb.i_gid); | ||
938 | inode->i_nlink = le16_to_cpu(fcb.i_links_count); | ||
939 | inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); | ||
940 | inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); | ||
941 | inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime); | ||
942 | inode->i_ctime.tv_nsec = | ||
943 | inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0; | ||
944 | oi->i_commit_size = le64_to_cpu(fcb.i_size); | ||
945 | i_size_write(inode, oi->i_commit_size); | ||
946 | inode->i_blkbits = EXOFS_BLKSHIFT; | ||
947 | inode->i_generation = le32_to_cpu(fcb.i_generation); | ||
948 | |||
949 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | ||
950 | if ((inode->i_size != sanity) && | ||
951 | (!exofs_inode_is_fast_symlink(inode))) { | ||
952 | EXOFS_ERR("WARNING: Size of object from inode and " | ||
953 | "attributes differ (%lld != %llu)\n", | ||
954 | inode->i_size, _LLU(sanity)); | ||
955 | } | ||
956 | #endif | ||
957 | |||
958 | oi->i_dir_start_lookup = 0; | ||
959 | |||
960 | if ((inode->i_nlink == 0) && (inode->i_mode == 0)) { | ||
961 | ret = -ESTALE; | ||
962 | goto bad_inode; | ||
963 | } | ||
964 | |||
965 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | ||
966 | if (fcb.i_data[0]) | ||
967 | inode->i_rdev = | ||
968 | old_decode_dev(le32_to_cpu(fcb.i_data[0])); | ||
969 | else | ||
970 | inode->i_rdev = | ||
971 | new_decode_dev(le32_to_cpu(fcb.i_data[1])); | ||
972 | } else { | ||
973 | memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); | ||
974 | } | ||
975 | |||
976 | if (S_ISREG(inode->i_mode)) { | ||
977 | inode->i_op = &exofs_file_inode_operations; | ||
978 | inode->i_fop = &exofs_file_operations; | ||
979 | inode->i_mapping->a_ops = &exofs_aops; | ||
980 | } else if (S_ISDIR(inode->i_mode)) { | ||
981 | inode->i_op = &exofs_dir_inode_operations; | ||
982 | inode->i_fop = &exofs_dir_operations; | ||
983 | inode->i_mapping->a_ops = &exofs_aops; | ||
984 | } else if (S_ISLNK(inode->i_mode)) { | ||
985 | if (exofs_inode_is_fast_symlink(inode)) | ||
986 | inode->i_op = &exofs_fast_symlink_inode_operations; | ||
987 | else { | ||
988 | inode->i_op = &exofs_symlink_inode_operations; | ||
989 | inode->i_mapping->a_ops = &exofs_aops; | ||
990 | } | ||
991 | } else { | ||
992 | inode->i_op = &exofs_special_inode_operations; | ||
993 | if (fcb.i_data[0]) | ||
994 | init_special_inode(inode, inode->i_mode, | ||
995 | old_decode_dev(le32_to_cpu(fcb.i_data[0]))); | ||
996 | else | ||
997 | init_special_inode(inode, inode->i_mode, | ||
998 | new_decode_dev(le32_to_cpu(fcb.i_data[1]))); | ||
999 | } | ||
1000 | |||
1001 | unlock_new_inode(inode); | ||
1002 | return inode; | ||
1003 | |||
1004 | bad_inode: | ||
1005 | iget_failed(inode); | ||
1006 | return ERR_PTR(ret); | ||
1007 | } | ||
1008 | |||
1009 | int __exofs_wait_obj_created(struct exofs_i_info *oi) | ||
1010 | { | ||
1011 | if (!obj_created(oi)) { | ||
1012 | BUG_ON(!obj_2bcreated(oi)); | ||
1013 | wait_event(oi->i_wq, obj_created(oi)); | ||
1014 | } | ||
1015 | return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; | ||
1016 | } | ||
1017 | /* | ||
1018 | * Callback function from exofs_new_inode(). The important thing is that we | ||
1019 | * set the obj_created flag so that other methods know that the object exists on | ||
1020 | * the OSD. | ||
1021 | */ | ||
1022 | static void create_done(struct osd_request *or, void *p) | ||
1023 | { | ||
1024 | struct inode *inode = p; | ||
1025 | struct exofs_i_info *oi = exofs_i(inode); | ||
1026 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
1027 | int ret; | ||
1028 | |||
1029 | ret = exofs_check_ok(or); | ||
1030 | osd_end_request(or); | ||
1031 | atomic_dec(&sbi->s_curr_pending); | ||
1032 | |||
1033 | if (unlikely(ret)) { | ||
1034 | EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", | ||
1035 | _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF)); | ||
1036 | make_bad_inode(inode); | ||
1037 | } else | ||
1038 | set_obj_created(oi); | ||
1039 | |||
1040 | atomic_dec(&inode->i_count); | ||
1041 | wake_up(&oi->i_wq); | ||
1042 | } | ||
1043 | |||
1044 | /* | ||
1045 | * Set up a new inode and create an object for it on the OSD | ||
1046 | */ | ||
1047 | struct inode *exofs_new_inode(struct inode *dir, int mode) | ||
1048 | { | ||
1049 | struct super_block *sb; | ||
1050 | struct inode *inode; | ||
1051 | struct exofs_i_info *oi; | ||
1052 | struct exofs_sb_info *sbi; | ||
1053 | struct osd_request *or; | ||
1054 | struct osd_obj_id obj; | ||
1055 | int ret; | ||
1056 | |||
1057 | sb = dir->i_sb; | ||
1058 | inode = new_inode(sb); | ||
1059 | if (!inode) | ||
1060 | return ERR_PTR(-ENOMEM); | ||
1061 | |||
1062 | oi = exofs_i(inode); | ||
1063 | |||
1064 | init_waitqueue_head(&oi->i_wq); | ||
1065 | set_obj_2bcreated(oi); | ||
1066 | |||
1067 | sbi = sb->s_fs_info; | ||
1068 | |||
1069 | sb->s_dirt = 1; | ||
1070 | inode->i_uid = current->cred->fsuid; | ||
1071 | if (dir->i_mode & S_ISGID) { | ||
1072 | inode->i_gid = dir->i_gid; | ||
1073 | if (S_ISDIR(mode)) | ||
1074 | mode |= S_ISGID; | ||
1075 | } else { | ||
1076 | inode->i_gid = current->cred->fsgid; | ||
1077 | } | ||
1078 | inode->i_mode = mode; | ||
1079 | |||
1080 | inode->i_ino = sbi->s_nextid++; | ||
1081 | inode->i_blkbits = EXOFS_BLKSHIFT; | ||
1082 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
1083 | oi->i_commit_size = inode->i_size = 0; | ||
1084 | spin_lock(&sbi->s_next_gen_lock); | ||
1085 | inode->i_generation = sbi->s_next_generation++; | ||
1086 | spin_unlock(&sbi->s_next_gen_lock); | ||
1087 | insert_inode_hash(inode); | ||
1088 | |||
1089 | mark_inode_dirty(inode); | ||
1090 | |||
1091 | obj.partition = sbi->s_pid; | ||
1092 | obj.id = inode->i_ino + EXOFS_OBJ_OFF; | ||
1093 | exofs_make_credential(oi->i_cred, &obj); | ||
1094 | |||
1095 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
1096 | if (unlikely(!or)) { | ||
1097 | EXOFS_ERR("exofs_new_inode: osd_start_request failed\n"); | ||
1098 | return ERR_PTR(-ENOMEM); | ||
1099 | } | ||
1100 | |||
1101 | osd_req_create_object(or, &obj); | ||
1102 | |||
1103 | /* increment the refcount so that the inode will still be around when we | ||
1104 | * reach the callback | ||
1105 | */ | ||
1106 | atomic_inc(&inode->i_count); | ||
1107 | |||
1108 | ret = exofs_async_op(or, create_done, inode, oi->i_cred); | ||
1109 | if (ret) { | ||
1110 | atomic_dec(&inode->i_count); | ||
1111 | osd_end_request(or); | ||
1112 | return ERR_PTR(-EIO); | ||
1113 | } | ||
1114 | atomic_inc(&sbi->s_curr_pending); | ||
1115 | |||
1116 | return inode; | ||
1117 | } | ||
1118 | |||
1119 | /* | ||
1120 | * struct to pass two arguments to update_inode's callback | ||
1121 | */ | ||
1122 | struct updatei_args { | ||
1123 | struct exofs_sb_info *sbi; | ||
1124 | struct exofs_fcb fcb; | ||
1125 | }; | ||
1126 | |||
1127 | /* | ||
1128 | * Callback function from exofs_update_inode(). | ||
1129 | */ | ||
1130 | static void updatei_done(struct osd_request *or, void *p) | ||
1131 | { | ||
1132 | struct updatei_args *args = p; | ||
1133 | |||
1134 | osd_end_request(or); | ||
1135 | |||
1136 | atomic_dec(&args->sbi->s_curr_pending); | ||
1137 | |||
1138 | kfree(args); | ||
1139 | } | ||
1140 | |||
1141 | /* | ||
1142 | * Write the inode to the OSD. Just fill up the struct, and set the attribute | ||
1143 | * synchronously or asynchronously depending on the do_sync flag. | ||
1144 | */ | ||
1145 | static int exofs_update_inode(struct inode *inode, int do_sync) | ||
1146 | { | ||
1147 | struct exofs_i_info *oi = exofs_i(inode); | ||
1148 | struct super_block *sb = inode->i_sb; | ||
1149 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
1150 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | ||
1151 | struct osd_request *or; | ||
1152 | struct osd_attr attr; | ||
1153 | struct exofs_fcb *fcb; | ||
1154 | struct updatei_args *args; | ||
1155 | int ret; | ||
1156 | |||
1157 | args = kzalloc(sizeof(*args), GFP_KERNEL); | ||
1158 | if (!args) | ||
1159 | return -ENOMEM; | ||
1160 | |||
1161 | fcb = &args->fcb; | ||
1162 | |||
1163 | fcb->i_mode = cpu_to_le16(inode->i_mode); | ||
1164 | fcb->i_uid = cpu_to_le32(inode->i_uid); | ||
1165 | fcb->i_gid = cpu_to_le32(inode->i_gid); | ||
1166 | fcb->i_links_count = cpu_to_le16(inode->i_nlink); | ||
1167 | fcb->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); | ||
1168 | fcb->i_atime = cpu_to_le32(inode->i_atime.tv_sec); | ||
1169 | fcb->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); | ||
1170 | oi->i_commit_size = i_size_read(inode); | ||
1171 | fcb->i_size = cpu_to_le64(oi->i_commit_size); | ||
1172 | fcb->i_generation = cpu_to_le32(inode->i_generation); | ||
1173 | |||
1174 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { | ||
1175 | if (old_valid_dev(inode->i_rdev)) { | ||
1176 | fcb->i_data[0] = | ||
1177 | cpu_to_le32(old_encode_dev(inode->i_rdev)); | ||
1178 | fcb->i_data[1] = 0; | ||
1179 | } else { | ||
1180 | fcb->i_data[0] = 0; | ||
1181 | fcb->i_data[1] = | ||
1182 | cpu_to_le32(new_encode_dev(inode->i_rdev)); | ||
1183 | fcb->i_data[2] = 0; | ||
1184 | } | ||
1185 | } else | ||
1186 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); | ||
1187 | |||
1188 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
1189 | if (unlikely(!or)) { | ||
1190 | EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n"); | ||
1191 | ret = -ENOMEM; | ||
1192 | goto free_args; | ||
1193 | } | ||
1194 | |||
1195 | osd_req_set_attributes(or, &obj); | ||
1196 | |||
1197 | attr = g_attr_inode_data; | ||
1198 | attr.val_ptr = fcb; | ||
1199 | osd_req_add_set_attr_list(or, &attr, 1); | ||
1200 | |||
1201 | if (!obj_created(oi)) { | ||
1202 | EXOFS_DBGMSG("!obj_created\n"); | ||
1203 | BUG_ON(!obj_2bcreated(oi)); | ||
1204 | wait_event(oi->i_wq, obj_created(oi)); | ||
1205 | EXOFS_DBGMSG("wait_event done\n"); | ||
1206 | } | ||
1207 | |||
1208 | if (do_sync) { | ||
1209 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | ||
1210 | osd_end_request(or); | ||
1211 | goto free_args; | ||
1212 | } else { | ||
1213 | args->sbi = sbi; | ||
1214 | |||
1215 | ret = exofs_async_op(or, updatei_done, args, oi->i_cred); | ||
1216 | if (ret) { | ||
1217 | osd_end_request(or); | ||
1218 | goto free_args; | ||
1219 | } | ||
1220 | atomic_inc(&sbi->s_curr_pending); | ||
1221 | goto out; /* deallocation in updatei_done */ | ||
1222 | } | ||
1223 | |||
1224 | free_args: | ||
1225 | kfree(args); | ||
1226 | out: | ||
1227 | EXOFS_DBGMSG("ret=>%d\n", ret); | ||
1228 | return ret; | ||
1229 | } | ||
1230 | |||
1231 | int exofs_write_inode(struct inode *inode, int wait) | ||
1232 | { | ||
1233 | return exofs_update_inode(inode, wait); | ||
1234 | } | ||
1235 | |||
1236 | /* | ||
1237 | * Callback function from exofs_delete_inode() - don't have much cleaning up to | ||
1238 | * do. | ||
1239 | */ | ||
1240 | static void delete_done(struct osd_request *or, void *p) | ||
1241 | { | ||
1242 | struct exofs_sb_info *sbi; | ||
1243 | osd_end_request(or); | ||
1244 | sbi = p; | ||
1245 | atomic_dec(&sbi->s_curr_pending); | ||
1246 | } | ||
1247 | |||
1248 | /* | ||
1249 | * Called when the refcount of an inode reaches zero. We remove the object | ||
1250 | * from the OSD here. We make sure the object was created before we try and | ||
1251 | * delete it. | ||
1252 | */ | ||
1253 | void exofs_delete_inode(struct inode *inode) | ||
1254 | { | ||
1255 | struct exofs_i_info *oi = exofs_i(inode); | ||
1256 | struct super_block *sb = inode->i_sb; | ||
1257 | struct exofs_sb_info *sbi = sb->s_fs_info; | ||
1258 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | ||
1259 | struct osd_request *or; | ||
1260 | int ret; | ||
1261 | |||
1262 | truncate_inode_pages(&inode->i_data, 0); | ||
1263 | |||
1264 | if (is_bad_inode(inode)) | ||
1265 | goto no_delete; | ||
1266 | |||
1267 | mark_inode_dirty(inode); | ||
1268 | exofs_update_inode(inode, inode_needs_sync(inode)); | ||
1269 | |||
1270 | inode->i_size = 0; | ||
1271 | if (inode->i_blocks) | ||
1272 | exofs_truncate(inode); | ||
1273 | |||
1274 | clear_inode(inode); | ||
1275 | |||
1276 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
1277 | if (unlikely(!or)) { | ||
1278 | EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n"); | ||
1279 | return; | ||
1280 | } | ||
1281 | |||
1282 | osd_req_remove_object(or, &obj); | ||
1283 | |||
1284 | /* if we are deleting an obj that hasn't been created yet, wait */ | ||
1285 | if (!obj_created(oi)) { | ||
1286 | BUG_ON(!obj_2bcreated(oi)); | ||
1287 | wait_event(oi->i_wq, obj_created(oi)); | ||
1288 | } | ||
1289 | |||
1290 | ret = exofs_async_op(or, delete_done, sbi, oi->i_cred); | ||
1291 | if (ret) { | ||
1292 | EXOFS_ERR( | ||
1293 | "ERROR: @exofs_delete_inode exofs_async_op failed\n"); | ||
1294 | osd_end_request(or); | ||
1295 | return; | ||
1296 | } | ||
1297 | atomic_inc(&sbi->s_curr_pending); | ||
1298 | |||
1299 | return; | ||
1300 | |||
1301 | no_delete: | ||
1302 | clear_inode(inode); | ||
1303 | } | ||