diff options
Diffstat (limited to 'fs/jfs/jfs_metapage.c')
-rw-r--r-- | fs/jfs/jfs_metapage.c | 580 |
1 files changed, 580 insertions, 0 deletions
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c new file mode 100644 index 000000000000..4c0a3ac75c08 --- /dev/null +++ b/fs/jfs/jfs_metapage.c | |||
@@ -0,0 +1,580 @@ | |||
1 | /* | ||
2 | * Copyright (C) International Business Machines Corp., 2000-2003 | ||
3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
13 | * the GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | #include <linux/fs.h> | ||
21 | #include <linux/init.h> | ||
22 | #include <linux/buffer_head.h> | ||
23 | #include <linux/mempool.h> | ||
24 | #include <linux/delay.h> | ||
25 | #include "jfs_incore.h" | ||
26 | #include "jfs_superblock.h" | ||
27 | #include "jfs_filsys.h" | ||
28 | #include "jfs_metapage.h" | ||
29 | #include "jfs_txnmgr.h" | ||
30 | #include "jfs_debug.h" | ||
31 | |||
32 | static DEFINE_SPINLOCK(meta_lock); | ||
33 | |||
34 | #ifdef CONFIG_JFS_STATISTICS | ||
35 | static struct { | ||
36 | uint pagealloc; /* # of page allocations */ | ||
37 | uint pagefree; /* # of page frees */ | ||
38 | uint lockwait; /* # of sleeping lock_metapage() calls */ | ||
39 | } mpStat; | ||
40 | #endif | ||
41 | |||
42 | |||
43 | #define HASH_BITS 10 /* This makes hash_table 1 4K page */ | ||
44 | #define HASH_SIZE (1 << HASH_BITS) | ||
45 | static struct metapage **hash_table = NULL; | ||
46 | static unsigned long hash_order; | ||
47 | |||
48 | |||
49 | static inline int metapage_locked(struct metapage *mp) | ||
50 | { | ||
51 | return test_bit(META_locked, &mp->flag); | ||
52 | } | ||
53 | |||
54 | static inline int trylock_metapage(struct metapage *mp) | ||
55 | { | ||
56 | return test_and_set_bit(META_locked, &mp->flag); | ||
57 | } | ||
58 | |||
59 | static inline void unlock_metapage(struct metapage *mp) | ||
60 | { | ||
61 | clear_bit(META_locked, &mp->flag); | ||
62 | wake_up(&mp->wait); | ||
63 | } | ||
64 | |||
65 | static void __lock_metapage(struct metapage *mp) | ||
66 | { | ||
67 | DECLARE_WAITQUEUE(wait, current); | ||
68 | |||
69 | INCREMENT(mpStat.lockwait); | ||
70 | |||
71 | add_wait_queue_exclusive(&mp->wait, &wait); | ||
72 | do { | ||
73 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
74 | if (metapage_locked(mp)) { | ||
75 | spin_unlock(&meta_lock); | ||
76 | schedule(); | ||
77 | spin_lock(&meta_lock); | ||
78 | } | ||
79 | } while (trylock_metapage(mp)); | ||
80 | __set_current_state(TASK_RUNNING); | ||
81 | remove_wait_queue(&mp->wait, &wait); | ||
82 | } | ||
83 | |||
84 | /* needs meta_lock */ | ||
85 | static inline void lock_metapage(struct metapage *mp) | ||
86 | { | ||
87 | if (trylock_metapage(mp)) | ||
88 | __lock_metapage(mp); | ||
89 | } | ||
90 | |||
91 | #define METAPOOL_MIN_PAGES 32 | ||
92 | static kmem_cache_t *metapage_cache; | ||
93 | static mempool_t *metapage_mempool; | ||
94 | |||
95 | static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) | ||
96 | { | ||
97 | struct metapage *mp = (struct metapage *)foo; | ||
98 | |||
99 | if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == | ||
100 | SLAB_CTOR_CONSTRUCTOR) { | ||
101 | mp->lid = 0; | ||
102 | mp->lsn = 0; | ||
103 | mp->flag = 0; | ||
104 | mp->data = NULL; | ||
105 | mp->clsn = 0; | ||
106 | mp->log = NULL; | ||
107 | set_bit(META_free, &mp->flag); | ||
108 | init_waitqueue_head(&mp->wait); | ||
109 | } | ||
110 | } | ||
111 | |||
112 | static inline struct metapage *alloc_metapage(int gfp_mask) | ||
113 | { | ||
114 | return mempool_alloc(metapage_mempool, gfp_mask); | ||
115 | } | ||
116 | |||
117 | static inline void free_metapage(struct metapage *mp) | ||
118 | { | ||
119 | mp->flag = 0; | ||
120 | set_bit(META_free, &mp->flag); | ||
121 | |||
122 | mempool_free(mp, metapage_mempool); | ||
123 | } | ||
124 | |||
125 | int __init metapage_init(void) | ||
126 | { | ||
127 | /* | ||
128 | * Allocate the metapage structures | ||
129 | */ | ||
130 | metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage), | ||
131 | 0, 0, init_once, NULL); | ||
132 | if (metapage_cache == NULL) | ||
133 | return -ENOMEM; | ||
134 | |||
135 | metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mempool_alloc_slab, | ||
136 | mempool_free_slab, metapage_cache); | ||
137 | |||
138 | if (metapage_mempool == NULL) { | ||
139 | kmem_cache_destroy(metapage_cache); | ||
140 | return -ENOMEM; | ||
141 | } | ||
142 | /* | ||
143 | * Now the hash list | ||
144 | */ | ||
145 | for (hash_order = 0; | ||
146 | ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE; | ||
147 | hash_order++); | ||
148 | hash_table = | ||
149 | (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order); | ||
150 | assert(hash_table); | ||
151 | memset(hash_table, 0, PAGE_SIZE << hash_order); | ||
152 | |||
153 | return 0; | ||
154 | } | ||
155 | |||
156 | void metapage_exit(void) | ||
157 | { | ||
158 | mempool_destroy(metapage_mempool); | ||
159 | kmem_cache_destroy(metapage_cache); | ||
160 | } | ||
161 | |||
162 | /* | ||
163 | * Basically same hash as in pagemap.h, but using our hash table | ||
164 | */ | ||
165 | static struct metapage **meta_hash(struct address_space *mapping, | ||
166 | unsigned long index) | ||
167 | { | ||
168 | #define i (((unsigned long)mapping)/ \ | ||
169 | (sizeof(struct inode) & ~(sizeof(struct inode) -1 ))) | ||
170 | #define s(x) ((x) + ((x) >> HASH_BITS)) | ||
171 | return hash_table + (s(i + index) & (HASH_SIZE - 1)); | ||
172 | #undef i | ||
173 | #undef s | ||
174 | } | ||
175 | |||
176 | static struct metapage *search_hash(struct metapage ** hash_ptr, | ||
177 | struct address_space *mapping, | ||
178 | unsigned long index) | ||
179 | { | ||
180 | struct metapage *ptr; | ||
181 | |||
182 | for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) { | ||
183 | if ((ptr->mapping == mapping) && (ptr->index == index)) | ||
184 | return ptr; | ||
185 | } | ||
186 | |||
187 | return NULL; | ||
188 | } | ||
189 | |||
190 | static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr) | ||
191 | { | ||
192 | if (*hash_ptr) | ||
193 | (*hash_ptr)->hash_prev = mp; | ||
194 | |||
195 | mp->hash_prev = NULL; | ||
196 | mp->hash_next = *hash_ptr; | ||
197 | *hash_ptr = mp; | ||
198 | } | ||
199 | |||
200 | static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr) | ||
201 | { | ||
202 | if (mp->hash_prev) | ||
203 | mp->hash_prev->hash_next = mp->hash_next; | ||
204 | else { | ||
205 | assert(*hash_ptr == mp); | ||
206 | *hash_ptr = mp->hash_next; | ||
207 | } | ||
208 | |||
209 | if (mp->hash_next) | ||
210 | mp->hash_next->hash_prev = mp->hash_prev; | ||
211 | } | ||
212 | |||
213 | struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, | ||
214 | unsigned int size, int absolute, | ||
215 | unsigned long new) | ||
216 | { | ||
217 | struct metapage **hash_ptr; | ||
218 | int l2BlocksPerPage; | ||
219 | int l2bsize; | ||
220 | struct address_space *mapping; | ||
221 | struct metapage *mp; | ||
222 | unsigned long page_index; | ||
223 | unsigned long page_offset; | ||
224 | |||
225 | jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock); | ||
226 | |||
227 | if (absolute) | ||
228 | mapping = inode->i_sb->s_bdev->bd_inode->i_mapping; | ||
229 | else { | ||
230 | /* | ||
231 | * If an nfs client tries to read an inode that is larger | ||
232 | * than any existing inodes, we may try to read past the | ||
233 | * end of the inode map | ||
234 | */ | ||
235 | if ((lblock << inode->i_blkbits) >= inode->i_size) | ||
236 | return NULL; | ||
237 | mapping = inode->i_mapping; | ||
238 | } | ||
239 | |||
240 | hash_ptr = meta_hash(mapping, lblock); | ||
241 | again: | ||
242 | spin_lock(&meta_lock); | ||
243 | mp = search_hash(hash_ptr, mapping, lblock); | ||
244 | if (mp) { | ||
245 | page_found: | ||
246 | if (test_bit(META_stale, &mp->flag)) { | ||
247 | spin_unlock(&meta_lock); | ||
248 | msleep(1); | ||
249 | goto again; | ||
250 | } | ||
251 | mp->count++; | ||
252 | lock_metapage(mp); | ||
253 | spin_unlock(&meta_lock); | ||
254 | if (test_bit(META_discard, &mp->flag)) { | ||
255 | if (!new) { | ||
256 | jfs_error(inode->i_sb, | ||
257 | "__get_metapage: using a " | ||
258 | "discarded metapage"); | ||
259 | release_metapage(mp); | ||
260 | return NULL; | ||
261 | } | ||
262 | clear_bit(META_discard, &mp->flag); | ||
263 | } | ||
264 | jfs_info("__get_metapage: found 0x%p, in hash", mp); | ||
265 | if (mp->logical_size != size) { | ||
266 | jfs_error(inode->i_sb, | ||
267 | "__get_metapage: mp->logical_size != size"); | ||
268 | release_metapage(mp); | ||
269 | return NULL; | ||
270 | } | ||
271 | } else { | ||
272 | l2bsize = inode->i_blkbits; | ||
273 | l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; | ||
274 | page_index = lblock >> l2BlocksPerPage; | ||
275 | page_offset = (lblock - (page_index << l2BlocksPerPage)) << | ||
276 | l2bsize; | ||
277 | if ((page_offset + size) > PAGE_CACHE_SIZE) { | ||
278 | spin_unlock(&meta_lock); | ||
279 | jfs_err("MetaData crosses page boundary!!"); | ||
280 | return NULL; | ||
281 | } | ||
282 | |||
283 | /* | ||
284 | * Locks held on aggregate inode pages are usually | ||
285 | * not held long, and they are taken in critical code | ||
286 | * paths (committing dirty inodes, txCommit thread) | ||
287 | * | ||
288 | * Attempt to get metapage without blocking, tapping into | ||
289 | * reserves if necessary. | ||
290 | */ | ||
291 | mp = NULL; | ||
292 | if (JFS_IP(inode)->fileset == AGGREGATE_I) { | ||
293 | mp = alloc_metapage(GFP_ATOMIC); | ||
294 | if (!mp) { | ||
295 | /* | ||
296 | * mempool is supposed to protect us from | ||
297 | * failing here. We will try a blocking | ||
298 | * call, but a deadlock is possible here | ||
299 | */ | ||
300 | printk(KERN_WARNING | ||
301 | "__get_metapage: atomic call to mempool_alloc failed.\n"); | ||
302 | printk(KERN_WARNING | ||
303 | "Will attempt blocking call\n"); | ||
304 | } | ||
305 | } | ||
306 | if (!mp) { | ||
307 | struct metapage *mp2; | ||
308 | |||
309 | spin_unlock(&meta_lock); | ||
310 | mp = alloc_metapage(GFP_NOFS); | ||
311 | spin_lock(&meta_lock); | ||
312 | |||
313 | /* we dropped the meta_lock, we need to search the | ||
314 | * hash again. | ||
315 | */ | ||
316 | mp2 = search_hash(hash_ptr, mapping, lblock); | ||
317 | if (mp2) { | ||
318 | free_metapage(mp); | ||
319 | mp = mp2; | ||
320 | goto page_found; | ||
321 | } | ||
322 | } | ||
323 | mp->flag = 0; | ||
324 | lock_metapage(mp); | ||
325 | if (absolute) | ||
326 | set_bit(META_absolute, &mp->flag); | ||
327 | mp->xflag = COMMIT_PAGE; | ||
328 | mp->count = 1; | ||
329 | atomic_set(&mp->nohomeok,0); | ||
330 | mp->mapping = mapping; | ||
331 | mp->index = lblock; | ||
332 | mp->page = NULL; | ||
333 | mp->logical_size = size; | ||
334 | add_to_hash(mp, hash_ptr); | ||
335 | spin_unlock(&meta_lock); | ||
336 | |||
337 | if (new) { | ||
338 | jfs_info("__get_metapage: Calling grab_cache_page"); | ||
339 | mp->page = grab_cache_page(mapping, page_index); | ||
340 | if (!mp->page) { | ||
341 | jfs_err("grab_cache_page failed!"); | ||
342 | goto freeit; | ||
343 | } else { | ||
344 | INCREMENT(mpStat.pagealloc); | ||
345 | unlock_page(mp->page); | ||
346 | } | ||
347 | } else { | ||
348 | jfs_info("__get_metapage: Calling read_cache_page"); | ||
349 | mp->page = read_cache_page(mapping, lblock, | ||
350 | (filler_t *)mapping->a_ops->readpage, NULL); | ||
351 | if (IS_ERR(mp->page)) { | ||
352 | jfs_err("read_cache_page failed!"); | ||
353 | goto freeit; | ||
354 | } else | ||
355 | INCREMENT(mpStat.pagealloc); | ||
356 | } | ||
357 | mp->data = kmap(mp->page) + page_offset; | ||
358 | } | ||
359 | |||
360 | if (new) | ||
361 | memset(mp->data, 0, PSIZE); | ||
362 | |||
363 | jfs_info("__get_metapage: returning = 0x%p", mp); | ||
364 | return mp; | ||
365 | |||
366 | freeit: | ||
367 | spin_lock(&meta_lock); | ||
368 | remove_from_hash(mp, hash_ptr); | ||
369 | free_metapage(mp); | ||
370 | spin_unlock(&meta_lock); | ||
371 | return NULL; | ||
372 | } | ||
373 | |||
374 | void hold_metapage(struct metapage * mp, int force) | ||
375 | { | ||
376 | spin_lock(&meta_lock); | ||
377 | |||
378 | mp->count++; | ||
379 | |||
380 | if (force) { | ||
381 | ASSERT (!(test_bit(META_forced, &mp->flag))); | ||
382 | if (trylock_metapage(mp)) | ||
383 | set_bit(META_forced, &mp->flag); | ||
384 | } else | ||
385 | lock_metapage(mp); | ||
386 | |||
387 | spin_unlock(&meta_lock); | ||
388 | } | ||
389 | |||
390 | static void __write_metapage(struct metapage * mp) | ||
391 | { | ||
392 | int l2bsize = mp->mapping->host->i_blkbits; | ||
393 | int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; | ||
394 | unsigned long page_index; | ||
395 | unsigned long page_offset; | ||
396 | int rc; | ||
397 | |||
398 | jfs_info("__write_metapage: mp = 0x%p", mp); | ||
399 | |||
400 | page_index = mp->page->index; | ||
401 | page_offset = | ||
402 | (mp->index - (page_index << l2BlocksPerPage)) << l2bsize; | ||
403 | |||
404 | lock_page(mp->page); | ||
405 | rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset, | ||
406 | page_offset + | ||
407 | mp->logical_size); | ||
408 | if (rc) { | ||
409 | jfs_err("prepare_write return %d!", rc); | ||
410 | ClearPageUptodate(mp->page); | ||
411 | unlock_page(mp->page); | ||
412 | clear_bit(META_dirty, &mp->flag); | ||
413 | return; | ||
414 | } | ||
415 | rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset, | ||
416 | page_offset + | ||
417 | mp->logical_size); | ||
418 | if (rc) { | ||
419 | jfs_err("commit_write returned %d", rc); | ||
420 | } | ||
421 | |||
422 | unlock_page(mp->page); | ||
423 | clear_bit(META_dirty, &mp->flag); | ||
424 | |||
425 | jfs_info("__write_metapage done"); | ||
426 | } | ||
427 | |||
428 | static inline void sync_metapage(struct metapage *mp) | ||
429 | { | ||
430 | struct page *page = mp->page; | ||
431 | |||
432 | page_cache_get(page); | ||
433 | lock_page(page); | ||
434 | |||
435 | /* we're done with this page - no need to check for errors */ | ||
436 | if (page_has_buffers(page)) | ||
437 | write_one_page(page, 1); | ||
438 | else | ||
439 | unlock_page(page); | ||
440 | page_cache_release(page); | ||
441 | } | ||
442 | |||
443 | void release_metapage(struct metapage * mp) | ||
444 | { | ||
445 | struct jfs_log *log; | ||
446 | |||
447 | jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag); | ||
448 | |||
449 | spin_lock(&meta_lock); | ||
450 | if (test_bit(META_forced, &mp->flag)) { | ||
451 | clear_bit(META_forced, &mp->flag); | ||
452 | mp->count--; | ||
453 | spin_unlock(&meta_lock); | ||
454 | return; | ||
455 | } | ||
456 | |||
457 | assert(mp->count); | ||
458 | if (--mp->count || atomic_read(&mp->nohomeok)) { | ||
459 | unlock_metapage(mp); | ||
460 | spin_unlock(&meta_lock); | ||
461 | return; | ||
462 | } | ||
463 | |||
464 | if (mp->page) { | ||
465 | set_bit(META_stale, &mp->flag); | ||
466 | spin_unlock(&meta_lock); | ||
467 | kunmap(mp->page); | ||
468 | mp->data = NULL; | ||
469 | if (test_bit(META_dirty, &mp->flag)) | ||
470 | __write_metapage(mp); | ||
471 | if (test_bit(META_sync, &mp->flag)) { | ||
472 | sync_metapage(mp); | ||
473 | clear_bit(META_sync, &mp->flag); | ||
474 | } | ||
475 | |||
476 | if (test_bit(META_discard, &mp->flag)) { | ||
477 | lock_page(mp->page); | ||
478 | block_invalidatepage(mp->page, 0); | ||
479 | unlock_page(mp->page); | ||
480 | } | ||
481 | |||
482 | page_cache_release(mp->page); | ||
483 | mp->page = NULL; | ||
484 | INCREMENT(mpStat.pagefree); | ||
485 | spin_lock(&meta_lock); | ||
486 | } | ||
487 | |||
488 | if (mp->lsn) { | ||
489 | /* | ||
490 | * Remove metapage from logsynclist. | ||
491 | */ | ||
492 | log = mp->log; | ||
493 | LOGSYNC_LOCK(log); | ||
494 | mp->log = NULL; | ||
495 | mp->lsn = 0; | ||
496 | mp->clsn = 0; | ||
497 | log->count--; | ||
498 | list_del(&mp->synclist); | ||
499 | LOGSYNC_UNLOCK(log); | ||
500 | } | ||
501 | remove_from_hash(mp, meta_hash(mp->mapping, mp->index)); | ||
502 | spin_unlock(&meta_lock); | ||
503 | |||
504 | free_metapage(mp); | ||
505 | } | ||
506 | |||
507 | void __invalidate_metapages(struct inode *ip, s64 addr, int len) | ||
508 | { | ||
509 | struct metapage **hash_ptr; | ||
510 | unsigned long lblock; | ||
511 | int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits; | ||
512 | /* All callers are interested in block device's mapping */ | ||
513 | struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping; | ||
514 | struct metapage *mp; | ||
515 | struct page *page; | ||
516 | |||
517 | /* | ||
518 | * First, mark metapages to discard. They will eventually be | ||
519 | * released, but should not be written. | ||
520 | */ | ||
521 | for (lblock = addr; lblock < addr + len; | ||
522 | lblock += 1 << l2BlocksPerPage) { | ||
523 | hash_ptr = meta_hash(mapping, lblock); | ||
524 | again: | ||
525 | spin_lock(&meta_lock); | ||
526 | mp = search_hash(hash_ptr, mapping, lblock); | ||
527 | if (mp) { | ||
528 | if (test_bit(META_stale, &mp->flag)) { | ||
529 | spin_unlock(&meta_lock); | ||
530 | msleep(1); | ||
531 | goto again; | ||
532 | } | ||
533 | |||
534 | clear_bit(META_dirty, &mp->flag); | ||
535 | set_bit(META_discard, &mp->flag); | ||
536 | spin_unlock(&meta_lock); | ||
537 | } else { | ||
538 | spin_unlock(&meta_lock); | ||
539 | page = find_lock_page(mapping, lblock>>l2BlocksPerPage); | ||
540 | if (page) { | ||
541 | block_invalidatepage(page, 0); | ||
542 | unlock_page(page); | ||
543 | page_cache_release(page); | ||
544 | } | ||
545 | } | ||
546 | } | ||
547 | } | ||
548 | |||
549 | #ifdef CONFIG_JFS_STATISTICS | ||
550 | int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length, | ||
551 | int *eof, void *data) | ||
552 | { | ||
553 | int len = 0; | ||
554 | off_t begin; | ||
555 | |||
556 | len += sprintf(buffer, | ||
557 | "JFS Metapage statistics\n" | ||
558 | "=======================\n" | ||
559 | "page allocations = %d\n" | ||
560 | "page frees = %d\n" | ||
561 | "lock waits = %d\n", | ||
562 | mpStat.pagealloc, | ||
563 | mpStat.pagefree, | ||
564 | mpStat.lockwait); | ||
565 | |||
566 | begin = offset; | ||
567 | *start = buffer + begin; | ||
568 | len -= begin; | ||
569 | |||
570 | if (len > length) | ||
571 | len = length; | ||
572 | else | ||
573 | *eof = 1; | ||
574 | |||
575 | if (len < 0) | ||
576 | len = 0; | ||
577 | |||
578 | return len; | ||
579 | } | ||
580 | #endif | ||