diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/jfs/jfs_txnmgr.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/jfs/jfs_txnmgr.c')
-rw-r--r-- | fs/jfs/jfs_txnmgr.c | 3131 |
1 files changed, 3131 insertions, 0 deletions
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c new file mode 100644 index 000000000000..f40301d93f74 --- /dev/null +++ b/fs/jfs/jfs_txnmgr.c | |||
@@ -0,0 +1,3131 @@ | |||
1 | /* | ||
2 | * Copyright (C) International Business Machines Corp., 2000-2005 | ||
3 | * Portions Copyright (C) Christoph Hellwig, 2001-2002 | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License as published by | ||
7 | * the Free Software Foundation; either version 2 of the License, or | ||
8 | * (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
13 | * the GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | */ | ||
19 | |||
20 | /* | ||
21 | * jfs_txnmgr.c: transaction manager | ||
22 | * | ||
23 | * notes: | ||
24 | * transaction starts with txBegin() and ends with txCommit() | ||
25 | * or txAbort(). | ||
26 | * | ||
27 | * tlock is acquired at the time of update; | ||
28 | * (obviate scan at commit time for xtree and dtree) | ||
29 | * tlock and mp points to each other; | ||
30 | * (no hashlist for mp -> tlock). | ||
31 | * | ||
32 | * special cases: | ||
33 | * tlock on in-memory inode: | ||
34 | * in-place tlock in the in-memory inode itself; | ||
35 | * converted to page lock by iWrite() at commit time. | ||
36 | * | ||
37 | * tlock during write()/mmap() under anonymous transaction (tid = 0): | ||
38 | * transferred (?) to transaction at commit time. | ||
39 | * | ||
40 | * use the page itself to update allocation maps | ||
41 | * (obviate intermediate replication of allocation/deallocation data) | ||
42 | * hold on to mp+lock thru update of maps | ||
43 | */ | ||
44 | |||
45 | |||
46 | #include <linux/fs.h> | ||
47 | #include <linux/vmalloc.h> | ||
48 | #include <linux/smp_lock.h> | ||
49 | #include <linux/completion.h> | ||
50 | #include <linux/suspend.h> | ||
51 | #include <linux/module.h> | ||
52 | #include <linux/moduleparam.h> | ||
53 | #include "jfs_incore.h" | ||
54 | #include "jfs_filsys.h" | ||
55 | #include "jfs_metapage.h" | ||
56 | #include "jfs_dinode.h" | ||
57 | #include "jfs_imap.h" | ||
58 | #include "jfs_dmap.h" | ||
59 | #include "jfs_superblock.h" | ||
60 | #include "jfs_debug.h" | ||
61 | |||
62 | /* | ||
63 | * transaction management structures | ||
64 | */ | ||
65 | static struct { | ||
66 | int freetid; /* index of a free tid structure */ | ||
67 | int freelock; /* index first free lock word */ | ||
68 | wait_queue_head_t freewait; /* eventlist of free tblock */ | ||
69 | wait_queue_head_t freelockwait; /* eventlist of free tlock */ | ||
70 | wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ | ||
71 | int tlocksInUse; /* Number of tlocks in use */ | ||
72 | spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ | ||
73 | /* struct tblock *sync_queue; * Transactions waiting for data sync */ | ||
74 | struct list_head unlock_queue; /* Txns waiting to be released */ | ||
75 | struct list_head anon_list; /* inodes having anonymous txns */ | ||
76 | struct list_head anon_list2; /* inodes having anonymous txns | ||
77 | that couldn't be sync'ed */ | ||
78 | } TxAnchor; | ||
79 | |||
80 | int jfs_tlocks_low; /* Indicates low number of available tlocks */ | ||
81 | |||
82 | #ifdef CONFIG_JFS_STATISTICS | ||
83 | static struct { | ||
84 | uint txBegin; | ||
85 | uint txBegin_barrier; | ||
86 | uint txBegin_lockslow; | ||
87 | uint txBegin_freetid; | ||
88 | uint txBeginAnon; | ||
89 | uint txBeginAnon_barrier; | ||
90 | uint txBeginAnon_lockslow; | ||
91 | uint txLockAlloc; | ||
92 | uint txLockAlloc_freelock; | ||
93 | } TxStat; | ||
94 | #endif | ||
95 | |||
96 | static int nTxBlock = -1; /* number of transaction blocks */ | ||
97 | module_param(nTxBlock, int, 0); | ||
98 | MODULE_PARM_DESC(nTxBlock, | ||
99 | "Number of transaction blocks (max:65536)"); | ||
100 | |||
101 | static int nTxLock = -1; /* number of transaction locks */ | ||
102 | module_param(nTxLock, int, 0); | ||
103 | MODULE_PARM_DESC(nTxLock, | ||
104 | "Number of transaction locks (max:65536)"); | ||
105 | |||
106 | struct tblock *TxBlock; /* transaction block table */ | ||
107 | static int TxLockLWM; /* Low water mark for number of txLocks used */ | ||
108 | static int TxLockHWM; /* High water mark for number of txLocks used */ | ||
109 | static int TxLockVHWM; /* Very High water mark */ | ||
110 | struct tlock *TxLock; /* transaction lock table */ | ||
111 | |||
112 | |||
113 | /* | ||
114 | * transaction management lock | ||
115 | */ | ||
116 | static DEFINE_SPINLOCK(jfsTxnLock); | ||
117 | |||
118 | #define TXN_LOCK() spin_lock(&jfsTxnLock) | ||
119 | #define TXN_UNLOCK() spin_unlock(&jfsTxnLock) | ||
120 | |||
121 | #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); | ||
122 | #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) | ||
123 | #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) | ||
124 | |||
125 | DECLARE_WAIT_QUEUE_HEAD(jfs_sync_thread_wait); | ||
126 | DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); | ||
127 | static int jfs_commit_thread_waking; | ||
128 | |||
129 | /* | ||
130 | * Retry logic exist outside these macros to protect from spurrious wakeups. | ||
131 | */ | ||
132 | static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) | ||
133 | { | ||
134 | DECLARE_WAITQUEUE(wait, current); | ||
135 | |||
136 | add_wait_queue(event, &wait); | ||
137 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
138 | TXN_UNLOCK(); | ||
139 | schedule(); | ||
140 | current->state = TASK_RUNNING; | ||
141 | remove_wait_queue(event, &wait); | ||
142 | } | ||
143 | |||
144 | #define TXN_SLEEP(event)\ | ||
145 | {\ | ||
146 | TXN_SLEEP_DROP_LOCK(event);\ | ||
147 | TXN_LOCK();\ | ||
148 | } | ||
149 | |||
150 | #define TXN_WAKEUP(event) wake_up_all(event) | ||
151 | |||
152 | |||
153 | /* | ||
154 | * statistics | ||
155 | */ | ||
156 | static struct { | ||
157 | tid_t maxtid; /* 4: biggest tid ever used */ | ||
158 | lid_t maxlid; /* 4: biggest lid ever used */ | ||
159 | int ntid; /* 4: # of transactions performed */ | ||
160 | int nlid; /* 4: # of tlocks acquired */ | ||
161 | int waitlock; /* 4: # of tlock wait */ | ||
162 | } stattx; | ||
163 | |||
164 | |||
165 | /* | ||
166 | * external references | ||
167 | */ | ||
168 | extern int lmGroupCommit(struct jfs_log *, struct tblock *); | ||
169 | extern int jfs_commit_inode(struct inode *, int); | ||
170 | extern int jfs_stop_threads; | ||
171 | |||
172 | extern struct completion jfsIOwait; | ||
173 | |||
174 | /* | ||
175 | * forward references | ||
176 | */ | ||
177 | static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | ||
178 | struct tlock * tlck, struct commit * cd); | ||
179 | static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | ||
180 | struct tlock * tlck); | ||
181 | static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | ||
182 | struct tlock * tlck); | ||
183 | static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | ||
184 | struct tlock * tlck); | ||
185 | static void txAllocPMap(struct inode *ip, struct maplock * maplock, | ||
186 | struct tblock * tblk); | ||
187 | static void txForce(struct tblock * tblk); | ||
188 | static int txLog(struct jfs_log * log, struct tblock * tblk, | ||
189 | struct commit * cd); | ||
190 | static void txUpdateMap(struct tblock * tblk); | ||
191 | static void txRelease(struct tblock * tblk); | ||
192 | static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | ||
193 | struct tlock * tlck); | ||
194 | static void LogSyncRelease(struct metapage * mp); | ||
195 | |||
196 | /* | ||
197 | * transaction block/lock management | ||
198 | * --------------------------------- | ||
199 | */ | ||
200 | |||
201 | /* | ||
202 | * Get a transaction lock from the free list. If the number in use is | ||
203 | * greater than the high water mark, wake up the sync daemon. This should | ||
204 | * free some anonymous transaction locks. (TXN_LOCK must be held.) | ||
205 | */ | ||
206 | static lid_t txLockAlloc(void) | ||
207 | { | ||
208 | lid_t lid; | ||
209 | |||
210 | INCREMENT(TxStat.txLockAlloc); | ||
211 | if (!TxAnchor.freelock) { | ||
212 | INCREMENT(TxStat.txLockAlloc_freelock); | ||
213 | } | ||
214 | |||
215 | while (!(lid = TxAnchor.freelock)) | ||
216 | TXN_SLEEP(&TxAnchor.freelockwait); | ||
217 | TxAnchor.freelock = TxLock[lid].next; | ||
218 | HIGHWATERMARK(stattx.maxlid, lid); | ||
219 | if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { | ||
220 | jfs_info("txLockAlloc tlocks low"); | ||
221 | jfs_tlocks_low = 1; | ||
222 | wake_up(&jfs_sync_thread_wait); | ||
223 | } | ||
224 | |||
225 | return lid; | ||
226 | } | ||
227 | |||
228 | static void txLockFree(lid_t lid) | ||
229 | { | ||
230 | TxLock[lid].next = TxAnchor.freelock; | ||
231 | TxAnchor.freelock = lid; | ||
232 | TxAnchor.tlocksInUse--; | ||
233 | if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) { | ||
234 | jfs_info("txLockFree jfs_tlocks_low no more"); | ||
235 | jfs_tlocks_low = 0; | ||
236 | TXN_WAKEUP(&TxAnchor.lowlockwait); | ||
237 | } | ||
238 | TXN_WAKEUP(&TxAnchor.freelockwait); | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * NAME: txInit() | ||
243 | * | ||
244 | * FUNCTION: initialize transaction management structures | ||
245 | * | ||
246 | * RETURN: | ||
247 | * | ||
248 | * serialization: single thread at jfs_init() | ||
249 | */ | ||
250 | int txInit(void) | ||
251 | { | ||
252 | int k, size; | ||
253 | struct sysinfo si; | ||
254 | |||
255 | /* Set defaults for nTxLock and nTxBlock if unset */ | ||
256 | |||
257 | if (nTxLock == -1) { | ||
258 | if (nTxBlock == -1) { | ||
259 | /* Base default on memory size */ | ||
260 | si_meminfo(&si); | ||
261 | if (si.totalram > (256 * 1024)) /* 1 GB */ | ||
262 | nTxLock = 64 * 1024; | ||
263 | else | ||
264 | nTxLock = si.totalram >> 2; | ||
265 | } else if (nTxBlock > (8 * 1024)) | ||
266 | nTxLock = 64 * 1024; | ||
267 | else | ||
268 | nTxLock = nTxBlock << 3; | ||
269 | } | ||
270 | if (nTxBlock == -1) | ||
271 | nTxBlock = nTxLock >> 3; | ||
272 | |||
273 | /* Verify tunable parameters */ | ||
274 | if (nTxBlock < 16) | ||
275 | nTxBlock = 16; /* No one should set it this low */ | ||
276 | if (nTxBlock > 65536) | ||
277 | nTxBlock = 65536; | ||
278 | if (nTxLock < 256) | ||
279 | nTxLock = 256; /* No one should set it this low */ | ||
280 | if (nTxLock > 65536) | ||
281 | nTxLock = 65536; | ||
282 | |||
283 | printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n", | ||
284 | nTxBlock, nTxLock); | ||
285 | /* | ||
286 | * initialize transaction block (tblock) table | ||
287 | * | ||
288 | * transaction id (tid) = tblock index | ||
289 | * tid = 0 is reserved. | ||
290 | */ | ||
291 | TxLockLWM = (nTxLock * 4) / 10; | ||
292 | TxLockHWM = (nTxLock * 7) / 10; | ||
293 | TxLockVHWM = (nTxLock * 8) / 10; | ||
294 | |||
295 | size = sizeof(struct tblock) * nTxBlock; | ||
296 | TxBlock = (struct tblock *) vmalloc(size); | ||
297 | if (TxBlock == NULL) | ||
298 | return -ENOMEM; | ||
299 | |||
300 | for (k = 1; k < nTxBlock - 1; k++) { | ||
301 | TxBlock[k].next = k + 1; | ||
302 | init_waitqueue_head(&TxBlock[k].gcwait); | ||
303 | init_waitqueue_head(&TxBlock[k].waitor); | ||
304 | } | ||
305 | TxBlock[k].next = 0; | ||
306 | init_waitqueue_head(&TxBlock[k].gcwait); | ||
307 | init_waitqueue_head(&TxBlock[k].waitor); | ||
308 | |||
309 | TxAnchor.freetid = 1; | ||
310 | init_waitqueue_head(&TxAnchor.freewait); | ||
311 | |||
312 | stattx.maxtid = 1; /* statistics */ | ||
313 | |||
314 | /* | ||
315 | * initialize transaction lock (tlock) table | ||
316 | * | ||
317 | * transaction lock id = tlock index | ||
318 | * tlock id = 0 is reserved. | ||
319 | */ | ||
320 | size = sizeof(struct tlock) * nTxLock; | ||
321 | TxLock = (struct tlock *) vmalloc(size); | ||
322 | if (TxLock == NULL) { | ||
323 | vfree(TxBlock); | ||
324 | return -ENOMEM; | ||
325 | } | ||
326 | |||
327 | /* initialize tlock table */ | ||
328 | for (k = 1; k < nTxLock - 1; k++) | ||
329 | TxLock[k].next = k + 1; | ||
330 | TxLock[k].next = 0; | ||
331 | init_waitqueue_head(&TxAnchor.freelockwait); | ||
332 | init_waitqueue_head(&TxAnchor.lowlockwait); | ||
333 | |||
334 | TxAnchor.freelock = 1; | ||
335 | TxAnchor.tlocksInUse = 0; | ||
336 | INIT_LIST_HEAD(&TxAnchor.anon_list); | ||
337 | INIT_LIST_HEAD(&TxAnchor.anon_list2); | ||
338 | |||
339 | LAZY_LOCK_INIT(); | ||
340 | INIT_LIST_HEAD(&TxAnchor.unlock_queue); | ||
341 | |||
342 | stattx.maxlid = 1; /* statistics */ | ||
343 | |||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * NAME: txExit() | ||
349 | * | ||
350 | * FUNCTION: clean up when module is unloaded | ||
351 | */ | ||
352 | void txExit(void) | ||
353 | { | ||
354 | vfree(TxLock); | ||
355 | TxLock = NULL; | ||
356 | vfree(TxBlock); | ||
357 | TxBlock = NULL; | ||
358 | } | ||
359 | |||
360 | |||
361 | /* | ||
362 | * NAME: txBegin() | ||
363 | * | ||
364 | * FUNCTION: start a transaction. | ||
365 | * | ||
366 | * PARAMETER: sb - superblock | ||
367 | * flag - force for nested tx; | ||
368 | * | ||
369 | * RETURN: tid - transaction id | ||
370 | * | ||
371 | * note: flag force allows to start tx for nested tx | ||
372 | * to prevent deadlock on logsync barrier; | ||
373 | */ | ||
374 | tid_t txBegin(struct super_block *sb, int flag) | ||
375 | { | ||
376 | tid_t t; | ||
377 | struct tblock *tblk; | ||
378 | struct jfs_log *log; | ||
379 | |||
380 | jfs_info("txBegin: flag = 0x%x", flag); | ||
381 | log = JFS_SBI(sb)->log; | ||
382 | |||
383 | TXN_LOCK(); | ||
384 | |||
385 | INCREMENT(TxStat.txBegin); | ||
386 | |||
387 | retry: | ||
388 | if (!(flag & COMMIT_FORCE)) { | ||
389 | /* | ||
390 | * synchronize with logsync barrier | ||
391 | */ | ||
392 | if (test_bit(log_SYNCBARRIER, &log->flag) || | ||
393 | test_bit(log_QUIESCE, &log->flag)) { | ||
394 | INCREMENT(TxStat.txBegin_barrier); | ||
395 | TXN_SLEEP(&log->syncwait); | ||
396 | goto retry; | ||
397 | } | ||
398 | } | ||
399 | if (flag == 0) { | ||
400 | /* | ||
401 | * Don't begin transaction if we're getting starved for tlocks | ||
402 | * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately | ||
403 | * free tlocks) | ||
404 | */ | ||
405 | if (TxAnchor.tlocksInUse > TxLockVHWM) { | ||
406 | INCREMENT(TxStat.txBegin_lockslow); | ||
407 | TXN_SLEEP(&TxAnchor.lowlockwait); | ||
408 | goto retry; | ||
409 | } | ||
410 | } | ||
411 | |||
412 | /* | ||
413 | * allocate transaction id/block | ||
414 | */ | ||
415 | if ((t = TxAnchor.freetid) == 0) { | ||
416 | jfs_info("txBegin: waiting for free tid"); | ||
417 | INCREMENT(TxStat.txBegin_freetid); | ||
418 | TXN_SLEEP(&TxAnchor.freewait); | ||
419 | goto retry; | ||
420 | } | ||
421 | |||
422 | tblk = tid_to_tblock(t); | ||
423 | |||
424 | if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { | ||
425 | /* Don't let a non-forced transaction take the last tblk */ | ||
426 | jfs_info("txBegin: waiting for free tid"); | ||
427 | INCREMENT(TxStat.txBegin_freetid); | ||
428 | TXN_SLEEP(&TxAnchor.freewait); | ||
429 | goto retry; | ||
430 | } | ||
431 | |||
432 | TxAnchor.freetid = tblk->next; | ||
433 | |||
434 | /* | ||
435 | * initialize transaction | ||
436 | */ | ||
437 | |||
438 | /* | ||
439 | * We can't zero the whole thing or we screw up another thread being | ||
440 | * awakened after sleeping on tblk->waitor | ||
441 | * | ||
442 | * memset(tblk, 0, sizeof(struct tblock)); | ||
443 | */ | ||
444 | tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0; | ||
445 | |||
446 | tblk->sb = sb; | ||
447 | ++log->logtid; | ||
448 | tblk->logtid = log->logtid; | ||
449 | |||
450 | ++log->active; | ||
451 | |||
452 | HIGHWATERMARK(stattx.maxtid, t); /* statistics */ | ||
453 | INCREMENT(stattx.ntid); /* statistics */ | ||
454 | |||
455 | TXN_UNLOCK(); | ||
456 | |||
457 | jfs_info("txBegin: returning tid = %d", t); | ||
458 | |||
459 | return t; | ||
460 | } | ||
461 | |||
462 | |||
463 | /* | ||
464 | * NAME: txBeginAnon() | ||
465 | * | ||
466 | * FUNCTION: start an anonymous transaction. | ||
467 | * Blocks if logsync or available tlocks are low to prevent | ||
468 | * anonymous tlocks from depleting supply. | ||
469 | * | ||
470 | * PARAMETER: sb - superblock | ||
471 | * | ||
472 | * RETURN: none | ||
473 | */ | ||
474 | void txBeginAnon(struct super_block *sb) | ||
475 | { | ||
476 | struct jfs_log *log; | ||
477 | |||
478 | log = JFS_SBI(sb)->log; | ||
479 | |||
480 | TXN_LOCK(); | ||
481 | INCREMENT(TxStat.txBeginAnon); | ||
482 | |||
483 | retry: | ||
484 | /* | ||
485 | * synchronize with logsync barrier | ||
486 | */ | ||
487 | if (test_bit(log_SYNCBARRIER, &log->flag) || | ||
488 | test_bit(log_QUIESCE, &log->flag)) { | ||
489 | INCREMENT(TxStat.txBeginAnon_barrier); | ||
490 | TXN_SLEEP(&log->syncwait); | ||
491 | goto retry; | ||
492 | } | ||
493 | |||
494 | /* | ||
495 | * Don't begin transaction if we're getting starved for tlocks | ||
496 | */ | ||
497 | if (TxAnchor.tlocksInUse > TxLockVHWM) { | ||
498 | INCREMENT(TxStat.txBeginAnon_lockslow); | ||
499 | TXN_SLEEP(&TxAnchor.lowlockwait); | ||
500 | goto retry; | ||
501 | } | ||
502 | TXN_UNLOCK(); | ||
503 | } | ||
504 | |||
505 | |||
506 | /* | ||
507 | * txEnd() | ||
508 | * | ||
509 | * function: free specified transaction block. | ||
510 | * | ||
511 | * logsync barrier processing: | ||
512 | * | ||
513 | * serialization: | ||
514 | */ | ||
515 | void txEnd(tid_t tid) | ||
516 | { | ||
517 | struct tblock *tblk = tid_to_tblock(tid); | ||
518 | struct jfs_log *log; | ||
519 | |||
520 | jfs_info("txEnd: tid = %d", tid); | ||
521 | TXN_LOCK(); | ||
522 | |||
523 | /* | ||
524 | * wakeup transactions waiting on the page locked | ||
525 | * by the current transaction | ||
526 | */ | ||
527 | TXN_WAKEUP(&tblk->waitor); | ||
528 | |||
529 | log = JFS_SBI(tblk->sb)->log; | ||
530 | |||
531 | /* | ||
532 | * Lazy commit thread can't free this guy until we mark it UNLOCKED, | ||
533 | * otherwise, we would be left with a transaction that may have been | ||
534 | * reused. | ||
535 | * | ||
536 | * Lazy commit thread will turn off tblkGC_LAZY before calling this | ||
537 | * routine. | ||
538 | */ | ||
539 | if (tblk->flag & tblkGC_LAZY) { | ||
540 | jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk); | ||
541 | TXN_UNLOCK(); | ||
542 | |||
543 | spin_lock_irq(&log->gclock); // LOGGC_LOCK | ||
544 | tblk->flag |= tblkGC_UNLOCKED; | ||
545 | spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK | ||
546 | return; | ||
547 | } | ||
548 | |||
549 | jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk); | ||
550 | |||
551 | assert(tblk->next == 0); | ||
552 | |||
553 | /* | ||
554 | * insert tblock back on freelist | ||
555 | */ | ||
556 | tblk->next = TxAnchor.freetid; | ||
557 | TxAnchor.freetid = tid; | ||
558 | |||
559 | /* | ||
560 | * mark the tblock not active | ||
561 | */ | ||
562 | if (--log->active == 0) { | ||
563 | clear_bit(log_FLUSH, &log->flag); | ||
564 | |||
565 | /* | ||
566 | * synchronize with logsync barrier | ||
567 | */ | ||
568 | if (test_bit(log_SYNCBARRIER, &log->flag)) { | ||
569 | /* forward log syncpt */ | ||
570 | /* lmSync(log); */ | ||
571 | |||
572 | jfs_info("log barrier off: 0x%x", log->lsn); | ||
573 | |||
574 | /* enable new transactions start */ | ||
575 | clear_bit(log_SYNCBARRIER, &log->flag); | ||
576 | |||
577 | /* wakeup all waitors for logsync barrier */ | ||
578 | TXN_WAKEUP(&log->syncwait); | ||
579 | } | ||
580 | } | ||
581 | |||
582 | /* | ||
583 | * wakeup all waitors for a free tblock | ||
584 | */ | ||
585 | TXN_WAKEUP(&TxAnchor.freewait); | ||
586 | |||
587 | TXN_UNLOCK(); | ||
588 | } | ||
589 | |||
590 | |||
591 | /* | ||
592 | * txLock() | ||
593 | * | ||
594 | * function: acquire a transaction lock on the specified <mp> | ||
595 | * | ||
596 | * parameter: | ||
597 | * | ||
598 | * return: transaction lock id | ||
599 | * | ||
600 | * serialization: | ||
601 | */ | ||
602 | struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, | ||
603 | int type) | ||
604 | { | ||
605 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | ||
606 | int dir_xtree = 0; | ||
607 | lid_t lid; | ||
608 | tid_t xtid; | ||
609 | struct tlock *tlck; | ||
610 | struct xtlock *xtlck; | ||
611 | struct linelock *linelock; | ||
612 | xtpage_t *p; | ||
613 | struct tblock *tblk; | ||
614 | |||
615 | TXN_LOCK(); | ||
616 | |||
617 | if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && | ||
618 | !(mp->xflag & COMMIT_PAGE)) { | ||
619 | /* | ||
620 | * Directory inode is special. It can have both an xtree tlock | ||
621 | * and a dtree tlock associated with it. | ||
622 | */ | ||
623 | dir_xtree = 1; | ||
624 | lid = jfs_ip->xtlid; | ||
625 | } else | ||
626 | lid = mp->lid; | ||
627 | |||
628 | /* is page not locked by a transaction ? */ | ||
629 | if (lid == 0) | ||
630 | goto allocateLock; | ||
631 | |||
632 | jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid); | ||
633 | |||
634 | /* is page locked by the requester transaction ? */ | ||
635 | tlck = lid_to_tlock(lid); | ||
636 | if ((xtid = tlck->tid) == tid) | ||
637 | goto grantLock; | ||
638 | |||
639 | /* | ||
640 | * is page locked by anonymous transaction/lock ? | ||
641 | * | ||
642 | * (page update without transaction (i.e., file write) is | ||
643 | * locked under anonymous transaction tid = 0: | ||
644 | * anonymous tlocks maintained on anonymous tlock list of | ||
645 | * the inode of the page and available to all anonymous | ||
646 | * transactions until txCommit() time at which point | ||
647 | * they are transferred to the transaction tlock list of | ||
648 | * the commiting transaction of the inode) | ||
649 | */ | ||
650 | if (xtid == 0) { | ||
651 | tlck->tid = tid; | ||
652 | tblk = tid_to_tblock(tid); | ||
653 | /* | ||
654 | * The order of the tlocks in the transaction is important | ||
655 | * (during truncate, child xtree pages must be freed before | ||
656 | * parent's tlocks change the working map). | ||
657 | * Take tlock off anonymous list and add to tail of | ||
658 | * transaction list | ||
659 | * | ||
660 | * Note: We really need to get rid of the tid & lid and | ||
661 | * use list_head's. This code is getting UGLY! | ||
662 | */ | ||
663 | if (jfs_ip->atlhead == lid) { | ||
664 | if (jfs_ip->atltail == lid) { | ||
665 | /* only anonymous txn. | ||
666 | * Remove from anon_list | ||
667 | */ | ||
668 | list_del_init(&jfs_ip->anon_inode_list); | ||
669 | } | ||
670 | jfs_ip->atlhead = tlck->next; | ||
671 | } else { | ||
672 | lid_t last; | ||
673 | for (last = jfs_ip->atlhead; | ||
674 | lid_to_tlock(last)->next != lid; | ||
675 | last = lid_to_tlock(last)->next) { | ||
676 | assert(last); | ||
677 | } | ||
678 | lid_to_tlock(last)->next = tlck->next; | ||
679 | if (jfs_ip->atltail == lid) | ||
680 | jfs_ip->atltail = last; | ||
681 | } | ||
682 | |||
683 | /* insert the tlock at tail of transaction tlock list */ | ||
684 | |||
685 | if (tblk->next) | ||
686 | lid_to_tlock(tblk->last)->next = lid; | ||
687 | else | ||
688 | tblk->next = lid; | ||
689 | tlck->next = 0; | ||
690 | tblk->last = lid; | ||
691 | |||
692 | goto grantLock; | ||
693 | } | ||
694 | |||
695 | goto waitLock; | ||
696 | |||
697 | /* | ||
698 | * allocate a tlock | ||
699 | */ | ||
700 | allocateLock: | ||
701 | lid = txLockAlloc(); | ||
702 | tlck = lid_to_tlock(lid); | ||
703 | |||
704 | /* | ||
705 | * initialize tlock | ||
706 | */ | ||
707 | tlck->tid = tid; | ||
708 | |||
709 | /* mark tlock for meta-data page */ | ||
710 | if (mp->xflag & COMMIT_PAGE) { | ||
711 | |||
712 | tlck->flag = tlckPAGELOCK; | ||
713 | |||
714 | /* mark the page dirty and nohomeok */ | ||
715 | mark_metapage_dirty(mp); | ||
716 | atomic_inc(&mp->nohomeok); | ||
717 | |||
718 | jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", | ||
719 | mp, atomic_read(&mp->nohomeok), tid, tlck); | ||
720 | |||
721 | /* if anonymous transaction, and buffer is on the group | ||
722 | * commit synclist, mark inode to show this. This will | ||
723 | * prevent the buffer from being marked nohomeok for too | ||
724 | * long a time. | ||
725 | */ | ||
726 | if ((tid == 0) && mp->lsn) | ||
727 | set_cflag(COMMIT_Synclist, ip); | ||
728 | } | ||
729 | /* mark tlock for in-memory inode */ | ||
730 | else | ||
731 | tlck->flag = tlckINODELOCK; | ||
732 | |||
733 | tlck->type = 0; | ||
734 | |||
735 | /* bind the tlock and the page */ | ||
736 | tlck->ip = ip; | ||
737 | tlck->mp = mp; | ||
738 | if (dir_xtree) | ||
739 | jfs_ip->xtlid = lid; | ||
740 | else | ||
741 | mp->lid = lid; | ||
742 | |||
743 | /* | ||
744 | * enqueue transaction lock to transaction/inode | ||
745 | */ | ||
746 | /* insert the tlock at tail of transaction tlock list */ | ||
747 | if (tid) { | ||
748 | tblk = tid_to_tblock(tid); | ||
749 | if (tblk->next) | ||
750 | lid_to_tlock(tblk->last)->next = lid; | ||
751 | else | ||
752 | tblk->next = lid; | ||
753 | tlck->next = 0; | ||
754 | tblk->last = lid; | ||
755 | } | ||
756 | /* anonymous transaction: | ||
757 | * insert the tlock at head of inode anonymous tlock list | ||
758 | */ | ||
759 | else { | ||
760 | tlck->next = jfs_ip->atlhead; | ||
761 | jfs_ip->atlhead = lid; | ||
762 | if (tlck->next == 0) { | ||
763 | /* This inode's first anonymous transaction */ | ||
764 | jfs_ip->atltail = lid; | ||
765 | list_add_tail(&jfs_ip->anon_inode_list, | ||
766 | &TxAnchor.anon_list); | ||
767 | } | ||
768 | } | ||
769 | |||
770 | /* initialize type dependent area for linelock */ | ||
771 | linelock = (struct linelock *) & tlck->lock; | ||
772 | linelock->next = 0; | ||
773 | linelock->flag = tlckLINELOCK; | ||
774 | linelock->maxcnt = TLOCKSHORT; | ||
775 | linelock->index = 0; | ||
776 | |||
777 | switch (type & tlckTYPE) { | ||
778 | case tlckDTREE: | ||
779 | linelock->l2linesize = L2DTSLOTSIZE; | ||
780 | break; | ||
781 | |||
782 | case tlckXTREE: | ||
783 | linelock->l2linesize = L2XTSLOTSIZE; | ||
784 | |||
785 | xtlck = (struct xtlock *) linelock; | ||
786 | xtlck->header.offset = 0; | ||
787 | xtlck->header.length = 2; | ||
788 | |||
789 | if (type & tlckNEW) { | ||
790 | xtlck->lwm.offset = XTENTRYSTART; | ||
791 | } else { | ||
792 | if (mp->xflag & COMMIT_PAGE) | ||
793 | p = (xtpage_t *) mp->data; | ||
794 | else | ||
795 | p = &jfs_ip->i_xtroot; | ||
796 | xtlck->lwm.offset = | ||
797 | le16_to_cpu(p->header.nextindex); | ||
798 | } | ||
799 | xtlck->lwm.length = 0; /* ! */ | ||
800 | xtlck->twm.offset = 0; | ||
801 | xtlck->hwm.offset = 0; | ||
802 | |||
803 | xtlck->index = 2; | ||
804 | break; | ||
805 | |||
806 | case tlckINODE: | ||
807 | linelock->l2linesize = L2INODESLOTSIZE; | ||
808 | break; | ||
809 | |||
810 | case tlckDATA: | ||
811 | linelock->l2linesize = L2DATASLOTSIZE; | ||
812 | break; | ||
813 | |||
814 | default: | ||
815 | jfs_err("UFO tlock:0x%p", tlck); | ||
816 | } | ||
817 | |||
818 | /* | ||
819 | * update tlock vector | ||
820 | */ | ||
821 | grantLock: | ||
822 | tlck->type |= type; | ||
823 | |||
824 | TXN_UNLOCK(); | ||
825 | |||
826 | return tlck; | ||
827 | |||
828 | /* | ||
829 | * page is being locked by another transaction: | ||
830 | */ | ||
831 | waitLock: | ||
832 | /* Only locks on ipimap or ipaimap should reach here */ | ||
833 | /* assert(jfs_ip->fileset == AGGREGATE_I); */ | ||
834 | if (jfs_ip->fileset != AGGREGATE_I) { | ||
835 | jfs_err("txLock: trying to lock locked page!"); | ||
836 | dump_mem("ip", ip, sizeof(struct inode)); | ||
837 | dump_mem("mp", mp, sizeof(struct metapage)); | ||
838 | dump_mem("Locker's tblk", tid_to_tblock(tid), | ||
839 | sizeof(struct tblock)); | ||
840 | dump_mem("Tlock", tlck, sizeof(struct tlock)); | ||
841 | BUG(); | ||
842 | } | ||
843 | INCREMENT(stattx.waitlock); /* statistics */ | ||
844 | release_metapage(mp); | ||
845 | |||
846 | jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", | ||
847 | tid, xtid, lid); | ||
848 | TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); | ||
849 | jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); | ||
850 | |||
851 | return NULL; | ||
852 | } | ||
853 | |||
854 | |||
855 | /* | ||
856 | * NAME: txRelease() | ||
857 | * | ||
858 | * FUNCTION: Release buffers associated with transaction locks, but don't | ||
859 | * mark homeok yet. The allows other transactions to modify | ||
860 | * buffers, but won't let them go to disk until commit record | ||
861 | * actually gets written. | ||
862 | * | ||
863 | * PARAMETER: | ||
864 | * tblk - | ||
865 | * | ||
866 | * RETURN: Errors from subroutines. | ||
867 | */ | ||
868 | static void txRelease(struct tblock * tblk) | ||
869 | { | ||
870 | struct metapage *mp; | ||
871 | lid_t lid; | ||
872 | struct tlock *tlck; | ||
873 | |||
874 | TXN_LOCK(); | ||
875 | |||
876 | for (lid = tblk->next; lid; lid = tlck->next) { | ||
877 | tlck = lid_to_tlock(lid); | ||
878 | if ((mp = tlck->mp) != NULL && | ||
879 | (tlck->type & tlckBTROOT) == 0) { | ||
880 | assert(mp->xflag & COMMIT_PAGE); | ||
881 | mp->lid = 0; | ||
882 | } | ||
883 | } | ||
884 | |||
885 | /* | ||
886 | * wakeup transactions waiting on a page locked | ||
887 | * by the current transaction | ||
888 | */ | ||
889 | TXN_WAKEUP(&tblk->waitor); | ||
890 | |||
891 | TXN_UNLOCK(); | ||
892 | } | ||
893 | |||
894 | |||
895 | /* | ||
896 | * NAME: txUnlock() | ||
897 | * | ||
898 | * FUNCTION: Initiates pageout of pages modified by tid in journalled | ||
899 | * objects and frees their lockwords. | ||
900 | */ | ||
901 | static void txUnlock(struct tblock * tblk) | ||
902 | { | ||
903 | struct tlock *tlck; | ||
904 | struct linelock *linelock; | ||
905 | lid_t lid, next, llid, k; | ||
906 | struct metapage *mp; | ||
907 | struct jfs_log *log; | ||
908 | int difft, diffp; | ||
909 | |||
910 | jfs_info("txUnlock: tblk = 0x%p", tblk); | ||
911 | log = JFS_SBI(tblk->sb)->log; | ||
912 | |||
913 | /* | ||
914 | * mark page under tlock homeok (its log has been written): | ||
915 | */ | ||
916 | for (lid = tblk->next; lid; lid = next) { | ||
917 | tlck = lid_to_tlock(lid); | ||
918 | next = tlck->next; | ||
919 | |||
920 | jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck); | ||
921 | |||
922 | /* unbind page from tlock */ | ||
923 | if ((mp = tlck->mp) != NULL && | ||
924 | (tlck->type & tlckBTROOT) == 0) { | ||
925 | assert(mp->xflag & COMMIT_PAGE); | ||
926 | |||
927 | /* hold buffer | ||
928 | * | ||
929 | * It's possible that someone else has the metapage. | ||
930 | * The only things were changing are nohomeok, which | ||
931 | * is handled atomically, and clsn which is protected | ||
932 | * by the LOGSYNC_LOCK. | ||
933 | */ | ||
934 | hold_metapage(mp, 1); | ||
935 | |||
936 | assert(atomic_read(&mp->nohomeok) > 0); | ||
937 | atomic_dec(&mp->nohomeok); | ||
938 | |||
939 | /* inherit younger/larger clsn */ | ||
940 | LOGSYNC_LOCK(log); | ||
941 | if (mp->clsn) { | ||
942 | logdiff(difft, tblk->clsn, log); | ||
943 | logdiff(diffp, mp->clsn, log); | ||
944 | if (difft > diffp) | ||
945 | mp->clsn = tblk->clsn; | ||
946 | } else | ||
947 | mp->clsn = tblk->clsn; | ||
948 | LOGSYNC_UNLOCK(log); | ||
949 | |||
950 | assert(!(tlck->flag & tlckFREEPAGE)); | ||
951 | |||
952 | if (tlck->flag & tlckWRITEPAGE) { | ||
953 | write_metapage(mp); | ||
954 | } else { | ||
955 | /* release page which has been forced */ | ||
956 | release_metapage(mp); | ||
957 | } | ||
958 | } | ||
959 | |||
960 | /* insert tlock, and linelock(s) of the tlock if any, | ||
961 | * at head of freelist | ||
962 | */ | ||
963 | TXN_LOCK(); | ||
964 | |||
965 | llid = ((struct linelock *) & tlck->lock)->next; | ||
966 | while (llid) { | ||
967 | linelock = (struct linelock *) lid_to_tlock(llid); | ||
968 | k = linelock->next; | ||
969 | txLockFree(llid); | ||
970 | llid = k; | ||
971 | } | ||
972 | txLockFree(lid); | ||
973 | |||
974 | TXN_UNLOCK(); | ||
975 | } | ||
976 | tblk->next = tblk->last = 0; | ||
977 | |||
978 | /* | ||
979 | * remove tblock from logsynclist | ||
980 | * (allocation map pages inherited lsn of tblk and | ||
981 | * has been inserted in logsync list at txUpdateMap()) | ||
982 | */ | ||
983 | if (tblk->lsn) { | ||
984 | LOGSYNC_LOCK(log); | ||
985 | log->count--; | ||
986 | list_del(&tblk->synclist); | ||
987 | LOGSYNC_UNLOCK(log); | ||
988 | } | ||
989 | } | ||
990 | |||
991 | |||
992 | /* | ||
993 | * txMaplock() | ||
994 | * | ||
995 | * function: allocate a transaction lock for freed page/entry; | ||
996 | * for freed page, maplock is used as xtlock/dtlock type; | ||
997 | */ | ||
998 | struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) | ||
999 | { | ||
1000 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | ||
1001 | lid_t lid; | ||
1002 | struct tblock *tblk; | ||
1003 | struct tlock *tlck; | ||
1004 | struct maplock *maplock; | ||
1005 | |||
1006 | TXN_LOCK(); | ||
1007 | |||
1008 | /* | ||
1009 | * allocate a tlock | ||
1010 | */ | ||
1011 | lid = txLockAlloc(); | ||
1012 | tlck = lid_to_tlock(lid); | ||
1013 | |||
1014 | /* | ||
1015 | * initialize tlock | ||
1016 | */ | ||
1017 | tlck->tid = tid; | ||
1018 | |||
1019 | /* bind the tlock and the object */ | ||
1020 | tlck->flag = tlckINODELOCK; | ||
1021 | tlck->ip = ip; | ||
1022 | tlck->mp = NULL; | ||
1023 | |||
1024 | tlck->type = type; | ||
1025 | |||
1026 | /* | ||
1027 | * enqueue transaction lock to transaction/inode | ||
1028 | */ | ||
1029 | /* insert the tlock at tail of transaction tlock list */ | ||
1030 | if (tid) { | ||
1031 | tblk = tid_to_tblock(tid); | ||
1032 | if (tblk->next) | ||
1033 | lid_to_tlock(tblk->last)->next = lid; | ||
1034 | else | ||
1035 | tblk->next = lid; | ||
1036 | tlck->next = 0; | ||
1037 | tblk->last = lid; | ||
1038 | } | ||
1039 | /* anonymous transaction: | ||
1040 | * insert the tlock at head of inode anonymous tlock list | ||
1041 | */ | ||
1042 | else { | ||
1043 | tlck->next = jfs_ip->atlhead; | ||
1044 | jfs_ip->atlhead = lid; | ||
1045 | if (tlck->next == 0) { | ||
1046 | /* This inode's first anonymous transaction */ | ||
1047 | jfs_ip->atltail = lid; | ||
1048 | list_add_tail(&jfs_ip->anon_inode_list, | ||
1049 | &TxAnchor.anon_list); | ||
1050 | } | ||
1051 | } | ||
1052 | |||
1053 | TXN_UNLOCK(); | ||
1054 | |||
1055 | /* initialize type dependent area for maplock */ | ||
1056 | maplock = (struct maplock *) & tlck->lock; | ||
1057 | maplock->next = 0; | ||
1058 | maplock->maxcnt = 0; | ||
1059 | maplock->index = 0; | ||
1060 | |||
1061 | return tlck; | ||
1062 | } | ||
1063 | |||
1064 | |||
1065 | /* | ||
1066 | * txLinelock() | ||
1067 | * | ||
1068 | * function: allocate a transaction lock for log vector list | ||
1069 | */ | ||
1070 | struct linelock *txLinelock(struct linelock * tlock) | ||
1071 | { | ||
1072 | lid_t lid; | ||
1073 | struct tlock *tlck; | ||
1074 | struct linelock *linelock; | ||
1075 | |||
1076 | TXN_LOCK(); | ||
1077 | |||
1078 | /* allocate a TxLock structure */ | ||
1079 | lid = txLockAlloc(); | ||
1080 | tlck = lid_to_tlock(lid); | ||
1081 | |||
1082 | TXN_UNLOCK(); | ||
1083 | |||
1084 | /* initialize linelock */ | ||
1085 | linelock = (struct linelock *) tlck; | ||
1086 | linelock->next = 0; | ||
1087 | linelock->flag = tlckLINELOCK; | ||
1088 | linelock->maxcnt = TLOCKLONG; | ||
1089 | linelock->index = 0; | ||
1090 | |||
1091 | /* append linelock after tlock */ | ||
1092 | linelock->next = tlock->next; | ||
1093 | tlock->next = lid; | ||
1094 | |||
1095 | return linelock; | ||
1096 | } | ||
1097 | |||
1098 | |||
1099 | |||
1100 | /* | ||
1101 | * transaction commit management | ||
1102 | * ----------------------------- | ||
1103 | */ | ||
1104 | |||
1105 | /* | ||
1106 | * NAME: txCommit() | ||
1107 | * | ||
1108 | * FUNCTION: commit the changes to the objects specified in | ||
1109 | * clist. For journalled segments only the | ||
1110 | * changes of the caller are committed, ie by tid. | ||
1111 | * for non-journalled segments the data are flushed to | ||
1112 | * disk and then the change to the disk inode and indirect | ||
1113 | * blocks committed (so blocks newly allocated to the | ||
1114 | * segment will be made a part of the segment atomically). | ||
1115 | * | ||
1116 | * all of the segments specified in clist must be in | ||
1117 | * one file system. no more than 6 segments are needed | ||
1118 | * to handle all unix svcs. | ||
1119 | * | ||
1120 | * if the i_nlink field (i.e. disk inode link count) | ||
1121 | * is zero, and the type of inode is a regular file or | ||
1122 | * directory, or symbolic link , the inode is truncated | ||
1123 | * to zero length. the truncation is committed but the | ||
1124 | * VM resources are unaffected until it is closed (see | ||
1125 | * iput and iclose). | ||
1126 | * | ||
1127 | * PARAMETER: | ||
1128 | * | ||
1129 | * RETURN: | ||
1130 | * | ||
1131 | * serialization: | ||
1132 | * on entry the inode lock on each segment is assumed | ||
1133 | * to be held. | ||
1134 | * | ||
1135 | * i/o error: | ||
1136 | */ | ||
1137 | int txCommit(tid_t tid, /* transaction identifier */ | ||
1138 | int nip, /* number of inodes to commit */ | ||
1139 | struct inode **iplist, /* list of inode to commit */ | ||
1140 | int flag) | ||
1141 | { | ||
1142 | int rc = 0; | ||
1143 | struct commit cd; | ||
1144 | struct jfs_log *log; | ||
1145 | struct tblock *tblk; | ||
1146 | struct lrd *lrd; | ||
1147 | int lsn; | ||
1148 | struct inode *ip; | ||
1149 | struct jfs_inode_info *jfs_ip; | ||
1150 | int k, n; | ||
1151 | ino_t top; | ||
1152 | struct super_block *sb; | ||
1153 | |||
1154 | jfs_info("txCommit, tid = %d, flag = %d", tid, flag); | ||
1155 | /* is read-only file system ? */ | ||
1156 | if (isReadOnly(iplist[0])) { | ||
1157 | rc = -EROFS; | ||
1158 | goto TheEnd; | ||
1159 | } | ||
1160 | |||
1161 | sb = cd.sb = iplist[0]->i_sb; | ||
1162 | cd.tid = tid; | ||
1163 | |||
1164 | if (tid == 0) | ||
1165 | tid = txBegin(sb, 0); | ||
1166 | tblk = tid_to_tblock(tid); | ||
1167 | |||
1168 | /* | ||
1169 | * initialize commit structure | ||
1170 | */ | ||
1171 | log = JFS_SBI(sb)->log; | ||
1172 | cd.log = log; | ||
1173 | |||
1174 | /* initialize log record descriptor in commit */ | ||
1175 | lrd = &cd.lrd; | ||
1176 | lrd->logtid = cpu_to_le32(tblk->logtid); | ||
1177 | lrd->backchain = 0; | ||
1178 | |||
1179 | tblk->xflag |= flag; | ||
1180 | |||
1181 | if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) | ||
1182 | tblk->xflag |= COMMIT_LAZY; | ||
1183 | /* | ||
1184 | * prepare non-journaled objects for commit | ||
1185 | * | ||
1186 | * flush data pages of non-journaled file | ||
1187 | * to prevent the file getting non-initialized disk blocks | ||
1188 | * in case of crash. | ||
1189 | * (new blocks - ) | ||
1190 | */ | ||
1191 | cd.iplist = iplist; | ||
1192 | cd.nip = nip; | ||
1193 | |||
1194 | /* | ||
1195 | * acquire transaction lock on (on-disk) inodes | ||
1196 | * | ||
1197 | * update on-disk inode from in-memory inode | ||
1198 | * acquiring transaction locks for AFTER records | ||
1199 | * on the on-disk inode of file object | ||
1200 | * | ||
1201 | * sort the inodes array by inode number in descending order | ||
1202 | * to prevent deadlock when acquiring transaction lock | ||
1203 | * of on-disk inodes on multiple on-disk inode pages by | ||
1204 | * multiple concurrent transactions | ||
1205 | */ | ||
1206 | for (k = 0; k < cd.nip; k++) { | ||
1207 | top = (cd.iplist[k])->i_ino; | ||
1208 | for (n = k + 1; n < cd.nip; n++) { | ||
1209 | ip = cd.iplist[n]; | ||
1210 | if (ip->i_ino > top) { | ||
1211 | top = ip->i_ino; | ||
1212 | cd.iplist[n] = cd.iplist[k]; | ||
1213 | cd.iplist[k] = ip; | ||
1214 | } | ||
1215 | } | ||
1216 | |||
1217 | ip = cd.iplist[k]; | ||
1218 | jfs_ip = JFS_IP(ip); | ||
1219 | |||
1220 | /* | ||
1221 | * BUGBUG - This code has temporarily been removed. The | ||
1222 | * intent is to ensure that any file data is written before | ||
1223 | * the metadata is committed to the journal. This prevents | ||
1224 | * uninitialized data from appearing in a file after the | ||
1225 | * journal has been replayed. (The uninitialized data | ||
1226 | * could be sensitive data removed by another user.) | ||
1227 | * | ||
1228 | * The problem now is that we are holding the IWRITELOCK | ||
1229 | * on the inode, and calling filemap_fdatawrite on an | ||
1230 | * unmapped page will cause a deadlock in jfs_get_block. | ||
1231 | * | ||
1232 | * The long term solution is to pare down the use of | ||
1233 | * IWRITELOCK. We are currently holding it too long. | ||
1234 | * We could also be smarter about which data pages need | ||
1235 | * to be written before the transaction is committed and | ||
1236 | * when we don't need to worry about it at all. | ||
1237 | * | ||
1238 | * if ((!S_ISDIR(ip->i_mode)) | ||
1239 | * && (tblk->flag & COMMIT_DELETE) == 0) { | ||
1240 | * filemap_fdatawrite(ip->i_mapping); | ||
1241 | * filemap_fdatawait(ip->i_mapping); | ||
1242 | * } | ||
1243 | */ | ||
1244 | |||
1245 | /* | ||
1246 | * Mark inode as not dirty. It will still be on the dirty | ||
1247 | * inode list, but we'll know not to commit it again unless | ||
1248 | * it gets marked dirty again | ||
1249 | */ | ||
1250 | clear_cflag(COMMIT_Dirty, ip); | ||
1251 | |||
1252 | /* inherit anonymous tlock(s) of inode */ | ||
1253 | if (jfs_ip->atlhead) { | ||
1254 | lid_to_tlock(jfs_ip->atltail)->next = tblk->next; | ||
1255 | tblk->next = jfs_ip->atlhead; | ||
1256 | if (!tblk->last) | ||
1257 | tblk->last = jfs_ip->atltail; | ||
1258 | jfs_ip->atlhead = jfs_ip->atltail = 0; | ||
1259 | TXN_LOCK(); | ||
1260 | list_del_init(&jfs_ip->anon_inode_list); | ||
1261 | TXN_UNLOCK(); | ||
1262 | } | ||
1263 | |||
1264 | /* | ||
1265 | * acquire transaction lock on on-disk inode page | ||
1266 | * (become first tlock of the tblk's tlock list) | ||
1267 | */ | ||
1268 | if (((rc = diWrite(tid, ip)))) | ||
1269 | goto out; | ||
1270 | } | ||
1271 | |||
1272 | /* | ||
1273 | * write log records from transaction locks | ||
1274 | * | ||
1275 | * txUpdateMap() resets XAD_NEW in XAD. | ||
1276 | */ | ||
1277 | if ((rc = txLog(log, tblk, &cd))) | ||
1278 | goto TheEnd; | ||
1279 | |||
1280 | /* | ||
1281 | * Ensure that inode isn't reused before | ||
1282 | * lazy commit thread finishes processing | ||
1283 | */ | ||
1284 | if (tblk->xflag & COMMIT_DELETE) { | ||
1285 | atomic_inc(&tblk->u.ip->i_count); | ||
1286 | /* | ||
1287 | * Avoid a rare deadlock | ||
1288 | * | ||
1289 | * If the inode is locked, we may be blocked in | ||
1290 | * jfs_commit_inode. If so, we don't want the | ||
1291 | * lazy_commit thread doing the last iput() on the inode | ||
1292 | * since that may block on the locked inode. Instead, | ||
1293 | * commit the transaction synchronously, so the last iput | ||
1294 | * will be done by the calling thread (or later) | ||
1295 | */ | ||
1296 | if (tblk->u.ip->i_state & I_LOCK) | ||
1297 | tblk->xflag &= ~COMMIT_LAZY; | ||
1298 | } | ||
1299 | |||
1300 | ASSERT((!(tblk->xflag & COMMIT_DELETE)) || | ||
1301 | ((tblk->u.ip->i_nlink == 0) && | ||
1302 | !test_cflag(COMMIT_Nolink, tblk->u.ip))); | ||
1303 | |||
1304 | /* | ||
1305 | * write COMMIT log record | ||
1306 | */ | ||
1307 | lrd->type = cpu_to_le16(LOG_COMMIT); | ||
1308 | lrd->length = 0; | ||
1309 | lsn = lmLog(log, tblk, lrd, NULL); | ||
1310 | |||
1311 | lmGroupCommit(log, tblk); | ||
1312 | |||
1313 | /* | ||
1314 | * - transaction is now committed - | ||
1315 | */ | ||
1316 | |||
1317 | /* | ||
1318 | * force pages in careful update | ||
1319 | * (imap addressing structure update) | ||
1320 | */ | ||
1321 | if (flag & COMMIT_FORCE) | ||
1322 | txForce(tblk); | ||
1323 | |||
1324 | /* | ||
1325 | * update allocation map. | ||
1326 | * | ||
1327 | * update inode allocation map and inode: | ||
1328 | * free pager lock on memory object of inode if any. | ||
1329 | * update block allocation map. | ||
1330 | * | ||
1331 | * txUpdateMap() resets XAD_NEW in XAD. | ||
1332 | */ | ||
1333 | if (tblk->xflag & COMMIT_FORCE) | ||
1334 | txUpdateMap(tblk); | ||
1335 | |||
1336 | /* | ||
1337 | * free transaction locks and pageout/free pages | ||
1338 | */ | ||
1339 | txRelease(tblk); | ||
1340 | |||
1341 | if ((tblk->flag & tblkGC_LAZY) == 0) | ||
1342 | txUnlock(tblk); | ||
1343 | |||
1344 | |||
1345 | /* | ||
1346 | * reset in-memory object state | ||
1347 | */ | ||
1348 | for (k = 0; k < cd.nip; k++) { | ||
1349 | ip = cd.iplist[k]; | ||
1350 | jfs_ip = JFS_IP(ip); | ||
1351 | |||
1352 | /* | ||
1353 | * reset in-memory inode state | ||
1354 | */ | ||
1355 | jfs_ip->bxflag = 0; | ||
1356 | jfs_ip->blid = 0; | ||
1357 | } | ||
1358 | |||
1359 | out: | ||
1360 | if (rc != 0) | ||
1361 | txAbort(tid, 1); | ||
1362 | |||
1363 | TheEnd: | ||
1364 | jfs_info("txCommit: tid = %d, returning %d", tid, rc); | ||
1365 | return rc; | ||
1366 | } | ||
1367 | |||
1368 | |||
1369 | /* | ||
1370 | * NAME: txLog() | ||
1371 | * | ||
1372 | * FUNCTION: Writes AFTER log records for all lines modified | ||
1373 | * by tid for segments specified by inodes in comdata. | ||
1374 | * Code assumes only WRITELOCKS are recorded in lockwords. | ||
1375 | * | ||
1376 | * PARAMETERS: | ||
1377 | * | ||
1378 | * RETURN : | ||
1379 | */ | ||
1380 | static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd) | ||
1381 | { | ||
1382 | int rc = 0; | ||
1383 | struct inode *ip; | ||
1384 | lid_t lid; | ||
1385 | struct tlock *tlck; | ||
1386 | struct lrd *lrd = &cd->lrd; | ||
1387 | |||
1388 | /* | ||
1389 | * write log record(s) for each tlock of transaction, | ||
1390 | */ | ||
1391 | for (lid = tblk->next; lid; lid = tlck->next) { | ||
1392 | tlck = lid_to_tlock(lid); | ||
1393 | |||
1394 | tlck->flag |= tlckLOG; | ||
1395 | |||
1396 | /* initialize lrd common */ | ||
1397 | ip = tlck->ip; | ||
1398 | lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate); | ||
1399 | lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset); | ||
1400 | lrd->log.redopage.inode = cpu_to_le32(ip->i_ino); | ||
1401 | |||
1402 | /* write log record of page from the tlock */ | ||
1403 | switch (tlck->type & tlckTYPE) { | ||
1404 | case tlckXTREE: | ||
1405 | xtLog(log, tblk, lrd, tlck); | ||
1406 | break; | ||
1407 | |||
1408 | case tlckDTREE: | ||
1409 | dtLog(log, tblk, lrd, tlck); | ||
1410 | break; | ||
1411 | |||
1412 | case tlckINODE: | ||
1413 | diLog(log, tblk, lrd, tlck, cd); | ||
1414 | break; | ||
1415 | |||
1416 | case tlckMAP: | ||
1417 | mapLog(log, tblk, lrd, tlck); | ||
1418 | break; | ||
1419 | |||
1420 | case tlckDATA: | ||
1421 | dataLog(log, tblk, lrd, tlck); | ||
1422 | break; | ||
1423 | |||
1424 | default: | ||
1425 | jfs_err("UFO tlock:0x%p", tlck); | ||
1426 | } | ||
1427 | } | ||
1428 | |||
1429 | return rc; | ||
1430 | } | ||
1431 | |||
1432 | |||
1433 | /* | ||
1434 | * diLog() | ||
1435 | * | ||
1436 | * function: log inode tlock and format maplock to update bmap; | ||
1437 | */ | ||
1438 | static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | ||
1439 | struct tlock * tlck, struct commit * cd) | ||
1440 | { | ||
1441 | int rc = 0; | ||
1442 | struct metapage *mp; | ||
1443 | pxd_t *pxd; | ||
1444 | struct pxd_lock *pxdlock; | ||
1445 | |||
1446 | mp = tlck->mp; | ||
1447 | |||
1448 | /* initialize as REDOPAGE record format */ | ||
1449 | lrd->log.redopage.type = cpu_to_le16(LOG_INODE); | ||
1450 | lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE); | ||
1451 | |||
1452 | pxd = &lrd->log.redopage.pxd; | ||
1453 | |||
1454 | /* | ||
1455 | * inode after image | ||
1456 | */ | ||
1457 | if (tlck->type & tlckENTRY) { | ||
1458 | /* log after-image for logredo(): */ | ||
1459 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | ||
1460 | // *pxd = mp->cm_pxd; | ||
1461 | PXDaddress(pxd, mp->index); | ||
1462 | PXDlength(pxd, | ||
1463 | mp->logical_size >> tblk->sb->s_blocksize_bits); | ||
1464 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | ||
1465 | |||
1466 | /* mark page as homeward bound */ | ||
1467 | tlck->flag |= tlckWRITEPAGE; | ||
1468 | } else if (tlck->type & tlckFREE) { | ||
1469 | /* | ||
1470 | * free inode extent | ||
1471 | * | ||
1472 | * (pages of the freed inode extent have been invalidated and | ||
1473 | * a maplock for free of the extent has been formatted at | ||
1474 | * txLock() time); | ||
1475 | * | ||
1476 | * the tlock had been acquired on the inode allocation map page | ||
1477 | * (iag) that specifies the freed extent, even though the map | ||
1478 | * page is not itself logged, to prevent pageout of the map | ||
1479 | * page before the log; | ||
1480 | */ | ||
1481 | |||
1482 | /* log LOG_NOREDOINOEXT of the freed inode extent for | ||
1483 | * logredo() to start NoRedoPage filters, and to update | ||
1484 | * imap and bmap for free of the extent; | ||
1485 | */ | ||
1486 | lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); | ||
1487 | /* | ||
1488 | * For the LOG_NOREDOINOEXT record, we need | ||
1489 | * to pass the IAG number and inode extent | ||
1490 | * index (within that IAG) from which the | ||
1491 | * the extent being released. These have been | ||
1492 | * passed to us in the iplist[1] and iplist[2]. | ||
1493 | */ | ||
1494 | lrd->log.noredoinoext.iagnum = | ||
1495 | cpu_to_le32((u32) (size_t) cd->iplist[1]); | ||
1496 | lrd->log.noredoinoext.inoext_idx = | ||
1497 | cpu_to_le32((u32) (size_t) cd->iplist[2]); | ||
1498 | |||
1499 | pxdlock = (struct pxd_lock *) & tlck->lock; | ||
1500 | *pxd = pxdlock->pxd; | ||
1501 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | ||
1502 | |||
1503 | /* update bmap */ | ||
1504 | tlck->flag |= tlckUPDATEMAP; | ||
1505 | |||
1506 | /* mark page as homeward bound */ | ||
1507 | tlck->flag |= tlckWRITEPAGE; | ||
1508 | } else | ||
1509 | jfs_err("diLog: UFO type tlck:0x%p", tlck); | ||
1510 | #ifdef _JFS_WIP | ||
1511 | /* | ||
1512 | * alloc/free external EA extent | ||
1513 | * | ||
1514 | * a maplock for txUpdateMap() to update bPWMAP for alloc/free | ||
1515 | * of the extent has been formatted at txLock() time; | ||
1516 | */ | ||
1517 | else { | ||
1518 | assert(tlck->type & tlckEA); | ||
1519 | |||
1520 | /* log LOG_UPDATEMAP for logredo() to update bmap for | ||
1521 | * alloc of new (and free of old) external EA extent; | ||
1522 | */ | ||
1523 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | ||
1524 | pxdlock = (struct pxd_lock *) & tlck->lock; | ||
1525 | nlock = pxdlock->index; | ||
1526 | for (i = 0; i < nlock; i++, pxdlock++) { | ||
1527 | if (pxdlock->flag & mlckALLOCPXD) | ||
1528 | lrd->log.updatemap.type = | ||
1529 | cpu_to_le16(LOG_ALLOCPXD); | ||
1530 | else | ||
1531 | lrd->log.updatemap.type = | ||
1532 | cpu_to_le16(LOG_FREEPXD); | ||
1533 | lrd->log.updatemap.nxd = cpu_to_le16(1); | ||
1534 | lrd->log.updatemap.pxd = pxdlock->pxd; | ||
1535 | lrd->backchain = | ||
1536 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | ||
1537 | } | ||
1538 | |||
1539 | /* update bmap */ | ||
1540 | tlck->flag |= tlckUPDATEMAP; | ||
1541 | } | ||
1542 | #endif /* _JFS_WIP */ | ||
1543 | |||
1544 | return rc; | ||
1545 | } | ||
1546 | |||
1547 | |||
1548 | /* | ||
1549 | * dataLog() | ||
1550 | * | ||
1551 | * function: log data tlock | ||
1552 | */ | ||
1553 | static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | ||
1554 | struct tlock * tlck) | ||
1555 | { | ||
1556 | struct metapage *mp; | ||
1557 | pxd_t *pxd; | ||
1558 | |||
1559 | mp = tlck->mp; | ||
1560 | |||
1561 | /* initialize as REDOPAGE record format */ | ||
1562 | lrd->log.redopage.type = cpu_to_le16(LOG_DATA); | ||
1563 | lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE); | ||
1564 | |||
1565 | pxd = &lrd->log.redopage.pxd; | ||
1566 | |||
1567 | /* log after-image for logredo(): */ | ||
1568 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | ||
1569 | |||
1570 | if (jfs_dirtable_inline(tlck->ip)) { | ||
1571 | /* | ||
1572 | * The table has been truncated, we've must have deleted | ||
1573 | * the last entry, so don't bother logging this | ||
1574 | */ | ||
1575 | mp->lid = 0; | ||
1576 | hold_metapage(mp, 0); | ||
1577 | atomic_dec(&mp->nohomeok); | ||
1578 | discard_metapage(mp); | ||
1579 | tlck->mp = NULL; | ||
1580 | return 0; | ||
1581 | } | ||
1582 | |||
1583 | PXDaddress(pxd, mp->index); | ||
1584 | PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); | ||
1585 | |||
1586 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | ||
1587 | |||
1588 | /* mark page as homeward bound */ | ||
1589 | tlck->flag |= tlckWRITEPAGE; | ||
1590 | |||
1591 | return 0; | ||
1592 | } | ||
1593 | |||
1594 | |||
1595 | /* | ||
1596 | * dtLog() | ||
1597 | * | ||
1598 | * function: log dtree tlock and format maplock to update bmap; | ||
1599 | */ | ||
1600 | static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | ||
1601 | struct tlock * tlck) | ||
1602 | { | ||
1603 | struct metapage *mp; | ||
1604 | struct pxd_lock *pxdlock; | ||
1605 | pxd_t *pxd; | ||
1606 | |||
1607 | mp = tlck->mp; | ||
1608 | |||
1609 | /* initialize as REDOPAGE/NOREDOPAGE record format */ | ||
1610 | lrd->log.redopage.type = cpu_to_le16(LOG_DTREE); | ||
1611 | lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE); | ||
1612 | |||
1613 | pxd = &lrd->log.redopage.pxd; | ||
1614 | |||
1615 | if (tlck->type & tlckBTROOT) | ||
1616 | lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); | ||
1617 | |||
1618 | /* | ||
1619 | * page extension via relocation: entry insertion; | ||
1620 | * page extension in-place: entry insertion; | ||
1621 | * new right page from page split, reinitialized in-line | ||
1622 | * root from root page split: entry insertion; | ||
1623 | */ | ||
1624 | if (tlck->type & (tlckNEW | tlckEXTEND)) { | ||
1625 | /* log after-image of the new page for logredo(): | ||
1626 | * mark log (LOG_NEW) for logredo() to initialize | ||
1627 | * freelist and update bmap for alloc of the new page; | ||
1628 | */ | ||
1629 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | ||
1630 | if (tlck->type & tlckEXTEND) | ||
1631 | lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); | ||
1632 | else | ||
1633 | lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); | ||
1634 | // *pxd = mp->cm_pxd; | ||
1635 | PXDaddress(pxd, mp->index); | ||
1636 | PXDlength(pxd, | ||
1637 | mp->logical_size >> tblk->sb->s_blocksize_bits); | ||
1638 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | ||
1639 | |||
1640 | /* format a maplock for txUpdateMap() to update bPMAP for | ||
1641 | * alloc of the new page; | ||
1642 | */ | ||
1643 | if (tlck->type & tlckBTROOT) | ||
1644 | return; | ||
1645 | tlck->flag |= tlckUPDATEMAP; | ||
1646 | pxdlock = (struct pxd_lock *) & tlck->lock; | ||
1647 | pxdlock->flag = mlckALLOCPXD; | ||
1648 | pxdlock->pxd = *pxd; | ||
1649 | |||
1650 | pxdlock->index = 1; | ||
1651 | |||
1652 | /* mark page as homeward bound */ | ||
1653 | tlck->flag |= tlckWRITEPAGE; | ||
1654 | return; | ||
1655 | } | ||
1656 | |||
1657 | /* | ||
1658 | * entry insertion/deletion, | ||
1659 | * sibling page link update (old right page before split); | ||
1660 | */ | ||
1661 | if (tlck->type & (tlckENTRY | tlckRELINK)) { | ||
1662 | /* log after-image for logredo(): */ | ||
1663 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | ||
1664 | PXDaddress(pxd, mp->index); | ||
1665 | PXDlength(pxd, | ||
1666 | mp->logical_size >> tblk->sb->s_blocksize_bits); | ||
1667 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | ||
1668 | |||
1669 | /* mark page as homeward bound */ | ||
1670 | tlck->flag |= tlckWRITEPAGE; | ||
1671 | return; | ||
1672 | } | ||
1673 | |||
1674 | /* | ||
1675 | * page deletion: page has been invalidated | ||
1676 | * page relocation: source extent | ||
1677 | * | ||
1678 | * a maplock for free of the page has been formatted | ||
1679 | * at txLock() time); | ||
1680 | */ | ||
1681 | if (tlck->type & (tlckFREE | tlckRELOCATE)) { | ||
1682 | /* log LOG_NOREDOPAGE of the deleted page for logredo() | ||
1683 | * to start NoRedoPage filter and to update bmap for free | ||
1684 | * of the deletd page | ||
1685 | */ | ||
1686 | lrd->type = cpu_to_le16(LOG_NOREDOPAGE); | ||
1687 | pxdlock = (struct pxd_lock *) & tlck->lock; | ||
1688 | *pxd = pxdlock->pxd; | ||
1689 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | ||
1690 | |||
1691 | /* a maplock for txUpdateMap() for free of the page | ||
1692 | * has been formatted at txLock() time; | ||
1693 | */ | ||
1694 | tlck->flag |= tlckUPDATEMAP; | ||
1695 | } | ||
1696 | return; | ||
1697 | } | ||
1698 | |||
1699 | |||
1700 | /* | ||
1701 | * xtLog() | ||
1702 | * | ||
1703 | * function: log xtree tlock and format maplock to update bmap; | ||
1704 | */ | ||
1705 | static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | ||
1706 | struct tlock * tlck) | ||
1707 | { | ||
1708 | struct inode *ip; | ||
1709 | struct metapage *mp; | ||
1710 | xtpage_t *p; | ||
1711 | struct xtlock *xtlck; | ||
1712 | struct maplock *maplock; | ||
1713 | struct xdlistlock *xadlock; | ||
1714 | struct pxd_lock *pxdlock; | ||
1715 | pxd_t *pxd; | ||
1716 | int next, lwm, hwm; | ||
1717 | |||
1718 | ip = tlck->ip; | ||
1719 | mp = tlck->mp; | ||
1720 | |||
1721 | /* initialize as REDOPAGE/NOREDOPAGE record format */ | ||
1722 | lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); | ||
1723 | lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); | ||
1724 | |||
1725 | pxd = &lrd->log.redopage.pxd; | ||
1726 | |||
1727 | if (tlck->type & tlckBTROOT) { | ||
1728 | lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); | ||
1729 | p = &JFS_IP(ip)->i_xtroot; | ||
1730 | if (S_ISDIR(ip->i_mode)) | ||
1731 | lrd->log.redopage.type |= | ||
1732 | cpu_to_le16(LOG_DIR_XTREE); | ||
1733 | } else | ||
1734 | p = (xtpage_t *) mp->data; | ||
1735 | next = le16_to_cpu(p->header.nextindex); | ||
1736 | |||
1737 | xtlck = (struct xtlock *) & tlck->lock; | ||
1738 | |||
1739 | maplock = (struct maplock *) & tlck->lock; | ||
1740 | xadlock = (struct xdlistlock *) maplock; | ||
1741 | |||
1742 | /* | ||
1743 | * entry insertion/extension; | ||
1744 | * sibling page link update (old right page before split); | ||
1745 | */ | ||
1746 | if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { | ||
1747 | /* log after-image for logredo(): | ||
1748 | * logredo() will update bmap for alloc of new/extended | ||
1749 | * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from | ||
1750 | * after-image of XADlist; | ||
1751 | * logredo() resets (XAD_NEW|XAD_EXTEND) flag when | ||
1752 | * applying the after-image to the meta-data page. | ||
1753 | */ | ||
1754 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | ||
1755 | // *pxd = mp->cm_pxd; | ||
1756 | PXDaddress(pxd, mp->index); | ||
1757 | PXDlength(pxd, | ||
1758 | mp->logical_size >> tblk->sb->s_blocksize_bits); | ||
1759 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | ||
1760 | |||
1761 | /* format a maplock for txUpdateMap() to update bPMAP | ||
1762 | * for alloc of new/extended extents of XAD[lwm:next) | ||
1763 | * from the page itself; | ||
1764 | * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. | ||
1765 | */ | ||
1766 | lwm = xtlck->lwm.offset; | ||
1767 | if (lwm == 0) | ||
1768 | lwm = XTPAGEMAXSLOT; | ||
1769 | |||
1770 | if (lwm == next) | ||
1771 | goto out; | ||
1772 | if (lwm > next) { | ||
1773 | jfs_err("xtLog: lwm > next\n"); | ||
1774 | goto out; | ||
1775 | } | ||
1776 | tlck->flag |= tlckUPDATEMAP; | ||
1777 | xadlock->flag = mlckALLOCXADLIST; | ||
1778 | xadlock->count = next - lwm; | ||
1779 | if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) { | ||
1780 | int i; | ||
1781 | /* | ||
1782 | * Lazy commit may allow xtree to be modified before | ||
1783 | * txUpdateMap runs. Copy xad into linelock to | ||
1784 | * preserve correct data. | ||
1785 | */ | ||
1786 | xadlock->xdlist = &xtlck->pxdlock; | ||
1787 | memcpy(xadlock->xdlist, &p->xad[lwm], | ||
1788 | sizeof(xad_t) * xadlock->count); | ||
1789 | |||
1790 | for (i = 0; i < xadlock->count; i++) | ||
1791 | p->xad[lwm + i].flag &= | ||
1792 | ~(XAD_NEW | XAD_EXTENDED); | ||
1793 | } else { | ||
1794 | /* | ||
1795 | * xdlist will point to into inode's xtree, ensure | ||
1796 | * that transaction is not committed lazily. | ||
1797 | */ | ||
1798 | xadlock->xdlist = &p->xad[lwm]; | ||
1799 | tblk->xflag &= ~COMMIT_LAZY; | ||
1800 | } | ||
1801 | jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d " | ||
1802 | "count:%d", tlck->ip, mp, tlck, lwm, xadlock->count); | ||
1803 | |||
1804 | maplock->index = 1; | ||
1805 | |||
1806 | out: | ||
1807 | /* mark page as homeward bound */ | ||
1808 | tlck->flag |= tlckWRITEPAGE; | ||
1809 | |||
1810 | return; | ||
1811 | } | ||
1812 | |||
1813 | /* | ||
1814 | * page deletion: file deletion/truncation (ref. xtTruncate()) | ||
1815 | * | ||
1816 | * (page will be invalidated after log is written and bmap | ||
1817 | * is updated from the page); | ||
1818 | */ | ||
1819 | if (tlck->type & tlckFREE) { | ||
1820 | /* LOG_NOREDOPAGE log for NoRedoPage filter: | ||
1821 | * if page free from file delete, NoRedoFile filter from | ||
1822 | * inode image of zero link count will subsume NoRedoPage | ||
1823 | * filters for each page; | ||
1824 | * if page free from file truncattion, write NoRedoPage | ||
1825 | * filter; | ||
1826 | * | ||
1827 | * upadte of block allocation map for the page itself: | ||
1828 | * if page free from deletion and truncation, LOG_UPDATEMAP | ||
1829 | * log for the page itself is generated from processing | ||
1830 | * its parent page xad entries; | ||
1831 | */ | ||
1832 | /* if page free from file truncation, log LOG_NOREDOPAGE | ||
1833 | * of the deleted page for logredo() to start NoRedoPage | ||
1834 | * filter for the page; | ||
1835 | */ | ||
1836 | if (tblk->xflag & COMMIT_TRUNCATE) { | ||
1837 | /* write NOREDOPAGE for the page */ | ||
1838 | lrd->type = cpu_to_le16(LOG_NOREDOPAGE); | ||
1839 | PXDaddress(pxd, mp->index); | ||
1840 | PXDlength(pxd, | ||
1841 | mp->logical_size >> tblk->sb-> | ||
1842 | s_blocksize_bits); | ||
1843 | lrd->backchain = | ||
1844 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | ||
1845 | |||
1846 | if (tlck->type & tlckBTROOT) { | ||
1847 | /* Empty xtree must be logged */ | ||
1848 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | ||
1849 | lrd->backchain = | ||
1850 | cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | ||
1851 | } | ||
1852 | } | ||
1853 | |||
1854 | /* init LOG_UPDATEMAP of the freed extents | ||
1855 | * XAD[XTENTRYSTART:hwm) from the deleted page itself | ||
1856 | * for logredo() to update bmap; | ||
1857 | */ | ||
1858 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | ||
1859 | lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); | ||
1860 | xtlck = (struct xtlock *) & tlck->lock; | ||
1861 | hwm = xtlck->hwm.offset; | ||
1862 | lrd->log.updatemap.nxd = | ||
1863 | cpu_to_le16(hwm - XTENTRYSTART + 1); | ||
1864 | /* reformat linelock for lmLog() */ | ||
1865 | xtlck->header.offset = XTENTRYSTART; | ||
1866 | xtlck->header.length = hwm - XTENTRYSTART + 1; | ||
1867 | xtlck->index = 1; | ||
1868 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | ||
1869 | |||
1870 | /* format a maplock for txUpdateMap() to update bmap | ||
1871 | * to free extents of XAD[XTENTRYSTART:hwm) from the | ||
1872 | * deleted page itself; | ||
1873 | */ | ||
1874 | tlck->flag |= tlckUPDATEMAP; | ||
1875 | xadlock->flag = mlckFREEXADLIST; | ||
1876 | xadlock->count = hwm - XTENTRYSTART + 1; | ||
1877 | if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) { | ||
1878 | /* | ||
1879 | * Lazy commit may allow xtree to be modified before | ||
1880 | * txUpdateMap runs. Copy xad into linelock to | ||
1881 | * preserve correct data. | ||
1882 | */ | ||
1883 | xadlock->xdlist = &xtlck->pxdlock; | ||
1884 | memcpy(xadlock->xdlist, &p->xad[XTENTRYSTART], | ||
1885 | sizeof(xad_t) * xadlock->count); | ||
1886 | } else { | ||
1887 | /* | ||
1888 | * xdlist will point to into inode's xtree, ensure | ||
1889 | * that transaction is not committed lazily. | ||
1890 | */ | ||
1891 | xadlock->xdlist = &p->xad[XTENTRYSTART]; | ||
1892 | tblk->xflag &= ~COMMIT_LAZY; | ||
1893 | } | ||
1894 | jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2", | ||
1895 | tlck->ip, mp, xadlock->count); | ||
1896 | |||
1897 | maplock->index = 1; | ||
1898 | |||
1899 | /* mark page as invalid */ | ||
1900 | if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode)) | ||
1901 | && !(tlck->type & tlckBTROOT)) | ||
1902 | tlck->flag |= tlckFREEPAGE; | ||
1903 | /* | ||
1904 | else (tblk->xflag & COMMIT_PMAP) | ||
1905 | ? release the page; | ||
1906 | */ | ||
1907 | return; | ||
1908 | } | ||
1909 | |||
1910 | /* | ||
1911 | * page/entry truncation: file truncation (ref. xtTruncate()) | ||
1912 | * | ||
1913 | * |----------+------+------+---------------| | ||
1914 | * | | | | ||
1915 | * | | hwm - hwm before truncation | ||
1916 | * | next - truncation point | ||
1917 | * lwm - lwm before truncation | ||
1918 | * header ? | ||
1919 | */ | ||
1920 | if (tlck->type & tlckTRUNCATE) { | ||
1921 | pxd_t tpxd; /* truncated extent of xad */ | ||
1922 | int twm; | ||
1923 | |||
1924 | /* | ||
1925 | * For truncation the entire linelock may be used, so it would | ||
1926 | * be difficult to store xad list in linelock itself. | ||
1927 | * Therefore, we'll just force transaction to be committed | ||
1928 | * synchronously, so that xtree pages won't be changed before | ||
1929 | * txUpdateMap runs. | ||
1930 | */ | ||
1931 | tblk->xflag &= ~COMMIT_LAZY; | ||
1932 | lwm = xtlck->lwm.offset; | ||
1933 | if (lwm == 0) | ||
1934 | lwm = XTPAGEMAXSLOT; | ||
1935 | hwm = xtlck->hwm.offset; | ||
1936 | twm = xtlck->twm.offset; | ||
1937 | |||
1938 | /* | ||
1939 | * write log records | ||
1940 | */ | ||
1941 | /* log after-image for logredo(): | ||
1942 | * | ||
1943 | * logredo() will update bmap for alloc of new/extended | ||
1944 | * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from | ||
1945 | * after-image of XADlist; | ||
1946 | * logredo() resets (XAD_NEW|XAD_EXTEND) flag when | ||
1947 | * applying the after-image to the meta-data page. | ||
1948 | */ | ||
1949 | lrd->type = cpu_to_le16(LOG_REDOPAGE); | ||
1950 | PXDaddress(pxd, mp->index); | ||
1951 | PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); | ||
1952 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | ||
1953 | |||
1954 | /* | ||
1955 | * truncate entry XAD[twm == next - 1]: | ||
1956 | */ | ||
1957 | if (twm == next - 1) { | ||
1958 | /* init LOG_UPDATEMAP for logredo() to update bmap for | ||
1959 | * free of truncated delta extent of the truncated | ||
1960 | * entry XAD[next - 1]: | ||
1961 | * (xtlck->pxdlock = truncated delta extent); | ||
1962 | */ | ||
1963 | pxdlock = (struct pxd_lock *) & xtlck->pxdlock; | ||
1964 | /* assert(pxdlock->type & tlckTRUNCATE); */ | ||
1965 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | ||
1966 | lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); | ||
1967 | lrd->log.updatemap.nxd = cpu_to_le16(1); | ||
1968 | lrd->log.updatemap.pxd = pxdlock->pxd; | ||
1969 | tpxd = pxdlock->pxd; /* save to format maplock */ | ||
1970 | lrd->backchain = | ||
1971 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | ||
1972 | } | ||
1973 | |||
1974 | /* | ||
1975 | * free entries XAD[next:hwm]: | ||
1976 | */ | ||
1977 | if (hwm >= next) { | ||
1978 | /* init LOG_UPDATEMAP of the freed extents | ||
1979 | * XAD[next:hwm] from the deleted page itself | ||
1980 | * for logredo() to update bmap; | ||
1981 | */ | ||
1982 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | ||
1983 | lrd->log.updatemap.type = | ||
1984 | cpu_to_le16(LOG_FREEXADLIST); | ||
1985 | xtlck = (struct xtlock *) & tlck->lock; | ||
1986 | hwm = xtlck->hwm.offset; | ||
1987 | lrd->log.updatemap.nxd = | ||
1988 | cpu_to_le16(hwm - next + 1); | ||
1989 | /* reformat linelock for lmLog() */ | ||
1990 | xtlck->header.offset = next; | ||
1991 | xtlck->header.length = hwm - next + 1; | ||
1992 | xtlck->index = 1; | ||
1993 | lrd->backchain = | ||
1994 | cpu_to_le32(lmLog(log, tblk, lrd, tlck)); | ||
1995 | } | ||
1996 | |||
1997 | /* | ||
1998 | * format maplock(s) for txUpdateMap() to update bmap | ||
1999 | */ | ||
2000 | maplock->index = 0; | ||
2001 | |||
2002 | /* | ||
2003 | * allocate entries XAD[lwm:next): | ||
2004 | */ | ||
2005 | if (lwm < next) { | ||
2006 | /* format a maplock for txUpdateMap() to update bPMAP | ||
2007 | * for alloc of new/extended extents of XAD[lwm:next) | ||
2008 | * from the page itself; | ||
2009 | * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. | ||
2010 | */ | ||
2011 | tlck->flag |= tlckUPDATEMAP; | ||
2012 | xadlock->flag = mlckALLOCXADLIST; | ||
2013 | xadlock->count = next - lwm; | ||
2014 | xadlock->xdlist = &p->xad[lwm]; | ||
2015 | |||
2016 | jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d " | ||
2017 | "lwm:%d next:%d", | ||
2018 | tlck->ip, mp, xadlock->count, lwm, next); | ||
2019 | maplock->index++; | ||
2020 | xadlock++; | ||
2021 | } | ||
2022 | |||
2023 | /* | ||
2024 | * truncate entry XAD[twm == next - 1]: | ||
2025 | */ | ||
2026 | if (twm == next - 1) { | ||
2027 | struct pxd_lock *pxdlock; | ||
2028 | |||
2029 | /* format a maplock for txUpdateMap() to update bmap | ||
2030 | * to free truncated delta extent of the truncated | ||
2031 | * entry XAD[next - 1]; | ||
2032 | * (xtlck->pxdlock = truncated delta extent); | ||
2033 | */ | ||
2034 | tlck->flag |= tlckUPDATEMAP; | ||
2035 | pxdlock = (struct pxd_lock *) xadlock; | ||
2036 | pxdlock->flag = mlckFREEPXD; | ||
2037 | pxdlock->count = 1; | ||
2038 | pxdlock->pxd = tpxd; | ||
2039 | |||
2040 | jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d " | ||
2041 | "hwm:%d", ip, mp, pxdlock->count, hwm); | ||
2042 | maplock->index++; | ||
2043 | xadlock++; | ||
2044 | } | ||
2045 | |||
2046 | /* | ||
2047 | * free entries XAD[next:hwm]: | ||
2048 | */ | ||
2049 | if (hwm >= next) { | ||
2050 | /* format a maplock for txUpdateMap() to update bmap | ||
2051 | * to free extents of XAD[next:hwm] from thedeleted | ||
2052 | * page itself; | ||
2053 | */ | ||
2054 | tlck->flag |= tlckUPDATEMAP; | ||
2055 | xadlock->flag = mlckFREEXADLIST; | ||
2056 | xadlock->count = hwm - next + 1; | ||
2057 | xadlock->xdlist = &p->xad[next]; | ||
2058 | |||
2059 | jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d " | ||
2060 | "next:%d hwm:%d", | ||
2061 | tlck->ip, mp, xadlock->count, next, hwm); | ||
2062 | maplock->index++; | ||
2063 | } | ||
2064 | |||
2065 | /* mark page as homeward bound */ | ||
2066 | tlck->flag |= tlckWRITEPAGE; | ||
2067 | } | ||
2068 | return; | ||
2069 | } | ||
2070 | |||
2071 | |||
2072 | /* | ||
2073 | * mapLog() | ||
2074 | * | ||
2075 | * function: log from maplock of freed data extents; | ||
2076 | */ | ||
2077 | void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, | ||
2078 | struct tlock * tlck) | ||
2079 | { | ||
2080 | struct pxd_lock *pxdlock; | ||
2081 | int i, nlock; | ||
2082 | pxd_t *pxd; | ||
2083 | |||
2084 | /* | ||
2085 | * page relocation: free the source page extent | ||
2086 | * | ||
2087 | * a maplock for txUpdateMap() for free of the page | ||
2088 | * has been formatted at txLock() time saving the src | ||
2089 | * relocated page address; | ||
2090 | */ | ||
2091 | if (tlck->type & tlckRELOCATE) { | ||
2092 | /* log LOG_NOREDOPAGE of the old relocated page | ||
2093 | * for logredo() to start NoRedoPage filter; | ||
2094 | */ | ||
2095 | lrd->type = cpu_to_le16(LOG_NOREDOPAGE); | ||
2096 | pxdlock = (struct pxd_lock *) & tlck->lock; | ||
2097 | pxd = &lrd->log.redopage.pxd; | ||
2098 | *pxd = pxdlock->pxd; | ||
2099 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | ||
2100 | |||
2101 | /* (N.B. currently, logredo() does NOT update bmap | ||
2102 | * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); | ||
2103 | * if page free from relocation, LOG_UPDATEMAP log is | ||
2104 | * specifically generated now for logredo() | ||
2105 | * to update bmap for free of src relocated page; | ||
2106 | * (new flag LOG_RELOCATE may be introduced which will | ||
2107 | * inform logredo() to start NORedoPage filter and also | ||
2108 | * update block allocation map at the same time, thus | ||
2109 | * avoiding an extra log write); | ||
2110 | */ | ||
2111 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | ||
2112 | lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); | ||
2113 | lrd->log.updatemap.nxd = cpu_to_le16(1); | ||
2114 | lrd->log.updatemap.pxd = pxdlock->pxd; | ||
2115 | lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | ||
2116 | |||
2117 | /* a maplock for txUpdateMap() for free of the page | ||
2118 | * has been formatted at txLock() time; | ||
2119 | */ | ||
2120 | tlck->flag |= tlckUPDATEMAP; | ||
2121 | return; | ||
2122 | } | ||
2123 | /* | ||
2124 | |||
2125 | * Otherwise it's not a relocate request | ||
2126 | * | ||
2127 | */ | ||
2128 | else { | ||
2129 | /* log LOG_UPDATEMAP for logredo() to update bmap for | ||
2130 | * free of truncated/relocated delta extent of the data; | ||
2131 | * e.g.: external EA extent, relocated/truncated extent | ||
2132 | * from xtTailgate(); | ||
2133 | */ | ||
2134 | lrd->type = cpu_to_le16(LOG_UPDATEMAP); | ||
2135 | pxdlock = (struct pxd_lock *) & tlck->lock; | ||
2136 | nlock = pxdlock->index; | ||
2137 | for (i = 0; i < nlock; i++, pxdlock++) { | ||
2138 | if (pxdlock->flag & mlckALLOCPXD) | ||
2139 | lrd->log.updatemap.type = | ||
2140 | cpu_to_le16(LOG_ALLOCPXD); | ||
2141 | else | ||
2142 | lrd->log.updatemap.type = | ||
2143 | cpu_to_le16(LOG_FREEPXD); | ||
2144 | lrd->log.updatemap.nxd = cpu_to_le16(1); | ||
2145 | lrd->log.updatemap.pxd = pxdlock->pxd; | ||
2146 | lrd->backchain = | ||
2147 | cpu_to_le32(lmLog(log, tblk, lrd, NULL)); | ||
2148 | jfs_info("mapLog: xaddr:0x%lx xlen:0x%x", | ||
2149 | (ulong) addressPXD(&pxdlock->pxd), | ||
2150 | lengthPXD(&pxdlock->pxd)); | ||
2151 | } | ||
2152 | |||
2153 | /* update bmap */ | ||
2154 | tlck->flag |= tlckUPDATEMAP; | ||
2155 | } | ||
2156 | } | ||
2157 | |||
2158 | |||
2159 | /* | ||
2160 | * txEA() | ||
2161 | * | ||
2162 | * function: acquire maplock for EA/ACL extents or | ||
2163 | * set COMMIT_INLINE flag; | ||
2164 | */ | ||
2165 | void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) | ||
2166 | { | ||
2167 | struct tlock *tlck = NULL; | ||
2168 | struct pxd_lock *maplock = NULL, *pxdlock = NULL; | ||
2169 | |||
2170 | /* | ||
2171 | * format maplock for alloc of new EA extent | ||
2172 | */ | ||
2173 | if (newea) { | ||
2174 | /* Since the newea could be a completely zeroed entry we need to | ||
2175 | * check for the two flags which indicate we should actually | ||
2176 | * commit new EA data | ||
2177 | */ | ||
2178 | if (newea->flag & DXD_EXTENT) { | ||
2179 | tlck = txMaplock(tid, ip, tlckMAP); | ||
2180 | maplock = (struct pxd_lock *) & tlck->lock; | ||
2181 | pxdlock = (struct pxd_lock *) maplock; | ||
2182 | pxdlock->flag = mlckALLOCPXD; | ||
2183 | PXDaddress(&pxdlock->pxd, addressDXD(newea)); | ||
2184 | PXDlength(&pxdlock->pxd, lengthDXD(newea)); | ||
2185 | pxdlock++; | ||
2186 | maplock->index = 1; | ||
2187 | } else if (newea->flag & DXD_INLINE) { | ||
2188 | tlck = NULL; | ||
2189 | |||
2190 | set_cflag(COMMIT_Inlineea, ip); | ||
2191 | } | ||
2192 | } | ||
2193 | |||
2194 | /* | ||
2195 | * format maplock for free of old EA extent | ||
2196 | */ | ||
2197 | if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { | ||
2198 | if (tlck == NULL) { | ||
2199 | tlck = txMaplock(tid, ip, tlckMAP); | ||
2200 | maplock = (struct pxd_lock *) & tlck->lock; | ||
2201 | pxdlock = (struct pxd_lock *) maplock; | ||
2202 | maplock->index = 0; | ||
2203 | } | ||
2204 | pxdlock->flag = mlckFREEPXD; | ||
2205 | PXDaddress(&pxdlock->pxd, addressDXD(oldea)); | ||
2206 | PXDlength(&pxdlock->pxd, lengthDXD(oldea)); | ||
2207 | maplock->index++; | ||
2208 | } | ||
2209 | } | ||
2210 | |||
2211 | |||
2212 | /* | ||
2213 | * txForce() | ||
2214 | * | ||
2215 | * function: synchronously write pages locked by transaction | ||
2216 | * after txLog() but before txUpdateMap(); | ||
2217 | */ | ||
2218 | void txForce(struct tblock * tblk) | ||
2219 | { | ||
2220 | struct tlock *tlck; | ||
2221 | lid_t lid, next; | ||
2222 | struct metapage *mp; | ||
2223 | |||
2224 | /* | ||
2225 | * reverse the order of transaction tlocks in | ||
2226 | * careful update order of address index pages | ||
2227 | * (right to left, bottom up) | ||
2228 | */ | ||
2229 | tlck = lid_to_tlock(tblk->next); | ||
2230 | lid = tlck->next; | ||
2231 | tlck->next = 0; | ||
2232 | while (lid) { | ||
2233 | tlck = lid_to_tlock(lid); | ||
2234 | next = tlck->next; | ||
2235 | tlck->next = tblk->next; | ||
2236 | tblk->next = lid; | ||
2237 | lid = next; | ||
2238 | } | ||
2239 | |||
2240 | /* | ||
2241 | * synchronously write the page, and | ||
2242 | * hold the page for txUpdateMap(); | ||
2243 | */ | ||
2244 | for (lid = tblk->next; lid; lid = next) { | ||
2245 | tlck = lid_to_tlock(lid); | ||
2246 | next = tlck->next; | ||
2247 | |||
2248 | if ((mp = tlck->mp) != NULL && | ||
2249 | (tlck->type & tlckBTROOT) == 0) { | ||
2250 | assert(mp->xflag & COMMIT_PAGE); | ||
2251 | |||
2252 | if (tlck->flag & tlckWRITEPAGE) { | ||
2253 | tlck->flag &= ~tlckWRITEPAGE; | ||
2254 | |||
2255 | /* do not release page to freelist */ | ||
2256 | |||
2257 | /* | ||
2258 | * The "right" thing to do here is to | ||
2259 | * synchronously write the metadata. | ||
2260 | * With the current implementation this | ||
2261 | * is hard since write_metapage requires | ||
2262 | * us to kunmap & remap the page. If we | ||
2263 | * have tlocks pointing into the metadata | ||
2264 | * pages, we don't want to do this. I think | ||
2265 | * we can get by with synchronously writing | ||
2266 | * the pages when they are released. | ||
2267 | */ | ||
2268 | assert(atomic_read(&mp->nohomeok)); | ||
2269 | set_bit(META_dirty, &mp->flag); | ||
2270 | set_bit(META_sync, &mp->flag); | ||
2271 | } | ||
2272 | } | ||
2273 | } | ||
2274 | } | ||
2275 | |||
2276 | |||
2277 | /* | ||
2278 | * txUpdateMap() | ||
2279 | * | ||
2280 | * function: update persistent allocation map (and working map | ||
2281 | * if appropriate); | ||
2282 | * | ||
2283 | * parameter: | ||
2284 | */ | ||
2285 | static void txUpdateMap(struct tblock * tblk) | ||
2286 | { | ||
2287 | struct inode *ip; | ||
2288 | struct inode *ipimap; | ||
2289 | lid_t lid; | ||
2290 | struct tlock *tlck; | ||
2291 | struct maplock *maplock; | ||
2292 | struct pxd_lock pxdlock; | ||
2293 | int maptype; | ||
2294 | int k, nlock; | ||
2295 | struct metapage *mp = NULL; | ||
2296 | |||
2297 | ipimap = JFS_SBI(tblk->sb)->ipimap; | ||
2298 | |||
2299 | maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP; | ||
2300 | |||
2301 | |||
2302 | /* | ||
2303 | * update block allocation map | ||
2304 | * | ||
2305 | * update allocation state in pmap (and wmap) and | ||
2306 | * update lsn of the pmap page; | ||
2307 | */ | ||
2308 | /* | ||
2309 | * scan each tlock/page of transaction for block allocation/free: | ||
2310 | * | ||
2311 | * for each tlock/page of transaction, update map. | ||
2312 | * ? are there tlock for pmap and pwmap at the same time ? | ||
2313 | */ | ||
2314 | for (lid = tblk->next; lid; lid = tlck->next) { | ||
2315 | tlck = lid_to_tlock(lid); | ||
2316 | |||
2317 | if ((tlck->flag & tlckUPDATEMAP) == 0) | ||
2318 | continue; | ||
2319 | |||
2320 | if (tlck->flag & tlckFREEPAGE) { | ||
2321 | /* | ||
2322 | * Another thread may attempt to reuse freed space | ||
2323 | * immediately, so we want to get rid of the metapage | ||
2324 | * before anyone else has a chance to get it. | ||
2325 | * Lock metapage, update maps, then invalidate | ||
2326 | * the metapage. | ||
2327 | */ | ||
2328 | mp = tlck->mp; | ||
2329 | ASSERT(mp->xflag & COMMIT_PAGE); | ||
2330 | hold_metapage(mp, 0); | ||
2331 | } | ||
2332 | |||
2333 | /* | ||
2334 | * extent list: | ||
2335 | * . in-line PXD list: | ||
2336 | * . out-of-line XAD list: | ||
2337 | */ | ||
2338 | maplock = (struct maplock *) & tlck->lock; | ||
2339 | nlock = maplock->index; | ||
2340 | |||
2341 | for (k = 0; k < nlock; k++, maplock++) { | ||
2342 | /* | ||
2343 | * allocate blocks in persistent map: | ||
2344 | * | ||
2345 | * blocks have been allocated from wmap at alloc time; | ||
2346 | */ | ||
2347 | if (maplock->flag & mlckALLOC) { | ||
2348 | txAllocPMap(ipimap, maplock, tblk); | ||
2349 | } | ||
2350 | /* | ||
2351 | * free blocks in persistent and working map: | ||
2352 | * blocks will be freed in pmap and then in wmap; | ||
2353 | * | ||
2354 | * ? tblock specifies the PMAP/PWMAP based upon | ||
2355 | * transaction | ||
2356 | * | ||
2357 | * free blocks in persistent map: | ||
2358 | * blocks will be freed from wmap at last reference | ||
2359 | * release of the object for regular files; | ||
2360 | * | ||
2361 | * Alway free blocks from both persistent & working | ||
2362 | * maps for directories | ||
2363 | */ | ||
2364 | else { /* (maplock->flag & mlckFREE) */ | ||
2365 | |||
2366 | if (S_ISDIR(tlck->ip->i_mode)) | ||
2367 | txFreeMap(ipimap, maplock, | ||
2368 | tblk, COMMIT_PWMAP); | ||
2369 | else | ||
2370 | txFreeMap(ipimap, maplock, | ||
2371 | tblk, maptype); | ||
2372 | } | ||
2373 | } | ||
2374 | if (tlck->flag & tlckFREEPAGE) { | ||
2375 | if (!(tblk->flag & tblkGC_LAZY)) { | ||
2376 | /* This is equivalent to txRelease */ | ||
2377 | ASSERT(mp->lid == lid); | ||
2378 | tlck->mp->lid = 0; | ||
2379 | } | ||
2380 | assert(atomic_read(&mp->nohomeok) == 1); | ||
2381 | atomic_dec(&mp->nohomeok); | ||
2382 | discard_metapage(mp); | ||
2383 | tlck->mp = NULL; | ||
2384 | } | ||
2385 | } | ||
2386 | /* | ||
2387 | * update inode allocation map | ||
2388 | * | ||
2389 | * update allocation state in pmap and | ||
2390 | * update lsn of the pmap page; | ||
2391 | * update in-memory inode flag/state | ||
2392 | * | ||
2393 | * unlock mapper/write lock | ||
2394 | */ | ||
2395 | if (tblk->xflag & COMMIT_CREATE) { | ||
2396 | diUpdatePMap(ipimap, tblk->ino, FALSE, tblk); | ||
2397 | ipimap->i_state |= I_DIRTY; | ||
2398 | /* update persistent block allocation map | ||
2399 | * for the allocation of inode extent; | ||
2400 | */ | ||
2401 | pxdlock.flag = mlckALLOCPXD; | ||
2402 | pxdlock.pxd = tblk->u.ixpxd; | ||
2403 | pxdlock.index = 1; | ||
2404 | txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk); | ||
2405 | } else if (tblk->xflag & COMMIT_DELETE) { | ||
2406 | ip = tblk->u.ip; | ||
2407 | diUpdatePMap(ipimap, ip->i_ino, TRUE, tblk); | ||
2408 | ipimap->i_state |= I_DIRTY; | ||
2409 | iput(ip); | ||
2410 | } | ||
2411 | } | ||
2412 | |||
2413 | |||
2414 | /* | ||
2415 | * txAllocPMap() | ||
2416 | * | ||
2417 | * function: allocate from persistent map; | ||
2418 | * | ||
2419 | * parameter: | ||
2420 | * ipbmap - | ||
2421 | * malock - | ||
2422 | * xad list: | ||
2423 | * pxd: | ||
2424 | * | ||
2425 | * maptype - | ||
2426 | * allocate from persistent map; | ||
2427 | * free from persistent map; | ||
2428 | * (e.g., tmp file - free from working map at releae | ||
2429 | * of last reference); | ||
2430 | * free from persistent and working map; | ||
2431 | * | ||
2432 | * lsn - log sequence number; | ||
2433 | */ | ||
2434 | static void txAllocPMap(struct inode *ip, struct maplock * maplock, | ||
2435 | struct tblock * tblk) | ||
2436 | { | ||
2437 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | ||
2438 | struct xdlistlock *xadlistlock; | ||
2439 | xad_t *xad; | ||
2440 | s64 xaddr; | ||
2441 | int xlen; | ||
2442 | struct pxd_lock *pxdlock; | ||
2443 | struct xdlistlock *pxdlistlock; | ||
2444 | pxd_t *pxd; | ||
2445 | int n; | ||
2446 | |||
2447 | /* | ||
2448 | * allocate from persistent map; | ||
2449 | */ | ||
2450 | if (maplock->flag & mlckALLOCXADLIST) { | ||
2451 | xadlistlock = (struct xdlistlock *) maplock; | ||
2452 | xad = xadlistlock->xdlist; | ||
2453 | for (n = 0; n < xadlistlock->count; n++, xad++) { | ||
2454 | if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { | ||
2455 | xaddr = addressXAD(xad); | ||
2456 | xlen = lengthXAD(xad); | ||
2457 | dbUpdatePMap(ipbmap, FALSE, xaddr, | ||
2458 | (s64) xlen, tblk); | ||
2459 | xad->flag &= ~(XAD_NEW | XAD_EXTENDED); | ||
2460 | jfs_info("allocPMap: xaddr:0x%lx xlen:%d", | ||
2461 | (ulong) xaddr, xlen); | ||
2462 | } | ||
2463 | } | ||
2464 | } else if (maplock->flag & mlckALLOCPXD) { | ||
2465 | pxdlock = (struct pxd_lock *) maplock; | ||
2466 | xaddr = addressPXD(&pxdlock->pxd); | ||
2467 | xlen = lengthPXD(&pxdlock->pxd); | ||
2468 | dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, tblk); | ||
2469 | jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); | ||
2470 | } else { /* (maplock->flag & mlckALLOCPXDLIST) */ | ||
2471 | |||
2472 | pxdlistlock = (struct xdlistlock *) maplock; | ||
2473 | pxd = pxdlistlock->xdlist; | ||
2474 | for (n = 0; n < pxdlistlock->count; n++, pxd++) { | ||
2475 | xaddr = addressPXD(pxd); | ||
2476 | xlen = lengthPXD(pxd); | ||
2477 | dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, | ||
2478 | tblk); | ||
2479 | jfs_info("allocPMap: xaddr:0x%lx xlen:%d", | ||
2480 | (ulong) xaddr, xlen); | ||
2481 | } | ||
2482 | } | ||
2483 | } | ||
2484 | |||
2485 | |||
2486 | /* | ||
2487 | * txFreeMap() | ||
2488 | * | ||
2489 | * function: free from persistent and/or working map; | ||
2490 | * | ||
2491 | * todo: optimization | ||
2492 | */ | ||
2493 | void txFreeMap(struct inode *ip, | ||
2494 | struct maplock * maplock, struct tblock * tblk, int maptype) | ||
2495 | { | ||
2496 | struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; | ||
2497 | struct xdlistlock *xadlistlock; | ||
2498 | xad_t *xad; | ||
2499 | s64 xaddr; | ||
2500 | int xlen; | ||
2501 | struct pxd_lock *pxdlock; | ||
2502 | struct xdlistlock *pxdlistlock; | ||
2503 | pxd_t *pxd; | ||
2504 | int n; | ||
2505 | |||
2506 | jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x", | ||
2507 | tblk, maplock, maptype); | ||
2508 | |||
2509 | /* | ||
2510 | * free from persistent map; | ||
2511 | */ | ||
2512 | if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) { | ||
2513 | if (maplock->flag & mlckFREEXADLIST) { | ||
2514 | xadlistlock = (struct xdlistlock *) maplock; | ||
2515 | xad = xadlistlock->xdlist; | ||
2516 | for (n = 0; n < xadlistlock->count; n++, xad++) { | ||
2517 | if (!(xad->flag & XAD_NEW)) { | ||
2518 | xaddr = addressXAD(xad); | ||
2519 | xlen = lengthXAD(xad); | ||
2520 | dbUpdatePMap(ipbmap, TRUE, xaddr, | ||
2521 | (s64) xlen, tblk); | ||
2522 | jfs_info("freePMap: xaddr:0x%lx " | ||
2523 | "xlen:%d", | ||
2524 | (ulong) xaddr, xlen); | ||
2525 | } | ||
2526 | } | ||
2527 | } else if (maplock->flag & mlckFREEPXD) { | ||
2528 | pxdlock = (struct pxd_lock *) maplock; | ||
2529 | xaddr = addressPXD(&pxdlock->pxd); | ||
2530 | xlen = lengthPXD(&pxdlock->pxd); | ||
2531 | dbUpdatePMap(ipbmap, TRUE, xaddr, (s64) xlen, | ||
2532 | tblk); | ||
2533 | jfs_info("freePMap: xaddr:0x%lx xlen:%d", | ||
2534 | (ulong) xaddr, xlen); | ||
2535 | } else { /* (maplock->flag & mlckALLOCPXDLIST) */ | ||
2536 | |||
2537 | pxdlistlock = (struct xdlistlock *) maplock; | ||
2538 | pxd = pxdlistlock->xdlist; | ||
2539 | for (n = 0; n < pxdlistlock->count; n++, pxd++) { | ||
2540 | xaddr = addressPXD(pxd); | ||
2541 | xlen = lengthPXD(pxd); | ||
2542 | dbUpdatePMap(ipbmap, TRUE, xaddr, | ||
2543 | (s64) xlen, tblk); | ||
2544 | jfs_info("freePMap: xaddr:0x%lx xlen:%d", | ||
2545 | (ulong) xaddr, xlen); | ||
2546 | } | ||
2547 | } | ||
2548 | } | ||
2549 | |||
2550 | /* | ||
2551 | * free from working map; | ||
2552 | */ | ||
2553 | if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) { | ||
2554 | if (maplock->flag & mlckFREEXADLIST) { | ||
2555 | xadlistlock = (struct xdlistlock *) maplock; | ||
2556 | xad = xadlistlock->xdlist; | ||
2557 | for (n = 0; n < xadlistlock->count; n++, xad++) { | ||
2558 | xaddr = addressXAD(xad); | ||
2559 | xlen = lengthXAD(xad); | ||
2560 | dbFree(ip, xaddr, (s64) xlen); | ||
2561 | xad->flag = 0; | ||
2562 | jfs_info("freeWMap: xaddr:0x%lx xlen:%d", | ||
2563 | (ulong) xaddr, xlen); | ||
2564 | } | ||
2565 | } else if (maplock->flag & mlckFREEPXD) { | ||
2566 | pxdlock = (struct pxd_lock *) maplock; | ||
2567 | xaddr = addressPXD(&pxdlock->pxd); | ||
2568 | xlen = lengthPXD(&pxdlock->pxd); | ||
2569 | dbFree(ip, xaddr, (s64) xlen); | ||
2570 | jfs_info("freeWMap: xaddr:0x%lx xlen:%d", | ||
2571 | (ulong) xaddr, xlen); | ||
2572 | } else { /* (maplock->flag & mlckFREEPXDLIST) */ | ||
2573 | |||
2574 | pxdlistlock = (struct xdlistlock *) maplock; | ||
2575 | pxd = pxdlistlock->xdlist; | ||
2576 | for (n = 0; n < pxdlistlock->count; n++, pxd++) { | ||
2577 | xaddr = addressPXD(pxd); | ||
2578 | xlen = lengthPXD(pxd); | ||
2579 | dbFree(ip, xaddr, (s64) xlen); | ||
2580 | jfs_info("freeWMap: xaddr:0x%lx xlen:%d", | ||
2581 | (ulong) xaddr, xlen); | ||
2582 | } | ||
2583 | } | ||
2584 | } | ||
2585 | } | ||
2586 | |||
2587 | |||
2588 | /* | ||
2589 | * txFreelock() | ||
2590 | * | ||
2591 | * function: remove tlock from inode anonymous locklist | ||
2592 | */ | ||
2593 | void txFreelock(struct inode *ip) | ||
2594 | { | ||
2595 | struct jfs_inode_info *jfs_ip = JFS_IP(ip); | ||
2596 | struct tlock *xtlck, *tlck; | ||
2597 | lid_t xlid = 0, lid; | ||
2598 | |||
2599 | if (!jfs_ip->atlhead) | ||
2600 | return; | ||
2601 | |||
2602 | TXN_LOCK(); | ||
2603 | xtlck = (struct tlock *) &jfs_ip->atlhead; | ||
2604 | |||
2605 | while ((lid = xtlck->next) != 0) { | ||
2606 | tlck = lid_to_tlock(lid); | ||
2607 | if (tlck->flag & tlckFREELOCK) { | ||
2608 | xtlck->next = tlck->next; | ||
2609 | txLockFree(lid); | ||
2610 | } else { | ||
2611 | xtlck = tlck; | ||
2612 | xlid = lid; | ||
2613 | } | ||
2614 | } | ||
2615 | |||
2616 | if (jfs_ip->atlhead) | ||
2617 | jfs_ip->atltail = xlid; | ||
2618 | else { | ||
2619 | jfs_ip->atltail = 0; | ||
2620 | /* | ||
2621 | * If inode was on anon_list, remove it | ||
2622 | */ | ||
2623 | list_del_init(&jfs_ip->anon_inode_list); | ||
2624 | } | ||
2625 | TXN_UNLOCK(); | ||
2626 | } | ||
2627 | |||
2628 | |||
2629 | /* | ||
2630 | * txAbort() | ||
2631 | * | ||
2632 | * function: abort tx before commit; | ||
2633 | * | ||
2634 | * frees line-locks and segment locks for all | ||
2635 | * segments in comdata structure. | ||
2636 | * Optionally sets state of file-system to FM_DIRTY in super-block. | ||
2637 | * log age of page-frames in memory for which caller has | ||
2638 | * are reset to 0 (to avoid logwarap). | ||
2639 | */ | ||
2640 | void txAbort(tid_t tid, int dirty) | ||
2641 | { | ||
2642 | lid_t lid, next; | ||
2643 | struct metapage *mp; | ||
2644 | struct tblock *tblk = tid_to_tblock(tid); | ||
2645 | struct tlock *tlck; | ||
2646 | |||
2647 | /* | ||
2648 | * free tlocks of the transaction | ||
2649 | */ | ||
2650 | for (lid = tblk->next; lid; lid = next) { | ||
2651 | tlck = lid_to_tlock(lid); | ||
2652 | next = tlck->next; | ||
2653 | mp = tlck->mp; | ||
2654 | JFS_IP(tlck->ip)->xtlid = 0; | ||
2655 | |||
2656 | if (mp) { | ||
2657 | mp->lid = 0; | ||
2658 | |||
2659 | /* | ||
2660 | * reset lsn of page to avoid logwarap: | ||
2661 | * | ||
2662 | * (page may have been previously committed by another | ||
2663 | * transaction(s) but has not been paged, i.e., | ||
2664 | * it may be on logsync list even though it has not | ||
2665 | * been logged for the current tx.) | ||
2666 | */ | ||
2667 | if (mp->xflag & COMMIT_PAGE && mp->lsn) | ||
2668 | LogSyncRelease(mp); | ||
2669 | } | ||
2670 | /* insert tlock at head of freelist */ | ||
2671 | TXN_LOCK(); | ||
2672 | txLockFree(lid); | ||
2673 | TXN_UNLOCK(); | ||
2674 | } | ||
2675 | |||
2676 | /* caller will free the transaction block */ | ||
2677 | |||
2678 | tblk->next = tblk->last = 0; | ||
2679 | |||
2680 | /* | ||
2681 | * mark filesystem dirty | ||
2682 | */ | ||
2683 | if (dirty) | ||
2684 | jfs_error(tblk->sb, "txAbort"); | ||
2685 | |||
2686 | return; | ||
2687 | } | ||
2688 | |||
2689 | /* | ||
2690 | * txLazyCommit(void) | ||
2691 | * | ||
2692 | * All transactions except those changing ipimap (COMMIT_FORCE) are | ||
2693 | * processed by this routine. This insures that the inode and block | ||
2694 | * allocation maps are updated in order. For synchronous transactions, | ||
2695 | * let the user thread finish processing after txUpdateMap() is called. | ||
2696 | */ | ||
2697 | static void txLazyCommit(struct tblock * tblk) | ||
2698 | { | ||
2699 | struct jfs_log *log; | ||
2700 | |||
2701 | while (((tblk->flag & tblkGC_READY) == 0) && | ||
2702 | ((tblk->flag & tblkGC_UNLOCKED) == 0)) { | ||
2703 | /* We must have gotten ahead of the user thread | ||
2704 | */ | ||
2705 | jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk); | ||
2706 | yield(); | ||
2707 | } | ||
2708 | |||
2709 | jfs_info("txLazyCommit: processing tblk 0x%p", tblk); | ||
2710 | |||
2711 | txUpdateMap(tblk); | ||
2712 | |||
2713 | log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; | ||
2714 | |||
2715 | spin_lock_irq(&log->gclock); // LOGGC_LOCK | ||
2716 | |||
2717 | tblk->flag |= tblkGC_COMMITTED; | ||
2718 | |||
2719 | if (tblk->flag & tblkGC_READY) | ||
2720 | log->gcrtc--; | ||
2721 | |||
2722 | wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP | ||
2723 | |||
2724 | /* | ||
2725 | * Can't release log->gclock until we've tested tblk->flag | ||
2726 | */ | ||
2727 | if (tblk->flag & tblkGC_LAZY) { | ||
2728 | spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK | ||
2729 | txUnlock(tblk); | ||
2730 | tblk->flag &= ~tblkGC_LAZY; | ||
2731 | txEnd(tblk - TxBlock); /* Convert back to tid */ | ||
2732 | } else | ||
2733 | spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK | ||
2734 | |||
2735 | jfs_info("txLazyCommit: done: tblk = 0x%p", tblk); | ||
2736 | } | ||
2737 | |||
2738 | /* | ||
2739 | * jfs_lazycommit(void) | ||
2740 | * | ||
2741 | * To be run as a kernel daemon. If lbmIODone is called in an interrupt | ||
2742 | * context, or where blocking is not wanted, this routine will process | ||
2743 | * committed transactions from the unlock queue. | ||
2744 | */ | ||
2745 | int jfs_lazycommit(void *arg) | ||
2746 | { | ||
2747 | int WorkDone; | ||
2748 | struct tblock *tblk; | ||
2749 | unsigned long flags; | ||
2750 | struct jfs_sb_info *sbi; | ||
2751 | |||
2752 | daemonize("jfsCommit"); | ||
2753 | |||
2754 | complete(&jfsIOwait); | ||
2755 | |||
2756 | do { | ||
2757 | LAZY_LOCK(flags); | ||
2758 | jfs_commit_thread_waking = 0; /* OK to wake another thread */ | ||
2759 | while (!list_empty(&TxAnchor.unlock_queue)) { | ||
2760 | WorkDone = 0; | ||
2761 | list_for_each_entry(tblk, &TxAnchor.unlock_queue, | ||
2762 | cqueue) { | ||
2763 | |||
2764 | sbi = JFS_SBI(tblk->sb); | ||
2765 | /* | ||
2766 | * For each volume, the transactions must be | ||
2767 | * handled in order. If another commit thread | ||
2768 | * is handling a tblk for this superblock, | ||
2769 | * skip it | ||
2770 | */ | ||
2771 | if (sbi->commit_state & IN_LAZYCOMMIT) | ||
2772 | continue; | ||
2773 | |||
2774 | sbi->commit_state |= IN_LAZYCOMMIT; | ||
2775 | WorkDone = 1; | ||
2776 | |||
2777 | /* | ||
2778 | * Remove transaction from queue | ||
2779 | */ | ||
2780 | list_del(&tblk->cqueue); | ||
2781 | |||
2782 | LAZY_UNLOCK(flags); | ||
2783 | txLazyCommit(tblk); | ||
2784 | LAZY_LOCK(flags); | ||
2785 | |||
2786 | sbi->commit_state &= ~IN_LAZYCOMMIT; | ||
2787 | /* | ||
2788 | * Don't continue in the for loop. (We can't | ||
2789 | * anyway, it's unsafe!) We want to go back to | ||
2790 | * the beginning of the list. | ||
2791 | */ | ||
2792 | break; | ||
2793 | } | ||
2794 | |||
2795 | /* If there was nothing to do, don't continue */ | ||
2796 | if (!WorkDone) | ||
2797 | break; | ||
2798 | } | ||
2799 | /* In case a wakeup came while all threads were active */ | ||
2800 | jfs_commit_thread_waking = 0; | ||
2801 | |||
2802 | if (current->flags & PF_FREEZE) { | ||
2803 | LAZY_UNLOCK(flags); | ||
2804 | refrigerator(PF_FREEZE); | ||
2805 | } else { | ||
2806 | DECLARE_WAITQUEUE(wq, current); | ||
2807 | |||
2808 | add_wait_queue(&jfs_commit_thread_wait, &wq); | ||
2809 | set_current_state(TASK_INTERRUPTIBLE); | ||
2810 | LAZY_UNLOCK(flags); | ||
2811 | schedule(); | ||
2812 | current->state = TASK_RUNNING; | ||
2813 | remove_wait_queue(&jfs_commit_thread_wait, &wq); | ||
2814 | } | ||
2815 | } while (!jfs_stop_threads); | ||
2816 | |||
2817 | if (!list_empty(&TxAnchor.unlock_queue)) | ||
2818 | jfs_err("jfs_lazycommit being killed w/pending transactions!"); | ||
2819 | else | ||
2820 | jfs_info("jfs_lazycommit being killed\n"); | ||
2821 | complete_and_exit(&jfsIOwait, 0); | ||
2822 | } | ||
2823 | |||
2824 | void txLazyUnlock(struct tblock * tblk) | ||
2825 | { | ||
2826 | unsigned long flags; | ||
2827 | |||
2828 | LAZY_LOCK(flags); | ||
2829 | |||
2830 | list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue); | ||
2831 | /* | ||
2832 | * Don't wake up a commit thread if there is already one servicing | ||
2833 | * this superblock, or if the last one we woke up hasn't started yet. | ||
2834 | */ | ||
2835 | if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) && | ||
2836 | !jfs_commit_thread_waking) { | ||
2837 | jfs_commit_thread_waking = 1; | ||
2838 | wake_up(&jfs_commit_thread_wait); | ||
2839 | } | ||
2840 | LAZY_UNLOCK(flags); | ||
2841 | } | ||
2842 | |||
2843 | static void LogSyncRelease(struct metapage * mp) | ||
2844 | { | ||
2845 | struct jfs_log *log = mp->log; | ||
2846 | |||
2847 | assert(atomic_read(&mp->nohomeok)); | ||
2848 | assert(log); | ||
2849 | atomic_dec(&mp->nohomeok); | ||
2850 | |||
2851 | if (atomic_read(&mp->nohomeok)) | ||
2852 | return; | ||
2853 | |||
2854 | hold_metapage(mp, 0); | ||
2855 | |||
2856 | LOGSYNC_LOCK(log); | ||
2857 | mp->log = NULL; | ||
2858 | mp->lsn = 0; | ||
2859 | mp->clsn = 0; | ||
2860 | log->count--; | ||
2861 | list_del_init(&mp->synclist); | ||
2862 | LOGSYNC_UNLOCK(log); | ||
2863 | |||
2864 | release_metapage(mp); | ||
2865 | } | ||
2866 | |||
2867 | /* | ||
2868 | * txQuiesce | ||
2869 | * | ||
2870 | * Block all new transactions and push anonymous transactions to | ||
2871 | * completion | ||
2872 | * | ||
2873 | * This does almost the same thing as jfs_sync below. We don't | ||
2874 | * worry about deadlocking when jfs_tlocks_low is set, since we would | ||
2875 | * expect jfs_sync to get us out of that jam. | ||
2876 | */ | ||
2877 | void txQuiesce(struct super_block *sb) | ||
2878 | { | ||
2879 | struct inode *ip; | ||
2880 | struct jfs_inode_info *jfs_ip; | ||
2881 | struct jfs_log *log = JFS_SBI(sb)->log; | ||
2882 | tid_t tid; | ||
2883 | |||
2884 | set_bit(log_QUIESCE, &log->flag); | ||
2885 | |||
2886 | TXN_LOCK(); | ||
2887 | restart: | ||
2888 | while (!list_empty(&TxAnchor.anon_list)) { | ||
2889 | jfs_ip = list_entry(TxAnchor.anon_list.next, | ||
2890 | struct jfs_inode_info, | ||
2891 | anon_inode_list); | ||
2892 | ip = &jfs_ip->vfs_inode; | ||
2893 | |||
2894 | /* | ||
2895 | * inode will be removed from anonymous list | ||
2896 | * when it is committed | ||
2897 | */ | ||
2898 | TXN_UNLOCK(); | ||
2899 | tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); | ||
2900 | down(&jfs_ip->commit_sem); | ||
2901 | txCommit(tid, 1, &ip, 0); | ||
2902 | txEnd(tid); | ||
2903 | up(&jfs_ip->commit_sem); | ||
2904 | /* | ||
2905 | * Just to be safe. I don't know how | ||
2906 | * long we can run without blocking | ||
2907 | */ | ||
2908 | cond_resched(); | ||
2909 | TXN_LOCK(); | ||
2910 | } | ||
2911 | |||
2912 | /* | ||
2913 | * If jfs_sync is running in parallel, there could be some inodes | ||
2914 | * on anon_list2. Let's check. | ||
2915 | */ | ||
2916 | if (!list_empty(&TxAnchor.anon_list2)) { | ||
2917 | list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list); | ||
2918 | INIT_LIST_HEAD(&TxAnchor.anon_list2); | ||
2919 | goto restart; | ||
2920 | } | ||
2921 | TXN_UNLOCK(); | ||
2922 | |||
2923 | /* | ||
2924 | * We may need to kick off the group commit | ||
2925 | */ | ||
2926 | jfs_flush_journal(log, 0); | ||
2927 | } | ||
2928 | |||
2929 | /* | ||
2930 | * txResume() | ||
2931 | * | ||
2932 | * Allows transactions to start again following txQuiesce | ||
2933 | */ | ||
2934 | void txResume(struct super_block *sb) | ||
2935 | { | ||
2936 | struct jfs_log *log = JFS_SBI(sb)->log; | ||
2937 | |||
2938 | clear_bit(log_QUIESCE, &log->flag); | ||
2939 | TXN_WAKEUP(&log->syncwait); | ||
2940 | } | ||
2941 | |||
2942 | /* | ||
2943 | * jfs_sync(void) | ||
2944 | * | ||
2945 | * To be run as a kernel daemon. This is awakened when tlocks run low. | ||
2946 | * We write any inodes that have anonymous tlocks so they will become | ||
2947 | * available. | ||
2948 | */ | ||
2949 | int jfs_sync(void *arg) | ||
2950 | { | ||
2951 | struct inode *ip; | ||
2952 | struct jfs_inode_info *jfs_ip; | ||
2953 | int rc; | ||
2954 | tid_t tid; | ||
2955 | |||
2956 | daemonize("jfsSync"); | ||
2957 | |||
2958 | complete(&jfsIOwait); | ||
2959 | |||
2960 | do { | ||
2961 | /* | ||
2962 | * write each inode on the anonymous inode list | ||
2963 | */ | ||
2964 | TXN_LOCK(); | ||
2965 | while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) { | ||
2966 | jfs_ip = list_entry(TxAnchor.anon_list.next, | ||
2967 | struct jfs_inode_info, | ||
2968 | anon_inode_list); | ||
2969 | ip = &jfs_ip->vfs_inode; | ||
2970 | |||
2971 | if (! igrab(ip)) { | ||
2972 | /* | ||
2973 | * Inode is being freed | ||
2974 | */ | ||
2975 | list_del_init(&jfs_ip->anon_inode_list); | ||
2976 | } else if (! down_trylock(&jfs_ip->commit_sem)) { | ||
2977 | /* | ||
2978 | * inode will be removed from anonymous list | ||
2979 | * when it is committed | ||
2980 | */ | ||
2981 | TXN_UNLOCK(); | ||
2982 | tid = txBegin(ip->i_sb, COMMIT_INODE); | ||
2983 | rc = txCommit(tid, 1, &ip, 0); | ||
2984 | txEnd(tid); | ||
2985 | up(&jfs_ip->commit_sem); | ||
2986 | |||
2987 | iput(ip); | ||
2988 | /* | ||
2989 | * Just to be safe. I don't know how | ||
2990 | * long we can run without blocking | ||
2991 | */ | ||
2992 | cond_resched(); | ||
2993 | TXN_LOCK(); | ||
2994 | } else { | ||
2995 | /* We can't get the commit semaphore. It may | ||
2996 | * be held by a thread waiting for tlock's | ||
2997 | * so let's not block here. Save it to | ||
2998 | * put back on the anon_list. | ||
2999 | */ | ||
3000 | |||
3001 | /* Take off anon_list */ | ||
3002 | list_del(&jfs_ip->anon_inode_list); | ||
3003 | |||
3004 | /* Put on anon_list2 */ | ||
3005 | list_add(&jfs_ip->anon_inode_list, | ||
3006 | &TxAnchor.anon_list2); | ||
3007 | |||
3008 | TXN_UNLOCK(); | ||
3009 | iput(ip); | ||
3010 | TXN_LOCK(); | ||
3011 | } | ||
3012 | } | ||
3013 | /* Add anon_list2 back to anon_list */ | ||
3014 | list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); | ||
3015 | |||
3016 | if (current->flags & PF_FREEZE) { | ||
3017 | TXN_UNLOCK(); | ||
3018 | refrigerator(PF_FREEZE); | ||
3019 | } else { | ||
3020 | DECLARE_WAITQUEUE(wq, current); | ||
3021 | |||
3022 | add_wait_queue(&jfs_sync_thread_wait, &wq); | ||
3023 | set_current_state(TASK_INTERRUPTIBLE); | ||
3024 | TXN_UNLOCK(); | ||
3025 | schedule(); | ||
3026 | current->state = TASK_RUNNING; | ||
3027 | remove_wait_queue(&jfs_sync_thread_wait, &wq); | ||
3028 | } | ||
3029 | } while (!jfs_stop_threads); | ||
3030 | |||
3031 | jfs_info("jfs_sync being killed"); | ||
3032 | complete_and_exit(&jfsIOwait, 0); | ||
3033 | } | ||
3034 | |||
3035 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) | ||
3036 | int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, | ||
3037 | int *eof, void *data) | ||
3038 | { | ||
3039 | int len = 0; | ||
3040 | off_t begin; | ||
3041 | char *freewait; | ||
3042 | char *freelockwait; | ||
3043 | char *lowlockwait; | ||
3044 | |||
3045 | freewait = | ||
3046 | waitqueue_active(&TxAnchor.freewait) ? "active" : "empty"; | ||
3047 | freelockwait = | ||
3048 | waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty"; | ||
3049 | lowlockwait = | ||
3050 | waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; | ||
3051 | |||
3052 | len += sprintf(buffer, | ||
3053 | "JFS TxAnchor\n" | ||
3054 | "============\n" | ||
3055 | "freetid = %d\n" | ||
3056 | "freewait = %s\n" | ||
3057 | "freelock = %d\n" | ||
3058 | "freelockwait = %s\n" | ||
3059 | "lowlockwait = %s\n" | ||
3060 | "tlocksInUse = %d\n" | ||
3061 | "jfs_tlocks_low = %d\n" | ||
3062 | "unlock_queue is %sempty\n", | ||
3063 | TxAnchor.freetid, | ||
3064 | freewait, | ||
3065 | TxAnchor.freelock, | ||
3066 | freelockwait, | ||
3067 | lowlockwait, | ||
3068 | TxAnchor.tlocksInUse, | ||
3069 | jfs_tlocks_low, | ||
3070 | list_empty(&TxAnchor.unlock_queue) ? "" : "not "); | ||
3071 | |||
3072 | begin = offset; | ||
3073 | *start = buffer + begin; | ||
3074 | len -= begin; | ||
3075 | |||
3076 | if (len > length) | ||
3077 | len = length; | ||
3078 | else | ||
3079 | *eof = 1; | ||
3080 | |||
3081 | if (len < 0) | ||
3082 | len = 0; | ||
3083 | |||
3084 | return len; | ||
3085 | } | ||
3086 | #endif | ||
3087 | |||
3088 | #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) | ||
3089 | int jfs_txstats_read(char *buffer, char **start, off_t offset, int length, | ||
3090 | int *eof, void *data) | ||
3091 | { | ||
3092 | int len = 0; | ||
3093 | off_t begin; | ||
3094 | |||
3095 | len += sprintf(buffer, | ||
3096 | "JFS TxStats\n" | ||
3097 | "===========\n" | ||
3098 | "calls to txBegin = %d\n" | ||
3099 | "txBegin blocked by sync barrier = %d\n" | ||
3100 | "txBegin blocked by tlocks low = %d\n" | ||
3101 | "txBegin blocked by no free tid = %d\n" | ||
3102 | "calls to txBeginAnon = %d\n" | ||
3103 | "txBeginAnon blocked by sync barrier = %d\n" | ||
3104 | "txBeginAnon blocked by tlocks low = %d\n" | ||
3105 | "calls to txLockAlloc = %d\n" | ||
3106 | "tLockAlloc blocked by no free lock = %d\n", | ||
3107 | TxStat.txBegin, | ||
3108 | TxStat.txBegin_barrier, | ||
3109 | TxStat.txBegin_lockslow, | ||
3110 | TxStat.txBegin_freetid, | ||
3111 | TxStat.txBeginAnon, | ||
3112 | TxStat.txBeginAnon_barrier, | ||
3113 | TxStat.txBeginAnon_lockslow, | ||
3114 | TxStat.txLockAlloc, | ||
3115 | TxStat.txLockAlloc_freelock); | ||
3116 | |||
3117 | begin = offset; | ||
3118 | *start = buffer + begin; | ||
3119 | len -= begin; | ||
3120 | |||
3121 | if (len > length) | ||
3122 | len = length; | ||
3123 | else | ||
3124 | *eof = 1; | ||
3125 | |||
3126 | if (len < 0) | ||
3127 | len = 0; | ||
3128 | |||
3129 | return len; | ||
3130 | } | ||
3131 | #endif | ||