aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ubifs/tnc_commit.c
diff options
context:
space:
mode:
authorArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2008-07-14 12:08:37 -0400
committerArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2008-07-15 10:35:15 -0400
commit1e51764a3c2ac05a23a22b2a95ddee4d9bffb16d (patch)
tree919debdd48aef9eee9ff0e8f465ef2649325b993 /fs/ubifs/tnc_commit.c
parente56a99d5a42dcb91e622ae7a0289d8fb2ddabffb (diff)
UBIFS: add new flash file system
This is a new flash file system. See http://www.linux-mtd.infradead.org/doc/ubifs.html Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Diffstat (limited to 'fs/ubifs/tnc_commit.c')
-rw-r--r--fs/ubifs/tnc_commit.c1103
1 files changed, 1103 insertions, 0 deletions
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
new file mode 100644
index 000000000000..8117e65ba2e9
--- /dev/null
+++ b/fs/ubifs/tnc_commit.c
@@ -0,0 +1,1103 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/* This file implements TNC functions for committing */
24
25#include "ubifs.h"
26
27/**
28 * make_idx_node - make an index node for fill-the-gaps method of TNC commit.
29 * @c: UBIFS file-system description object
30 * @idx: buffer in which to place new index node
31 * @znode: znode from which to make new index node
32 * @lnum: LEB number where new index node will be written
33 * @offs: offset where new index node will be written
34 * @len: length of new index node
35 */
36static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
37 struct ubifs_znode *znode, int lnum, int offs, int len)
38{
39 struct ubifs_znode *zp;
40 int i, err;
41
42 /* Make index node */
43 idx->ch.node_type = UBIFS_IDX_NODE;
44 idx->child_cnt = cpu_to_le16(znode->child_cnt);
45 idx->level = cpu_to_le16(znode->level);
46 for (i = 0; i < znode->child_cnt; i++) {
47 struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
48 struct ubifs_zbranch *zbr = &znode->zbranch[i];
49
50 key_write_idx(c, &zbr->key, &br->key);
51 br->lnum = cpu_to_le32(zbr->lnum);
52 br->offs = cpu_to_le32(zbr->offs);
53 br->len = cpu_to_le32(zbr->len);
54 if (!zbr->lnum || !zbr->len) {
55 ubifs_err("bad ref in znode");
56 dbg_dump_znode(c, znode);
57 if (zbr->znode)
58 dbg_dump_znode(c, zbr->znode);
59 }
60 }
61 ubifs_prepare_node(c, idx, len, 0);
62
63#ifdef CONFIG_UBIFS_FS_DEBUG
64 znode->lnum = lnum;
65 znode->offs = offs;
66 znode->len = len;
67#endif
68
69 err = insert_old_idx_znode(c, znode);
70
71 /* Update the parent */
72 zp = znode->parent;
73 if (zp) {
74 struct ubifs_zbranch *zbr;
75
76 zbr = &zp->zbranch[znode->iip];
77 zbr->lnum = lnum;
78 zbr->offs = offs;
79 zbr->len = len;
80 } else {
81 c->zroot.lnum = lnum;
82 c->zroot.offs = offs;
83 c->zroot.len = len;
84 }
85 c->calc_idx_sz += ALIGN(len, 8);
86
87 atomic_long_dec(&c->dirty_zn_cnt);
88
89 ubifs_assert(ubifs_zn_dirty(znode));
90 ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
91
92 __clear_bit(DIRTY_ZNODE, &znode->flags);
93 __clear_bit(COW_ZNODE, &znode->flags);
94
95 return err;
96}
97
98/**
99 * fill_gap - make index nodes in gaps in dirty index LEBs.
100 * @c: UBIFS file-system description object
101 * @lnum: LEB number that gap appears in
102 * @gap_start: offset of start of gap
103 * @gap_end: offset of end of gap
104 * @dirt: adds dirty space to this
105 *
106 * This function returns the number of index nodes written into the gap.
107 */
108static int fill_gap(struct ubifs_info *c, int lnum, int gap_start, int gap_end,
109 int *dirt)
110{
111 int len, gap_remains, gap_pos, written, pad_len;
112
113 ubifs_assert((gap_start & 7) == 0);
114 ubifs_assert((gap_end & 7) == 0);
115 ubifs_assert(gap_end >= gap_start);
116
117 gap_remains = gap_end - gap_start;
118 if (!gap_remains)
119 return 0;
120 gap_pos = gap_start;
121 written = 0;
122 while (c->enext) {
123 len = ubifs_idx_node_sz(c, c->enext->child_cnt);
124 if (len < gap_remains) {
125 struct ubifs_znode *znode = c->enext;
126 const int alen = ALIGN(len, 8);
127 int err;
128
129 ubifs_assert(alen <= gap_remains);
130 err = make_idx_node(c, c->ileb_buf + gap_pos, znode,
131 lnum, gap_pos, len);
132 if (err)
133 return err;
134 gap_remains -= alen;
135 gap_pos += alen;
136 c->enext = znode->cnext;
137 if (c->enext == c->cnext)
138 c->enext = NULL;
139 written += 1;
140 } else
141 break;
142 }
143 if (gap_end == c->leb_size) {
144 c->ileb_len = ALIGN(gap_pos, c->min_io_size);
145 /* Pad to end of min_io_size */
146 pad_len = c->ileb_len - gap_pos;
147 } else
148 /* Pad to end of gap */
149 pad_len = gap_remains;
150 dbg_gc("LEB %d:%d to %d len %d nodes written %d wasted bytes %d",
151 lnum, gap_start, gap_end, gap_end - gap_start, written, pad_len);
152 ubifs_pad(c, c->ileb_buf + gap_pos, pad_len);
153 *dirt += pad_len;
154 return written;
155}
156
157/**
158 * find_old_idx - find an index node obsoleted since the last commit start.
159 * @c: UBIFS file-system description object
160 * @lnum: LEB number of obsoleted index node
161 * @offs: offset of obsoleted index node
162 *
163 * Returns %1 if found and %0 otherwise.
164 */
165static int find_old_idx(struct ubifs_info *c, int lnum, int offs)
166{
167 struct ubifs_old_idx *o;
168 struct rb_node *p;
169
170 p = c->old_idx.rb_node;
171 while (p) {
172 o = rb_entry(p, struct ubifs_old_idx, rb);
173 if (lnum < o->lnum)
174 p = p->rb_left;
175 else if (lnum > o->lnum)
176 p = p->rb_right;
177 else if (offs < o->offs)
178 p = p->rb_left;
179 else if (offs > o->offs)
180 p = p->rb_right;
181 else
182 return 1;
183 }
184 return 0;
185}
186
187/**
188 * is_idx_node_in_use - determine if an index node can be overwritten.
189 * @c: UBIFS file-system description object
190 * @key: key of index node
191 * @level: index node level
192 * @lnum: LEB number of index node
193 * @offs: offset of index node
194 *
195 * If @key / @lnum / @offs identify an index node that was not part of the old
196 * index, then this function returns %0 (obsolete). Else if the index node was
197 * part of the old index but is now dirty %1 is returned, else if it is clean %2
198 * is returned. A negative error code is returned on failure.
199 */
200static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key,
201 int level, int lnum, int offs)
202{
203 int ret;
204
205 ret = is_idx_node_in_tnc(c, key, level, lnum, offs);
206 if (ret < 0)
207 return ret; /* Error code */
208 if (ret == 0)
209 if (find_old_idx(c, lnum, offs))
210 return 1;
211 return ret;
212}
213
214/**
215 * layout_leb_in_gaps - layout index nodes using in-the-gaps method.
216 * @c: UBIFS file-system description object
217 * @p: return LEB number here
218 *
219 * This function lays out new index nodes for dirty znodes using in-the-gaps
220 * method of TNC commit.
221 * This function merely puts the next znode into the next gap, making no attempt
222 * to try to maximise the number of znodes that fit.
223 * This function returns the number of index nodes written into the gaps, or a
224 * negative error code on failure.
225 */
226static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
227{
228 struct ubifs_scan_leb *sleb;
229 struct ubifs_scan_node *snod;
230 int lnum, dirt = 0, gap_start, gap_end, err, written, tot_written;
231
232 tot_written = 0;
233 /* Get an index LEB with lots of obsolete index nodes */
234 lnum = ubifs_find_dirty_idx_leb(c);
235 if (lnum < 0)
236 /*
237 * There also may be dirt in the index head that could be
238 * filled, however we do not check there at present.
239 */
240 return lnum; /* Error code */
241 *p = lnum;
242 dbg_gc("LEB %d", lnum);
243 /*
244 * Scan the index LEB. We use the generic scan for this even though
245 * it is more comprehensive and less efficient than is needed for this
246 * purpose.
247 */
248 sleb = ubifs_scan(c, lnum, 0, c->ileb_buf);
249 c->ileb_len = 0;
250 if (IS_ERR(sleb))
251 return PTR_ERR(sleb);
252 gap_start = 0;
253 list_for_each_entry(snod, &sleb->nodes, list) {
254 struct ubifs_idx_node *idx;
255 int in_use, level;
256
257 ubifs_assert(snod->type == UBIFS_IDX_NODE);
258 idx = snod->node;
259 key_read(c, ubifs_idx_key(c, idx), &snod->key);
260 level = le16_to_cpu(idx->level);
261 /* Determine if the index node is in use (not obsolete) */
262 in_use = is_idx_node_in_use(c, &snod->key, level, lnum,
263 snod->offs);
264 if (in_use < 0) {
265 ubifs_scan_destroy(sleb);
266 return in_use; /* Error code */
267 }
268 if (in_use) {
269 if (in_use == 1)
270 dirt += ALIGN(snod->len, 8);
271 /*
272 * The obsolete index nodes form gaps that can be
273 * overwritten. This gap has ended because we have
274 * found an index node that is still in use
275 * i.e. not obsolete
276 */
277 gap_end = snod->offs;
278 /* Try to fill gap */
279 written = fill_gap(c, lnum, gap_start, gap_end, &dirt);
280 if (written < 0) {
281 ubifs_scan_destroy(sleb);
282 return written; /* Error code */
283 }
284 tot_written += written;
285 gap_start = ALIGN(snod->offs + snod->len, 8);
286 }
287 }
288 ubifs_scan_destroy(sleb);
289 c->ileb_len = c->leb_size;
290 gap_end = c->leb_size;
291 /* Try to fill gap */
292 written = fill_gap(c, lnum, gap_start, gap_end, &dirt);
293 if (written < 0)
294 return written; /* Error code */
295 tot_written += written;
296 if (tot_written == 0) {
297 struct ubifs_lprops lp;
298
299 dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written);
300 err = ubifs_read_one_lp(c, lnum, &lp);
301 if (err)
302 return err;
303 if (lp.free == c->leb_size) {
304 /*
305 * We must have snatched this LEB from the idx_gc list
306 * so we need to correct the free and dirty space.
307 */
308 err = ubifs_change_one_lp(c, lnum,
309 c->leb_size - c->ileb_len,
310 dirt, 0, 0, 0);
311 if (err)
312 return err;
313 }
314 return 0;
315 }
316 err = ubifs_change_one_lp(c, lnum, c->leb_size - c->ileb_len, dirt,
317 0, 0, 0);
318 if (err)
319 return err;
320 err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len,
321 UBI_SHORTTERM);
322 if (err)
323 return err;
324 dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written);
325 return tot_written;
326}
327
328/**
329 * get_leb_cnt - calculate the number of empty LEBs needed to commit.
330 * @c: UBIFS file-system description object
331 * @cnt: number of znodes to commit
332 *
333 * This function returns the number of empty LEBs needed to commit @cnt znodes
334 * to the current index head. The number is not exact and may be more than
335 * needed.
336 */
337static int get_leb_cnt(struct ubifs_info *c, int cnt)
338{
339 int d;
340
341 /* Assume maximum index node size (i.e. overestimate space needed) */
342 cnt -= (c->leb_size - c->ihead_offs) / c->max_idx_node_sz;
343 if (cnt < 0)
344 cnt = 0;
345 d = c->leb_size / c->max_idx_node_sz;
346 return DIV_ROUND_UP(cnt, d);
347}
348
349/**
350 * layout_in_gaps - in-the-gaps method of committing TNC.
351 * @c: UBIFS file-system description object
352 * @cnt: number of dirty znodes to commit.
353 *
354 * This function lays out new index nodes for dirty znodes using in-the-gaps
355 * method of TNC commit.
356 *
357 * This function returns %0 on success and a negative error code on failure.
358 */
359static int layout_in_gaps(struct ubifs_info *c, int cnt)
360{
361 int err, leb_needed_cnt, written, *p;
362
363 dbg_gc("%d znodes to write", cnt);
364
365 c->gap_lebs = kmalloc(sizeof(int) * (c->lst.idx_lebs + 1), GFP_NOFS);
366 if (!c->gap_lebs)
367 return -ENOMEM;
368
369 p = c->gap_lebs;
370 do {
371 ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs);
372 written = layout_leb_in_gaps(c, p);
373 if (written < 0) {
374 err = written;
375 if (err == -ENOSPC) {
376 if (!dbg_force_in_the_gaps_enabled) {
377 /*
378 * Do not print scary warnings if the
379 * debugging option which forces
380 * in-the-gaps is enabled.
381 */
382 ubifs_err("out of space");
383 spin_lock(&c->space_lock);
384 dbg_dump_budg(c);
385 spin_unlock(&c->space_lock);
386 dbg_dump_lprops(c);
387 }
388 /* Try to commit anyway */
389 err = 0;
390 break;
391 }
392 kfree(c->gap_lebs);
393 c->gap_lebs = NULL;
394 return err;
395 }
396 p++;
397 cnt -= written;
398 leb_needed_cnt = get_leb_cnt(c, cnt);
399 dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt,
400 leb_needed_cnt, c->ileb_cnt);
401 } while (leb_needed_cnt > c->ileb_cnt);
402
403 *p = -1;
404 return 0;
405}
406
407/**
408 * layout_in_empty_space - layout index nodes in empty space.
409 * @c: UBIFS file-system description object
410 *
411 * This function lays out new index nodes for dirty znodes using empty LEBs.
412 *
413 * This function returns %0 on success and a negative error code on failure.
414 */
415static int layout_in_empty_space(struct ubifs_info *c)
416{
417 struct ubifs_znode *znode, *cnext, *zp;
418 int lnum, offs, len, next_len, buf_len, buf_offs, used, avail;
419 int wlen, blen, err;
420
421 cnext = c->enext;
422 if (!cnext)
423 return 0;
424
425 lnum = c->ihead_lnum;
426 buf_offs = c->ihead_offs;
427
428 buf_len = ubifs_idx_node_sz(c, c->fanout);
429 buf_len = ALIGN(buf_len, c->min_io_size);
430 used = 0;
431 avail = buf_len;
432
433 /* Ensure there is enough room for first write */
434 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
435 if (buf_offs + next_len > c->leb_size)
436 lnum = -1;
437
438 while (1) {
439 znode = cnext;
440
441 len = ubifs_idx_node_sz(c, znode->child_cnt);
442
443 /* Determine the index node position */
444 if (lnum == -1) {
445 if (c->ileb_nxt >= c->ileb_cnt) {
446 ubifs_err("out of space");
447 return -ENOSPC;
448 }
449 lnum = c->ilebs[c->ileb_nxt++];
450 buf_offs = 0;
451 used = 0;
452 avail = buf_len;
453 }
454
455 offs = buf_offs + used;
456
457#ifdef CONFIG_UBIFS_FS_DEBUG
458 znode->lnum = lnum;
459 znode->offs = offs;
460 znode->len = len;
461#endif
462
463 /* Update the parent */
464 zp = znode->parent;
465 if (zp) {
466 struct ubifs_zbranch *zbr;
467 int i;
468
469 i = znode->iip;
470 zbr = &zp->zbranch[i];
471 zbr->lnum = lnum;
472 zbr->offs = offs;
473 zbr->len = len;
474 } else {
475 c->zroot.lnum = lnum;
476 c->zroot.offs = offs;
477 c->zroot.len = len;
478 }
479 c->calc_idx_sz += ALIGN(len, 8);
480
481 /*
482 * Once lprops is updated, we can decrease the dirty znode count
483 * but it is easier to just do it here.
484 */
485 atomic_long_dec(&c->dirty_zn_cnt);
486
487 /*
488 * Calculate the next index node length to see if there is
489 * enough room for it
490 */
491 cnext = znode->cnext;
492 if (cnext == c->cnext)
493 next_len = 0;
494 else
495 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
496
497 if (c->min_io_size == 1) {
498 buf_offs += ALIGN(len, 8);
499 if (next_len) {
500 if (buf_offs + next_len <= c->leb_size)
501 continue;
502 err = ubifs_update_one_lp(c, lnum, 0,
503 c->leb_size - buf_offs, 0, 0);
504 if (err)
505 return err;
506 lnum = -1;
507 continue;
508 }
509 err = ubifs_update_one_lp(c, lnum,
510 c->leb_size - buf_offs, 0, 0, 0);
511 if (err)
512 return err;
513 break;
514 }
515
516 /* Update buffer positions */
517 wlen = used + len;
518 used += ALIGN(len, 8);
519 avail -= ALIGN(len, 8);
520
521 if (next_len != 0 &&
522 buf_offs + used + next_len <= c->leb_size &&
523 avail > 0)
524 continue;
525
526 if (avail <= 0 && next_len &&
527 buf_offs + used + next_len <= c->leb_size)
528 blen = buf_len;
529 else
530 blen = ALIGN(wlen, c->min_io_size);
531
532 /* The buffer is full or there are no more znodes to do */
533 buf_offs += blen;
534 if (next_len) {
535 if (buf_offs + next_len > c->leb_size) {
536 err = ubifs_update_one_lp(c, lnum,
537 c->leb_size - buf_offs, blen - used,
538 0, 0);
539 if (err)
540 return err;
541 lnum = -1;
542 }
543 used -= blen;
544 if (used < 0)
545 used = 0;
546 avail = buf_len - used;
547 continue;
548 }
549 err = ubifs_update_one_lp(c, lnum, c->leb_size - buf_offs,
550 blen - used, 0, 0);
551 if (err)
552 return err;
553 break;
554 }
555
556#ifdef CONFIG_UBIFS_FS_DEBUG
557 c->new_ihead_lnum = lnum;
558 c->new_ihead_offs = buf_offs;
559#endif
560
561 return 0;
562}
563
564/**
565 * layout_commit - determine positions of index nodes to commit.
566 * @c: UBIFS file-system description object
567 * @no_space: indicates that insufficient empty LEBs were allocated
568 * @cnt: number of znodes to commit
569 *
570 * Calculate and update the positions of index nodes to commit. If there were
571 * an insufficient number of empty LEBs allocated, then index nodes are placed
572 * into the gaps created by obsolete index nodes in non-empty index LEBs. For
573 * this purpose, an obsolete index node is one that was not in the index as at
574 * the end of the last commit. To write "in-the-gaps" requires that those index
575 * LEBs are updated atomically in-place.
576 */
577static int layout_commit(struct ubifs_info *c, int no_space, int cnt)
578{
579 int err;
580
581 if (no_space) {
582 err = layout_in_gaps(c, cnt);
583 if (err)
584 return err;
585 }
586 err = layout_in_empty_space(c);
587 return err;
588}
589
590/**
591 * find_first_dirty - find first dirty znode.
592 * @znode: znode to begin searching from
593 */
594static struct ubifs_znode *find_first_dirty(struct ubifs_znode *znode)
595{
596 int i, cont;
597
598 if (!znode)
599 return NULL;
600
601 while (1) {
602 if (znode->level == 0) {
603 if (ubifs_zn_dirty(znode))
604 return znode;
605 return NULL;
606 }
607 cont = 0;
608 for (i = 0; i < znode->child_cnt; i++) {
609 struct ubifs_zbranch *zbr = &znode->zbranch[i];
610
611 if (zbr->znode && ubifs_zn_dirty(zbr->znode)) {
612 znode = zbr->znode;
613 cont = 1;
614 break;
615 }
616 }
617 if (!cont) {
618 if (ubifs_zn_dirty(znode))
619 return znode;
620 return NULL;
621 }
622 }
623}
624
625/**
626 * find_next_dirty - find next dirty znode.
627 * @znode: znode to begin searching from
628 */
629static struct ubifs_znode *find_next_dirty(struct ubifs_znode *znode)
630{
631 int n = znode->iip + 1;
632
633 znode = znode->parent;
634 if (!znode)
635 return NULL;
636 for (; n < znode->child_cnt; n++) {
637 struct ubifs_zbranch *zbr = &znode->zbranch[n];
638
639 if (zbr->znode && ubifs_zn_dirty(zbr->znode))
640 return find_first_dirty(zbr->znode);
641 }
642 return znode;
643}
644
645/**
646 * get_znodes_to_commit - create list of dirty znodes to commit.
647 * @c: UBIFS file-system description object
648 *
649 * This function returns the number of znodes to commit.
650 */
651static int get_znodes_to_commit(struct ubifs_info *c)
652{
653 struct ubifs_znode *znode, *cnext;
654 int cnt = 0;
655
656 c->cnext = find_first_dirty(c->zroot.znode);
657 znode = c->enext = c->cnext;
658 if (!znode) {
659 dbg_cmt("no znodes to commit");
660 return 0;
661 }
662 cnt += 1;
663 while (1) {
664 ubifs_assert(!test_bit(COW_ZNODE, &znode->flags));
665 __set_bit(COW_ZNODE, &znode->flags);
666 znode->alt = 0;
667 cnext = find_next_dirty(znode);
668 if (!cnext) {
669 znode->cnext = c->cnext;
670 break;
671 }
672 znode->cnext = cnext;
673 znode = cnext;
674 cnt += 1;
675 }
676 dbg_cmt("committing %d znodes", cnt);
677 ubifs_assert(cnt == atomic_long_read(&c->dirty_zn_cnt));
678 return cnt;
679}
680
681/**
682 * alloc_idx_lebs - allocate empty LEBs to be used to commit.
683 * @c: UBIFS file-system description object
684 * @cnt: number of znodes to commit
685 *
686 * This function returns %-ENOSPC if it cannot allocate a sufficient number of
687 * empty LEBs. %0 is returned on success, otherwise a negative error code
688 * is returned.
689 */
690static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
691{
692 int i, leb_cnt, lnum;
693
694 c->ileb_cnt = 0;
695 c->ileb_nxt = 0;
696 leb_cnt = get_leb_cnt(c, cnt);
697 dbg_cmt("need about %d empty LEBS for TNC commit", leb_cnt);
698 if (!leb_cnt)
699 return 0;
700 c->ilebs = kmalloc(leb_cnt * sizeof(int), GFP_NOFS);
701 if (!c->ilebs)
702 return -ENOMEM;
703 for (i = 0; i < leb_cnt; i++) {
704 lnum = ubifs_find_free_leb_for_idx(c);
705 if (lnum < 0)
706 return lnum;
707 c->ilebs[c->ileb_cnt++] = lnum;
708 dbg_cmt("LEB %d", lnum);
709 }
710 if (dbg_force_in_the_gaps())
711 return -ENOSPC;
712 return 0;
713}
714
715/**
716 * free_unused_idx_lebs - free unused LEBs that were allocated for the commit.
717 * @c: UBIFS file-system description object
718 *
719 * It is possible that we allocate more empty LEBs for the commit than we need.
720 * This functions frees the surplus.
721 *
722 * This function returns %0 on success and a negative error code on failure.
723 */
724static int free_unused_idx_lebs(struct ubifs_info *c)
725{
726 int i, err = 0, lnum, er;
727
728 for (i = c->ileb_nxt; i < c->ileb_cnt; i++) {
729 lnum = c->ilebs[i];
730 dbg_cmt("LEB %d", lnum);
731 er = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
732 LPROPS_INDEX | LPROPS_TAKEN, 0);
733 if (!err)
734 err = er;
735 }
736 return err;
737}
738
739/**
740 * free_idx_lebs - free unused LEBs after commit end.
741 * @c: UBIFS file-system description object
742 *
743 * This function returns %0 on success and a negative error code on failure.
744 */
745static int free_idx_lebs(struct ubifs_info *c)
746{
747 int err;
748
749 err = free_unused_idx_lebs(c);
750 kfree(c->ilebs);
751 c->ilebs = NULL;
752 return err;
753}
754
755/**
756 * ubifs_tnc_start_commit - start TNC commit.
757 * @c: UBIFS file-system description object
758 * @zroot: new index root position is returned here
759 *
760 * This function prepares the list of indexing nodes to commit and lays out
761 * their positions on flash. If there is not enough free space it uses the
762 * in-gap commit method. Returns zero in case of success and a negative error
763 * code in case of failure.
764 */
765int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
766{
767 int err = 0, cnt;
768
769 mutex_lock(&c->tnc_mutex);
770 err = dbg_check_tnc(c, 1);
771 if (err)
772 goto out;
773 cnt = get_znodes_to_commit(c);
774 if (cnt != 0) {
775 int no_space = 0;
776
777 err = alloc_idx_lebs(c, cnt);
778 if (err == -ENOSPC)
779 no_space = 1;
780 else if (err)
781 goto out_free;
782 err = layout_commit(c, no_space, cnt);
783 if (err)
784 goto out_free;
785 ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
786 err = free_unused_idx_lebs(c);
787 if (err)
788 goto out;
789 }
790 destroy_old_idx(c);
791 memcpy(zroot, &c->zroot, sizeof(struct ubifs_zbranch));
792
793 err = ubifs_save_dirty_idx_lnums(c);
794 if (err)
795 goto out;
796
797 spin_lock(&c->space_lock);
798 /*
799 * Although we have not finished committing yet, update size of the
800 * committed index ('c->old_idx_sz') and zero out the index growth
801 * budget. It is OK to do this now, because we've reserved all the
802 * space which is needed to commit the index, and it is save for the
803 * budgeting subsystem to assume the index is already committed,
804 * even though it is not.
805 */
806 c->old_idx_sz = c->calc_idx_sz;
807 c->budg_uncommitted_idx = 0;
808 spin_unlock(&c->space_lock);
809 mutex_unlock(&c->tnc_mutex);
810
811 dbg_cmt("number of index LEBs %d", c->lst.idx_lebs);
812 dbg_cmt("size of index %llu", c->calc_idx_sz);
813 return err;
814
815out_free:
816 free_idx_lebs(c);
817out:
818 mutex_unlock(&c->tnc_mutex);
819 return err;
820}
821
822/**
823 * write_index - write index nodes.
824 * @c: UBIFS file-system description object
825 *
826 * This function writes the index nodes whose positions were laid out in the
827 * layout_in_empty_space function.
828 */
829static int write_index(struct ubifs_info *c)
830{
831 struct ubifs_idx_node *idx;
832 struct ubifs_znode *znode, *cnext;
833 int i, lnum, offs, len, next_len, buf_len, buf_offs, used;
834 int avail, wlen, err, lnum_pos = 0;
835
836 cnext = c->enext;
837 if (!cnext)
838 return 0;
839
840 /*
841 * Always write index nodes to the index head so that index nodes and
842 * other types of nodes are never mixed in the same erase block.
843 */
844 lnum = c->ihead_lnum;
845 buf_offs = c->ihead_offs;
846
847 /* Allocate commit buffer */
848 buf_len = ALIGN(c->max_idx_node_sz, c->min_io_size);
849 used = 0;
850 avail = buf_len;
851
852 /* Ensure there is enough room for first write */
853 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
854 if (buf_offs + next_len > c->leb_size) {
855 err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, 0,
856 LPROPS_TAKEN);
857 if (err)
858 return err;
859 lnum = -1;
860 }
861
862 while (1) {
863 cond_resched();
864
865 znode = cnext;
866 idx = c->cbuf + used;
867
868 /* Make index node */
869 idx->ch.node_type = UBIFS_IDX_NODE;
870 idx->child_cnt = cpu_to_le16(znode->child_cnt);
871 idx->level = cpu_to_le16(znode->level);
872 for (i = 0; i < znode->child_cnt; i++) {
873 struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
874 struct ubifs_zbranch *zbr = &znode->zbranch[i];
875
876 key_write_idx(c, &zbr->key, &br->key);
877 br->lnum = cpu_to_le32(zbr->lnum);
878 br->offs = cpu_to_le32(zbr->offs);
879 br->len = cpu_to_le32(zbr->len);
880 if (!zbr->lnum || !zbr->len) {
881 ubifs_err("bad ref in znode");
882 dbg_dump_znode(c, znode);
883 if (zbr->znode)
884 dbg_dump_znode(c, zbr->znode);
885 }
886 }
887 len = ubifs_idx_node_sz(c, znode->child_cnt);
888 ubifs_prepare_node(c, idx, len, 0);
889
890 /* Determine the index node position */
891 if (lnum == -1) {
892 lnum = c->ilebs[lnum_pos++];
893 buf_offs = 0;
894 used = 0;
895 avail = buf_len;
896 }
897 offs = buf_offs + used;
898
899#ifdef CONFIG_UBIFS_FS_DEBUG
900 if (lnum != znode->lnum || offs != znode->offs ||
901 len != znode->len) {
902 ubifs_err("inconsistent znode posn");
903 return -EINVAL;
904 }
905#endif
906
907 /* Grab some stuff from znode while we still can */
908 cnext = znode->cnext;
909
910 ubifs_assert(ubifs_zn_dirty(znode));
911 ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
912
913 /*
914 * It is important that other threads should see %DIRTY_ZNODE
915 * flag cleared before %COW_ZNODE. Specifically, it matters in
916 * the 'dirty_cow_znode()' function. This is the reason for the
917 * first barrier. Also, we want the bit changes to be seen to
918 * other threads ASAP, to avoid unnecesarry copying, which is
919 * the reason for the second barrier.
920 */
921 clear_bit(DIRTY_ZNODE, &znode->flags);
922 smp_mb__before_clear_bit();
923 clear_bit(COW_ZNODE, &znode->flags);
924 smp_mb__after_clear_bit();
925
926 /* Do not access znode from this point on */
927
928 /* Update buffer positions */
929 wlen = used + len;
930 used += ALIGN(len, 8);
931 avail -= ALIGN(len, 8);
932
933 /*
934 * Calculate the next index node length to see if there is
935 * enough room for it
936 */
937 if (cnext == c->cnext)
938 next_len = 0;
939 else
940 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
941
942 if (c->min_io_size == 1) {
943 /*
944 * Write the prepared index node immediately if there is
945 * no minimum IO size
946 */
947 err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
948 wlen, UBI_SHORTTERM);
949 if (err)
950 return err;
951 buf_offs += ALIGN(wlen, 8);
952 if (next_len) {
953 used = 0;
954 avail = buf_len;
955 if (buf_offs + next_len > c->leb_size) {
956 err = ubifs_update_one_lp(c, lnum,
957 LPROPS_NC, 0, 0, LPROPS_TAKEN);
958 if (err)
959 return err;
960 lnum = -1;
961 }
962 continue;
963 }
964 } else {
965 int blen, nxt_offs = buf_offs + used + next_len;
966
967 if (next_len && nxt_offs <= c->leb_size) {
968 if (avail > 0)
969 continue;
970 else
971 blen = buf_len;
972 } else {
973 wlen = ALIGN(wlen, 8);
974 blen = ALIGN(wlen, c->min_io_size);
975 ubifs_pad(c, c->cbuf + wlen, blen - wlen);
976 }
977 /*
978 * The buffer is full or there are no more znodes
979 * to do
980 */
981 err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
982 blen, UBI_SHORTTERM);
983 if (err)
984 return err;
985 buf_offs += blen;
986 if (next_len) {
987 if (nxt_offs > c->leb_size) {
988 err = ubifs_update_one_lp(c, lnum,
989 LPROPS_NC, 0, 0, LPROPS_TAKEN);
990 if (err)
991 return err;
992 lnum = -1;
993 }
994 used -= blen;
995 if (used < 0)
996 used = 0;
997 avail = buf_len - used;
998 memmove(c->cbuf, c->cbuf + blen, used);
999 continue;
1000 }
1001 }
1002 break;
1003 }
1004
1005#ifdef CONFIG_UBIFS_FS_DEBUG
1006 if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) {
1007 ubifs_err("inconsistent ihead");
1008 return -EINVAL;
1009 }
1010#endif
1011
1012 c->ihead_lnum = lnum;
1013 c->ihead_offs = buf_offs;
1014
1015 return 0;
1016}
1017
1018/**
1019 * free_obsolete_znodes - free obsolete znodes.
1020 * @c: UBIFS file-system description object
1021 *
1022 * At the end of commit end, obsolete znodes are freed.
1023 */
1024static void free_obsolete_znodes(struct ubifs_info *c)
1025{
1026 struct ubifs_znode *znode, *cnext;
1027
1028 cnext = c->cnext;
1029 do {
1030 znode = cnext;
1031 cnext = znode->cnext;
1032 if (test_bit(OBSOLETE_ZNODE, &znode->flags))
1033 kfree(znode);
1034 else {
1035 znode->cnext = NULL;
1036 atomic_long_inc(&c->clean_zn_cnt);
1037 atomic_long_inc(&ubifs_clean_zn_cnt);
1038 }
1039 } while (cnext != c->cnext);
1040}
1041
1042/**
1043 * return_gap_lebs - return LEBs used by the in-gap commit method.
1044 * @c: UBIFS file-system description object
1045 *
1046 * This function clears the "taken" flag for the LEBs which were used by the
1047 * "commit in-the-gaps" method.
1048 */
1049static int return_gap_lebs(struct ubifs_info *c)
1050{
1051 int *p, err;
1052
1053 if (!c->gap_lebs)
1054 return 0;
1055
1056 dbg_cmt("");
1057 for (p = c->gap_lebs; *p != -1; p++) {
1058 err = ubifs_change_one_lp(c, *p, LPROPS_NC, LPROPS_NC, 0,
1059 LPROPS_TAKEN, 0);
1060 if (err)
1061 return err;
1062 }
1063
1064 kfree(c->gap_lebs);
1065 c->gap_lebs = NULL;
1066 return 0;
1067}
1068
1069/**
1070 * ubifs_tnc_end_commit - update the TNC for commit end.
1071 * @c: UBIFS file-system description object
1072 *
1073 * Write the dirty znodes.
1074 */
1075int ubifs_tnc_end_commit(struct ubifs_info *c)
1076{
1077 int err;
1078
1079 if (!c->cnext)
1080 return 0;
1081
1082 err = return_gap_lebs(c);
1083 if (err)
1084 return err;
1085
1086 err = write_index(c);
1087 if (err)
1088 return err;
1089
1090 mutex_lock(&c->tnc_mutex);
1091
1092 dbg_cmt("TNC height is %d", c->zroot.znode->level + 1);
1093
1094 free_obsolete_znodes(c);
1095
1096 c->cnext = NULL;
1097 kfree(c->ilebs);
1098 c->ilebs = NULL;
1099
1100 mutex_unlock(&c->tnc_mutex);
1101
1102 return 0;
1103}
                                                             


                               
                                     



                                                 
                                        
 
                                                        



                                                                     



                  
     
                                                


                     
                                                         








                                                          
                                        


                                                                          






                                                                         














                                                                    
                                          
 
                         
                                     
                                                              
                                                                            













                                                         

                                                     



                                              





                                                               







                                                 
                                

                                  
                                          











                                                          
      









                                                      
                                  


                 
     
                            
                                  






                                                 
                                

                                            
                                  





                                                 
                                




                                                                            
                                  




                                                 
                                                         
                                   

                               
                                                                     
 
                                         

                                
                                                   



                                                   
                                                    
 
                                                   
                                                                

                          
                                                                          










                                                               
                                           


                 
            
                   
     
                          
                                           






                                                 
                                         





                                          
                                           












                                                                      
                                









                                                                  

                                                                  










                                                         
                                  























                                                             
                                


                                                   
                                          





                                                          

                                                          



                                                 
                                  













                                                     

                                                              





                                             
                                                           



                                                 
                                           












                               
          
                       
     










                                                
                                                            
 
                                                     

                                                

                  
                                                    





                                                                          
                                                                 

                         
                        

             
                                                                  

                         
                        

             
                                                                 
                                                           

                                 
                                


                     
                                                                 
                                                            

                                 
                                


                     
    

                                      




                                             
                                                     
                                              



                                                                      


                                             
         
                          



                                                

                                                                                 



                                                        
                                  
                    
                                 
         



                                      
 
                 

 
                                                         

                                                 
                                                         
                                                    
              
 
                                         
                                                  








                                                                  
                                                

                                       
                                
                          
                                  











                                                                           
 
                                           

 
   


                                                           
                                                
                      
                                     


                                                                            

                             
   
 
                                                               

                                                                    
 
                                           
                                              

                                                         
                                                      
                                        
                               
                          


                                 
                 
 




                                                    













                                                                        
                                                                                                


                                                        
                                                                                                 

                                                          



                                                                                          





                                                         


















                                                                             
                                                     
                                       

                                                                             

                                                                     





                                        
                                                     

                                         
                                        




                                            


                                                                 

                                       




                                                                              
                                                                     





                                                            
 
                                               
                                 
                                          
                                                       
                                
                   
 
           




                                 


                                                                     
                                                            



                                             
                      
                                        




                                                             
                                                                    



                                                                           
                                                               
                                         
                                                                           









                                                                        
                                           

                                                                         


                                 





                                                                      



                                                                       
 

                                                                            

                               
 
                   
                                                                                



                                                 

                                      

                                       
                                                                                    
                          
                                           

                                               
                                                    



                  
              
                            
          
                                 
          
                                    
    

                                               
                                           
                   
                                                   


                     
























                                                                          



                                                                  
                                                            
                                

                  
        



                                                                     
                                                                
 
                                                                             
                                           

                                                                      


                                     
                                                                             


                                  

                                           
                                          
 
                                            


                                           



                 
             
                                    
     
                                   


                     
                                                            
 
                                                     
                                              
                                   


                       


                                              




                                                   
                                                                        

                            
                                             
                                     
                                  

                                                                   
                                                               







                                                     
                                                                









                                                 
                  





                                
                                
                              
                                  







                                     

                                                        


                                                     
                                            
                                                              
                                                                 







                                             
                                              






                                                             
                                             


                                           
                                  

                                     
                                                        










                                           
    







                                                                
 
                                                               


                                                                      



                 
                                                             
 
                                                     
                                              
                                   
                  








                                                   

                                             




                                                                                
                                                             
                                        
                                       
                                          
 
                                                                                                      
                                                      



                                                                                   

                                        
                                                            
                                                                         
                                                                    

                                                                              


                                                                                   







                                                             

                                                         
 
                                              

                                   
 
                                                            
                       
                                              

                       

                                             



                                                                               



                 
                                                               
 
                                             








































                                                                            




                                                                  
                                                              




                                   
                                           







                                            


                     








                                                                                    

                                         
                                                         

                                                                

                                 

                       

                  
                                                                         
                            
 
                                                            
                                                                                



                                                            
                                                                          



                                                    
                                                                       









                                             
                                                                                         


                                          
                             


                                                            




                                                                     
                                                                                   
                                   
                                                         


















                                                                                
 
                                                


                                                                             
                                                  













                                                                
     



                                





                                                     
                                


                                                                             


                                                             

                                                      
                                   
                                                



                                       




                                                                       
                                                 
                                          
 
                                                  





                                                                      
                                          
                                                        
 
                                        
         
                                  









                                                                  








                                                                          
                                                                            



                                                                     
                                                






                                                                        


















                                                                         







                                                













                                                               
                  



                                                   








                                                                                  

                                       
                                                                         

                                                                           


                                                                    

                                

                                                    
                                                                                


                                                        
                                                        


                                                 





                                                        






                                                           


                                        
                                                                         

                                
                                                                         




                            









                                                                                

                                                                     



























                                                          
                                                                             
                       


                                                                                   






                                    
                                                   
                                                             
 

                                                                     







                                                            








                                                         
                                                  



                                       
                                                   





























                                                                         
                                                                                               
                                                
 
                                               
                                                                                                
                                                




                                                                                         



                                  
                                                                                          

                                   
                             

                                                                     

                                                         
                          
                                   
                                             

         


                                                                            







                                                          
                           
                                         









                                           

                                               
                                                
                                         
                                         
  
/*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
 * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
 * of the GNU General Public License version 2.
 */

/*
 * Quota change tags are associated with each transaction that allocates or
 * deallocates space.  Those changes are accumulated locally to each node (in a
 * per-node file) and then are periodically synced to the quota file.  This
 * avoids the bottleneck of constantly touching the quota file, but introduces
 * fuzziness in the current usage value of IDs that are being used on different
 * nodes in the cluster simultaneously.  So, it is possible for a user on
 * multiple nodes to overrun their quota, but that overrun is controlable.
 * Since quota tags are part of transactions, there is no need for a quota check
 * program to be run on node crashes or anything like that.
 *
 * There are couple of knobs that let the administrator manage the quota
 * fuzziness.  "quota_quantum" sets the maximum time a quota change can be
 * sitting on one node before being synced to the quota file.  (The default is
 * 60 seconds.)  Another knob, "quota_scale" controls how quickly the frequency
 * of quota file syncs increases as the user moves closer to their limit.  The
 * more frequent the syncs, the more accurate the quota enforcement, but that
 * means that there is more contention between the nodes for the quota file.
 * The default value is one.  This sets the maximum theoretical quota overrun
 * (with infinite node with infinite bandwidth) to twice the user's limit.  (In
 * practice, the maximum overrun you see should be much less.)  A "quota_scale"
 * number greater than one makes quota syncs more frequent and reduces the
 * maximum overrun.  Numbers less than one (but greater than zero) make quota
 * syncs less frequent.
 *
 * GFS quotas also use per-ID Lock Value Blocks (LVBs) to cache the contents of
 * the quota file, so it is not being constantly read.
 */

#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/sort.h>
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/gfs2_ondisk.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/quota.h>
#include <linux/dqblk_xfs.h>

#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "glock.h"
#include "glops.h"
#include "log.h"
#include "meta_io.h"
#include "quota.h"
#include "rgrp.h"
#include "super.h"
#include "trans.h"
#include "inode.h"
#include "util.h"

#define QUOTA_USER 1
#define QUOTA_GROUP 0

struct gfs2_quota_change_host {
	u64 qc_change;
	u32 qc_flags; /* GFS2_QCF_... */
	u32 qc_id;
};

static LIST_HEAD(qd_lru_list);
static atomic_t qd_lru_count = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(qd_lru_lock);

int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
{
	struct gfs2_quota_data *qd;
	struct gfs2_sbd *sdp;
	int nr_to_scan = sc->nr_to_scan;

	if (nr_to_scan == 0)
		goto out;

	if (!(sc->gfp_mask & __GFP_FS))
		return -1;

	spin_lock(&qd_lru_lock);
	while (nr_to_scan && !list_empty(&qd_lru_list)) {
		qd = list_entry(qd_lru_list.next,
				struct gfs2_quota_data, qd_reclaim);
		sdp = qd->qd_gl->gl_sbd;

		/* Free from the filesystem-specific list */
		list_del(&qd->qd_list);

		gfs2_assert_warn(sdp, !qd->qd_change);
		gfs2_assert_warn(sdp, !qd->qd_slot_count);
		gfs2_assert_warn(sdp, !qd->qd_bh_count);

		gfs2_glock_put(qd->qd_gl);
		atomic_dec(&sdp->sd_quota_count);

		/* Delete it from the common reclaim list */
		list_del_init(&qd->qd_reclaim);
		atomic_dec(&qd_lru_count);
		spin_unlock(&qd_lru_lock);
		kmem_cache_free(gfs2_quotad_cachep, qd);
		spin_lock(&qd_lru_lock);
		nr_to_scan--;
	}
	spin_unlock(&qd_lru_lock);

out:
	return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100;
}

static u64 qd2offset(struct gfs2_quota_data *qd)
{
	u64 offset;

	offset = 2 * (u64)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags);
	offset *= sizeof(struct gfs2_quota);

	return offset;
}

static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id,
		    struct gfs2_quota_data **qdp)
{
	struct gfs2_quota_data *qd;
	int error;

	qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS);
	if (!qd)
		return -ENOMEM;

	atomic_set(&qd->qd_count, 1);
	qd->qd_id = id;
	if (user)
		set_bit(QDF_USER, &qd->qd_flags);
	qd->qd_slot = -1;
	INIT_LIST_HEAD(&qd->qd_reclaim);

	error = gfs2_glock_get(sdp, 2 * (u64)id + !user,
			      &gfs2_quota_glops, CREATE, &qd->qd_gl);
	if (error)
		goto fail;

	*qdp = qd;

	return 0;

fail:
	kmem_cache_free(gfs2_quotad_cachep, qd);
	return error;
}

static int qd_get(struct gfs2_sbd *sdp, int user, u32 id,
		  struct gfs2_quota_data **qdp)
{
	struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
	int error, found;

	*qdp = NULL;

	for (;;) {
		found = 0;
		spin_lock(&qd_lru_lock);
		list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
			if (qd->qd_id == id &&
			    !test_bit(QDF_USER, &qd->qd_flags) == !user) {
				if (!atomic_read(&qd->qd_count) &&
				    !list_empty(&qd->qd_reclaim)) {
					/* Remove it from reclaim list */
					list_del_init(&qd->qd_reclaim);
					atomic_dec(&qd_lru_count);
				}
				atomic_inc(&qd->qd_count);
				found = 1;
				break;
			}
		}

		if (!found)
			qd = NULL;

		if (!qd && new_qd) {
			qd = new_qd;
			list_add(&qd->qd_list, &sdp->sd_quota_list);
			atomic_inc(&sdp->sd_quota_count);
			new_qd = NULL;
		}

		spin_unlock(&qd_lru_lock);

		if (qd) {
			if (new_qd) {
				gfs2_glock_put(new_qd->qd_gl);
				kmem_cache_free(gfs2_quotad_cachep, new_qd);
			}
			*qdp = qd;
			return 0;
		}

		error = qd_alloc(sdp, user, id, &new_qd);
		if (error)
			return error;
	}
}

static void qd_hold(struct gfs2_quota_data *qd)
{
	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
	gfs2_assert(sdp, atomic_read(&qd->qd_count));
	atomic_inc(&qd->qd_count);
}

static void qd_put(struct gfs2_quota_data *qd)
{
	if (atomic_dec_and_lock(&qd->qd_count, &qd_lru_lock)) {
		/* Add to the reclaim list */
		list_add_tail(&qd->qd_reclaim, &qd_lru_list);
		atomic_inc(&qd_lru_count);
		spin_unlock(&qd_lru_lock);
	}
}

static int slot_get(struct gfs2_quota_data *qd)
{
	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
	unsigned int c, o = 0, b;
	unsigned char byte = 0;

	spin_lock(&qd_lru_lock);

	if (qd->qd_slot_count++) {
		spin_unlock(&qd_lru_lock);
		return 0;
	}

	for (c = 0; c < sdp->sd_quota_chunks; c++)
		for (o = 0; o < PAGE_SIZE; o++) {
			byte = sdp->sd_quota_bitmap[c][o];
			if (byte != 0xFF)
				goto found;
		}

	goto fail;

found:
	for (b = 0; b < 8; b++)
		if (!(byte & (1 << b)))
			break;
	qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b;

	if (qd->qd_slot >= sdp->sd_quota_slots)
		goto fail;

	sdp->sd_quota_bitmap[c][o] |= 1 << b;

	spin_unlock(&qd_lru_lock);

	return 0;

fail:
	qd->qd_slot_count--;
	spin_unlock(&qd_lru_lock);
	return -ENOSPC;
}

static void slot_hold(struct gfs2_quota_data *qd)
{
	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;

	spin_lock(&qd_lru_lock);
	gfs2_assert(sdp, qd->qd_slot_count);
	qd->qd_slot_count++;
	spin_unlock(&qd_lru_lock);
}

static void slot_put(struct gfs2_quota_data *qd)
{
	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;

	spin_lock(&qd_lru_lock);
	gfs2_assert(sdp, qd->qd_slot_count);
	if (!--qd->qd_slot_count) {
		gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
		qd->qd_slot = -1;
	}
	spin_unlock(&qd_lru_lock);
}

static int bh_get(struct gfs2_quota_data *qd)
{
	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
	struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
	unsigned int block, offset;
	struct buffer_head *bh;
	int error;
	struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };

	mutex_lock(&sdp->sd_quota_mutex);

	if (qd->qd_bh_count++) {
		mutex_unlock(&sdp->sd_quota_mutex);
		return 0;
	}

	block = qd->qd_slot / sdp->sd_qc_per_block;
	offset = qd->qd_slot % sdp->sd_qc_per_block;

	bh_map.b_size = 1 << ip->i_inode.i_blkbits;
	error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0);
	if (error)
		goto fail;
	error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh);
	if (error)
		goto fail;
	error = -EIO;
	if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC))
		goto fail_brelse;

	qd->qd_bh = bh;
	qd->qd_bh_qc = (struct gfs2_quota_change *)
		(bh->b_data + sizeof(struct gfs2_meta_header) +
		 offset * sizeof(struct gfs2_quota_change));

	mutex_unlock(&sdp->sd_quota_mutex);

	return 0;

fail_brelse:
	brelse(bh);
fail:
	qd->qd_bh_count--;
	mutex_unlock(&sdp->sd_quota_mutex);
	return error;
}

static void bh_put(struct gfs2_quota_data *qd)
{
	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;

	mutex_lock(&sdp->sd_quota_mutex);
	gfs2_assert(sdp, qd->qd_bh_count);
	if (!--qd->qd_bh_count) {
		brelse(qd->qd_bh);
		qd->qd_bh = NULL;
		qd->qd_bh_qc = NULL;
	}
	mutex_unlock(&sdp->sd_quota_mutex);
}

static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
{
	struct gfs2_quota_data *qd = NULL;
	int error;
	int found = 0;

	*qdp = NULL;

	if (sdp->sd_vfs->s_flags & MS_RDONLY)
		return 0;

	spin_lock(&qd_lru_lock);

	list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
		if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
		    !test_bit(QDF_CHANGE, &qd->qd_flags) ||
		    qd->qd_sync_gen >= sdp->sd_quota_sync_gen)
			continue;

		list_move_tail(&qd->qd_list, &sdp->sd_quota_list);

		set_bit(QDF_LOCKED, &qd->qd_flags);
		gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
		atomic_inc(&qd->qd_count);
		qd->qd_change_sync = qd->qd_change;
		gfs2_assert_warn(sdp, qd->qd_slot_count);
		qd->qd_slot_count++;
		found = 1;

		break;
	}

	if (!found)
		qd = NULL;

	spin_unlock(&qd_lru_lock);

	if (qd) {
		gfs2_assert_warn(sdp, qd->qd_change_sync);
		error = bh_get(qd);
		if (error) {
			clear_bit(QDF_LOCKED, &qd->qd_flags);
			slot_put(qd);
			qd_put(qd);
			return error;
		}
	}

	*qdp = qd;

	return 0;
}

static int qd_trylock(struct gfs2_quota_data *qd)
{
	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;

	if (sdp->sd_vfs->s_flags & MS_RDONLY)
		return 0;

	spin_lock(&qd_lru_lock);

	if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
	    !test_bit(QDF_CHANGE, &qd->qd_flags)) {
		spin_unlock(&qd_lru_lock);
		return 0;
	}

	list_move_tail(&qd->qd_list, &sdp->sd_quota_list);

	set_bit(QDF_LOCKED, &qd->qd_flags);
	gfs2_assert_warn(sdp, atomic_read(&qd->qd_count));
	atomic_inc(&qd->qd_count);
	qd->qd_change_sync = qd->qd_change;
	gfs2_assert_warn(sdp, qd->qd_slot_count);
	qd->qd_slot_count++;

	spin_unlock(&qd_lru_lock);

	gfs2_assert_warn(sdp, qd->qd_change_sync);
	if (bh_get(qd)) {
		clear_bit(QDF_LOCKED, &qd->qd_flags);
		slot_put(qd);
		qd_put(qd);
		return 0;
	}

	return 1;
}

static void qd_unlock(struct gfs2_quota_data *qd)
{
	gfs2_assert_warn(qd->qd_gl->gl_sbd,
			 test_bit(QDF_LOCKED, &qd->qd_flags));
	clear_bit(QDF_LOCKED, &qd->qd_flags);
	bh_put(qd);
	slot_put(qd);
	qd_put(qd);
}

static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id,
		    struct gfs2_quota_data **qdp)
{
	int error;

	error = qd_get(sdp, user, id, qdp);
	if (error)
		return error;

	error = slot_get(*qdp);
	if (error)
		goto fail;

	error = bh_get(*qdp);
	if (error)
		goto fail_slot;

	return 0;

fail_slot:
	slot_put(*qdp);
fail:
	qd_put(*qdp);
	return error;
}

static void qdsb_put(struct gfs2_quota_data *qd)
{
	bh_put(qd);
	slot_put(qd);
	qd_put(qd);
}

int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
{
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
	struct gfs2_qadata *qa = ip->i_qadata;
	struct gfs2_quota_data **qd = qa->qa_qd;
	int error;

	if (gfs2_assert_warn(sdp, !qa->qa_qd_num) ||
	    gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
		return -EIO;

	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
		return 0;

	error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd);
	if (error)
		goto out;
	qa->qa_qd_num++;
	qd++;

	error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd);
	if (error)
		goto out;
	qa->qa_qd_num++;
	qd++;

	if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
		error = qdsb_get(sdp, QUOTA_USER, uid, qd);
		if (error)
			goto out;
		qa->qa_qd_num++;
		qd++;
	}

	if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) {
		error = qdsb_get(sdp, QUOTA_GROUP, gid, qd);
		if (error)
			goto out;
		qa->qa_qd_num++;
		qd++;
	}

out:
	if (error)
		gfs2_quota_unhold(ip);
	return error;
}

void gfs2_quota_unhold(struct gfs2_inode *ip)
{
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
	struct gfs2_qadata *qa = ip->i_qadata;
	unsigned int x;

	gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));

	for (x = 0; x < qa->qa_qd_num; x++) {
		qdsb_put(qa->qa_qd[x]);
		qa->qa_qd[x] = NULL;
	}
	qa->qa_qd_num = 0;
}

static int sort_qd(const void *a, const void *b)
{
	const struct gfs2_quota_data *qd_a = *(const struct gfs2_quota_data **)a;
	const struct gfs2_quota_data *qd_b = *(const struct gfs2_quota_data **)b;

	if (!test_bit(QDF_USER, &qd_a->qd_flags) !=
	    !test_bit(QDF_USER, &qd_b->qd_flags)) {
		if (test_bit(QDF_USER, &qd_a->qd_flags))
			return -1;
		else
			return 1;
	}
	if (qd_a->qd_id < qd_b->qd_id)
		return -1;
	if (qd_a->qd_id > qd_b->qd_id)
		return 1;

	return 0;
}

static void do_qc(struct gfs2_quota_data *qd, s64 change)
{
	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
	struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
	struct gfs2_quota_change *qc = qd->qd_bh_qc;
	s64 x;

	mutex_lock(&sdp->sd_quota_mutex);
	gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1);

	if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
		qc->qc_change = 0;
		qc->qc_flags = 0;
		if (test_bit(QDF_USER, &qd->qd_flags))
			qc->qc_flags = cpu_to_be32(GFS2_QCF_USER);
		qc->qc_id = cpu_to_be32(qd->qd_id);
	}

	x = be64_to_cpu(qc->qc_change) + change;
	qc->qc_change = cpu_to_be64(x);

	spin_lock(&qd_lru_lock);
	qd->qd_change = x;
	spin_unlock(&qd_lru_lock);

	if (!x) {
		gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
		clear_bit(QDF_CHANGE, &qd->qd_flags);
		qc->qc_flags = 0;
		qc->qc_id = 0;
		slot_put(qd);
		qd_put(qd);
	} else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) {
		qd_hold(qd);
		slot_hold(qd);
	}

	mutex_unlock(&sdp->sd_quota_mutex);
}

/**
 * gfs2_adjust_quota - adjust record of current block usage
 * @ip: The quota inode
 * @loc: Offset of the entry in the quota file
 * @change: The amount of usage change to record
 * @qd: The quota data
 * @fdq: The updated limits to record
 *
 * This function was mostly borrowed from gfs2_block_truncate_page which was
 * in turn mostly borrowed from ext3
 *
 * Returns: 0 or -ve on error
 */

static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
			     s64 change, struct gfs2_quota_data *qd,
			     struct fs_disk_quota *fdq)
{
	struct inode *inode = &ip->i_inode;
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	struct address_space *mapping = inode->i_mapping;
	unsigned long index = loc >> PAGE_CACHE_SHIFT;
	unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
	unsigned blocksize, iblock, pos;
	struct buffer_head *bh;
	struct page *page;
	void *kaddr, *ptr;
	struct gfs2_quota q, *qp;
	int err, nbytes;
	u64 size;

	if (gfs2_is_stuffed(ip)) {
		err = gfs2_unstuff_dinode(ip, NULL);
		if (err)
			return err;
	}

	memset(&q, 0, sizeof(struct gfs2_quota));
	err = gfs2_internal_read(ip, NULL, (char *)&q, &loc, sizeof(q));
	if (err < 0)
		return err;

	err = -EIO;
	qp = &q;
	qp->qu_value = be64_to_cpu(qp->qu_value);
	qp->qu_value += change;
	qp->qu_value = cpu_to_be64(qp->qu_value);
	qd->qd_qb.qb_value = qp->qu_value;
	if (fdq) {
		if (fdq->d_fieldmask & FS_DQ_BSOFT) {
			qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
			qd->qd_qb.qb_warn = qp->qu_warn;
		}
		if (fdq->d_fieldmask & FS_DQ_BHARD) {
			qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
			qd->qd_qb.qb_limit = qp->qu_limit;
		}
		if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
			qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
			qd->qd_qb.qb_value = qp->qu_value;
		}
	}

	/* Write the quota into the quota file on disk */
	ptr = qp;
	nbytes = sizeof(struct gfs2_quota);
get_a_page:
	page = grab_cache_page(mapping, index);
	if (!page)
		return -ENOMEM;

	blocksize = inode->i_sb->s_blocksize;
	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);

	if (!page_has_buffers(page))
		create_empty_buffers(page, blocksize, 0);

	bh = page_buffers(page);
	pos = blocksize;
	while (offset >= pos) {
		bh = bh->b_this_page;
		iblock++;
		pos += blocksize;
	}

	if (!buffer_mapped(bh)) {
		gfs2_block_map(inode, iblock, bh, 1);
		if (!buffer_mapped(bh))
			goto unlock_out;
		/* If it's a newly allocated disk block for quota, zero it */
		if (buffer_new(bh))
			zero_user(page, pos - blocksize, bh->b_size);
	}

	if (PageUptodate(page))
		set_buffer_uptodate(bh);

	if (!buffer_uptodate(bh)) {
		ll_rw_block(READ | REQ_META, 1, &bh);
		wait_on_buffer(bh);
		if (!buffer_uptodate(bh))
			goto unlock_out;
	}

	gfs2_trans_add_bh(ip->i_gl, bh, 0);

	kaddr = kmap_atomic(page, KM_USER0);
	if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
		nbytes = PAGE_CACHE_SIZE - offset;
	memcpy(kaddr + offset, ptr, nbytes);
	flush_dcache_page(page);
	kunmap_atomic(kaddr, KM_USER0);
	unlock_page(page);
	page_cache_release(page);

	/* If quota straddles page boundary, we need to update the rest of the
	 * quota at the beginning of the next page */
	if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) {
		ptr = ptr + nbytes;
		nbytes = sizeof(struct gfs2_quota) - nbytes;
		offset = 0;
		index++;
		goto get_a_page;
	}

	size = loc + sizeof(struct gfs2_quota);
	if (size > inode->i_size)
		i_size_write(inode, size);
	inode->i_mtime = inode->i_atime = CURRENT_TIME;
	mark_inode_dirty(inode);
	return err;

unlock_out:
	unlock_page(page);
	page_cache_release(page);
	return err;
}

static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
{
	struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
	struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
	unsigned int data_blocks, ind_blocks;
	struct gfs2_holder *ghs, i_gh;
	unsigned int qx, x;
	struct gfs2_quota_data *qd;
	loff_t offset;
	unsigned int nalloc = 0, blocks;
	int error;

	gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
			      &data_blocks, &ind_blocks);

	ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
	if (!ghs)
		return -ENOMEM;

	sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
	mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA);
	for (qx = 0; qx < num_qd; qx++) {
		error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
					   GL_NOCACHE, &ghs[qx]);
		if (error)
			goto out;
	}

	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
	if (error)
		goto out;

	for (x = 0; x < num_qd; x++) {
		offset = qd2offset(qda[x]);
		if (gfs2_write_alloc_required(ip, offset,
					      sizeof(struct gfs2_quota)))
			nalloc++;
	}

	/* 
	 * 1 blk for unstuffing inode if stuffed. We add this extra
	 * block to the reservation unconditionally. If the inode
	 * doesn't need unstuffing, the block will be released to the 
	 * rgrp since it won't be allocated during the transaction
	 */
	/* +3 in the end for unstuffing block, inode size update block
	 * and another block in case quota straddles page boundary and 
	 * two blocks need to be updated instead of 1 */
	blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;

	error = gfs2_inplace_reserve(ip, 1 +
				     (nalloc * (data_blocks + ind_blocks)));
	if (error)
		goto out_alloc;

	if (nalloc)
		blocks += gfs2_rg_blocks(ip) + nalloc * ind_blocks + RES_STATFS;

	error = gfs2_trans_begin(sdp, blocks, 0);
	if (error)
		goto out_ipres;

	for (x = 0; x < num_qd; x++) {
		qd = qda[x];
		offset = qd2offset(qd);
		error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, qd, NULL);
		if (error)
			goto out_end_trans;

		do_qc(qd, -qd->qd_change_sync);
		set_bit(QDF_REFRESH, &qd->qd_flags);
	}

	error = 0;

out_end_trans:
	gfs2_trans_end(sdp);
out_ipres:
	gfs2_inplace_release(ip);
out_alloc:
	gfs2_glock_dq_uninit(&i_gh);
out:
	while (qx--)
		gfs2_glock_dq_uninit(&ghs[qx]);
	mutex_unlock(&ip->i_inode.i_mutex);
	kfree(ghs);
	gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
	return error;
}

static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
{
	struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
	struct gfs2_quota q;
	struct gfs2_quota_lvb *qlvb;
	loff_t pos;
	int error;

	memset(&q, 0, sizeof(struct gfs2_quota));
	pos = qd2offset(qd);
	error = gfs2_internal_read(ip, NULL, (char *)&q, &pos, sizeof(q));
	if (error < 0)
		return error;

	qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
	qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
	qlvb->__pad = 0;
	qlvb->qb_limit = q.qu_limit;
	qlvb->qb_warn = q.qu_warn;
	qlvb->qb_value = q.qu_value;
	qd->qd_qb = *qlvb;

	return 0;
}

static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
		    struct gfs2_holder *q_gh)
{
	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
	struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
	struct gfs2_holder i_gh;
	int error;

restart:
	error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
	if (error)
		return error;

	qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;

	if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
		gfs2_glock_dq_uninit(q_gh);
		error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE,
					   GL_NOCACHE, q_gh);
		if (error)
			return error;

		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
		if (error)
			goto fail;

		error = update_qd(sdp, qd);
		if (error)
			goto fail_gunlock;

		gfs2_glock_dq_uninit(&i_gh);
		gfs2_glock_dq_uninit(q_gh);
		force_refresh = 0;
		goto restart;
	}

	return 0;

fail_gunlock:
	gfs2_glock_dq_uninit(&i_gh);
fail:
	gfs2_glock_dq_uninit(q_gh);
	return error;
}

int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
{
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
	struct gfs2_qadata *qa = ip->i_qadata;
	struct gfs2_quota_data *qd;
	unsigned int x;
	int error = 0;

	error = gfs2_quota_hold(ip, uid, gid);
	if (error)
		return error;

	if (capable(CAP_SYS_RESOURCE) ||
	    sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
		return 0;

	sort(qa->qa_qd, qa->qa_qd_num, sizeof(struct gfs2_quota_data *),
	     sort_qd, NULL);

	for (x = 0; x < qa->qa_qd_num; x++) {
		int force = NO_FORCE;
		qd = qa->qa_qd[x];
		if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
			force = FORCE;
		error = do_glock(qd, force, &qa->qa_qd_ghs[x]);
		if (error)
			break;
	}

	if (!error)
		set_bit(GIF_QD_LOCKED, &ip->i_flags);
	else {
		while (x--)
			gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
		gfs2_quota_unhold(ip);
	}

	return error;
}

static int need_sync(struct gfs2_quota_data *qd)
{
	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
	struct gfs2_tune *gt = &sdp->sd_tune;
	s64 value;
	unsigned int num, den;
	int do_sync = 1;

	if (!qd->qd_qb.qb_limit)
		return 0;

	spin_lock(&qd_lru_lock);
	value = qd->qd_change;
	spin_unlock(&qd_lru_lock);

	spin_lock(&gt->gt_spin);
	num = gt->gt_quota_scale_num;
	den = gt->gt_quota_scale_den;
	spin_unlock(&gt->gt_spin);

	if (value < 0)
		do_sync = 0;
	else if ((s64)be64_to_cpu(qd->qd_qb.qb_value) >=
		 (s64)be64_to_cpu(qd->qd_qb.qb_limit))
		do_sync = 0;
	else {
		value *= gfs2_jindex_size(sdp) * num;
		value = div_s64(value, den);
		value += (s64)be64_to_cpu(qd->qd_qb.qb_value);
		if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit))
			do_sync = 0;
	}

	return do_sync;
}

void gfs2_quota_unlock(struct gfs2_inode *ip)
{
	struct gfs2_qadata *qa = ip->i_qadata;
	struct gfs2_quota_data *qda[4];
	unsigned int count = 0;
	unsigned int x;

	if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
		goto out;

	for (x = 0; x < qa->qa_qd_num; x++) {
		struct gfs2_quota_data *qd;
		int sync;

		qd = qa->qa_qd[x];
		sync = need_sync(qd);

		gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);

		if (sync && qd_trylock(qd))
			qda[count++] = qd;
	}

	if (count) {
		do_sync(count, qda);
		for (x = 0; x < count; x++)
			qd_unlock(qda[x]);
	}

out:
	gfs2_quota_unhold(ip);
}

#define MAX_LINE 256

static int print_message(struct gfs2_quota_data *qd, char *type)
{
	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;

	printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n",
	       sdp->sd_fsname, type,
	       (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
	       qd->qd_id);

	return 0;
}

int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
{
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
	struct gfs2_qadata *qa = ip->i_qadata;
	struct gfs2_quota_data *qd;
	s64 value;
	unsigned int x;
	int error = 0;

	if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
		return 0;

        if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
                return 0;

	for (x = 0; x < qa->qa_qd_num; x++) {
		qd = qa->qa_qd[x];

		if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
		      (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
			continue;

		value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
		spin_lock(&qd_lru_lock);
		value += qd->qd_change;
		spin_unlock(&qd_lru_lock);

		if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
			print_message(qd, "exceeded");
			quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ?
					   USRQUOTA : GRPQUOTA, qd->qd_id,
					   sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN);

			error = -EDQUOT;
			break;
		} else if (be64_to_cpu(qd->qd_qb.qb_warn) &&
			   (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value &&
			   time_after_eq(jiffies, qd->qd_last_warn +
					 gfs2_tune_get(sdp,
						gt_quota_warn_period) * HZ)) {
			quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ?
					   USRQUOTA : GRPQUOTA, qd->qd_id,
					   sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
			error = print_message(qd, "warning");
			qd->qd_last_warn = jiffies;
		}
	}

	return error;
}

void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
		       u32 uid, u32 gid)
{
	struct gfs2_qadata *qa = ip->i_qadata;
	struct gfs2_quota_data *qd;
	unsigned int x;

	if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change))
		return;
	if (ip->i_diskflags & GFS2_DIF_SYSTEM)
		return;

	for (x = 0; x < qa->qa_qd_num; x++) {
		qd = qa->qa_qd[x];

		if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
		    (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
			do_qc(qd, change);
		}
	}
}

int gfs2_quota_sync(struct super_block *sb, int type, int wait)
{
	struct gfs2_sbd *sdp = sb->s_fs_info;
	struct gfs2_quota_data **qda;
	unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync);
	unsigned int num_qd;
	unsigned int x;
	int error = 0;

	sdp->sd_quota_sync_gen++;

	qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
	if (!qda)
		return -ENOMEM;

	do {
		num_qd = 0;

		for (;;) {
			error = qd_fish(sdp, qda + num_qd);
			if (error || !qda[num_qd])
				break;
			if (++num_qd == max_qd)
				break;
		}

		if (num_qd) {
			if (!error)
				error = do_sync(num_qd, qda);
			if (!error)
				for (x = 0; x < num_qd; x++)
					qda[x]->qd_sync_gen =
						sdp->sd_quota_sync_gen;

			for (x = 0; x < num_qd; x++)
				qd_unlock(qda[x]);
		}
	} while (!error && num_qd == max_qd);

	kfree(qda);

	return error;
}

static int gfs2_quota_sync_timeo(struct super_block *sb, int type)
{
	return gfs2_quota_sync(sb, type, 0);
}

int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
{
	struct gfs2_quota_data *qd;
	struct gfs2_holder q_gh;
	int error;

	error = qd_get(sdp, user, id, &qd);
	if (error)
		return error;

	error = do_glock(qd, FORCE, &q_gh);
	if (!error)
		gfs2_glock_dq_uninit(&q_gh);

	qd_put(qd);
	return error;
}

static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
{
	const struct gfs2_quota_change *str = buf;

	qc->qc_change = be64_to_cpu(str->qc_change);
	qc->qc_flags = be32_to_cpu(str->qc_flags);
	qc->qc_id = be32_to_cpu(str->qc_id);
}

int gfs2_quota_init(struct gfs2_sbd *sdp)
{
	struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
	u64 size = i_size_read(sdp->sd_qc_inode);
	unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift;
	unsigned int x, slot = 0;
	unsigned int found = 0;
	u64 dblock;
	u32 extlen = 0;
	int error;

	if (gfs2_check_internal_file_size(sdp->sd_qc_inode, 1, 64 << 20))
		return -EIO;

	sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block;
	sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE);

	error = -ENOMEM;

	sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks,
				       sizeof(unsigned char *), GFP_NOFS);
	if (!sdp->sd_quota_bitmap)
		return error;

	for (x = 0; x < sdp->sd_quota_chunks; x++) {
		sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_NOFS);
		if (!sdp->sd_quota_bitmap[x])
			goto fail;
	}

	for (x = 0; x < blocks; x++) {
		struct buffer_head *bh;
		unsigned int y;

		if (!extlen) {
			int new = 0;
			error = gfs2_extent_map(&ip->i_inode, x, &new, &dblock, &extlen);
			if (error)
				goto fail;
		}
		error = -EIO;
		bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
		if (!bh)
			goto fail;
		if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) {
			brelse(bh);
			goto fail;
		}

		for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots;
		     y++, slot++) {
			struct gfs2_quota_change_host qc;
			struct gfs2_quota_data *qd;

			gfs2_quota_change_in(&qc, bh->b_data +
					  sizeof(struct gfs2_meta_header) +
					  y * sizeof(struct gfs2_quota_change));
			if (!qc.qc_change)
				continue;

			error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER),