diff options
Diffstat (limited to 'fs/gfs2/rgrp.c')
-rw-r--r-- | fs/gfs2/rgrp.c | 833 |
1 files changed, 697 insertions, 136 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f74fb9bd1973..4d34887a601d 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -35,6 +35,9 @@ | |||
35 | #define BFITNOENT ((u32)~0) | 35 | #define BFITNOENT ((u32)~0) |
36 | #define NO_BLOCK ((u64)~0) | 36 | #define NO_BLOCK ((u64)~0) |
37 | 37 | ||
38 | #define RSRV_CONTENTION_FACTOR 4 | ||
39 | #define RGRP_RSRV_MAX_CONTENDERS 2 | ||
40 | |||
38 | #if BITS_PER_LONG == 32 | 41 | #if BITS_PER_LONG == 32 |
39 | #define LBITMASK (0x55555555UL) | 42 | #define LBITMASK (0x55555555UL) |
40 | #define LBITSKIP55 (0x55555555UL) | 43 | #define LBITSKIP55 (0x55555555UL) |
@@ -178,6 +181,57 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) | |||
178 | } | 181 | } |
179 | 182 | ||
180 | /** | 183 | /** |
184 | * rs_cmp - multi-block reservation range compare | ||
185 | * @blk: absolute file system block number of the new reservation | ||
186 | * @len: number of blocks in the new reservation | ||
187 | * @rs: existing reservation to compare against | ||
188 | * | ||
189 | * returns: 1 if the block range is beyond the reach of the reservation | ||
190 | * -1 if the block range is before the start of the reservation | ||
191 | * 0 if the block range overlaps with the reservation | ||
192 | */ | ||
193 | static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) | ||
194 | { | ||
195 | u64 startblk = gfs2_rs_startblk(rs); | ||
196 | |||
197 | if (blk >= startblk + rs->rs_free) | ||
198 | return 1; | ||
199 | if (blk + len - 1 < startblk) | ||
200 | return -1; | ||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | /** | ||
205 | * rs_find - Find a rgrp multi-block reservation that contains a given block | ||
206 | * @rgd: The rgrp | ||
207 | * @rgblk: The block we're looking for, relative to the rgrp | ||
208 | */ | ||
209 | static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) | ||
210 | { | ||
211 | struct rb_node **newn; | ||
212 | int rc; | ||
213 | u64 fsblk = rgblk + rgd->rd_data0; | ||
214 | |||
215 | spin_lock(&rgd->rd_rsspin); | ||
216 | newn = &rgd->rd_rstree.rb_node; | ||
217 | while (*newn) { | ||
218 | struct gfs2_blkreserv *cur = | ||
219 | rb_entry(*newn, struct gfs2_blkreserv, rs_node); | ||
220 | rc = rs_cmp(fsblk, 1, cur); | ||
221 | if (rc < 0) | ||
222 | newn = &((*newn)->rb_left); | ||
223 | else if (rc > 0) | ||
224 | newn = &((*newn)->rb_right); | ||
225 | else { | ||
226 | spin_unlock(&rgd->rd_rsspin); | ||
227 | return cur; | ||
228 | } | ||
229 | } | ||
230 | spin_unlock(&rgd->rd_rsspin); | ||
231 | return NULL; | ||
232 | } | ||
233 | |||
234 | /** | ||
181 | * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing | 235 | * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing |
182 | * a block in a given allocation state. | 236 | * a block in a given allocation state. |
183 | * @buf: the buffer that holds the bitmaps | 237 | * @buf: the buffer that holds the bitmaps |
@@ -417,6 +471,137 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) | |||
417 | } | 471 | } |
418 | } | 472 | } |
419 | 473 | ||
474 | /** | ||
475 | * gfs2_rs_alloc - make sure we have a reservation assigned to the inode | ||
476 | * @ip: the inode for this reservation | ||
477 | */ | ||
478 | int gfs2_rs_alloc(struct gfs2_inode *ip) | ||
479 | { | ||
480 | int error = 0; | ||
481 | struct gfs2_blkreserv *res; | ||
482 | |||
483 | if (ip->i_res) | ||
484 | return 0; | ||
485 | |||
486 | res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); | ||
487 | if (!res) | ||
488 | error = -ENOMEM; | ||
489 | |||
490 | down_write(&ip->i_rw_mutex); | ||
491 | if (ip->i_res) | ||
492 | kmem_cache_free(gfs2_rsrv_cachep, res); | ||
493 | else | ||
494 | ip->i_res = res; | ||
495 | up_write(&ip->i_rw_mutex); | ||
496 | return error; | ||
497 | } | ||
498 | |||
499 | static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) | ||
500 | { | ||
501 | gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", | ||
502 | rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, | ||
503 | rs->rs_free); | ||
504 | } | ||
505 | |||
506 | /** | ||
507 | * __rs_deltree - remove a multi-block reservation from the rgd tree | ||
508 | * @rs: The reservation to remove | ||
509 | * | ||
510 | */ | ||
511 | static void __rs_deltree(struct gfs2_blkreserv *rs) | ||
512 | { | ||
513 | struct gfs2_rgrpd *rgd; | ||
514 | |||
515 | if (!gfs2_rs_active(rs)) | ||
516 | return; | ||
517 | |||
518 | rgd = rs->rs_rgd; | ||
519 | /* We can't do this: The reason is that when the rgrp is invalidated, | ||
520 | it's in the "middle" of acquiring the glock, but the HOLDER bit | ||
521 | isn't set yet: | ||
522 | BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ | ||
523 | trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); | ||
524 | |||
525 | if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) | ||
526 | rb_erase(&rs->rs_node, &rgd->rd_rstree); | ||
527 | BUG_ON(!rgd->rd_rs_cnt); | ||
528 | rgd->rd_rs_cnt--; | ||
529 | |||
530 | if (rs->rs_free) { | ||
531 | /* return reserved blocks to the rgrp and the ip */ | ||
532 | BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); | ||
533 | rs->rs_rgd->rd_reserved -= rs->rs_free; | ||
534 | rs->rs_free = 0; | ||
535 | clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); | ||
536 | smp_mb__after_clear_bit(); | ||
537 | } | ||
538 | /* We can't change any of the step 1 or step 2 components of the rs. | ||
539 | E.g. We can't set rs_rgd to NULL because the rgd glock is held and | ||
540 | dequeued through this pointer. | ||
541 | Can't: atomic_set(&rs->rs_sizehint, 0); | ||
542 | Can't: rs->rs_requested = 0; | ||
543 | Can't: rs->rs_rgd = NULL;*/ | ||
544 | rs->rs_bi = NULL; | ||
545 | rs->rs_biblk = 0; | ||
546 | } | ||
547 | |||
548 | /** | ||
549 | * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree | ||
550 | * @rs: The reservation to remove | ||
551 | * | ||
552 | */ | ||
553 | void gfs2_rs_deltree(struct gfs2_blkreserv *rs) | ||
554 | { | ||
555 | struct gfs2_rgrpd *rgd; | ||
556 | |||
557 | if (!gfs2_rs_active(rs)) | ||
558 | return; | ||
559 | |||
560 | rgd = rs->rs_rgd; | ||
561 | spin_lock(&rgd->rd_rsspin); | ||
562 | __rs_deltree(rs); | ||
563 | spin_unlock(&rgd->rd_rsspin); | ||
564 | } | ||
565 | |||
566 | /** | ||
567 | * gfs2_rs_delete - delete a multi-block reservation | ||
568 | * @ip: The inode for this reservation | ||
569 | * | ||
570 | */ | ||
571 | void gfs2_rs_delete(struct gfs2_inode *ip) | ||
572 | { | ||
573 | down_write(&ip->i_rw_mutex); | ||
574 | if (ip->i_res) { | ||
575 | gfs2_rs_deltree(ip->i_res); | ||
576 | trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); | ||
577 | BUG_ON(ip->i_res->rs_free); | ||
578 | kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); | ||
579 | ip->i_res = NULL; | ||
580 | } | ||
581 | up_write(&ip->i_rw_mutex); | ||
582 | } | ||
583 | |||
584 | /** | ||
585 | * return_all_reservations - return all reserved blocks back to the rgrp. | ||
586 | * @rgd: the rgrp that needs its space back | ||
587 | * | ||
588 | * We previously reserved a bunch of blocks for allocation. Now we need to | ||
589 | * give them back. This leave the reservation structures in tact, but removes | ||
590 | * all of their corresponding "no-fly zones". | ||
591 | */ | ||
592 | static void return_all_reservations(struct gfs2_rgrpd *rgd) | ||
593 | { | ||
594 | struct rb_node *n; | ||
595 | struct gfs2_blkreserv *rs; | ||
596 | |||
597 | spin_lock(&rgd->rd_rsspin); | ||
598 | while ((n = rb_first(&rgd->rd_rstree))) { | ||
599 | rs = rb_entry(n, struct gfs2_blkreserv, rs_node); | ||
600 | __rs_deltree(rs); | ||
601 | } | ||
602 | spin_unlock(&rgd->rd_rsspin); | ||
603 | } | ||
604 | |||
420 | void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) | 605 | void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) |
421 | { | 606 | { |
422 | struct rb_node *n; | 607 | struct rb_node *n; |
@@ -439,6 +624,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) | |||
439 | 624 | ||
440 | gfs2_free_clones(rgd); | 625 | gfs2_free_clones(rgd); |
441 | kfree(rgd->rd_bits); | 626 | kfree(rgd->rd_bits); |
627 | return_all_reservations(rgd); | ||
442 | kmem_cache_free(gfs2_rgrpd_cachep, rgd); | 628 | kmem_cache_free(gfs2_rgrpd_cachep, rgd); |
443 | } | 629 | } |
444 | } | 630 | } |
@@ -616,6 +802,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) | |||
616 | rgd->rd_data0 = be64_to_cpu(buf.ri_data0); | 802 | rgd->rd_data0 = be64_to_cpu(buf.ri_data0); |
617 | rgd->rd_data = be32_to_cpu(buf.ri_data); | 803 | rgd->rd_data = be32_to_cpu(buf.ri_data); |
618 | rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); | 804 | rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); |
805 | spin_lock_init(&rgd->rd_rsspin); | ||
619 | 806 | ||
620 | error = compute_bitstructs(rgd); | 807 | error = compute_bitstructs(rgd); |
621 | if (error) | 808 | if (error) |
@@ -627,6 +814,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) | |||
627 | goto fail; | 814 | goto fail; |
628 | 815 | ||
629 | rgd->rd_gl->gl_object = rgd; | 816 | rgd->rd_gl->gl_object = rgd; |
817 | rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; | ||
630 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; | 818 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; |
631 | if (rgd->rd_data > sdp->sd_max_rg_data) | 819 | if (rgd->rd_data > sdp->sd_max_rg_data) |
632 | sdp->sd_max_rg_data = rgd->rd_data; | 820 | sdp->sd_max_rg_data = rgd->rd_data; |
@@ -736,9 +924,65 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) | |||
736 | memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); | 924 | memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); |
737 | } | 925 | } |
738 | 926 | ||
927 | static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) | ||
928 | { | ||
929 | struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; | ||
930 | struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; | ||
931 | |||
932 | if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free || | ||
933 | rgl->rl_dinodes != str->rg_dinodes || | ||
934 | rgl->rl_igeneration != str->rg_igeneration) | ||
935 | return 0; | ||
936 | return 1; | ||
937 | } | ||
938 | |||
939 | static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) | ||
940 | { | ||
941 | const struct gfs2_rgrp *str = buf; | ||
942 | |||
943 | rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); | ||
944 | rgl->rl_flags = str->rg_flags; | ||
945 | rgl->rl_free = str->rg_free; | ||
946 | rgl->rl_dinodes = str->rg_dinodes; | ||
947 | rgl->rl_igeneration = str->rg_igeneration; | ||
948 | rgl->__pad = 0UL; | ||
949 | } | ||
950 | |||
951 | static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change) | ||
952 | { | ||
953 | struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; | ||
954 | u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change; | ||
955 | rgl->rl_unlinked = cpu_to_be32(unlinked); | ||
956 | } | ||
957 | |||
958 | static u32 count_unlinked(struct gfs2_rgrpd *rgd) | ||
959 | { | ||
960 | struct gfs2_bitmap *bi; | ||
961 | const u32 length = rgd->rd_length; | ||
962 | const u8 *buffer = NULL; | ||
963 | u32 i, goal, count = 0; | ||
964 | |||
965 | for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) { | ||
966 | goal = 0; | ||
967 | buffer = bi->bi_bh->b_data + bi->bi_offset; | ||
968 | WARN_ON(!buffer_uptodate(bi->bi_bh)); | ||
969 | while (goal < bi->bi_len * GFS2_NBBY) { | ||
970 | goal = gfs2_bitfit(buffer, bi->bi_len, goal, | ||
971 | GFS2_BLKST_UNLINKED); | ||
972 | if (goal == BFITNOENT) | ||
973 | break; | ||
974 | count++; | ||
975 | goal++; | ||
976 | } | ||
977 | } | ||
978 | |||
979 | return count; | ||
980 | } | ||
981 | |||
982 | |||
739 | /** | 983 | /** |
740 | * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps | 984 | * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps |
741 | * @gh: The glock holder for the resource group | 985 | * @rgd: the struct gfs2_rgrpd describing the RG to read in |
742 | * | 986 | * |
743 | * Read in all of a Resource Group's header and bitmap blocks. | 987 | * Read in all of a Resource Group's header and bitmap blocks. |
744 | * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. | 988 | * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. |
@@ -746,9 +990,8 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) | |||
746 | * Returns: errno | 990 | * Returns: errno |
747 | */ | 991 | */ |
748 | 992 | ||
749 | int gfs2_rgrp_go_lock(struct gfs2_holder *gh) | 993 | int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) |
750 | { | 994 | { |
751 | struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; | ||
752 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 995 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
753 | struct gfs2_glock *gl = rgd->rd_gl; | 996 | struct gfs2_glock *gl = rgd->rd_gl; |
754 | unsigned int length = rgd->rd_length; | 997 | unsigned int length = rgd->rd_length; |
@@ -756,6 +999,9 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) | |||
756 | unsigned int x, y; | 999 | unsigned int x, y; |
757 | int error; | 1000 | int error; |
758 | 1001 | ||
1002 | if (rgd->rd_bits[0].bi_bh != NULL) | ||
1003 | return 0; | ||
1004 | |||
759 | for (x = 0; x < length; x++) { | 1005 | for (x = 0; x < length; x++) { |
760 | bi = rgd->rd_bits + x; | 1006 | bi = rgd->rd_bits + x; |
761 | error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); | 1007 | error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); |
@@ -782,7 +1028,20 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) | |||
782 | rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); | 1028 | rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); |
783 | rgd->rd_free_clone = rgd->rd_free; | 1029 | rgd->rd_free_clone = rgd->rd_free; |
784 | } | 1030 | } |
785 | 1031 | if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { | |
1032 | rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); | ||
1033 | gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, | ||
1034 | rgd->rd_bits[0].bi_bh->b_data); | ||
1035 | } | ||
1036 | else if (sdp->sd_args.ar_rgrplvb) { | ||
1037 | if (!gfs2_rgrp_lvb_valid(rgd)){ | ||
1038 | gfs2_consist_rgrpd(rgd); | ||
1039 | error = -EIO; | ||
1040 | goto fail; | ||
1041 | } | ||
1042 | if (rgd->rd_rgl->rl_unlinked == 0) | ||
1043 | rgd->rd_flags &= ~GFS2_RDF_CHECK; | ||
1044 | } | ||
786 | return 0; | 1045 | return 0; |
787 | 1046 | ||
788 | fail: | 1047 | fail: |
@@ -796,6 +1055,39 @@ fail: | |||
796 | return error; | 1055 | return error; |
797 | } | 1056 | } |
798 | 1057 | ||
1058 | int update_rgrp_lvb(struct gfs2_rgrpd *rgd) | ||
1059 | { | ||
1060 | u32 rl_flags; | ||
1061 | |||
1062 | if (rgd->rd_flags & GFS2_RDF_UPTODATE) | ||
1063 | return 0; | ||
1064 | |||
1065 | if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) | ||
1066 | return gfs2_rgrp_bh_get(rgd); | ||
1067 | |||
1068 | rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); | ||
1069 | rl_flags &= ~GFS2_RDF_MASK; | ||
1070 | rgd->rd_flags &= GFS2_RDF_MASK; | ||
1071 | rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); | ||
1072 | if (rgd->rd_rgl->rl_unlinked == 0) | ||
1073 | rgd->rd_flags &= ~GFS2_RDF_CHECK; | ||
1074 | rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); | ||
1075 | rgd->rd_free_clone = rgd->rd_free; | ||
1076 | rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); | ||
1077 | rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); | ||
1078 | return 0; | ||
1079 | } | ||
1080 | |||
1081 | int gfs2_rgrp_go_lock(struct gfs2_holder *gh) | ||
1082 | { | ||
1083 | struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; | ||
1084 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
1085 | |||
1086 | if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) | ||
1087 | return 0; | ||
1088 | return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object); | ||
1089 | } | ||
1090 | |||
799 | /** | 1091 | /** |
800 | * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() | 1092 | * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() |
801 | * @gh: The glock holder for the resource group | 1093 | * @gh: The glock holder for the resource group |
@@ -809,8 +1101,10 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) | |||
809 | 1101 | ||
810 | for (x = 0; x < length; x++) { | 1102 | for (x = 0; x < length; x++) { |
811 | struct gfs2_bitmap *bi = rgd->rd_bits + x; | 1103 | struct gfs2_bitmap *bi = rgd->rd_bits + x; |
812 | brelse(bi->bi_bh); | 1104 | if (bi->bi_bh) { |
813 | bi->bi_bh = NULL; | 1105 | brelse(bi->bi_bh); |
1106 | bi->bi_bh = NULL; | ||
1107 | } | ||
814 | } | 1108 | } |
815 | 1109 | ||
816 | } | 1110 | } |
@@ -954,6 +1248,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) | |||
954 | rgd->rd_flags |= GFS2_RGF_TRIMMED; | 1248 | rgd->rd_flags |= GFS2_RGF_TRIMMED; |
955 | gfs2_trans_add_bh(rgd->rd_gl, bh, 1); | 1249 | gfs2_trans_add_bh(rgd->rd_gl, bh, 1); |
956 | gfs2_rgrp_out(rgd, bh->b_data); | 1250 | gfs2_rgrp_out(rgd, bh->b_data); |
1251 | gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); | ||
957 | gfs2_trans_end(sdp); | 1252 | gfs2_trans_end(sdp); |
958 | } | 1253 | } |
959 | } | 1254 | } |
@@ -974,38 +1269,184 @@ out: | |||
974 | } | 1269 | } |
975 | 1270 | ||
976 | /** | 1271 | /** |
977 | * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode | 1272 | * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree |
978 | * @ip: the incore GFS2 inode structure | 1273 | * @bi: the bitmap with the blocks |
1274 | * @ip: the inode structure | ||
1275 | * @biblk: the 32-bit block number relative to the start of the bitmap | ||
1276 | * @amount: the number of blocks to reserve | ||
979 | * | 1277 | * |
980 | * Returns: the struct gfs2_qadata | 1278 | * Returns: NULL - reservation was already taken, so not inserted |
1279 | * pointer to the inserted reservation | ||
981 | */ | 1280 | */ |
1281 | static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, | ||
1282 | struct gfs2_inode *ip, u32 biblk, | ||
1283 | int amount) | ||
1284 | { | ||
1285 | struct rb_node **newn, *parent = NULL; | ||
1286 | int rc; | ||
1287 | struct gfs2_blkreserv *rs = ip->i_res; | ||
1288 | struct gfs2_rgrpd *rgd = rs->rs_rgd; | ||
1289 | u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; | ||
982 | 1290 | ||
983 | struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) | 1291 | spin_lock(&rgd->rd_rsspin); |
1292 | newn = &rgd->rd_rstree.rb_node; | ||
1293 | BUG_ON(!ip->i_res); | ||
1294 | BUG_ON(gfs2_rs_active(rs)); | ||
1295 | /* Figure out where to put new node */ | ||
1296 | /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ | ||
1297 | while (*newn) { | ||
1298 | struct gfs2_blkreserv *cur = | ||
1299 | rb_entry(*newn, struct gfs2_blkreserv, rs_node); | ||
1300 | |||
1301 | parent = *newn; | ||
1302 | rc = rs_cmp(fsblock, amount, cur); | ||
1303 | if (rc > 0) | ||
1304 | newn = &((*newn)->rb_right); | ||
1305 | else if (rc < 0) | ||
1306 | newn = &((*newn)->rb_left); | ||
1307 | else { | ||
1308 | spin_unlock(&rgd->rd_rsspin); | ||
1309 | return NULL; /* reservation already in use */ | ||
1310 | } | ||
1311 | } | ||
1312 | |||
1313 | /* Do our reservation work */ | ||
1314 | rs = ip->i_res; | ||
1315 | rs->rs_free = amount; | ||
1316 | rs->rs_biblk = biblk; | ||
1317 | rs->rs_bi = bi; | ||
1318 | rb_link_node(&rs->rs_node, parent, newn); | ||
1319 | rb_insert_color(&rs->rs_node, &rgd->rd_rstree); | ||
1320 | |||
1321 | /* Do our inode accounting for the reservation */ | ||
1322 | /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ | ||
1323 | |||
1324 | /* Do our rgrp accounting for the reservation */ | ||
1325 | rgd->rd_reserved += amount; /* blocks reserved */ | ||
1326 | rgd->rd_rs_cnt++; /* number of in-tree reservations */ | ||
1327 | spin_unlock(&rgd->rd_rsspin); | ||
1328 | trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); | ||
1329 | return rs; | ||
1330 | } | ||
1331 | |||
1332 | /** | ||
1333 | * unclaimed_blocks - return number of blocks that aren't spoken for | ||
1334 | */ | ||
1335 | static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) | ||
984 | { | 1336 | { |
985 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1337 | return rgd->rd_free_clone - rgd->rd_reserved; |
986 | int error; | ||
987 | BUG_ON(ip->i_qadata != NULL); | ||
988 | ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS); | ||
989 | error = gfs2_rindex_update(sdp); | ||
990 | if (error) | ||
991 | fs_warn(sdp, "rindex update returns %d\n", error); | ||
992 | return ip->i_qadata; | ||
993 | } | 1338 | } |
994 | 1339 | ||
995 | /** | 1340 | /** |
996 | * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode | 1341 | * rg_mblk_search - find a group of multiple free blocks |
997 | * @ip: the incore GFS2 inode structure | 1342 | * @rgd: the resource group descriptor |
1343 | * @rs: the block reservation | ||
1344 | * @ip: pointer to the inode for which we're reserving blocks | ||
998 | * | 1345 | * |
999 | * Returns: the struct gfs2_qadata | 1346 | * This is very similar to rgblk_search, except we're looking for whole |
1347 | * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing | ||
1348 | * on aligned dwords for speed's sake. | ||
1349 | * | ||
1350 | * Returns: 0 if successful or BFITNOENT if there isn't enough free space | ||
1000 | */ | 1351 | */ |
1001 | 1352 | ||
1002 | static int gfs2_blkrsv_get(struct gfs2_inode *ip) | 1353 | static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) |
1003 | { | 1354 | { |
1004 | BUG_ON(ip->i_res != NULL); | 1355 | struct gfs2_bitmap *bi = rgd->rd_bits; |
1005 | ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); | 1356 | const u32 length = rgd->rd_length; |
1006 | if (!ip->i_res) | 1357 | u32 blk; |
1007 | return -ENOMEM; | 1358 | unsigned int buf, x, search_bytes; |
1008 | return 0; | 1359 | u8 *buffer = NULL; |
1360 | u8 *ptr, *end, *nonzero; | ||
1361 | u32 goal, rsv_bytes; | ||
1362 | struct gfs2_blkreserv *rs; | ||
1363 | u32 best_rs_bytes, unclaimed; | ||
1364 | int best_rs_blocks; | ||
1365 | |||
1366 | /* Find bitmap block that contains bits for goal block */ | ||
1367 | if (rgrp_contains_block(rgd, ip->i_goal)) | ||
1368 | goal = ip->i_goal - rgd->rd_data0; | ||
1369 | else | ||
1370 | goal = rgd->rd_last_alloc; | ||
1371 | for (buf = 0; buf < length; buf++) { | ||
1372 | bi = rgd->rd_bits + buf; | ||
1373 | /* Convert scope of "goal" from rgrp-wide to within | ||
1374 | found bit block */ | ||
1375 | if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { | ||
1376 | goal -= bi->bi_start * GFS2_NBBY; | ||
1377 | goto do_search; | ||
1378 | } | ||
1379 | } | ||
1380 | buf = 0; | ||
1381 | goal = 0; | ||
1382 | |||
1383 | do_search: | ||
1384 | best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint), | ||
1385 | (RGRP_RSRV_MINBLKS * rgd->rd_length)); | ||
1386 | best_rs_bytes = (best_rs_blocks * | ||
1387 | (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) / | ||
1388 | GFS2_NBBY; /* 1 + is for our not-yet-created reservation */ | ||
1389 | best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64)); | ||
1390 | unclaimed = unclaimed_blocks(rgd); | ||
1391 | if (best_rs_bytes * GFS2_NBBY > unclaimed) | ||
1392 | best_rs_bytes = unclaimed >> GFS2_BIT_SIZE; | ||
1393 | |||
1394 | for (x = 0; x <= length; x++) { | ||
1395 | bi = rgd->rd_bits + buf; | ||
1396 | |||
1397 | if (test_bit(GBF_FULL, &bi->bi_flags)) | ||
1398 | goto skip; | ||
1399 | |||
1400 | WARN_ON(!buffer_uptodate(bi->bi_bh)); | ||
1401 | if (bi->bi_clone) | ||
1402 | buffer = bi->bi_clone + bi->bi_offset; | ||
1403 | else | ||
1404 | buffer = bi->bi_bh->b_data + bi->bi_offset; | ||
1405 | |||
1406 | /* We have to keep the reservations aligned on u64 boundaries | ||
1407 | otherwise we could get situations where a byte can't be | ||
1408 | used because it's after a reservation, but a free bit still | ||
1409 | is within the reservation's area. */ | ||
1410 | ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); | ||
1411 | end = (buffer + bi->bi_len); | ||
1412 | while (ptr < end) { | ||
1413 | rsv_bytes = 0; | ||
1414 | if ((ptr + best_rs_bytes) <= end) | ||
1415 | search_bytes = best_rs_bytes; | ||
1416 | else | ||
1417 | search_bytes = end - ptr; | ||
1418 | BUG_ON(!search_bytes); | ||
1419 | nonzero = memchr_inv(ptr, 0, search_bytes); | ||
1420 | /* If the lot is all zeroes, reserve the whole size. If | ||
1421 | there's enough zeroes to satisfy the request, use | ||
1422 | what we can. If there's not enough, keep looking. */ | ||
1423 | if (nonzero == NULL) | ||
1424 | rsv_bytes = search_bytes; | ||
1425 | else if ((nonzero - ptr) * GFS2_NBBY >= | ||
1426 | ip->i_res->rs_requested) | ||
1427 | rsv_bytes = (nonzero - ptr); | ||
1428 | |||
1429 | if (rsv_bytes) { | ||
1430 | blk = ((ptr - buffer) * GFS2_NBBY); | ||
1431 | BUG_ON(blk >= bi->bi_len * GFS2_NBBY); | ||
1432 | rs = rs_insert(bi, ip, blk, | ||
1433 | rsv_bytes * GFS2_NBBY); | ||
1434 | if (IS_ERR(rs)) | ||
1435 | return PTR_ERR(rs); | ||
1436 | if (rs) | ||
1437 | return 0; | ||
1438 | } | ||
1439 | ptr += ALIGN(search_bytes, sizeof(u64)); | ||
1440 | } | ||
1441 | skip: | ||
1442 | /* Try next bitmap block (wrap back to rgrp header | ||
1443 | if at end) */ | ||
1444 | buf++; | ||
1445 | buf %= length; | ||
1446 | goal = 0; | ||
1447 | } | ||
1448 | |||
1449 | return BFITNOENT; | ||
1009 | } | 1450 | } |
1010 | 1451 | ||
1011 | /** | 1452 | /** |
@@ -1014,24 +1455,26 @@ static int gfs2_blkrsv_get(struct gfs2_inode *ip) | |||
1014 | * @ip: the inode | 1455 | * @ip: the inode |
1015 | * | 1456 | * |
1016 | * If there's room for the requested blocks to be allocated from the RG: | 1457 | * If there's room for the requested blocks to be allocated from the RG: |
1458 | * This will try to get a multi-block reservation first, and if that doesn't | ||
1459 | * fit, it will take what it can. | ||
1017 | * | 1460 | * |
1018 | * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) | 1461 | * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) |
1019 | */ | 1462 | */ |
1020 | 1463 | ||
1021 | static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip) | 1464 | static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) |
1022 | { | 1465 | { |
1023 | const struct gfs2_blkreserv *rs = ip->i_res; | 1466 | struct gfs2_blkreserv *rs = ip->i_res; |
1024 | 1467 | ||
1025 | if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) | 1468 | if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) |
1026 | return 0; | 1469 | return 0; |
1027 | if (rgd->rd_free_clone >= rs->rs_requested) | 1470 | /* Look for a multi-block reservation. */ |
1471 | if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && | ||
1472 | rg_mblk_search(rgd, ip) != BFITNOENT) | ||
1473 | return 1; | ||
1474 | if (unclaimed_blocks(rgd) >= rs->rs_requested) | ||
1028 | return 1; | 1475 | return 1; |
1029 | return 0; | ||
1030 | } | ||
1031 | 1476 | ||
1032 | static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk) | 1477 | return 0; |
1033 | { | ||
1034 | return (bi->bi_start * GFS2_NBBY) + blk; | ||
1035 | } | 1478 | } |
1036 | 1479 | ||
1037 | /** | 1480 | /** |
@@ -1101,119 +1544,120 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip | |||
1101 | } | 1544 | } |
1102 | 1545 | ||
1103 | /** | 1546 | /** |
1104 | * get_local_rgrp - Choose and lock a rgrp for allocation | 1547 | * gfs2_inplace_reserve - Reserve space in the filesystem |
1105 | * @ip: the inode to reserve space for | 1548 | * @ip: the inode to reserve space for |
1106 | * @last_unlinked: the last unlinked block | 1549 | * @requested: the number of blocks to be reserved |
1107 | * | ||
1108 | * Try to acquire rgrp in way which avoids contending with others. | ||
1109 | * | 1550 | * |
1110 | * Returns: errno | 1551 | * Returns: errno |
1111 | */ | 1552 | */ |
1112 | 1553 | ||
1113 | static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | 1554 | int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) |
1114 | { | 1555 | { |
1115 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1556 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1116 | struct gfs2_rgrpd *rgd, *begin = NULL; | 1557 | struct gfs2_rgrpd *begin = NULL; |
1117 | struct gfs2_blkreserv *rs = ip->i_res; | 1558 | struct gfs2_blkreserv *rs = ip->i_res; |
1118 | int error, rg_locked, flags = LM_FLAG_TRY; | 1559 | int error = 0, rg_locked, flags = LM_FLAG_TRY; |
1560 | u64 last_unlinked = NO_BLOCK; | ||
1119 | int loops = 0; | 1561 | int loops = 0; |
1120 | 1562 | ||
1121 | if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) | 1563 | if (sdp->sd_args.ar_rgrplvb) |
1122 | rgd = begin = ip->i_rgd; | 1564 | flags |= GL_SKIP; |
1123 | else | 1565 | rs->rs_requested = requested; |
1124 | rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); | 1566 | if (gfs2_assert_warn(sdp, requested)) { |
1125 | 1567 | error = -EINVAL; | |
1126 | if (rgd == NULL) | 1568 | goto out; |
1569 | } | ||
1570 | if (gfs2_rs_active(rs)) { | ||
1571 | begin = rs->rs_rgd; | ||
1572 | flags = 0; /* Yoda: Do or do not. There is no try */ | ||
1573 | } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { | ||
1574 | rs->rs_rgd = begin = ip->i_rgd; | ||
1575 | } else { | ||
1576 | rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); | ||
1577 | } | ||
1578 | if (rs->rs_rgd == NULL) | ||
1127 | return -EBADSLT; | 1579 | return -EBADSLT; |
1128 | 1580 | ||
1129 | while (loops < 3) { | 1581 | while (loops < 3) { |
1130 | rg_locked = 0; | 1582 | rg_locked = 0; |
1131 | 1583 | ||
1132 | if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { | 1584 | if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { |
1133 | rg_locked = 1; | 1585 | rg_locked = 1; |
1134 | error = 0; | 1586 | error = 0; |
1587 | } else if (!loops && !gfs2_rs_active(rs) && | ||
1588 | rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { | ||
1589 | /* If the rgrp already is maxed out for contenders, | ||
1590 | we can eliminate it as a "first pass" without even | ||
1591 | requesting the rgrp glock. */ | ||
1592 | error = GLR_TRYFAILED; | ||
1135 | } else { | 1593 | } else { |
1136 | error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, | 1594 | error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, |
1137 | flags, &rs->rs_rgd_gh); | 1595 | LM_ST_EXCLUSIVE, flags, |
1596 | &rs->rs_rgd_gh); | ||
1597 | if (!error && sdp->sd_args.ar_rgrplvb) { | ||
1598 | error = update_rgrp_lvb(rs->rs_rgd); | ||
1599 | if (error) { | ||
1600 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); | ||
1601 | return error; | ||
1602 | } | ||
1603 | } | ||
1138 | } | 1604 | } |
1139 | switch (error) { | 1605 | switch (error) { |
1140 | case 0: | 1606 | case 0: |
1141 | if (try_rgrp_fit(rgd, ip)) { | 1607 | if (gfs2_rs_active(rs)) { |
1142 | ip->i_rgd = rgd; | 1608 | if (unclaimed_blocks(rs->rs_rgd) + |
1609 | rs->rs_free >= rs->rs_requested) { | ||
1610 | ip->i_rgd = rs->rs_rgd; | ||
1611 | return 0; | ||
1612 | } | ||
1613 | /* We have a multi-block reservation, but the | ||
1614 | rgrp doesn't have enough free blocks to | ||
1615 | satisfy the request. Free the reservation | ||
1616 | and look for a suitable rgrp. */ | ||
1617 | gfs2_rs_deltree(rs); | ||
1618 | } | ||
1619 | if (try_rgrp_fit(rs->rs_rgd, ip)) { | ||
1620 | if (sdp->sd_args.ar_rgrplvb) | ||
1621 | gfs2_rgrp_bh_get(rs->rs_rgd); | ||
1622 | ip->i_rgd = rs->rs_rgd; | ||
1143 | return 0; | 1623 | return 0; |
1144 | } | 1624 | } |
1145 | if (rgd->rd_flags & GFS2_RDF_CHECK) | 1625 | if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) { |
1146 | try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); | 1626 | if (sdp->sd_args.ar_rgrplvb) |
1627 | gfs2_rgrp_bh_get(rs->rs_rgd); | ||
1628 | try_rgrp_unlink(rs->rs_rgd, &last_unlinked, | ||
1629 | ip->i_no_addr); | ||
1630 | } | ||
1147 | if (!rg_locked) | 1631 | if (!rg_locked) |
1148 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); | 1632 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); |
1149 | /* fall through */ | 1633 | /* fall through */ |
1150 | case GLR_TRYFAILED: | 1634 | case GLR_TRYFAILED: |
1151 | rgd = gfs2_rgrpd_get_next(rgd); | 1635 | rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd); |
1152 | if (rgd == begin) { | 1636 | rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */ |
1153 | flags = 0; | 1637 | if (rs->rs_rgd != begin) /* If we didn't wrap */ |
1154 | loops++; | 1638 | break; |
1155 | } | 1639 | |
1640 | flags &= ~LM_FLAG_TRY; | ||
1641 | loops++; | ||
1642 | /* Check that fs hasn't grown if writing to rindex */ | ||
1643 | if (ip == GFS2_I(sdp->sd_rindex) && | ||
1644 | !sdp->sd_rindex_uptodate) { | ||
1645 | error = gfs2_ri_update(ip); | ||
1646 | if (error) | ||
1647 | goto out; | ||
1648 | } else if (loops == 2) | ||
1649 | /* Flushing the log may release space */ | ||
1650 | gfs2_log_flush(sdp, NULL); | ||
1156 | break; | 1651 | break; |
1157 | default: | 1652 | default: |
1158 | return error; | 1653 | goto out; |
1159 | } | 1654 | } |
1160 | } | 1655 | } |
1161 | 1656 | error = -ENOSPC; | |
1162 | return -ENOSPC; | ||
1163 | } | ||
1164 | |||
1165 | static void gfs2_blkrsv_put(struct gfs2_inode *ip) | ||
1166 | { | ||
1167 | BUG_ON(ip->i_res == NULL); | ||
1168 | kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); | ||
1169 | ip->i_res = NULL; | ||
1170 | } | ||
1171 | |||
1172 | /** | ||
1173 | * gfs2_inplace_reserve - Reserve space in the filesystem | ||
1174 | * @ip: the inode to reserve space for | ||
1175 | * @requested: the number of blocks to be reserved | ||
1176 | * | ||
1177 | * Returns: errno | ||
1178 | */ | ||
1179 | |||
1180 | int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | ||
1181 | { | ||
1182 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1183 | struct gfs2_blkreserv *rs; | ||
1184 | int error; | ||
1185 | u64 last_unlinked = NO_BLOCK; | ||
1186 | int tries = 0; | ||
1187 | |||
1188 | error = gfs2_blkrsv_get(ip); | ||
1189 | if (error) | ||
1190 | return error; | ||
1191 | |||
1192 | rs = ip->i_res; | ||
1193 | rs->rs_requested = requested; | ||
1194 | if (gfs2_assert_warn(sdp, requested)) { | ||
1195 | error = -EINVAL; | ||
1196 | goto out; | ||
1197 | } | ||
1198 | |||
1199 | do { | ||
1200 | error = get_local_rgrp(ip, &last_unlinked); | ||
1201 | if (error != -ENOSPC) | ||
1202 | break; | ||
1203 | /* Check that fs hasn't grown if writing to rindex */ | ||
1204 | if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { | ||
1205 | error = gfs2_ri_update(ip); | ||
1206 | if (error) | ||
1207 | break; | ||
1208 | continue; | ||
1209 | } | ||
1210 | /* Flushing the log may release space */ | ||
1211 | gfs2_log_flush(sdp, NULL); | ||
1212 | } while (tries++ < 3); | ||
1213 | 1657 | ||
1214 | out: | 1658 | out: |
1215 | if (error) | 1659 | if (error) |
1216 | gfs2_blkrsv_put(ip); | 1660 | rs->rs_requested = 0; |
1217 | return error; | 1661 | return error; |
1218 | } | 1662 | } |
1219 | 1663 | ||
@@ -1228,9 +1672,15 @@ void gfs2_inplace_release(struct gfs2_inode *ip) | |||
1228 | { | 1672 | { |
1229 | struct gfs2_blkreserv *rs = ip->i_res; | 1673 | struct gfs2_blkreserv *rs = ip->i_res; |
1230 | 1674 | ||
1675 | if (!rs) | ||
1676 | return; | ||
1677 | |||
1678 | if (!rs->rs_free) | ||
1679 | gfs2_rs_deltree(rs); | ||
1680 | |||
1231 | if (rs->rs_rgd_gh.gh_gl) | 1681 | if (rs->rs_rgd_gh.gh_gl) |
1232 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); | 1682 | gfs2_glock_dq_uninit(&rs->rs_rgd_gh); |
1233 | gfs2_blkrsv_put(ip); | 1683 | rs->rs_requested = 0; |
1234 | } | 1684 | } |
1235 | 1685 | ||
1236 | /** | 1686 | /** |
@@ -1326,7 +1776,27 @@ do_search: | |||
1326 | if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) | 1776 | if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) |
1327 | buffer = bi->bi_clone + bi->bi_offset; | 1777 | buffer = bi->bi_clone + bi->bi_offset; |
1328 | 1778 | ||
1329 | biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); | 1779 | while (1) { |
1780 | struct gfs2_blkreserv *rs; | ||
1781 | u32 rgblk; | ||
1782 | |||
1783 | biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); | ||
1784 | if (biblk == BFITNOENT) | ||
1785 | break; | ||
1786 | /* Check if this block is reserved() */ | ||
1787 | rgblk = gfs2_bi2rgd_blk(bi, biblk); | ||
1788 | rs = rs_find(rgd, rgblk); | ||
1789 | if (rs == NULL) | ||
1790 | break; | ||
1791 | |||
1792 | BUG_ON(rs->rs_bi != bi); | ||
1793 | biblk = BFITNOENT; | ||
1794 | /* This should jump to the first block after the | ||
1795 | reservation. */ | ||
1796 | goal = rs->rs_biblk + rs->rs_free; | ||
1797 | if (goal >= bi->bi_len * GFS2_NBBY) | ||
1798 | break; | ||
1799 | } | ||
1330 | if (biblk != BFITNOENT) | 1800 | if (biblk != BFITNOENT) |
1331 | break; | 1801 | break; |
1332 | 1802 | ||
@@ -1362,8 +1832,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, | |||
1362 | u32 blk, bool dinode, unsigned int *n) | 1832 | u32 blk, bool dinode, unsigned int *n) |
1363 | { | 1833 | { |
1364 | const unsigned int elen = *n; | 1834 | const unsigned int elen = *n; |
1365 | u32 goal; | 1835 | u32 goal, rgblk; |
1366 | const u8 *buffer = NULL; | 1836 | const u8 *buffer = NULL; |
1837 | struct gfs2_blkreserv *rs; | ||
1367 | 1838 | ||
1368 | *n = 0; | 1839 | *n = 0; |
1369 | buffer = bi->bi_bh->b_data + bi->bi_offset; | 1840 | buffer = bi->bi_bh->b_data + bi->bi_offset; |
@@ -1376,6 +1847,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, | |||
1376 | goal++; | 1847 | goal++; |
1377 | if (goal >= (bi->bi_len * GFS2_NBBY)) | 1848 | if (goal >= (bi->bi_len * GFS2_NBBY)) |
1378 | break; | 1849 | break; |
1850 | rgblk = gfs2_bi2rgd_blk(bi, goal); | ||
1851 | rs = rs_find(rgd, rgblk); | ||
1852 | if (rs) /* Oops, we bumped into someone's reservation */ | ||
1853 | break; | ||
1379 | if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != | 1854 | if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != |
1380 | GFS2_BLKST_FREE) | 1855 | GFS2_BLKST_FREE) |
1381 | break; | 1856 | break; |
@@ -1451,12 +1926,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
1451 | 1926 | ||
1452 | int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) | 1927 | int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) |
1453 | { | 1928 | { |
1454 | const struct gfs2_rgrpd *rgd = gl->gl_object; | 1929 | struct gfs2_rgrpd *rgd = gl->gl_object; |
1930 | struct gfs2_blkreserv *trs; | ||
1931 | const struct rb_node *n; | ||
1932 | |||
1455 | if (rgd == NULL) | 1933 | if (rgd == NULL) |
1456 | return 0; | 1934 | return 0; |
1457 | gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", | 1935 | gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", |
1458 | (unsigned long long)rgd->rd_addr, rgd->rd_flags, | 1936 | (unsigned long long)rgd->rd_addr, rgd->rd_flags, |
1459 | rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); | 1937 | rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, |
1938 | rgd->rd_reserved); | ||
1939 | spin_lock(&rgd->rd_rsspin); | ||
1940 | for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { | ||
1941 | trs = rb_entry(n, struct gfs2_blkreserv, rs_node); | ||
1942 | dump_rs(seq, trs); | ||
1943 | } | ||
1944 | spin_unlock(&rgd->rd_rsspin); | ||
1460 | return 0; | 1945 | return 0; |
1461 | } | 1946 | } |
1462 | 1947 | ||
@@ -1471,10 +1956,63 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) | |||
1471 | } | 1956 | } |
1472 | 1957 | ||
1473 | /** | 1958 | /** |
1959 | * claim_reserved_blks - Claim previously reserved blocks | ||
1960 | * @ip: the inode that's claiming the reservation | ||
1961 | * @dinode: 1 if this block is a dinode block, otherwise data block | ||
1962 | * @nblocks: desired extent length | ||
1963 | * | ||
1964 | * Lay claim to previously allocated block reservation blocks. | ||
1965 | * Returns: Starting block number of the blocks claimed. | ||
1966 | * Sets *nblocks to the actual extent length allocated. | ||
1967 | */ | ||
1968 | static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, | ||
1969 | unsigned int *nblocks) | ||
1970 | { | ||
1971 | struct gfs2_blkreserv *rs = ip->i_res; | ||
1972 | struct gfs2_rgrpd *rgd = rs->rs_rgd; | ||
1973 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1974 | struct gfs2_bitmap *bi; | ||
1975 | u64 start_block = gfs2_rs_startblk(rs); | ||
1976 | const unsigned int elen = *nblocks; | ||
1977 | |||
1978 | /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ | ||
1979 | gfs2_assert_withdraw(sdp, rgd); | ||
1980 | /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ | ||
1981 | bi = rs->rs_bi; | ||
1982 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); | ||
1983 | |||
1984 | for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { | ||
1985 | /* Make sure the bitmap hasn't changed */ | ||
1986 | gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, | ||
1987 | dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); | ||
1988 | rs->rs_biblk++; | ||
1989 | rs->rs_free--; | ||
1990 | |||
1991 | BUG_ON(!rgd->rd_reserved); | ||
1992 | rgd->rd_reserved--; | ||
1993 | dinode = false; | ||
1994 | trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); | ||
1995 | } | ||
1996 | |||
1997 | if (!rs->rs_free) { | ||
1998 | struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd; | ||
1999 | |||
2000 | gfs2_rs_deltree(rs); | ||
2001 | /* -nblocks because we haven't returned to do the math yet. | ||
2002 | I'm doing the math backwards to prevent negative numbers, | ||
2003 | but think of it as: | ||
2004 | if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */ | ||
2005 | if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks) | ||
2006 | rg_mblk_search(rgd, ip); | ||
2007 | } | ||
2008 | return start_block; | ||
2009 | } | ||
2010 | |||
2011 | /** | ||
1474 | * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode | 2012 | * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode |
1475 | * @ip: the inode to allocate the block for | 2013 | * @ip: the inode to allocate the block for |
1476 | * @bn: Used to return the starting block number | 2014 | * @bn: Used to return the starting block number |
1477 | * @ndata: requested number of blocks/extent length (value/result) | 2015 | * @nblocks: requested number of blocks/extent length (value/result) |
1478 | * @dinode: 1 if we're allocating a dinode block, else 0 | 2016 | * @dinode: 1 if we're allocating a dinode block, else 0 |
1479 | * @generation: the generation number of the inode | 2017 | * @generation: the generation number of the inode |
1480 | * | 2018 | * |
@@ -1496,23 +2034,37 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, | |||
1496 | /* Only happens if there is a bug in gfs2, return something distinctive | 2034 | /* Only happens if there is a bug in gfs2, return something distinctive |
1497 | * to ensure that it is noticed. | 2035 | * to ensure that it is noticed. |
1498 | */ | 2036 | */ |
1499 | if (ip->i_res == NULL) | 2037 | if (ip->i_res->rs_requested == 0) |
1500 | return -ECANCELED; | 2038 | return -ECANCELED; |
1501 | 2039 | ||
1502 | rgd = ip->i_rgd; | 2040 | /* Check if we have a multi-block reservation, and if so, claim the |
1503 | 2041 | next free block from it. */ | |
1504 | if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) | 2042 | if (gfs2_rs_active(ip->i_res)) { |
1505 | goal = ip->i_goal - rgd->rd_data0; | 2043 | BUG_ON(!ip->i_res->rs_free); |
1506 | else | 2044 | rgd = ip->i_res->rs_rgd; |
1507 | goal = rgd->rd_last_alloc; | 2045 | block = claim_reserved_blks(ip, dinode, nblocks); |
1508 | 2046 | } else { | |
1509 | blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); | 2047 | rgd = ip->i_rgd; |
1510 | 2048 | ||
1511 | /* Since all blocks are reserved in advance, this shouldn't happen */ | 2049 | if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) |
1512 | if (blk == BFITNOENT) | 2050 | goal = ip->i_goal - rgd->rd_data0; |
1513 | goto rgrp_error; | 2051 | else |
2052 | goal = rgd->rd_last_alloc; | ||
2053 | |||
2054 | blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); | ||
2055 | |||
2056 | /* Since all blocks are reserved in advance, this shouldn't | ||
2057 | happen */ | ||
2058 | if (blk == BFITNOENT) { | ||
2059 | printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", | ||
2060 | *nblocks); | ||
2061 | printk(KERN_WARNING "FULL=%d\n", | ||
2062 | test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); | ||
2063 | goto rgrp_error; | ||
2064 | } | ||
1514 | 2065 | ||
1515 | block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); | 2066 | block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); |
2067 | } | ||
1516 | ndata = *nblocks; | 2068 | ndata = *nblocks; |
1517 | if (dinode) | 2069 | if (dinode) |
1518 | ndata--; | 2070 | ndata--; |
@@ -1529,8 +2081,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, | |||
1529 | brelse(dibh); | 2081 | brelse(dibh); |
1530 | } | 2082 | } |
1531 | } | 2083 | } |
1532 | if (rgd->rd_free < *nblocks) | 2084 | if (rgd->rd_free < *nblocks) { |
2085 | printk(KERN_WARNING "nblocks=%u\n", *nblocks); | ||
1533 | goto rgrp_error; | 2086 | goto rgrp_error; |
2087 | } | ||
1534 | 2088 | ||
1535 | rgd->rd_free -= *nblocks; | 2089 | rgd->rd_free -= *nblocks; |
1536 | if (dinode) { | 2090 | if (dinode) { |
@@ -1542,6 +2096,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, | |||
1542 | 2096 | ||
1543 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 2097 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1544 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); | 2098 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
2099 | gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); | ||
1545 | 2100 | ||
1546 | gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); | 2101 | gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); |
1547 | if (dinode) | 2102 | if (dinode) |
@@ -1588,6 +2143,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) | |||
1588 | rgd->rd_flags &= ~GFS2_RGF_TRIMMED; | 2143 | rgd->rd_flags &= ~GFS2_RGF_TRIMMED; |
1589 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 2144 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1590 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); | 2145 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
2146 | gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); | ||
1591 | 2147 | ||
1592 | /* Directories keep their data in the metadata address space */ | 2148 | /* Directories keep their data in the metadata address space */ |
1593 | if (meta || ip->i_depth) | 2149 | if (meta || ip->i_depth) |
@@ -1624,6 +2180,8 @@ void gfs2_unlink_di(struct inode *inode) | |||
1624 | trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); | 2180 | trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); |
1625 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 2181 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1626 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); | 2182 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
2183 | gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); | ||
2184 | update_rgrp_lvb_unlinked(rgd, 1); | ||
1627 | } | 2185 | } |
1628 | 2186 | ||
1629 | static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) | 2187 | static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) |
@@ -1643,6 +2201,8 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) | |||
1643 | 2201 | ||
1644 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 2202 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
1645 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); | 2203 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
2204 | gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); | ||
2205 | update_rgrp_lvb_unlinked(rgd, -1); | ||
1646 | 2206 | ||
1647 | gfs2_statfs_change(sdp, 0, +1, -1); | 2207 | gfs2_statfs_change(sdp, 0, +1, -1); |
1648 | } | 2208 | } |
@@ -1784,6 +2344,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) | |||
1784 | for (x = 0; x < rlist->rl_rgrps; x++) | 2344 | for (x = 0; x < rlist->rl_rgrps; x++) |
1785 | gfs2_holder_uninit(&rlist->rl_ghs[x]); | 2345 | gfs2_holder_uninit(&rlist->rl_ghs[x]); |
1786 | kfree(rlist->rl_ghs); | 2346 | kfree(rlist->rl_ghs); |
2347 | rlist->rl_ghs = NULL; | ||
1787 | } | 2348 | } |
1788 | } | 2349 | } |
1789 | 2350 | ||