diff options
-rw-r--r-- | Documentation/filesystems/ocfs2.txt | 3 | ||||
-rw-r--r-- | fs/ocfs2/Makefile | 1 | ||||
-rw-r--r-- | fs/ocfs2/cluster/masklog.c | 1 | ||||
-rw-r--r-- | fs/ocfs2/cluster/masklog.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/localalloc.c | 64 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2.h | 5 | ||||
-rw-r--r-- | fs/ocfs2/reservations.c | 849 | ||||
-rw-r--r-- | fs/ocfs2/reservations.h | 154 | ||||
-rw-r--r-- | fs/ocfs2/suballoc.h | 2 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 25 |
10 files changed, 1094 insertions, 11 deletions
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index c58b9f5ba002..412df9095937 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt | |||
@@ -80,3 +80,6 @@ user_xattr (*) Enables Extended User Attributes. | |||
80 | nouser_xattr Disables Extended User Attributes. | 80 | nouser_xattr Disables Extended User Attributes. |
81 | acl Enables POSIX Access Control Lists support. | 81 | acl Enables POSIX Access Control Lists support. |
82 | noacl (*) Disables POSIX Access Control Lists support. | 82 | noacl (*) Disables POSIX Access Control Lists support. |
83 | resv_level=4 (*) Set how agressive allocation reservations will be. | ||
84 | Valid values are between 0 (reservations off) to 8 | ||
85 | (maximum space for reservations). | ||
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 791c0886c060..07d9fd854350 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
@@ -29,6 +29,7 @@ ocfs2-objs := \ | |||
29 | mmap.o \ | 29 | mmap.o \ |
30 | namei.o \ | 30 | namei.o \ |
31 | refcounttree.o \ | 31 | refcounttree.o \ |
32 | reservations.o \ | ||
32 | resize.o \ | 33 | resize.o \ |
33 | slot_map.o \ | 34 | slot_map.o \ |
34 | suballoc.o \ | 35 | suballoc.o \ |
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index 3bb928a2bf7d..c7fba396392d 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c | |||
@@ -116,6 +116,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = { | |||
116 | define_mask(ERROR), | 116 | define_mask(ERROR), |
117 | define_mask(NOTICE), | 117 | define_mask(NOTICE), |
118 | define_mask(KTHREAD), | 118 | define_mask(KTHREAD), |
119 | define_mask(RESERVATIONS), | ||
119 | }; | 120 | }; |
120 | 121 | ||
121 | static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, }; | 122 | static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, }; |
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 3dfddbec32f2..fd96e2a2fa56 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
@@ -119,6 +119,7 @@ | |||
119 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ | 119 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ |
120 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ | 120 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ |
121 | #define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ | 121 | #define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ |
122 | #define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ | ||
122 | 123 | ||
123 | #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) | 124 | #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) |
124 | #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) | 125 | #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 7e7dd65d97ef..7fe8149a0002 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
@@ -52,7 +52,8 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); | |||
52 | 52 | ||
53 | static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, | 53 | static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, |
54 | struct ocfs2_dinode *alloc, | 54 | struct ocfs2_dinode *alloc, |
55 | u32 numbits); | 55 | u32 *numbits, |
56 | struct ocfs2_alloc_reservation *resv); | ||
56 | 57 | ||
57 | static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); | 58 | static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); |
58 | 59 | ||
@@ -262,6 +263,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) | |||
262 | 263 | ||
263 | osb->local_alloc_state = OCFS2_LA_DISABLED; | 264 | osb->local_alloc_state = OCFS2_LA_DISABLED; |
264 | 265 | ||
266 | ocfs2_resmap_uninit(&osb->osb_la_resmap); | ||
267 | |||
265 | main_bm_inode = ocfs2_get_system_file_inode(osb, | 268 | main_bm_inode = ocfs2_get_system_file_inode(osb, |
266 | GLOBAL_BITMAP_SYSTEM_INODE, | 269 | GLOBAL_BITMAP_SYSTEM_INODE, |
267 | OCFS2_INVALID_SLOT); | 270 | OCFS2_INVALID_SLOT); |
@@ -493,7 +496,7 @@ static int ocfs2_local_alloc_in_range(struct inode *inode, | |||
493 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; | 496 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; |
494 | la = OCFS2_LOCAL_ALLOC(alloc); | 497 | la = OCFS2_LOCAL_ALLOC(alloc); |
495 | 498 | ||
496 | start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); | 499 | start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted, NULL); |
497 | if (start == -1) { | 500 | if (start == -1) { |
498 | mlog_errno(-ENOSPC); | 501 | mlog_errno(-ENOSPC); |
499 | return 0; | 502 | return 0; |
@@ -659,7 +662,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, | |||
659 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; | 662 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; |
660 | la = OCFS2_LOCAL_ALLOC(alloc); | 663 | la = OCFS2_LOCAL_ALLOC(alloc); |
661 | 664 | ||
662 | start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); | 665 | start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted, |
666 | ac->ac_resv); | ||
663 | if (start == -1) { | 667 | if (start == -1) { |
664 | /* TODO: Shouldn't we just BUG here? */ | 668 | /* TODO: Shouldn't we just BUG here? */ |
665 | status = -ENOSPC; | 669 | status = -ENOSPC; |
@@ -669,8 +673,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, | |||
669 | 673 | ||
670 | bitmap = la->la_bitmap; | 674 | bitmap = la->la_bitmap; |
671 | *bit_off = le32_to_cpu(la->la_bm_off) + start; | 675 | *bit_off = le32_to_cpu(la->la_bm_off) + start; |
672 | /* local alloc is always contiguous by nature -- we never | ||
673 | * delete bits from it! */ | ||
674 | *num_bits = bits_wanted; | 676 | *num_bits = bits_wanted; |
675 | 677 | ||
676 | status = ocfs2_journal_access_di(handle, | 678 | status = ocfs2_journal_access_di(handle, |
@@ -682,6 +684,9 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, | |||
682 | goto bail; | 684 | goto bail; |
683 | } | 685 | } |
684 | 686 | ||
687 | ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start, | ||
688 | bits_wanted); | ||
689 | |||
685 | while(bits_wanted--) | 690 | while(bits_wanted--) |
686 | ocfs2_set_bit(start++, bitmap); | 691 | ocfs2_set_bit(start++, bitmap); |
687 | 692 | ||
@@ -711,13 +716,17 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) | |||
711 | } | 716 | } |
712 | 717 | ||
713 | static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, | 718 | static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, |
714 | struct ocfs2_dinode *alloc, | 719 | struct ocfs2_dinode *alloc, |
715 | u32 numbits) | 720 | u32 *numbits, |
721 | struct ocfs2_alloc_reservation *resv) | ||
716 | { | 722 | { |
717 | int numfound, bitoff, left, startoff, lastzero; | 723 | int numfound, bitoff, left, startoff, lastzero; |
724 | int local_resv = 0; | ||
725 | struct ocfs2_alloc_reservation r; | ||
718 | void *bitmap = NULL; | 726 | void *bitmap = NULL; |
727 | struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap; | ||
719 | 728 | ||
720 | mlog_entry("(numbits wanted = %u)\n", numbits); | 729 | mlog_entry("(numbits wanted = %u)\n", *numbits); |
721 | 730 | ||
722 | if (!alloc->id1.bitmap1.i_total) { | 731 | if (!alloc->id1.bitmap1.i_total) { |
723 | mlog(0, "No bits in my window!\n"); | 732 | mlog(0, "No bits in my window!\n"); |
@@ -725,6 +734,30 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, | |||
725 | goto bail; | 734 | goto bail; |
726 | } | 735 | } |
727 | 736 | ||
737 | if (!resv) { | ||
738 | local_resv = 1; | ||
739 | ocfs2_resv_init_once(&r); | ||
740 | ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP); | ||
741 | resv = &r; | ||
742 | } | ||
743 | |||
744 | numfound = *numbits; | ||
745 | if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) { | ||
746 | if (numfound < *numbits) | ||
747 | *numbits = numfound; | ||
748 | goto bail; | ||
749 | } | ||
750 | |||
751 | /* | ||
752 | * Code error. While reservations are enabled, local | ||
753 | * allocation should _always_ go through them. | ||
754 | */ | ||
755 | BUG_ON(osb->osb_resv_level != 0); | ||
756 | |||
757 | /* | ||
758 | * Reservations are disabled. Handle this the old way. | ||
759 | */ | ||
760 | |||
728 | bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; | 761 | bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; |
729 | 762 | ||
730 | numfound = bitoff = startoff = 0; | 763 | numfound = bitoff = startoff = 0; |
@@ -750,7 +783,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, | |||
750 | startoff = bitoff+1; | 783 | startoff = bitoff+1; |
751 | } | 784 | } |
752 | /* we got everything we needed */ | 785 | /* we got everything we needed */ |
753 | if (numfound == numbits) { | 786 | if (numfound == *numbits) { |
754 | /* mlog(0, "Found it all!\n"); */ | 787 | /* mlog(0, "Found it all!\n"); */ |
755 | break; | 788 | break; |
756 | } | 789 | } |
@@ -759,12 +792,18 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, | |||
759 | mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, | 792 | mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, |
760 | numfound); | 793 | numfound); |
761 | 794 | ||
762 | if (numfound == numbits) | 795 | if (numfound == *numbits) { |
763 | bitoff = startoff - numfound; | 796 | bitoff = startoff - numfound; |
764 | else | 797 | *numbits = numfound; |
798 | } else { | ||
799 | numfound = 0; | ||
765 | bitoff = -1; | 800 | bitoff = -1; |
801 | } | ||
766 | 802 | ||
767 | bail: | 803 | bail: |
804 | if (local_resv) | ||
805 | ocfs2_resv_discard(resmap, resv); | ||
806 | |||
768 | mlog_exit(bitoff); | 807 | mlog_exit(bitoff); |
769 | return bitoff; | 808 | return bitoff; |
770 | } | 809 | } |
@@ -1087,6 +1126,9 @@ retry_enospc: | |||
1087 | memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, | 1126 | memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, |
1088 | le16_to_cpu(la->la_size)); | 1127 | le16_to_cpu(la->la_size)); |
1089 | 1128 | ||
1129 | ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count, | ||
1130 | OCFS2_LOCAL_ALLOC(alloc)->la_bitmap); | ||
1131 | |||
1090 | mlog(0, "New window allocated:\n"); | 1132 | mlog(0, "New window allocated:\n"); |
1091 | mlog(0, "window la_bm_off = %u\n", | 1133 | mlog(0, "window la_bm_off = %u\n", |
1092 | OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); | 1134 | OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index adf5e2ebc2c4..9552560df6cd 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -47,6 +47,7 @@ | |||
47 | /* For struct ocfs2_blockcheck_stats */ | 47 | /* For struct ocfs2_blockcheck_stats */ |
48 | #include "blockcheck.h" | 48 | #include "blockcheck.h" |
49 | 49 | ||
50 | #include "reservations.h" | ||
50 | 51 | ||
51 | /* Caching of metadata buffers */ | 52 | /* Caching of metadata buffers */ |
52 | 53 | ||
@@ -349,6 +350,10 @@ struct ocfs2_super | |||
349 | 350 | ||
350 | u64 la_last_gd; | 351 | u64 la_last_gd; |
351 | 352 | ||
353 | struct ocfs2_reservation_map osb_la_resmap; | ||
354 | |||
355 | unsigned int osb_resv_level; | ||
356 | |||
352 | /* Next three fields are for local node slot recovery during | 357 | /* Next three fields are for local node slot recovery during |
353 | * mount. */ | 358 | * mount. */ |
354 | int dirty; | 359 | int dirty; |
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c new file mode 100644 index 000000000000..79642d608210 --- /dev/null +++ b/fs/ocfs2/reservations.c | |||
@@ -0,0 +1,849 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * reservations.c | ||
5 | * | ||
6 | * Allocation reservations implementation | ||
7 | * | ||
8 | * Some code borrowed from fs/ext3/balloc.c and is: | ||
9 | * | ||
10 | * Copyright (C) 1992, 1993, 1994, 1995 | ||
11 | * Remy Card (card@masi.ibp.fr) | ||
12 | * Laboratoire MASI - Institut Blaise Pascal | ||
13 | * Universite Pierre et Marie Curie (Paris VI) | ||
14 | * | ||
15 | * The rest is copyright (C) 2010 Novell. All rights reserved. | ||
16 | * | ||
17 | * This program is free software; you can redistribute it and/or | ||
18 | * modify it under the terms of the GNU General Public | ||
19 | * License version 2 as published by the Free Software Foundation. | ||
20 | * | ||
21 | * This program is distributed in the hope that it will be useful, | ||
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
24 | * General Public License for more details. | ||
25 | */ | ||
26 | |||
27 | #include <linux/fs.h> | ||
28 | #include <linux/types.h> | ||
29 | #include <linux/slab.h> | ||
30 | #include <linux/highmem.h> | ||
31 | #include <linux/bitops.h> | ||
32 | #include <linux/list.h> | ||
33 | |||
34 | #define MLOG_MASK_PREFIX ML_RESERVATIONS | ||
35 | #include <cluster/masklog.h> | ||
36 | |||
37 | #include "ocfs2.h" | ||
38 | |||
39 | #ifdef CONFIG_OCFS2_DEBUG_FS | ||
40 | #define OCFS2_CHECK_RESERVATIONS | ||
41 | #endif | ||
42 | |||
43 | DEFINE_SPINLOCK(resv_lock); | ||
44 | |||
45 | #define OCFS2_MIN_RESV_WINDOW_BITS 8 | ||
46 | #define OCFS2_MAX_RESV_WINDOW_BITS 1024 | ||
47 | |||
48 | static unsigned int ocfs2_resv_window_bits(struct ocfs2_reservation_map *resmap, | ||
49 | struct ocfs2_alloc_reservation *resv) | ||
50 | { | ||
51 | struct ocfs2_super *osb = resmap->m_osb; | ||
52 | unsigned int bits; | ||
53 | |||
54 | /* 8, 16, 32, 64, 128, 256, 512, 1024 */ | ||
55 | bits = 4 << osb->osb_resv_level; | ||
56 | |||
57 | return bits; | ||
58 | } | ||
59 | |||
60 | static inline unsigned int ocfs2_resv_end(struct ocfs2_alloc_reservation *resv) | ||
61 | { | ||
62 | if (resv->r_len) | ||
63 | return resv->r_start + resv->r_len - 1; | ||
64 | return resv->r_start; | ||
65 | } | ||
66 | |||
67 | static inline int ocfs2_resv_empty(struct ocfs2_alloc_reservation *resv) | ||
68 | { | ||
69 | return !!(resv->r_len == 0); | ||
70 | } | ||
71 | |||
72 | static inline int ocfs2_resmap_disabled(struct ocfs2_reservation_map *resmap) | ||
73 | { | ||
74 | if (resmap->m_osb->osb_resv_level == 0) | ||
75 | return 1; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | static void ocfs2_dump_resv(struct ocfs2_reservation_map *resmap) | ||
80 | { | ||
81 | struct ocfs2_super *osb = resmap->m_osb; | ||
82 | struct rb_node *node; | ||
83 | struct ocfs2_alloc_reservation *resv; | ||
84 | int i = 0; | ||
85 | |||
86 | mlog(ML_NOTICE, "Dumping resmap for device %s. Bitmap length: %u\n", | ||
87 | osb->dev_str, resmap->m_bitmap_len); | ||
88 | |||
89 | node = rb_first(&resmap->m_reservations); | ||
90 | while (node) { | ||
91 | resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node); | ||
92 | |||
93 | mlog(ML_NOTICE, "start: %u\tend: %u\tlen: %u\tlast_start: %u" | ||
94 | "\tlast_len: %u\n", resv->r_start, | ||
95 | ocfs2_resv_end(resv), resv->r_len, resv->r_last_start, | ||
96 | resv->r_last_len); | ||
97 | |||
98 | node = rb_next(node); | ||
99 | i++; | ||
100 | } | ||
101 | |||
102 | mlog(ML_NOTICE, "%d reservations found. LRU follows\n", i); | ||
103 | |||
104 | i = 0; | ||
105 | list_for_each_entry(resv, &resmap->m_lru, r_lru) { | ||
106 | mlog(ML_NOTICE, "LRU(%d) start: %u\tend: %u\tlen: %u\t" | ||
107 | "last_start: %u\tlast_len: %u\n", i, resv->r_start, | ||
108 | ocfs2_resv_end(resv), resv->r_len, resv->r_last_start, | ||
109 | resv->r_last_len); | ||
110 | |||
111 | i++; | ||
112 | } | ||
113 | } | ||
114 | |||
115 | #ifdef OCFS2_CHECK_RESERVATIONS | ||
116 | static int ocfs2_validate_resmap_bits(struct ocfs2_reservation_map *resmap, | ||
117 | int i, | ||
118 | struct ocfs2_alloc_reservation *resv) | ||
119 | { | ||
120 | char *disk_bitmap = resmap->m_disk_bitmap; | ||
121 | unsigned int start = resv->r_start; | ||
122 | unsigned int end = ocfs2_resv_end(resv); | ||
123 | |||
124 | while (start <= end) { | ||
125 | if (ocfs2_test_bit(start, disk_bitmap)) { | ||
126 | mlog(ML_ERROR, | ||
127 | "reservation %d covers an allocated area " | ||
128 | "starting at bit %u!\n", i, start); | ||
129 | return 1; | ||
130 | } | ||
131 | |||
132 | start++; | ||
133 | } | ||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | static void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap) | ||
138 | { | ||
139 | unsigned int off = 0; | ||
140 | int i = 0; | ||
141 | struct rb_node *node; | ||
142 | struct ocfs2_alloc_reservation *resv; | ||
143 | |||
144 | node = rb_first(&resmap->m_reservations); | ||
145 | while (node) { | ||
146 | resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node); | ||
147 | |||
148 | if (i > 0 && resv->r_start <= off) { | ||
149 | mlog(ML_ERROR, "reservation %d has bad start off!\n", | ||
150 | i); | ||
151 | goto bad; | ||
152 | } | ||
153 | |||
154 | if (resv->r_len == 0) { | ||
155 | mlog(ML_ERROR, "reservation %d has no length!\n", | ||
156 | i); | ||
157 | goto bad; | ||
158 | } | ||
159 | |||
160 | if (resv->r_start > ocfs2_resv_end(resv)) { | ||
161 | mlog(ML_ERROR, "reservation %d has invalid range!\n", | ||
162 | i); | ||
163 | goto bad; | ||
164 | } | ||
165 | |||
166 | if (ocfs2_resv_end(resv) >= resmap->m_bitmap_len) { | ||
167 | mlog(ML_ERROR, "reservation %d extends past bitmap!\n", | ||
168 | i); | ||
169 | goto bad; | ||
170 | } | ||
171 | |||
172 | if (ocfs2_validate_resmap_bits(resmap, i, resv)) | ||
173 | goto bad; | ||
174 | |||
175 | off = ocfs2_resv_end(resv); | ||
176 | node = rb_next(node); | ||
177 | |||
178 | i++; | ||
179 | } | ||
180 | return; | ||
181 | |||
182 | bad: | ||
183 | ocfs2_dump_resv(resmap); | ||
184 | BUG(); | ||
185 | } | ||
186 | #else | ||
187 | static inline void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap) | ||
188 | { | ||
189 | |||
190 | } | ||
191 | #endif | ||
192 | |||
193 | void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv) | ||
194 | { | ||
195 | memset(resv, 0, sizeof(*resv)); | ||
196 | INIT_LIST_HEAD(&resv->r_lru); | ||
197 | } | ||
198 | |||
199 | void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv, | ||
200 | unsigned int flags) | ||
201 | { | ||
202 | BUG_ON(flags & ~OCFS2_RESV_TYPES); | ||
203 | |||
204 | resv->r_flags |= flags; | ||
205 | } | ||
206 | |||
207 | int ocfs2_resmap_init(struct ocfs2_super *osb, | ||
208 | struct ocfs2_reservation_map *resmap) | ||
209 | { | ||
210 | memset(resmap, 0, sizeof(*resmap)); | ||
211 | |||
212 | resmap->m_osb = osb; | ||
213 | resmap->m_reservations = RB_ROOT; | ||
214 | /* m_bitmap_len is initialized to zero by the above memset. */ | ||
215 | INIT_LIST_HEAD(&resmap->m_lru); | ||
216 | |||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | static void ocfs2_resv_mark_lru(struct ocfs2_reservation_map *resmap, | ||
221 | struct ocfs2_alloc_reservation *resv) | ||
222 | { | ||
223 | assert_spin_locked(&resv_lock); | ||
224 | |||
225 | if (!list_empty(&resv->r_lru)) | ||
226 | list_del_init(&resv->r_lru); | ||
227 | |||
228 | list_add_tail(&resv->r_lru, &resmap->m_lru); | ||
229 | } | ||
230 | |||
231 | static void __ocfs2_resv_trunc(struct ocfs2_alloc_reservation *resv) | ||
232 | { | ||
233 | resv->r_len = 0; | ||
234 | resv->r_start = 0; | ||
235 | } | ||
236 | |||
237 | static void ocfs2_resv_remove(struct ocfs2_reservation_map *resmap, | ||
238 | struct ocfs2_alloc_reservation *resv) | ||
239 | { | ||
240 | if (resv->r_flags & OCFS2_RESV_FLAG_INUSE) { | ||
241 | list_del_init(&resv->r_lru); | ||
242 | rb_erase(&resv->r_node, &resmap->m_reservations); | ||
243 | resv->r_flags &= ~OCFS2_RESV_FLAG_INUSE; | ||
244 | } | ||
245 | } | ||
246 | |||
247 | static void __ocfs2_resv_discard(struct ocfs2_reservation_map *resmap, | ||
248 | struct ocfs2_alloc_reservation *resv) | ||
249 | { | ||
250 | assert_spin_locked(&resv_lock); | ||
251 | |||
252 | __ocfs2_resv_trunc(resv); | ||
253 | /* | ||
254 | * last_len and last_start no longer make sense if | ||
255 | * we're changing the range of our allocations. | ||
256 | */ | ||
257 | resv->r_last_len = resv->r_last_start = 0; | ||
258 | |||
259 | ocfs2_resv_remove(resmap, resv); | ||
260 | } | ||
261 | |||
262 | /* does nothing if 'resv' is null */ | ||
263 | void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap, | ||
264 | struct ocfs2_alloc_reservation *resv) | ||
265 | { | ||
266 | if (resv) { | ||
267 | spin_lock(&resv_lock); | ||
268 | __ocfs2_resv_discard(resmap, resv); | ||
269 | spin_unlock(&resv_lock); | ||
270 | } | ||
271 | } | ||
272 | |||
273 | static void ocfs2_resmap_clear_all_resv(struct ocfs2_reservation_map *resmap) | ||
274 | { | ||
275 | struct rb_node *node; | ||
276 | struct ocfs2_alloc_reservation *resv; | ||
277 | |||
278 | assert_spin_locked(&resv_lock); | ||
279 | |||
280 | while ((node = rb_last(&resmap->m_reservations)) != NULL) { | ||
281 | resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node); | ||
282 | |||
283 | __ocfs2_resv_discard(resmap, resv); | ||
284 | } | ||
285 | } | ||
286 | |||
287 | void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap, | ||
288 | unsigned int clen, char *disk_bitmap) | ||
289 | { | ||
290 | if (ocfs2_resmap_disabled(resmap)) | ||
291 | return; | ||
292 | |||
293 | spin_lock(&resv_lock); | ||
294 | |||
295 | ocfs2_resmap_clear_all_resv(resmap); | ||
296 | resmap->m_bitmap_len = clen; | ||
297 | resmap->m_disk_bitmap = disk_bitmap; | ||
298 | |||
299 | spin_unlock(&resv_lock); | ||
300 | } | ||
301 | |||
302 | void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap) | ||
303 | { | ||
304 | /* Does nothing for now. Keep this around for API symmetry */ | ||
305 | } | ||
306 | |||
307 | static void ocfs2_resv_insert(struct ocfs2_reservation_map *resmap, | ||
308 | struct ocfs2_alloc_reservation *new) | ||
309 | { | ||
310 | struct rb_root *root = &resmap->m_reservations; | ||
311 | struct rb_node *parent = NULL; | ||
312 | struct rb_node **p = &root->rb_node; | ||
313 | struct ocfs2_alloc_reservation *tmp; | ||
314 | |||
315 | assert_spin_locked(&resv_lock); | ||
316 | |||
317 | mlog(0, "Insert reservation start: %u len: %u\n", new->r_start, | ||
318 | new->r_len); | ||
319 | |||
320 | while (*p) { | ||
321 | parent = *p; | ||
322 | |||
323 | tmp = rb_entry(parent, struct ocfs2_alloc_reservation, r_node); | ||
324 | |||
325 | if (new->r_start < tmp->r_start) { | ||
326 | p = &(*p)->rb_left; | ||
327 | |||
328 | /* | ||
329 | * This is a good place to check for | ||
330 | * overlapping reservations. | ||
331 | */ | ||
332 | BUG_ON(ocfs2_resv_end(new) >= tmp->r_start); | ||
333 | } else if (new->r_start > ocfs2_resv_end(tmp)) { | ||
334 | p = &(*p)->rb_right; | ||
335 | } else { | ||
336 | /* This should never happen! */ | ||
337 | mlog(ML_ERROR, "Duplicate reservation window!\n"); | ||
338 | BUG(); | ||
339 | } | ||
340 | } | ||
341 | |||
342 | rb_link_node(&new->r_node, parent, p); | ||
343 | rb_insert_color(&new->r_node, root); | ||
344 | new->r_flags |= OCFS2_RESV_FLAG_INUSE; | ||
345 | |||
346 | ocfs2_resv_mark_lru(resmap, new); | ||
347 | |||
348 | ocfs2_check_resmap(resmap); | ||
349 | } | ||
350 | |||
351 | /** | ||
352 | * ocfs2_find_resv_lhs() - find the window which contains goal | ||
353 | * @resmap: reservation map to search | ||
354 | * @goal: which bit to search for | ||
355 | * | ||
356 | * If a window containing that goal is not found, we return the window | ||
357 | * which comes before goal. Returns NULL on empty rbtree or no window | ||
358 | * before goal. | ||
359 | */ | ||
360 | static struct ocfs2_alloc_reservation * | ||
361 | ocfs2_find_resv_lhs(struct ocfs2_reservation_map *resmap, unsigned int goal) | ||
362 | { | ||
363 | struct ocfs2_alloc_reservation *resv = NULL; | ||
364 | struct ocfs2_alloc_reservation *prev_resv = NULL; | ||
365 | struct rb_node *node = resmap->m_reservations.rb_node; | ||
366 | struct rb_node *prev = NULL; | ||
367 | |||
368 | assert_spin_locked(&resv_lock); | ||
369 | |||
370 | if (!node) | ||
371 | return NULL; | ||
372 | |||
373 | node = rb_first(&resmap->m_reservations); | ||
374 | while (node) { | ||
375 | resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node); | ||
376 | |||
377 | if (resv->r_start <= goal && ocfs2_resv_end(resv) >= goal) | ||
378 | break; | ||
379 | |||
380 | /* Check if we overshot the reservation just before goal? */ | ||
381 | if (resv->r_start > goal) { | ||
382 | resv = prev_resv; | ||
383 | break; | ||
384 | } | ||
385 | |||
386 | prev_resv = resv; | ||
387 | prev = node; | ||
388 | node = rb_next(node); | ||
389 | } | ||
390 | |||
391 | return resv; | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * We are given a range within the bitmap, which corresponds to a gap | ||
396 | * inside the reservations tree (search_start, search_len). The range | ||
397 | * can be anything from the whole bitmap, to a gap between | ||
398 | * reservations. | ||
399 | * | ||
400 | * The start value of *rstart is insignificant. | ||
401 | * | ||
402 | * This function searches the bitmap range starting at search_start | ||
403 | * with length csearch_len for a set of contiguous free bits. We try | ||
404 | * to find up to 'wanted' bits, but can sometimes return less. | ||
405 | * | ||
406 | * Returns the length of allocation, 0 if no free bits are found. | ||
407 | * | ||
408 | * *cstart and *clen will also be populated with the result. | ||
409 | */ | ||
410 | static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap, | ||
411 | unsigned int wanted, | ||
412 | unsigned int search_start, | ||
413 | unsigned int search_len, | ||
414 | unsigned int *rstart, | ||
415 | unsigned int *rlen) | ||
416 | { | ||
417 | void *bitmap = resmap->m_disk_bitmap; | ||
418 | unsigned int best_start, best_len = 0; | ||
419 | int offset, start, found; | ||
420 | |||
421 | mlog(0, "Find %u bits within range (%u, len %u) resmap len: %u\n", | ||
422 | wanted, search_start, search_len, resmap->m_bitmap_len); | ||
423 | |||
424 | found = best_start = best_len = 0; | ||
425 | |||
426 | start = search_start; | ||
427 | while ((offset = ocfs2_find_next_zero_bit(bitmap, resmap->m_bitmap_len, | ||
428 | start)) != -1) { | ||
429 | /* Search reached end of the region */ | ||
430 | if (offset >= (search_start + search_len)) | ||
431 | break; | ||
432 | |||
433 | if (offset == start) { | ||
434 | /* we found a zero */ | ||
435 | found++; | ||
436 | /* move start to the next bit to test */ | ||
437 | start++; | ||
438 | } else { | ||
439 | /* got a zero after some ones */ | ||
440 | found = 1; | ||
441 | start = offset + 1; | ||
442 | } | ||
443 | if (found > best_len) { | ||
444 | best_len = found; | ||
445 | best_start = start - found; | ||
446 | } | ||
447 | |||
448 | if (found >= wanted) | ||
449 | break; | ||
450 | } | ||
451 | |||
452 | if (best_len == 0) | ||
453 | return 0; | ||
454 | |||
455 | if (best_len >= wanted) | ||
456 | best_len = wanted; | ||
457 | |||
458 | *rlen = best_len; | ||
459 | *rstart = best_start; | ||
460 | |||
461 | mlog(0, "Found start: %u len: %u\n", best_start, best_len); | ||
462 | |||
463 | return *rlen; | ||
464 | } | ||
465 | |||
466 | static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap, | ||
467 | struct ocfs2_alloc_reservation *resv, | ||
468 | unsigned int goal, unsigned int wanted) | ||
469 | { | ||
470 | struct rb_root *root = &resmap->m_reservations; | ||
471 | unsigned int gap_start, gap_end, gap_len; | ||
472 | struct ocfs2_alloc_reservation *prev_resv, *next_resv; | ||
473 | struct rb_node *prev, *next; | ||
474 | unsigned int cstart, clen; | ||
475 | unsigned int best_start = 0, best_len = 0; | ||
476 | |||
477 | /* | ||
478 | * Nasty cases to consider: | ||
479 | * | ||
480 | * - rbtree is empty | ||
481 | * - our window should be first in all reservations | ||
482 | * - our window should be last in all reservations | ||
483 | * - need to make sure we don't go past end of bitmap | ||
484 | */ | ||
485 | |||
486 | mlog(0, "resv start: %u resv end: %u goal: %u wanted: %u\n", | ||
487 | resv->r_start, ocfs2_resv_end(resv), goal, wanted); | ||
488 | |||
489 | assert_spin_locked(&resv_lock); | ||
490 | |||
491 | if (RB_EMPTY_ROOT(root)) { | ||
492 | /* | ||
493 | * Easiest case - empty tree. We can just take | ||
494 | * whatever window of free bits we want. | ||
495 | */ | ||
496 | |||
497 | mlog(0, "Empty root\n"); | ||
498 | |||
499 | clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal, | ||
500 | resmap->m_bitmap_len - goal, | ||
501 | &cstart, &clen); | ||
502 | |||
503 | /* | ||
504 | * This should never happen - the local alloc window | ||
505 | * will always have free bits when we're called. | ||
506 | */ | ||
507 | BUG_ON(goal == 0 && clen == 0); | ||
508 | |||
509 | if (clen == 0) | ||
510 | return; | ||
511 | |||
512 | resv->r_start = cstart; | ||
513 | resv->r_len = clen; | ||
514 | |||
515 | ocfs2_resv_insert(resmap, resv); | ||
516 | return; | ||
517 | } | ||
518 | |||
519 | prev_resv = ocfs2_find_resv_lhs(resmap, goal); | ||
520 | |||
521 | if (prev_resv == NULL) { | ||
522 | mlog(0, "Goal on LHS of leftmost window\n"); | ||
523 | |||
524 | /* | ||
525 | * A NULL here means that the search code couldn't | ||
526 | * find a window that starts before goal. | ||
527 | * | ||
528 | * However, we can take the first window after goal, | ||
529 | * which is also by definition, the leftmost window in | ||
530 | * the entire tree. If we can find free bits in the | ||
531 | * gap between goal and the LHS window, then the | ||
532 | * reservation can safely be placed there. | ||
533 | * | ||
534 | * Otherwise we fall back to a linear search, checking | ||
535 | * the gaps in between windows for a place to | ||
536 | * allocate. | ||
537 | */ | ||
538 | |||
539 | next = rb_first(root); | ||
540 | next_resv = rb_entry(next, struct ocfs2_alloc_reservation, | ||
541 | r_node); | ||
542 | |||
543 | /* | ||
544 | * The search should never return such a window. (see | ||
545 | * comment above | ||
546 | */ | ||
547 | if (next_resv->r_start <= goal) { | ||
548 | mlog(ML_ERROR, "goal: %u next_resv: start %u len %u\n", | ||
549 | goal, next_resv->r_start, next_resv->r_len); | ||
550 | ocfs2_dump_resv(resmap); | ||
551 | BUG(); | ||
552 | } | ||
553 | |||
554 | clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal, | ||
555 | next_resv->r_start - goal, | ||
556 | &cstart, &clen); | ||
557 | if (clen) { | ||
558 | best_len = clen; | ||
559 | best_start = cstart; | ||
560 | if (best_len == wanted) | ||
561 | goto out_insert; | ||
562 | } | ||
563 | |||
564 | prev_resv = next_resv; | ||
565 | next_resv = NULL; | ||
566 | } | ||
567 | |||
568 | prev = &prev_resv->r_node; | ||
569 | |||
570 | /* Now we do a linear search for a window, starting at 'prev_rsv' */ | ||
571 | while (1) { | ||
572 | next = rb_next(prev); | ||
573 | if (next) { | ||
574 | mlog(0, "One more resv found in linear search\n"); | ||
575 | next_resv = rb_entry(next, | ||
576 | struct ocfs2_alloc_reservation, | ||
577 | r_node); | ||
578 | |||
579 | gap_start = ocfs2_resv_end(prev_resv) + 1; | ||
580 | gap_end = next_resv->r_start - 1; | ||
581 | gap_len = gap_end - gap_start + 1; | ||
582 | } else { | ||
583 | mlog(0, "No next node\n"); | ||
584 | /* | ||
585 | * We're at the rightmost edge of the | ||
586 | * tree. See if a reservation between this | ||
587 | * window and the end of the bitmap will work. | ||
588 | */ | ||
589 | gap_start = ocfs2_resv_end(prev_resv) + 1; | ||
590 | gap_len = resmap->m_bitmap_len - gap_start; | ||
591 | gap_end = resmap->m_bitmap_len - 1; | ||
592 | } | ||
593 | |||
594 | /* | ||
595 | * No need to check this gap if we have already found | ||
596 | * a larger region of free bits. | ||
597 | */ | ||
598 | if (gap_len <= best_len) | ||
599 | goto next_resv; | ||
600 | |||
601 | clen = ocfs2_resmap_find_free_bits(resmap, wanted, gap_start, | ||
602 | gap_len, &cstart, &clen); | ||
603 | if (clen == wanted) { | ||
604 | best_len = clen; | ||
605 | best_start = cstart; | ||
606 | goto out_insert; | ||
607 | } else if (clen > best_len) { | ||
608 | best_len = clen; | ||
609 | best_start = cstart; | ||
610 | } | ||
611 | |||
612 | next_resv: | ||
613 | if (!next) | ||
614 | break; | ||
615 | |||
616 | prev = next; | ||
617 | prev_resv = rb_entry(prev, struct ocfs2_alloc_reservation, | ||
618 | r_node); | ||
619 | } | ||
620 | |||
621 | out_insert: | ||
622 | if (best_len) { | ||
623 | resv->r_start = best_start; | ||
624 | resv->r_len = best_len; | ||
625 | ocfs2_resv_insert(resmap, resv); | ||
626 | } | ||
627 | } | ||
628 | |||
629 | static void ocfs2_cannibalize_resv(struct ocfs2_reservation_map *resmap, | ||
630 | struct ocfs2_alloc_reservation *resv, | ||
631 | unsigned int wanted) | ||
632 | { | ||
633 | struct ocfs2_alloc_reservation *lru_resv; | ||
634 | int tmpwindow = !!(resv->r_flags & OCFS2_RESV_FLAG_TMP); | ||
635 | unsigned int min_bits; | ||
636 | |||
637 | if (!tmpwindow) | ||
638 | min_bits = ocfs2_resv_window_bits(resmap, resv) >> 1; | ||
639 | else | ||
640 | min_bits = wanted; /* We at know the temp window will use all | ||
641 | * of these bits */ | ||
642 | |||
643 | /* | ||
644 | * Take the first reservation off the LRU as our 'target'. We | ||
645 | * don't try to be smart about it. There might be a case for | ||
646 | * searching based on size but I don't have enough data to be | ||
647 | * sure. --Mark (3/16/2010) | ||
648 | */ | ||
649 | lru_resv = list_first_entry(&resmap->m_lru, | ||
650 | struct ocfs2_alloc_reservation, r_lru); | ||
651 | |||
652 | mlog(0, "lru resv: start: %u len: %u end: %u\n", lru_resv->r_start, | ||
653 | lru_resv->r_len, ocfs2_resv_end(lru_resv)); | ||
654 | |||
655 | /* | ||
656 | * Cannibalize (some or all) of the target reservation and | ||
657 | * feed it to the current window. | ||
658 | */ | ||
659 | if (lru_resv->r_len <= min_bits) { | ||
660 | /* | ||
661 | * Discard completely if size is less than or equal to a | ||
662 | * reasonable threshold - 50% of window bits for non temporary | ||
663 | * windows. | ||
664 | */ | ||
665 | resv->r_start = lru_resv->r_start; | ||
666 | resv->r_len = lru_resv->r_len; | ||
667 | |||
668 | __ocfs2_resv_discard(resmap, lru_resv); | ||
669 | } else { | ||
670 | unsigned int shrink; | ||
671 | if (tmpwindow) | ||
672 | shrink = min_bits; | ||
673 | else | ||
674 | shrink = lru_resv->r_len / 2; | ||
675 | |||
676 | lru_resv->r_len -= shrink; | ||
677 | |||
678 | resv->r_start = ocfs2_resv_end(lru_resv) + 1; | ||
679 | resv->r_len = shrink; | ||
680 | } | ||
681 | |||
682 | mlog(0, "Reservation now looks like: r_start: %u r_end: %u " | ||
683 | "r_len: %u r_last_start: %u r_last_len: %u\n", | ||
684 | resv->r_start, ocfs2_resv_end(resv), resv->r_len, | ||
685 | resv->r_last_start, resv->r_last_len); | ||
686 | |||
687 | ocfs2_resv_insert(resmap, resv); | ||
688 | } | ||
689 | |||
690 | static void ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap, | ||
691 | struct ocfs2_alloc_reservation *resv, | ||
692 | unsigned int wanted) | ||
693 | { | ||
694 | unsigned int goal = 0; | ||
695 | |||
696 | BUG_ON(!ocfs2_resv_empty(resv)); | ||
697 | |||
698 | /* | ||
699 | * Begin by trying to get a window as close to the previous | ||
700 | * one as possible. Using the most recent allocation as a | ||
701 | * start goal makes sense. | ||
702 | */ | ||
703 | if (resv->r_last_len) { | ||
704 | goal = resv->r_last_start + resv->r_last_len; | ||
705 | if (goal >= resmap->m_bitmap_len) | ||
706 | goal = 0; | ||
707 | } | ||
708 | |||
709 | __ocfs2_resv_find_window(resmap, resv, goal, wanted); | ||
710 | |||
711 | /* Search from last alloc didn't work, try once more from beginning. */ | ||
712 | if (ocfs2_resv_empty(resv) && goal != 0) | ||
713 | __ocfs2_resv_find_window(resmap, resv, 0, wanted); | ||
714 | |||
715 | if (ocfs2_resv_empty(resv)) { | ||
716 | /* | ||
717 | * Still empty? Pull oldest one off the LRU, remove it from | ||
718 | * tree, put this one in it's place. | ||
719 | */ | ||
720 | ocfs2_cannibalize_resv(resmap, resv, wanted); | ||
721 | } | ||
722 | |||
723 | BUG_ON(ocfs2_resv_empty(resv)); | ||
724 | } | ||
725 | |||
726 | int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap, | ||
727 | struct ocfs2_alloc_reservation *resv, | ||
728 | int *cstart, int *clen) | ||
729 | { | ||
730 | unsigned int wanted = *clen; | ||
731 | |||
732 | if (resv == NULL || ocfs2_resmap_disabled(resmap)) | ||
733 | return -ENOSPC; | ||
734 | |||
735 | spin_lock(&resv_lock); | ||
736 | |||
737 | /* | ||
738 | * We don't want to over-allocate for temporary | ||
739 | * windows. Otherwise, we run the risk of fragmenting the | ||
740 | * allocation space. | ||
741 | */ | ||
742 | wanted = ocfs2_resv_window_bits(resmap, resv); | ||
743 | if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) | ||
744 | wanted = *clen; | ||
745 | |||
746 | if (ocfs2_resv_empty(resv)) { | ||
747 | mlog(0, "empty reservation, find new window\n"); | ||
748 | |||
749 | /* | ||
750 | * Try to get a window here. If it works, we must fall | ||
751 | * through and test the bitmap . This avoids some | ||
752 | * ping-ponging of windows due to non-reserved space | ||
753 | * being allocation before we initialize a window for | ||
754 | * that inode. | ||
755 | */ | ||
756 | ocfs2_resv_find_window(resmap, resv, wanted); | ||
757 | } | ||
758 | |||
759 | BUG_ON(ocfs2_resv_empty(resv)); | ||
760 | |||
761 | *cstart = resv->r_start; | ||
762 | *clen = resv->r_len; | ||
763 | |||
764 | spin_unlock(&resv_lock); | ||
765 | return 0; | ||
766 | } | ||
767 | |||
768 | static void | ||
769 | ocfs2_adjust_resv_from_alloc(struct ocfs2_reservation_map *resmap, | ||
770 | struct ocfs2_alloc_reservation *resv, | ||
771 | unsigned int start, unsigned int end) | ||
772 | { | ||
773 | unsigned int lhs = 0, rhs = 0; | ||
774 | |||
775 | BUG_ON(start < resv->r_start); | ||
776 | |||
777 | /* | ||
778 | * Completely used? We can remove it then. | ||
779 | */ | ||
780 | if (ocfs2_resv_end(resv) <= end && resv->r_start >= start) { | ||
781 | __ocfs2_resv_discard(resmap, resv); | ||
782 | return; | ||
783 | } | ||
784 | |||
785 | if (end < ocfs2_resv_end(resv)) | ||
786 | rhs = end - ocfs2_resv_end(resv); | ||
787 | |||
788 | if (start > resv->r_start) | ||
789 | lhs = start - resv->r_start; | ||
790 | |||
791 | /* | ||
792 | * This should have been trapped above. At the very least, rhs | ||
793 | * should be non zero. | ||
794 | */ | ||
795 | BUG_ON(rhs == 0 && lhs == 0); | ||
796 | |||
797 | if (rhs >= lhs) { | ||
798 | unsigned int old_end = ocfs2_resv_end(resv); | ||
799 | |||
800 | resv->r_start = end + 1; | ||
801 | resv->r_len = old_end - resv->r_start + 1; | ||
802 | } else { | ||
803 | resv->r_len = start - resv->r_start; | ||
804 | } | ||
805 | } | ||
806 | |||
807 | void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap, | ||
808 | struct ocfs2_alloc_reservation *resv, | ||
809 | u32 cstart, u32 clen) | ||
810 | { | ||
811 | unsigned int cend = cstart + clen - 1; | ||
812 | |||
813 | if (resmap == NULL || ocfs2_resmap_disabled(resmap)) | ||
814 | return; | ||
815 | |||
816 | if (resv == NULL) | ||
817 | return; | ||
818 | |||
819 | spin_lock(&resv_lock); | ||
820 | |||
821 | mlog(0, "claim bits: cstart: %u cend: %u clen: %u r_start: %u " | ||
822 | "r_end: %u r_len: %u, r_last_start: %u r_last_len: %u\n", | ||
823 | cstart, cend, clen, resv->r_start, ocfs2_resv_end(resv), | ||
824 | resv->r_len, resv->r_last_start, resv->r_last_len); | ||
825 | |||
826 | BUG_ON(cstart < resv->r_start); | ||
827 | BUG_ON(cstart > ocfs2_resv_end(resv)); | ||
828 | BUG_ON(cend > ocfs2_resv_end(resv)); | ||
829 | |||
830 | ocfs2_adjust_resv_from_alloc(resmap, resv, cstart, cend); | ||
831 | resv->r_last_start = cstart; | ||
832 | resv->r_last_len = clen; | ||
833 | |||
834 | /* | ||
835 | * May have been discarded above from | ||
836 | * ocfs2_adjust_resv_from_alloc(). | ||
837 | */ | ||
838 | if (!ocfs2_resv_empty(resv)) | ||
839 | ocfs2_resv_mark_lru(resmap, resv); | ||
840 | |||
841 | mlog(0, "Reservation now looks like: r_start: %u r_end: %u " | ||
842 | "r_len: %u r_last_start: %u r_last_len: %u\n", | ||
843 | resv->r_start, ocfs2_resv_end(resv), resv->r_len, | ||
844 | resv->r_last_start, resv->r_last_len); | ||
845 | |||
846 | ocfs2_check_resmap(resmap); | ||
847 | |||
848 | spin_unlock(&resv_lock); | ||
849 | } | ||
diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h new file mode 100644 index 000000000000..8341cd0ef855 --- /dev/null +++ b/fs/ocfs2/reservations.h | |||
@@ -0,0 +1,154 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * reservations.h | ||
5 | * | ||
6 | * Allocation reservations function prototypes and structures. | ||
7 | * | ||
8 | * Copyright (C) 2010 Novell. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License version 2 as published by the Free Software Foundation. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * General Public License for more details. | ||
18 | */ | ||
19 | |||
20 | #ifndef OCFS2_RESERVATIONS_H | ||
21 | #define OCFS2_RESERVATIONS_H | ||
22 | |||
23 | #include <linux/rbtree.h> | ||
24 | |||
25 | #define OCFS2_DEFAULT_RESV_LEVEL 4 | ||
26 | #define OCFS2_MAX_RESV_LEVEL 9 | ||
27 | #define OCFS2_MIN_RESV_LEVEL 0 | ||
28 | |||
29 | struct ocfs2_alloc_reservation { | ||
30 | struct rb_node r_node; | ||
31 | |||
32 | unsigned int r_start; /* Begining of current window */ | ||
33 | unsigned int r_len; /* Length of the window */ | ||
34 | |||
35 | unsigned int r_last_len; /* Length of most recent alloc */ | ||
36 | unsigned int r_last_start; /* Start of most recent alloc */ | ||
37 | struct list_head r_lru; /* LRU list head */ | ||
38 | |||
39 | unsigned int r_flags; | ||
40 | }; | ||
41 | |||
42 | #define OCFS2_RESV_FLAG_INUSE 0x01 /* Set when r_node is part of a btree */ | ||
43 | #define OCFS2_RESV_FLAG_TMP 0x02 /* Temporary reservation, will be | ||
44 | * destroyed immedately after use */ | ||
45 | |||
46 | struct ocfs2_reservation_map { | ||
47 | struct rb_root m_reservations; | ||
48 | char *m_disk_bitmap; | ||
49 | |||
50 | struct ocfs2_super *m_osb; | ||
51 | |||
52 | /* The following are not initialized to meaningful values until a disk | ||
53 | * bitmap is provided. */ | ||
54 | u32 m_bitmap_len; /* Number of valid | ||
55 | * bits available */ | ||
56 | |||
57 | struct list_head m_lru; /* LRU of reservations | ||
58 | * structures. */ | ||
59 | |||
60 | }; | ||
61 | |||
62 | void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv); | ||
63 | |||
64 | #define OCFS2_RESV_TYPES (OCFS2_RESV_FLAG_TMP) | ||
65 | void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv, | ||
66 | unsigned int flags); | ||
67 | |||
68 | /** | ||
69 | * ocfs2_resv_discard() - truncate a reservation | ||
70 | * @resmap: | ||
71 | * @resv: the reservation to truncate. | ||
72 | * | ||
73 | * After this function is called, the reservation will be empty, and | ||
74 | * unlinked from the rbtree. | ||
75 | */ | ||
76 | void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap, | ||
77 | struct ocfs2_alloc_reservation *resv); | ||
78 | |||
79 | |||
80 | /** | ||
81 | * ocfs2_resmap_init() - Initialize fields of a reservations bitmap | ||
82 | * @resmap: struct ocfs2_reservation_map to initialize | ||
83 | * @obj: unused for now | ||
84 | * @ops: unused for now | ||
85 | * @max_bitmap_bytes: Maximum size of the bitmap (typically blocksize) | ||
86 | * | ||
87 | * Only possible return value other than '0' is -ENOMEM for failure to | ||
88 | * allocation mirror bitmap. | ||
89 | */ | ||
90 | int ocfs2_resmap_init(struct ocfs2_super *osb, | ||
91 | struct ocfs2_reservation_map *resmap); | ||
92 | |||
93 | /** | ||
94 | * ocfs2_resmap_restart() - "restart" a reservation bitmap | ||
95 | * @resmap: reservations bitmap | ||
96 | * @clen: Number of valid bits in the bitmap | ||
97 | * @disk_bitmap: the disk bitmap this resmap should refer to. | ||
98 | * | ||
99 | * Re-initialize the parameters of a reservation bitmap. This is | ||
100 | * useful for local alloc window slides. | ||
101 | * | ||
102 | * This function will call ocfs2_trunc_resv against all existing | ||
103 | * reservations. A future version will recalculate existing | ||
104 | * reservations based on the new bitmap. | ||
105 | */ | ||
106 | void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap, | ||
107 | unsigned int clen, char *disk_bitmap); | ||
108 | |||
109 | /** | ||
110 | * ocfs2_resmap_uninit() - uninitialize a reservation bitmap structure | ||
111 | * @resmap: the struct ocfs2_reservation_map to uninitialize | ||
112 | */ | ||
113 | void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap); | ||
114 | |||
115 | /** | ||
116 | * ocfs2_resmap_resv_bits() - Return still-valid reservation bits | ||
117 | * @resmap: reservations bitmap | ||
118 | * @resv: reservation to base search from | ||
119 | * @cstart: start of proposed allocation | ||
120 | * @clen: length (in clusters) of proposed allocation | ||
121 | * | ||
122 | * Using the reservation data from resv, this function will compare | ||
123 | * resmap and resmap->m_disk_bitmap to determine what part (if any) of | ||
124 | * the reservation window is still clear to use. If resv is empty, | ||
125 | * this function will try to allocate a window for it. | ||
126 | * | ||
127 | * On success, zero is returned and the valid allocation area is set in cstart | ||
128 | * and clen. | ||
129 | * | ||
130 | * Returns -ENOSPC if reservations are disabled. | ||
131 | */ | ||
132 | int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap, | ||
133 | struct ocfs2_alloc_reservation *resv, | ||
134 | int *cstart, int *clen); | ||
135 | |||
136 | /** | ||
137 | * ocfs2_resmap_claimed_bits() - Tell the reservation code that bits were used. | ||
138 | * @resmap: reservations bitmap | ||
139 | * @resv: optional reservation to recalulate based on new bitmap | ||
140 | * @cstart: start of allocation in clusters | ||
141 | * @clen: end of allocation in clusters. | ||
142 | * | ||
143 | * Tell the reservation code that bits were used to fulfill allocation in | ||
144 | * resmap. The bits don't have to have been part of any existing | ||
145 | * reservation. But we must always call this function when bits are claimed. | ||
146 | * Internally, the reservations code will use this information to mark the | ||
147 | * reservations bitmap. If resv is passed, it's next allocation window will be | ||
148 | * calculated. | ||
149 | */ | ||
150 | void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap, | ||
151 | struct ocfs2_alloc_reservation *resv, | ||
152 | u32 cstart, u32 clen); | ||
153 | |||
154 | #endif /* OCFS2_RESERVATIONS_H */ | ||
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index e0f46df357e6..da2f29a55ec3 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -54,6 +54,8 @@ struct ocfs2_alloc_context { | |||
54 | u64 ac_last_group; | 54 | u64 ac_last_group; |
55 | u64 ac_max_block; /* Highest block number to allocate. 0 is | 55 | u64 ac_max_block; /* Highest block number to allocate. 0 is |
56 | is the same as ~0 - unlimited */ | 56 | is the same as ~0 - unlimited */ |
57 | |||
58 | struct ocfs2_alloc_reservation *ac_resv; | ||
57 | }; | 59 | }; |
58 | 60 | ||
59 | void ocfs2_init_steal_slots(struct ocfs2_super *osb); | 61 | void ocfs2_init_steal_slots(struct ocfs2_super *osb); |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index dee03197a494..cfe672e72b27 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -95,6 +95,7 @@ struct mount_options | |||
95 | unsigned int atime_quantum; | 95 | unsigned int atime_quantum; |
96 | signed short slot; | 96 | signed short slot; |
97 | unsigned int localalloc_opt; | 97 | unsigned int localalloc_opt; |
98 | unsigned int resv_level; | ||
98 | char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; | 99 | char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; |
99 | }; | 100 | }; |
100 | 101 | ||
@@ -176,6 +177,7 @@ enum { | |||
176 | Opt_noacl, | 177 | Opt_noacl, |
177 | Opt_usrquota, | 178 | Opt_usrquota, |
178 | Opt_grpquota, | 179 | Opt_grpquota, |
180 | Opt_resv_level, | ||
179 | Opt_err, | 181 | Opt_err, |
180 | }; | 182 | }; |
181 | 183 | ||
@@ -202,6 +204,7 @@ static const match_table_t tokens = { | |||
202 | {Opt_noacl, "noacl"}, | 204 | {Opt_noacl, "noacl"}, |
203 | {Opt_usrquota, "usrquota"}, | 205 | {Opt_usrquota, "usrquota"}, |
204 | {Opt_grpquota, "grpquota"}, | 206 | {Opt_grpquota, "grpquota"}, |
207 | {Opt_resv_level, "resv_level=%u"}, | ||
205 | {Opt_err, NULL} | 208 | {Opt_err, NULL} |
206 | }; | 209 | }; |
207 | 210 | ||
@@ -1030,6 +1033,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
1030 | osb->osb_commit_interval = parsed_options.commit_interval; | 1033 | osb->osb_commit_interval = parsed_options.commit_interval; |
1031 | osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); | 1034 | osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); |
1032 | osb->local_alloc_bits = osb->local_alloc_default_bits; | 1035 | osb->local_alloc_bits = osb->local_alloc_default_bits; |
1036 | osb->osb_resv_level = parsed_options.resv_level; | ||
1033 | 1037 | ||
1034 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); | 1038 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); |
1035 | if (status) | 1039 | if (status) |
@@ -1290,6 +1294,7 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1290 | mopt->slot = OCFS2_INVALID_SLOT; | 1294 | mopt->slot = OCFS2_INVALID_SLOT; |
1291 | mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | 1295 | mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; |
1292 | mopt->cluster_stack[0] = '\0'; | 1296 | mopt->cluster_stack[0] = '\0'; |
1297 | mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL; | ||
1293 | 1298 | ||
1294 | if (!options) { | 1299 | if (!options) { |
1295 | status = 1; | 1300 | status = 1; |
@@ -1433,6 +1438,17 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1433 | mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; | 1438 | mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; |
1434 | mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; | 1439 | mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; |
1435 | break; | 1440 | break; |
1441 | case Opt_resv_level: | ||
1442 | if (is_remount) | ||
1443 | break; | ||
1444 | if (match_int(&args[0], &option)) { | ||
1445 | status = 0; | ||
1446 | goto bail; | ||
1447 | } | ||
1448 | if (option >= OCFS2_MIN_RESV_LEVEL && | ||
1449 | option < OCFS2_MAX_RESV_LEVEL) | ||
1450 | mopt->resv_level = option; | ||
1451 | break; | ||
1436 | default: | 1452 | default: |
1437 | mlog(ML_ERROR, | 1453 | mlog(ML_ERROR, |
1438 | "Unrecognized mount option \"%s\" " | 1454 | "Unrecognized mount option \"%s\" " |
@@ -1514,6 +1530,9 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1514 | else | 1530 | else |
1515 | seq_printf(s, ",noacl"); | 1531 | seq_printf(s, ",noacl"); |
1516 | 1532 | ||
1533 | if (osb->osb_resv_level != OCFS2_DEFAULT_RESV_LEVEL) | ||
1534 | seq_printf(s, ",resv_level=%d", osb->osb_resv_level); | ||
1535 | |||
1517 | return 0; | 1536 | return 0; |
1518 | } | 1537 | } |
1519 | 1538 | ||
@@ -2042,6 +2061,12 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2042 | 2061 | ||
2043 | init_waitqueue_head(&osb->osb_mount_event); | 2062 | init_waitqueue_head(&osb->osb_mount_event); |
2044 | 2063 | ||
2064 | status = ocfs2_resmap_init(osb, &osb->osb_la_resmap); | ||
2065 | if (status) { | ||
2066 | mlog_errno(status); | ||
2067 | goto bail; | ||
2068 | } | ||
2069 | |||
2045 | osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); | 2070 | osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); |
2046 | if (!osb->vol_label) { | 2071 | if (!osb->vol_label) { |
2047 | mlog(ML_ERROR, "unable to alloc vol label\n"); | 2072 | mlog(ML_ERROR, "unable to alloc vol label\n"); |