aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorMark Fasheh <mfasheh@suse.com>2009-12-07 16:10:48 -0500
committerJoel Becker <joel.becker@oracle.com>2010-05-05 21:17:30 -0400
commitd02f00cc057809d96c044cc72d5b9809d59f7d49 (patch)
tree44a6d81ecf9fb4b5aa91c0501a8da2ee36890a38 /fs/ocfs2
parentec20cec7a351584ca6c70ead012e73d61f9a8e04 (diff)
ocfs2: allocation reservations
This patch improves Ocfs2 allocation policy by allowing an inode to reserve a portion of the local alloc bitmap for itself. The reserved portion (allocation window) is advisory in that other allocation windows might steal it if the local alloc bitmap becomes full. Otherwise, the reservations are honored and guaranteed to be free. When the local alloc window is moved to a different portion of the bitmap, existing reservations are discarded. Reservation windows are represented internally by a red-black tree. Within that tree, each node represents the reservation window of one inode. An LRU of active reservations is also maintained. When new data is written, we allocate it from the inodes window. When all bits in a window are exhausted, we allocate a new one as close to the previous one as possible. Should we not find free space, an existing reservation is pulled off the LRU and cannibalized. Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/Makefile1
-rw-r--r--fs/ocfs2/cluster/masklog.c1
-rw-r--r--fs/ocfs2/cluster/masklog.h1
-rw-r--r--fs/ocfs2/localalloc.c64
-rw-r--r--fs/ocfs2/ocfs2.h5
-rw-r--r--fs/ocfs2/reservations.c849
-rw-r--r--fs/ocfs2/reservations.h154
-rw-r--r--fs/ocfs2/suballoc.h2
-rw-r--r--fs/ocfs2/super.c25
9 files changed, 1091 insertions, 11 deletions
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 791c0886c060..07d9fd854350 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -29,6 +29,7 @@ ocfs2-objs := \
29 mmap.o \ 29 mmap.o \
30 namei.o \ 30 namei.o \
31 refcounttree.o \ 31 refcounttree.o \
32 reservations.o \
32 resize.o \ 33 resize.o \
33 slot_map.o \ 34 slot_map.o \
34 suballoc.o \ 35 suballoc.o \
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 3bb928a2bf7d..c7fba396392d 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -116,6 +116,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
116 define_mask(ERROR), 116 define_mask(ERROR),
117 define_mask(NOTICE), 117 define_mask(NOTICE),
118 define_mask(KTHREAD), 118 define_mask(KTHREAD),
119 define_mask(RESERVATIONS),
119}; 120};
120 121
121static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, }; 122static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 3dfddbec32f2..fd96e2a2fa56 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -119,6 +119,7 @@
119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ 121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */
122#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
122 123
123#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) 124#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
124#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) 125#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 7e7dd65d97ef..7fe8149a0002 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -52,7 +52,8 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
52 52
53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
54 struct ocfs2_dinode *alloc, 54 struct ocfs2_dinode *alloc,
55 u32 numbits); 55 u32 *numbits,
56 struct ocfs2_alloc_reservation *resv);
56 57
57static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 58static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
58 59
@@ -262,6 +263,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
262 263
263 osb->local_alloc_state = OCFS2_LA_DISABLED; 264 osb->local_alloc_state = OCFS2_LA_DISABLED;
264 265
266 ocfs2_resmap_uninit(&osb->osb_la_resmap);
267
265 main_bm_inode = ocfs2_get_system_file_inode(osb, 268 main_bm_inode = ocfs2_get_system_file_inode(osb,
266 GLOBAL_BITMAP_SYSTEM_INODE, 269 GLOBAL_BITMAP_SYSTEM_INODE,
267 OCFS2_INVALID_SLOT); 270 OCFS2_INVALID_SLOT);
@@ -493,7 +496,7 @@ static int ocfs2_local_alloc_in_range(struct inode *inode,
493 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 496 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
494 la = OCFS2_LOCAL_ALLOC(alloc); 497 la = OCFS2_LOCAL_ALLOC(alloc);
495 498
496 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 499 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted, NULL);
497 if (start == -1) { 500 if (start == -1) {
498 mlog_errno(-ENOSPC); 501 mlog_errno(-ENOSPC);
499 return 0; 502 return 0;
@@ -659,7 +662,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
659 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 662 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
660 la = OCFS2_LOCAL_ALLOC(alloc); 663 la = OCFS2_LOCAL_ALLOC(alloc);
661 664
662 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 665 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted,
666 ac->ac_resv);
663 if (start == -1) { 667 if (start == -1) {
664 /* TODO: Shouldn't we just BUG here? */ 668 /* TODO: Shouldn't we just BUG here? */
665 status = -ENOSPC; 669 status = -ENOSPC;
@@ -669,8 +673,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
669 673
670 bitmap = la->la_bitmap; 674 bitmap = la->la_bitmap;
671 *bit_off = le32_to_cpu(la->la_bm_off) + start; 675 *bit_off = le32_to_cpu(la->la_bm_off) + start;
672 /* local alloc is always contiguous by nature -- we never
673 * delete bits from it! */
674 *num_bits = bits_wanted; 676 *num_bits = bits_wanted;
675 677
676 status = ocfs2_journal_access_di(handle, 678 status = ocfs2_journal_access_di(handle,
@@ -682,6 +684,9 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
682 goto bail; 684 goto bail;
683 } 685 }
684 686
687 ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start,
688 bits_wanted);
689
685 while(bits_wanted--) 690 while(bits_wanted--)
686 ocfs2_set_bit(start++, bitmap); 691 ocfs2_set_bit(start++, bitmap);
687 692
@@ -711,13 +716,17 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
711} 716}
712 717
713static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 718static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
714 struct ocfs2_dinode *alloc, 719 struct ocfs2_dinode *alloc,
715 u32 numbits) 720 u32 *numbits,
721 struct ocfs2_alloc_reservation *resv)
716{ 722{
717 int numfound, bitoff, left, startoff, lastzero; 723 int numfound, bitoff, left, startoff, lastzero;
724 int local_resv = 0;
725 struct ocfs2_alloc_reservation r;
718 void *bitmap = NULL; 726 void *bitmap = NULL;
727 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
719 728
720 mlog_entry("(numbits wanted = %u)\n", numbits); 729 mlog_entry("(numbits wanted = %u)\n", *numbits);
721 730
722 if (!alloc->id1.bitmap1.i_total) { 731 if (!alloc->id1.bitmap1.i_total) {
723 mlog(0, "No bits in my window!\n"); 732 mlog(0, "No bits in my window!\n");
@@ -725,6 +734,30 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
725 goto bail; 734 goto bail;
726 } 735 }
727 736
737 if (!resv) {
738 local_resv = 1;
739 ocfs2_resv_init_once(&r);
740 ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP);
741 resv = &r;
742 }
743
744 numfound = *numbits;
745 if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) {
746 if (numfound < *numbits)
747 *numbits = numfound;
748 goto bail;
749 }
750
751 /*
752 * Code error. While reservations are enabled, local
753 * allocation should _always_ go through them.
754 */
755 BUG_ON(osb->osb_resv_level != 0);
756
757 /*
758 * Reservations are disabled. Handle this the old way.
759 */
760
728 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 761 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
729 762
730 numfound = bitoff = startoff = 0; 763 numfound = bitoff = startoff = 0;
@@ -750,7 +783,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
750 startoff = bitoff+1; 783 startoff = bitoff+1;
751 } 784 }
752 /* we got everything we needed */ 785 /* we got everything we needed */
753 if (numfound == numbits) { 786 if (numfound == *numbits) {
754 /* mlog(0, "Found it all!\n"); */ 787 /* mlog(0, "Found it all!\n"); */
755 break; 788 break;
756 } 789 }
@@ -759,12 +792,18 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
759 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, 792 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
760 numfound); 793 numfound);
761 794
762 if (numfound == numbits) 795 if (numfound == *numbits) {
763 bitoff = startoff - numfound; 796 bitoff = startoff - numfound;
764 else 797 *numbits = numfound;
798 } else {
799 numfound = 0;
765 bitoff = -1; 800 bitoff = -1;
801 }
766 802
767bail: 803bail:
804 if (local_resv)
805 ocfs2_resv_discard(resmap, resv);
806
768 mlog_exit(bitoff); 807 mlog_exit(bitoff);
769 return bitoff; 808 return bitoff;
770} 809}
@@ -1087,6 +1126,9 @@ retry_enospc:
1087 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1126 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
1088 le16_to_cpu(la->la_size)); 1127 le16_to_cpu(la->la_size));
1089 1128
1129 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count,
1130 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
1131
1090 mlog(0, "New window allocated:\n"); 1132 mlog(0, "New window allocated:\n");
1091 mlog(0, "window la_bm_off = %u\n", 1133 mlog(0, "window la_bm_off = %u\n",
1092 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 1134 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index adf5e2ebc2c4..9552560df6cd 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -47,6 +47,7 @@
47/* For struct ocfs2_blockcheck_stats */ 47/* For struct ocfs2_blockcheck_stats */
48#include "blockcheck.h" 48#include "blockcheck.h"
49 49
50#include "reservations.h"
50 51
51/* Caching of metadata buffers */ 52/* Caching of metadata buffers */
52 53
@@ -349,6 +350,10 @@ struct ocfs2_super
349 350
350 u64 la_last_gd; 351 u64 la_last_gd;
351 352
353 struct ocfs2_reservation_map osb_la_resmap;
354
355 unsigned int osb_resv_level;
356
352 /* Next three fields are for local node slot recovery during 357 /* Next three fields are for local node slot recovery during
353 * mount. */ 358 * mount. */
354 int dirty; 359 int dirty;
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
new file mode 100644
index 000000000000..79642d608210
--- /dev/null
+++ b/fs/ocfs2/reservations.c
@@ -0,0 +1,849 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * reservations.c
5 *
6 * Allocation reservations implementation
7 *
8 * Some code borrowed from fs/ext3/balloc.c and is:
9 *
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 *
15 * The rest is copyright (C) 2010 Novell. All rights reserved.
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public
19 * License version 2 as published by the Free Software Foundation.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 * General Public License for more details.
25 */
26
27#include <linux/fs.h>
28#include <linux/types.h>
29#include <linux/slab.h>
30#include <linux/highmem.h>
31#include <linux/bitops.h>
32#include <linux/list.h>
33
34#define MLOG_MASK_PREFIX ML_RESERVATIONS
35#include <cluster/masklog.h>
36
37#include "ocfs2.h"
38
39#ifdef CONFIG_OCFS2_DEBUG_FS
40#define OCFS2_CHECK_RESERVATIONS
41#endif
42
43DEFINE_SPINLOCK(resv_lock);
44
45#define OCFS2_MIN_RESV_WINDOW_BITS 8
46#define OCFS2_MAX_RESV_WINDOW_BITS 1024
47
48static unsigned int ocfs2_resv_window_bits(struct ocfs2_reservation_map *resmap,
49 struct ocfs2_alloc_reservation *resv)
50{
51 struct ocfs2_super *osb = resmap->m_osb;
52 unsigned int bits;
53
54 /* 8, 16, 32, 64, 128, 256, 512, 1024 */
55 bits = 4 << osb->osb_resv_level;
56
57 return bits;
58}
59
60static inline unsigned int ocfs2_resv_end(struct ocfs2_alloc_reservation *resv)
61{
62 if (resv->r_len)
63 return resv->r_start + resv->r_len - 1;
64 return resv->r_start;
65}
66
67static inline int ocfs2_resv_empty(struct ocfs2_alloc_reservation *resv)
68{
69 return !!(resv->r_len == 0);
70}
71
72static inline int ocfs2_resmap_disabled(struct ocfs2_reservation_map *resmap)
73{
74 if (resmap->m_osb->osb_resv_level == 0)
75 return 1;
76 return 0;
77}
78
79static void ocfs2_dump_resv(struct ocfs2_reservation_map *resmap)
80{
81 struct ocfs2_super *osb = resmap->m_osb;
82 struct rb_node *node;
83 struct ocfs2_alloc_reservation *resv;
84 int i = 0;
85
86 mlog(ML_NOTICE, "Dumping resmap for device %s. Bitmap length: %u\n",
87 osb->dev_str, resmap->m_bitmap_len);
88
89 node = rb_first(&resmap->m_reservations);
90 while (node) {
91 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
92
93 mlog(ML_NOTICE, "start: %u\tend: %u\tlen: %u\tlast_start: %u"
94 "\tlast_len: %u\n", resv->r_start,
95 ocfs2_resv_end(resv), resv->r_len, resv->r_last_start,
96 resv->r_last_len);
97
98 node = rb_next(node);
99 i++;
100 }
101
102 mlog(ML_NOTICE, "%d reservations found. LRU follows\n", i);
103
104 i = 0;
105 list_for_each_entry(resv, &resmap->m_lru, r_lru) {
106 mlog(ML_NOTICE, "LRU(%d) start: %u\tend: %u\tlen: %u\t"
107 "last_start: %u\tlast_len: %u\n", i, resv->r_start,
108 ocfs2_resv_end(resv), resv->r_len, resv->r_last_start,
109 resv->r_last_len);
110
111 i++;
112 }
113}
114
115#ifdef OCFS2_CHECK_RESERVATIONS
116static int ocfs2_validate_resmap_bits(struct ocfs2_reservation_map *resmap,
117 int i,
118 struct ocfs2_alloc_reservation *resv)
119{
120 char *disk_bitmap = resmap->m_disk_bitmap;
121 unsigned int start = resv->r_start;
122 unsigned int end = ocfs2_resv_end(resv);
123
124 while (start <= end) {
125 if (ocfs2_test_bit(start, disk_bitmap)) {
126 mlog(ML_ERROR,
127 "reservation %d covers an allocated area "
128 "starting at bit %u!\n", i, start);
129 return 1;
130 }
131
132 start++;
133 }
134 return 0;
135}
136
137static void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
138{
139 unsigned int off = 0;
140 int i = 0;
141 struct rb_node *node;
142 struct ocfs2_alloc_reservation *resv;
143
144 node = rb_first(&resmap->m_reservations);
145 while (node) {
146 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
147
148 if (i > 0 && resv->r_start <= off) {
149 mlog(ML_ERROR, "reservation %d has bad start off!\n",
150 i);
151 goto bad;
152 }
153
154 if (resv->r_len == 0) {
155 mlog(ML_ERROR, "reservation %d has no length!\n",
156 i);
157 goto bad;
158 }
159
160 if (resv->r_start > ocfs2_resv_end(resv)) {
161 mlog(ML_ERROR, "reservation %d has invalid range!\n",
162 i);
163 goto bad;
164 }
165
166 if (ocfs2_resv_end(resv) >= resmap->m_bitmap_len) {
167 mlog(ML_ERROR, "reservation %d extends past bitmap!\n",
168 i);
169 goto bad;
170 }
171
172 if (ocfs2_validate_resmap_bits(resmap, i, resv))
173 goto bad;
174
175 off = ocfs2_resv_end(resv);
176 node = rb_next(node);
177
178 i++;
179 }
180 return;
181
182bad:
183 ocfs2_dump_resv(resmap);
184 BUG();
185}
186#else
187static inline void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
188{
189
190}
191#endif
192
193void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv)
194{
195 memset(resv, 0, sizeof(*resv));
196 INIT_LIST_HEAD(&resv->r_lru);
197}
198
199void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
200 unsigned int flags)
201{
202 BUG_ON(flags & ~OCFS2_RESV_TYPES);
203
204 resv->r_flags |= flags;
205}
206
207int ocfs2_resmap_init(struct ocfs2_super *osb,
208 struct ocfs2_reservation_map *resmap)
209{
210 memset(resmap, 0, sizeof(*resmap));
211
212 resmap->m_osb = osb;
213 resmap->m_reservations = RB_ROOT;
214 /* m_bitmap_len is initialized to zero by the above memset. */
215 INIT_LIST_HEAD(&resmap->m_lru);
216
217 return 0;
218}
219
220static void ocfs2_resv_mark_lru(struct ocfs2_reservation_map *resmap,
221 struct ocfs2_alloc_reservation *resv)
222{
223 assert_spin_locked(&resv_lock);
224
225 if (!list_empty(&resv->r_lru))
226 list_del_init(&resv->r_lru);
227
228 list_add_tail(&resv->r_lru, &resmap->m_lru);
229}
230
231static void __ocfs2_resv_trunc(struct ocfs2_alloc_reservation *resv)
232{
233 resv->r_len = 0;
234 resv->r_start = 0;
235}
236
237static void ocfs2_resv_remove(struct ocfs2_reservation_map *resmap,
238 struct ocfs2_alloc_reservation *resv)
239{
240 if (resv->r_flags & OCFS2_RESV_FLAG_INUSE) {
241 list_del_init(&resv->r_lru);
242 rb_erase(&resv->r_node, &resmap->m_reservations);
243 resv->r_flags &= ~OCFS2_RESV_FLAG_INUSE;
244 }
245}
246
247static void __ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
248 struct ocfs2_alloc_reservation *resv)
249{
250 assert_spin_locked(&resv_lock);
251
252 __ocfs2_resv_trunc(resv);
253 /*
254 * last_len and last_start no longer make sense if
255 * we're changing the range of our allocations.
256 */
257 resv->r_last_len = resv->r_last_start = 0;
258
259 ocfs2_resv_remove(resmap, resv);
260}
261
262/* does nothing if 'resv' is null */
263void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
264 struct ocfs2_alloc_reservation *resv)
265{
266 if (resv) {
267 spin_lock(&resv_lock);
268 __ocfs2_resv_discard(resmap, resv);
269 spin_unlock(&resv_lock);
270 }
271}
272
273static void ocfs2_resmap_clear_all_resv(struct ocfs2_reservation_map *resmap)
274{
275 struct rb_node *node;
276 struct ocfs2_alloc_reservation *resv;
277
278 assert_spin_locked(&resv_lock);
279
280 while ((node = rb_last(&resmap->m_reservations)) != NULL) {
281 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
282
283 __ocfs2_resv_discard(resmap, resv);
284 }
285}
286
287void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
288 unsigned int clen, char *disk_bitmap)
289{
290 if (ocfs2_resmap_disabled(resmap))
291 return;
292
293 spin_lock(&resv_lock);
294
295 ocfs2_resmap_clear_all_resv(resmap);
296 resmap->m_bitmap_len = clen;
297 resmap->m_disk_bitmap = disk_bitmap;
298
299 spin_unlock(&resv_lock);
300}
301
302void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap)
303{
304 /* Does nothing for now. Keep this around for API symmetry */
305}
306
307static void ocfs2_resv_insert(struct ocfs2_reservation_map *resmap,
308 struct ocfs2_alloc_reservation *new)
309{
310 struct rb_root *root = &resmap->m_reservations;
311 struct rb_node *parent = NULL;
312 struct rb_node **p = &root->rb_node;
313 struct ocfs2_alloc_reservation *tmp;
314
315 assert_spin_locked(&resv_lock);
316
317 mlog(0, "Insert reservation start: %u len: %u\n", new->r_start,
318 new->r_len);
319
320 while (*p) {
321 parent = *p;
322
323 tmp = rb_entry(parent, struct ocfs2_alloc_reservation, r_node);
324
325 if (new->r_start < tmp->r_start) {
326 p = &(*p)->rb_left;
327
328 /*
329 * This is a good place to check for
330 * overlapping reservations.
331 */
332 BUG_ON(ocfs2_resv_end(new) >= tmp->r_start);
333 } else if (new->r_start > ocfs2_resv_end(tmp)) {
334 p = &(*p)->rb_right;
335 } else {
336 /* This should never happen! */
337 mlog(ML_ERROR, "Duplicate reservation window!\n");
338 BUG();
339 }
340 }
341
342 rb_link_node(&new->r_node, parent, p);
343 rb_insert_color(&new->r_node, root);
344 new->r_flags |= OCFS2_RESV_FLAG_INUSE;
345
346 ocfs2_resv_mark_lru(resmap, new);
347
348 ocfs2_check_resmap(resmap);
349}
350
351/**
352 * ocfs2_find_resv_lhs() - find the window which contains goal
353 * @resmap: reservation map to search
354 * @goal: which bit to search for
355 *
356 * If a window containing that goal is not found, we return the window
357 * which comes before goal. Returns NULL on empty rbtree or no window
358 * before goal.
359 */
360static struct ocfs2_alloc_reservation *
361ocfs2_find_resv_lhs(struct ocfs2_reservation_map *resmap, unsigned int goal)
362{
363 struct ocfs2_alloc_reservation *resv = NULL;
364 struct ocfs2_alloc_reservation *prev_resv = NULL;
365 struct rb_node *node = resmap->m_reservations.rb_node;
366 struct rb_node *prev = NULL;
367
368 assert_spin_locked(&resv_lock);
369
370 if (!node)
371 return NULL;
372
373 node = rb_first(&resmap->m_reservations);
374 while (node) {
375 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
376
377 if (resv->r_start <= goal && ocfs2_resv_end(resv) >= goal)
378 break;
379
380 /* Check if we overshot the reservation just before goal? */
381 if (resv->r_start > goal) {
382 resv = prev_resv;
383 break;
384 }
385
386 prev_resv = resv;
387 prev = node;
388 node = rb_next(node);
389 }
390
391 return resv;
392}
393
394/*
395 * We are given a range within the bitmap, which corresponds to a gap
396 * inside the reservations tree (search_start, search_len). The range
397 * can be anything from the whole bitmap, to a gap between
398 * reservations.
399 *
400 * The start value of *rstart is insignificant.
401 *
402 * This function searches the bitmap range starting at search_start
403 * with length csearch_len for a set of contiguous free bits. We try
404 * to find up to 'wanted' bits, but can sometimes return less.
405 *
406 * Returns the length of allocation, 0 if no free bits are found.
407 *
408 * *cstart and *clen will also be populated with the result.
409 */
410static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap,
411 unsigned int wanted,
412 unsigned int search_start,
413 unsigned int search_len,
414 unsigned int *rstart,
415 unsigned int *rlen)
416{
417 void *bitmap = resmap->m_disk_bitmap;
418 unsigned int best_start, best_len = 0;
419 int offset, start, found;
420
421 mlog(0, "Find %u bits within range (%u, len %u) resmap len: %u\n",
422 wanted, search_start, search_len, resmap->m_bitmap_len);
423
424 found = best_start = best_len = 0;
425
426 start = search_start;
427 while ((offset = ocfs2_find_next_zero_bit(bitmap, resmap->m_bitmap_len,
428 start)) != -1) {
429 /* Search reached end of the region */
430 if (offset >= (search_start + search_len))
431 break;
432
433 if (offset == start) {
434 /* we found a zero */
435 found++;
436 /* move start to the next bit to test */
437 start++;
438 } else {
439 /* got a zero after some ones */
440 found = 1;
441 start = offset + 1;
442 }
443 if (found > best_len) {
444 best_len = found;
445 best_start = start - found;
446 }
447
448 if (found >= wanted)
449 break;
450 }
451
452 if (best_len == 0)
453 return 0;
454
455 if (best_len >= wanted)
456 best_len = wanted;
457
458 *rlen = best_len;
459 *rstart = best_start;
460
461 mlog(0, "Found start: %u len: %u\n", best_start, best_len);
462
463 return *rlen;
464}
465
466static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
467 struct ocfs2_alloc_reservation *resv,
468 unsigned int goal, unsigned int wanted)
469{
470 struct rb_root *root = &resmap->m_reservations;
471 unsigned int gap_start, gap_end, gap_len;
472 struct ocfs2_alloc_reservation *prev_resv, *next_resv;
473 struct rb_node *prev, *next;
474 unsigned int cstart, clen;
475 unsigned int best_start = 0, best_len = 0;
476
477 /*
478 * Nasty cases to consider:
479 *
480 * - rbtree is empty
481 * - our window should be first in all reservations
482 * - our window should be last in all reservations
483 * - need to make sure we don't go past end of bitmap
484 */
485
486 mlog(0, "resv start: %u resv end: %u goal: %u wanted: %u\n",
487 resv->r_start, ocfs2_resv_end(resv), goal, wanted);
488
489 assert_spin_locked(&resv_lock);
490
491 if (RB_EMPTY_ROOT(root)) {
492 /*
493 * Easiest case - empty tree. We can just take
494 * whatever window of free bits we want.
495 */
496
497 mlog(0, "Empty root\n");
498
499 clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal,
500 resmap->m_bitmap_len - goal,
501 &cstart, &clen);
502
503 /*
504 * This should never happen - the local alloc window
505 * will always have free bits when we're called.
506 */
507 BUG_ON(goal == 0 && clen == 0);
508
509 if (clen == 0)
510 return;
511
512 resv->r_start = cstart;
513 resv->r_len = clen;
514
515 ocfs2_resv_insert(resmap, resv);
516 return;
517 }
518
519 prev_resv = ocfs2_find_resv_lhs(resmap, goal);
520
521 if (prev_resv == NULL) {
522 mlog(0, "Goal on LHS of leftmost window\n");
523
524 /*
525 * A NULL here means that the search code couldn't
526 * find a window that starts before goal.
527 *
528 * However, we can take the first window after goal,
529 * which is also by definition, the leftmost window in
530 * the entire tree. If we can find free bits in the
531 * gap between goal and the LHS window, then the
532 * reservation can safely be placed there.
533 *
534 * Otherwise we fall back to a linear search, checking
535 * the gaps in between windows for a place to
536 * allocate.
537 */
538
539 next = rb_first(root);
540 next_resv = rb_entry(next, struct ocfs2_alloc_reservation,
541 r_node);
542
543 /*
544 * The search should never return such a window. (see
545 * comment above
546 */
547 if (next_resv->r_start <= goal) {
548 mlog(ML_ERROR, "goal: %u next_resv: start %u len %u\n",
549 goal, next_resv->r_start, next_resv->r_len);
550 ocfs2_dump_resv(resmap);
551 BUG();
552 }
553
554 clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal,
555 next_resv->r_start - goal,
556 &cstart, &clen);
557 if (clen) {
558 best_len = clen;
559 best_start = cstart;
560 if (best_len == wanted)
561 goto out_insert;
562 }
563
564 prev_resv = next_resv;
565 next_resv = NULL;
566 }
567
568 prev = &prev_resv->r_node;
569
570 /* Now we do a linear search for a window, starting at 'prev_rsv' */
571 while (1) {
572 next = rb_next(prev);
573 if (next) {
574 mlog(0, "One more resv found in linear search\n");
575 next_resv = rb_entry(next,
576 struct ocfs2_alloc_reservation,
577 r_node);
578
579 gap_start = ocfs2_resv_end(prev_resv) + 1;
580 gap_end = next_resv->r_start - 1;
581 gap_len = gap_end - gap_start + 1;
582 } else {
583 mlog(0, "No next node\n");
584 /*
585 * We're at the rightmost edge of the
586 * tree. See if a reservation between this
587 * window and the end of the bitmap will work.
588 */
589 gap_start = ocfs2_resv_end(prev_resv) + 1;
590 gap_len = resmap->m_bitmap_len - gap_start;
591 gap_end = resmap->m_bitmap_len - 1;
592 }
593
594 /*
595 * No need to check this gap if we have already found
596 * a larger region of free bits.
597 */
598 if (gap_len <= best_len)
599 goto next_resv;
600
601 clen = ocfs2_resmap_find_free_bits(resmap, wanted, gap_start,
602 gap_len, &cstart, &clen);
603 if (clen == wanted) {
604 best_len = clen;
605 best_start = cstart;
606 goto out_insert;
607 } else if (clen > best_len) {
608 best_len = clen;
609 best_start = cstart;
610 }
611
612next_resv:
613 if (!next)
614 break;
615
616 prev = next;
617 prev_resv = rb_entry(prev, struct ocfs2_alloc_reservation,
618 r_node);
619 }
620
621out_insert:
622 if (best_len) {
623 resv->r_start = best_start;
624 resv->r_len = best_len;
625 ocfs2_resv_insert(resmap, resv);
626 }
627}
628
629static void ocfs2_cannibalize_resv(struct ocfs2_reservation_map *resmap,
630 struct ocfs2_alloc_reservation *resv,
631 unsigned int wanted)
632{
633 struct ocfs2_alloc_reservation *lru_resv;
634 int tmpwindow = !!(resv->r_flags & OCFS2_RESV_FLAG_TMP);
635 unsigned int min_bits;
636
637 if (!tmpwindow)
638 min_bits = ocfs2_resv_window_bits(resmap, resv) >> 1;
639 else
640 min_bits = wanted; /* We at know the temp window will use all
641 * of these bits */
642
643 /*
644 * Take the first reservation off the LRU as our 'target'. We
645 * don't try to be smart about it. There might be a case for
646 * searching based on size but I don't have enough data to be
647 * sure. --Mark (3/16/2010)
648 */
649 lru_resv = list_first_entry(&resmap->m_lru,
650 struct ocfs2_alloc_reservation, r_lru);
651
652 mlog(0, "lru resv: start: %u len: %u end: %u\n", lru_resv->r_start,
653 lru_resv->r_len, ocfs2_resv_end(lru_resv));
654
655 /*
656 * Cannibalize (some or all) of the target reservation and
657 * feed it to the current window.
658 */
659 if (lru_resv->r_len <= min_bits) {
660 /*
661 * Discard completely if size is less than or equal to a
662 * reasonable threshold - 50% of window bits for non temporary
663 * windows.
664 */
665 resv->r_start = lru_resv->r_start;
666 resv->r_len = lru_resv->r_len;
667
668 __ocfs2_resv_discard(resmap, lru_resv);
669 } else {
670 unsigned int shrink;
671 if (tmpwindow)
672 shrink = min_bits;
673 else
674 shrink = lru_resv->r_len / 2;
675
676 lru_resv->r_len -= shrink;
677
678 resv->r_start = ocfs2_resv_end(lru_resv) + 1;
679 resv->r_len = shrink;
680 }
681
682 mlog(0, "Reservation now looks like: r_start: %u r_end: %u "
683 "r_len: %u r_last_start: %u r_last_len: %u\n",
684 resv->r_start, ocfs2_resv_end(resv), resv->r_len,
685 resv->r_last_start, resv->r_last_len);
686
687 ocfs2_resv_insert(resmap, resv);
688}
689
690static void ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
691 struct ocfs2_alloc_reservation *resv,
692 unsigned int wanted)
693{
694 unsigned int goal = 0;
695
696 BUG_ON(!ocfs2_resv_empty(resv));
697
698 /*
699 * Begin by trying to get a window as close to the previous
700 * one as possible. Using the most recent allocation as a
701 * start goal makes sense.
702 */
703 if (resv->r_last_len) {
704 goal = resv->r_last_start + resv->r_last_len;
705 if (goal >= resmap->m_bitmap_len)
706 goal = 0;
707 }
708
709 __ocfs2_resv_find_window(resmap, resv, goal, wanted);
710
711 /* Search from last alloc didn't work, try once more from beginning. */
712 if (ocfs2_resv_empty(resv) && goal != 0)
713 __ocfs2_resv_find_window(resmap, resv, 0, wanted);
714
715 if (ocfs2_resv_empty(resv)) {
716 /*
717 * Still empty? Pull oldest one off the LRU, remove it from
718 * tree, put this one in it's place.
719 */
720 ocfs2_cannibalize_resv(resmap, resv, wanted);
721 }
722
723 BUG_ON(ocfs2_resv_empty(resv));
724}
725
726int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
727 struct ocfs2_alloc_reservation *resv,
728 int *cstart, int *clen)
729{
730 unsigned int wanted = *clen;
731
732 if (resv == NULL || ocfs2_resmap_disabled(resmap))
733 return -ENOSPC;
734
735 spin_lock(&resv_lock);
736
737 /*
738 * We don't want to over-allocate for temporary
739 * windows. Otherwise, we run the risk of fragmenting the
740 * allocation space.
741 */
742 wanted = ocfs2_resv_window_bits(resmap, resv);
743 if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
744 wanted = *clen;
745
746 if (ocfs2_resv_empty(resv)) {
747 mlog(0, "empty reservation, find new window\n");
748
749 /*
750 * Try to get a window here. If it works, we must fall
751 * through and test the bitmap . This avoids some
752 * ping-ponging of windows due to non-reserved space
753 * being allocation before we initialize a window for
754 * that inode.
755 */
756 ocfs2_resv_find_window(resmap, resv, wanted);
757 }
758
759 BUG_ON(ocfs2_resv_empty(resv));
760
761 *cstart = resv->r_start;
762 *clen = resv->r_len;
763
764 spin_unlock(&resv_lock);
765 return 0;
766}
767
768static void
769 ocfs2_adjust_resv_from_alloc(struct ocfs2_reservation_map *resmap,
770 struct ocfs2_alloc_reservation *resv,
771 unsigned int start, unsigned int end)
772{
773 unsigned int lhs = 0, rhs = 0;
774
775 BUG_ON(start < resv->r_start);
776
777 /*
778 * Completely used? We can remove it then.
779 */
780 if (ocfs2_resv_end(resv) <= end && resv->r_start >= start) {
781 __ocfs2_resv_discard(resmap, resv);
782 return;
783 }
784
785 if (end < ocfs2_resv_end(resv))
786 rhs = end - ocfs2_resv_end(resv);
787
788 if (start > resv->r_start)
789 lhs = start - resv->r_start;
790
791 /*
792 * This should have been trapped above. At the very least, rhs
793 * should be non zero.
794 */
795 BUG_ON(rhs == 0 && lhs == 0);
796
797 if (rhs >= lhs) {
798 unsigned int old_end = ocfs2_resv_end(resv);
799
800 resv->r_start = end + 1;
801 resv->r_len = old_end - resv->r_start + 1;
802 } else {
803 resv->r_len = start - resv->r_start;
804 }
805}
806
807void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
808 struct ocfs2_alloc_reservation *resv,
809 u32 cstart, u32 clen)
810{
811 unsigned int cend = cstart + clen - 1;
812
813 if (resmap == NULL || ocfs2_resmap_disabled(resmap))
814 return;
815
816 if (resv == NULL)
817 return;
818
819 spin_lock(&resv_lock);
820
821 mlog(0, "claim bits: cstart: %u cend: %u clen: %u r_start: %u "
822 "r_end: %u r_len: %u, r_last_start: %u r_last_len: %u\n",
823 cstart, cend, clen, resv->r_start, ocfs2_resv_end(resv),
824 resv->r_len, resv->r_last_start, resv->r_last_len);
825
826 BUG_ON(cstart < resv->r_start);
827 BUG_ON(cstart > ocfs2_resv_end(resv));
828 BUG_ON(cend > ocfs2_resv_end(resv));
829
830 ocfs2_adjust_resv_from_alloc(resmap, resv, cstart, cend);
831 resv->r_last_start = cstart;
832 resv->r_last_len = clen;
833
834 /*
835 * May have been discarded above from
836 * ocfs2_adjust_resv_from_alloc().
837 */
838 if (!ocfs2_resv_empty(resv))
839 ocfs2_resv_mark_lru(resmap, resv);
840
841 mlog(0, "Reservation now looks like: r_start: %u r_end: %u "
842 "r_len: %u r_last_start: %u r_last_len: %u\n",
843 resv->r_start, ocfs2_resv_end(resv), resv->r_len,
844 resv->r_last_start, resv->r_last_len);
845
846 ocfs2_check_resmap(resmap);
847
848 spin_unlock(&resv_lock);
849}
diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h
new file mode 100644
index 000000000000..8341cd0ef855
--- /dev/null
+++ b/fs/ocfs2/reservations.h
@@ -0,0 +1,154 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * reservations.h
5 *
6 * Allocation reservations function prototypes and structures.
7 *
8 * Copyright (C) 2010 Novell. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License version 2 as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#ifndef OCFS2_RESERVATIONS_H
21#define OCFS2_RESERVATIONS_H
22
23#include <linux/rbtree.h>
24
25#define OCFS2_DEFAULT_RESV_LEVEL 4
26#define OCFS2_MAX_RESV_LEVEL 9
27#define OCFS2_MIN_RESV_LEVEL 0
28
29struct ocfs2_alloc_reservation {
30 struct rb_node r_node;
31
32 unsigned int r_start; /* Begining of current window */
33 unsigned int r_len; /* Length of the window */
34
35 unsigned int r_last_len; /* Length of most recent alloc */
36 unsigned int r_last_start; /* Start of most recent alloc */
37 struct list_head r_lru; /* LRU list head */
38
39 unsigned int r_flags;
40};
41
42#define OCFS2_RESV_FLAG_INUSE 0x01 /* Set when r_node is part of a btree */
43#define OCFS2_RESV_FLAG_TMP 0x02 /* Temporary reservation, will be
44 * destroyed immedately after use */
45
46struct ocfs2_reservation_map {
47 struct rb_root m_reservations;
48 char *m_disk_bitmap;
49
50 struct ocfs2_super *m_osb;
51
52 /* The following are not initialized to meaningful values until a disk
53 * bitmap is provided. */
54 u32 m_bitmap_len; /* Number of valid
55 * bits available */
56
57 struct list_head m_lru; /* LRU of reservations
58 * structures. */
59
60};
61
62void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv);
63
64#define OCFS2_RESV_TYPES (OCFS2_RESV_FLAG_TMP)
65void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
66 unsigned int flags);
67
68/**
69 * ocfs2_resv_discard() - truncate a reservation
70 * @resmap:
71 * @resv: the reservation to truncate.
72 *
73 * After this function is called, the reservation will be empty, and
74 * unlinked from the rbtree.
75 */
76void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
77 struct ocfs2_alloc_reservation *resv);
78
79
80/**
81 * ocfs2_resmap_init() - Initialize fields of a reservations bitmap
82 * @resmap: struct ocfs2_reservation_map to initialize
83 * @obj: unused for now
84 * @ops: unused for now
85 * @max_bitmap_bytes: Maximum size of the bitmap (typically blocksize)
86 *
87 * Only possible return value other than '0' is -ENOMEM for failure to
88 * allocation mirror bitmap.
89 */
90int ocfs2_resmap_init(struct ocfs2_super *osb,
91 struct ocfs2_reservation_map *resmap);
92
93/**
94 * ocfs2_resmap_restart() - "restart" a reservation bitmap
95 * @resmap: reservations bitmap
96 * @clen: Number of valid bits in the bitmap
97 * @disk_bitmap: the disk bitmap this resmap should refer to.
98 *
99 * Re-initialize the parameters of a reservation bitmap. This is
100 * useful for local alloc window slides.
101 *
102 * This function will call ocfs2_trunc_resv against all existing
103 * reservations. A future version will recalculate existing
104 * reservations based on the new bitmap.
105 */
106void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
107 unsigned int clen, char *disk_bitmap);
108
109/**
110 * ocfs2_resmap_uninit() - uninitialize a reservation bitmap structure
111 * @resmap: the struct ocfs2_reservation_map to uninitialize
112 */
113void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap);
114
115/**
116 * ocfs2_resmap_resv_bits() - Return still-valid reservation bits
117 * @resmap: reservations bitmap
118 * @resv: reservation to base search from
119 * @cstart: start of proposed allocation
120 * @clen: length (in clusters) of proposed allocation
121 *
122 * Using the reservation data from resv, this function will compare
123 * resmap and resmap->m_disk_bitmap to determine what part (if any) of
124 * the reservation window is still clear to use. If resv is empty,
125 * this function will try to allocate a window for it.
126 *
127 * On success, zero is returned and the valid allocation area is set in cstart
128 * and clen.
129 *
130 * Returns -ENOSPC if reservations are disabled.
131 */
132int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
133 struct ocfs2_alloc_reservation *resv,
134 int *cstart, int *clen);
135
136/**
137 * ocfs2_resmap_claimed_bits() - Tell the reservation code that bits were used.
138 * @resmap: reservations bitmap
139 * @resv: optional reservation to recalulate based on new bitmap
140 * @cstart: start of allocation in clusters
141 * @clen: end of allocation in clusters.
142 *
143 * Tell the reservation code that bits were used to fulfill allocation in
144 * resmap. The bits don't have to have been part of any existing
145 * reservation. But we must always call this function when bits are claimed.
146 * Internally, the reservations code will use this information to mark the
147 * reservations bitmap. If resv is passed, it's next allocation window will be
148 * calculated.
149 */
150void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
151 struct ocfs2_alloc_reservation *resv,
152 u32 cstart, u32 clen);
153
154#endif /* OCFS2_RESERVATIONS_H */
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index e0f46df357e6..da2f29a55ec3 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -54,6 +54,8 @@ struct ocfs2_alloc_context {
54 u64 ac_last_group; 54 u64 ac_last_group;
55 u64 ac_max_block; /* Highest block number to allocate. 0 is 55 u64 ac_max_block; /* Highest block number to allocate. 0 is
56 is the same as ~0 - unlimited */ 56 is the same as ~0 - unlimited */
57
58 struct ocfs2_alloc_reservation *ac_resv;
57}; 59};
58 60
59void ocfs2_init_steal_slots(struct ocfs2_super *osb); 61void ocfs2_init_steal_slots(struct ocfs2_super *osb);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index dee03197a494..cfe672e72b27 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -95,6 +95,7 @@ struct mount_options
95 unsigned int atime_quantum; 95 unsigned int atime_quantum;
96 signed short slot; 96 signed short slot;
97 unsigned int localalloc_opt; 97 unsigned int localalloc_opt;
98 unsigned int resv_level;
98 char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; 99 char cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
99}; 100};
100 101
@@ -176,6 +177,7 @@ enum {
176 Opt_noacl, 177 Opt_noacl,
177 Opt_usrquota, 178 Opt_usrquota,
178 Opt_grpquota, 179 Opt_grpquota,
180 Opt_resv_level,
179 Opt_err, 181 Opt_err,
180}; 182};
181 183
@@ -202,6 +204,7 @@ static const match_table_t tokens = {
202 {Opt_noacl, "noacl"}, 204 {Opt_noacl, "noacl"},
203 {Opt_usrquota, "usrquota"}, 205 {Opt_usrquota, "usrquota"},
204 {Opt_grpquota, "grpquota"}, 206 {Opt_grpquota, "grpquota"},
207 {Opt_resv_level, "resv_level=%u"},
205 {Opt_err, NULL} 208 {Opt_err, NULL}
206}; 209};
207 210
@@ -1030,6 +1033,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1030 osb->osb_commit_interval = parsed_options.commit_interval; 1033 osb->osb_commit_interval = parsed_options.commit_interval;
1031 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); 1034 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
1032 osb->local_alloc_bits = osb->local_alloc_default_bits; 1035 osb->local_alloc_bits = osb->local_alloc_default_bits;
1036 osb->osb_resv_level = parsed_options.resv_level;
1033 1037
1034 status = ocfs2_verify_userspace_stack(osb, &parsed_options); 1038 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
1035 if (status) 1039 if (status)
@@ -1290,6 +1294,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1290 mopt->slot = OCFS2_INVALID_SLOT; 1294 mopt->slot = OCFS2_INVALID_SLOT;
1291 mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; 1295 mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
1292 mopt->cluster_stack[0] = '\0'; 1296 mopt->cluster_stack[0] = '\0';
1297 mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL;
1293 1298
1294 if (!options) { 1299 if (!options) {
1295 status = 1; 1300 status = 1;
@@ -1433,6 +1438,17 @@ static int ocfs2_parse_options(struct super_block *sb,
1433 mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; 1438 mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
1434 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; 1439 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
1435 break; 1440 break;
1441 case Opt_resv_level:
1442 if (is_remount)
1443 break;
1444 if (match_int(&args[0], &option)) {
1445 status = 0;
1446 goto bail;
1447 }
1448 if (option >= OCFS2_MIN_RESV_LEVEL &&
1449 option < OCFS2_MAX_RESV_LEVEL)
1450 mopt->resv_level = option;
1451 break;
1436 default: 1452 default:
1437 mlog(ML_ERROR, 1453 mlog(ML_ERROR,
1438 "Unrecognized mount option \"%s\" " 1454 "Unrecognized mount option \"%s\" "
@@ -1514,6 +1530,9 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1514 else 1530 else
1515 seq_printf(s, ",noacl"); 1531 seq_printf(s, ",noacl");
1516 1532
1533 if (osb->osb_resv_level != OCFS2_DEFAULT_RESV_LEVEL)
1534 seq_printf(s, ",resv_level=%d", osb->osb_resv_level);
1535
1517 return 0; 1536 return 0;
1518} 1537}
1519 1538
@@ -2042,6 +2061,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
2042 2061
2043 init_waitqueue_head(&osb->osb_mount_event); 2062 init_waitqueue_head(&osb->osb_mount_event);
2044 2063
2064 status = ocfs2_resmap_init(osb, &osb->osb_la_resmap);
2065 if (status) {
2066 mlog_errno(status);
2067 goto bail;
2068 }
2069
2045 osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); 2070 osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
2046 if (!osb->vol_label) { 2071 if (!osb->vol_label) {
2047 mlog(ML_ERROR, "unable to alloc vol label\n"); 2072 mlog(ML_ERROR, "unable to alloc vol label\n");