aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/Kconfig85
-rw-r--r--fs/ocfs2/Makefile7
-rw-r--r--fs/ocfs2/acl.c479
-rw-r--r--fs/ocfs2/acl.h58
-rw-r--r--fs/ocfs2/alloc.c712
-rw-r--r--fs/ocfs2/alloc.h30
-rw-r--r--fs/ocfs2/aops.c59
-rw-r--r--fs/ocfs2/blockcheck.c477
-rw-r--r--fs/ocfs2/blockcheck.h82
-rw-r--r--fs/ocfs2/buffer_head_io.c32
-rw-r--r--fs/ocfs2/buffer_head_io.h27
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/ocfs2/cluster/masklog.c1
-rw-r--r--fs/ocfs2/cluster/masklog.h1
-rw-r--r--fs/ocfs2/cluster/netdebug.c8
-rw-r--r--fs/ocfs2/cluster/nodemanager.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c29
-rw-r--r--fs/ocfs2/dir.c399
-rw-r--r--fs/ocfs2/dir.h2
-rw-r--r--fs/ocfs2/dlm/dlmast.c52
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h3
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c53
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c1
-rw-r--r--fs/ocfs2/dlm/dlmfs.c10
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c42
-rw-r--r--fs/ocfs2/dlm/dlmthread.c3
-rw-r--r--fs/ocfs2/dlmglue.c172
-rw-r--r--fs/ocfs2/dlmglue.h19
-rw-r--r--fs/ocfs2/extent_map.c96
-rw-r--r--fs/ocfs2/extent_map.h24
-rw-r--r--fs/ocfs2/file.c211
-rw-r--r--fs/ocfs2/file.h3
-rw-r--r--fs/ocfs2/inode.c175
-rw-r--r--fs/ocfs2/inode.h18
-rw-r--r--fs/ocfs2/journal.c364
-rw-r--r--fs/ocfs2/journal.h128
-rw-r--r--fs/ocfs2/localalloc.c26
-rw-r--r--fs/ocfs2/namei.c318
-rw-r--r--fs/ocfs2/ocfs2.h46
-rw-r--r--fs/ocfs2/ocfs2_fs.h213
-rw-r--r--fs/ocfs2/ocfs2_jbd_compat.h82
-rw-r--r--fs/ocfs2/ocfs2_lockid.h5
-rw-r--r--fs/ocfs2/quota.h119
-rw-r--r--fs/ocfs2/quota_global.c860
-rw-r--r--fs/ocfs2/quota_local.c1253
-rw-r--r--fs/ocfs2/resize.c76
-rw-r--r--fs/ocfs2/slot_map.c4
-rw-r--r--fs/ocfs2/suballoc.c363
-rw-r--r--fs/ocfs2/suballoc.h18
-rw-r--r--fs/ocfs2/super.c328
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/ocfs2/xattr.c2984
-rw-r--r--fs/ocfs2/xattr.h45
53 files changed, 8250 insertions, 2358 deletions
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
new file mode 100644
index 000000000000..701b7a3a872e
--- /dev/null
+++ b/fs/ocfs2/Kconfig
@@ -0,0 +1,85 @@
1config OCFS2_FS
2 tristate "OCFS2 file system support"
3 depends on NET && SYSFS
4 select CONFIGFS_FS
5 select JBD2
6 select CRC32
7 select QUOTA
8 select QUOTA_TREE
9 help
10 OCFS2 is a general purpose extent based shared disk cluster file
11 system with many similarities to ext3. It supports 64 bit inode
12 numbers, and has automatically extending metadata groups which may
13 also make it attractive for non-clustered use.
14
15 You'll want to install the ocfs2-tools package in order to at least
16 get "mount.ocfs2".
17
18 Project web page: http://oss.oracle.com/projects/ocfs2
19 Tools web page: http://oss.oracle.com/projects/ocfs2-tools
20 OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
21
22 For more information on OCFS2, see the file
23 <file:Documentation/filesystems/ocfs2.txt>.
24
25config OCFS2_FS_O2CB
26 tristate "O2CB Kernelspace Clustering"
27 depends on OCFS2_FS
28 default y
29 help
30 OCFS2 includes a simple kernelspace clustering package, the OCFS2
31 Cluster Base. It only requires a very small userspace component
32 to configure it. This comes with the standard ocfs2-tools package.
33 O2CB is limited to maintaining a cluster for OCFS2 file systems.
34 It cannot manage any other cluster applications.
35
36 It is always safe to say Y here, as the clustering method is
37 run-time selectable.
38
39config OCFS2_FS_USERSPACE_CLUSTER
40 tristate "OCFS2 Userspace Clustering"
41 depends on OCFS2_FS && DLM
42 default y
43 help
44 This option will allow OCFS2 to use userspace clustering services
45 in conjunction with the DLM in fs/dlm. If you are using a
46 userspace cluster manager, say Y here.
47
48 It is safe to say Y, as the clustering method is run-time
49 selectable.
50
51config OCFS2_FS_STATS
52 bool "OCFS2 statistics"
53 depends on OCFS2_FS
54 default y
55 help
56 This option allows some fs statistics to be captured. Enabling
57 this option may increase the memory consumption.
58
59config OCFS2_DEBUG_MASKLOG
60 bool "OCFS2 logging support"
61 depends on OCFS2_FS
62 default y
63 help
64 The ocfs2 filesystem has an extensive logging system. The system
65 allows selection of events to log via files in /sys/o2cb/logmask/.
66 This option will enlarge your kernel, but it allows debugging of
67 ocfs2 filesystem issues.
68
69config OCFS2_DEBUG_FS
70 bool "OCFS2 expensive checks"
71 depends on OCFS2_FS
72 default n
73 help
74 This option will enable expensive consistency checks. Enable
75 this option for debugging only as it is likely to decrease
76 performance of the filesystem.
77
78config OCFS2_FS_POSIX_ACL
79 bool "OCFS2 POSIX Access Control Lists"
80 depends on OCFS2_FS
81 select FS_POSIX_ACL
82 default n
83 help
84 Posix Access Control Lists (ACLs) support permissions for users and
85 groups beyond the owner/group/world scheme.
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 589dcdfdfe3c..01596079dd63 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_OCFS2_FS_USERSPACE_CLUSTER) += ocfs2_stack_user.o
12ocfs2-objs := \ 12ocfs2-objs := \
13 alloc.o \ 13 alloc.o \
14 aops.o \ 14 aops.o \
15 blockcheck.o \
15 buffer_head_io.o \ 16 buffer_head_io.o \
16 dcache.o \ 17 dcache.o \
17 dir.o \ 18 dir.o \
@@ -35,8 +36,14 @@ ocfs2-objs := \
35 sysfile.o \ 36 sysfile.o \
36 uptodate.o \ 37 uptodate.o \
37 ver.o \ 38 ver.o \
39 quota_local.o \
40 quota_global.o \
38 xattr.o 41 xattr.o
39 42
43ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y)
44ocfs2-objs += acl.o
45endif
46
40ocfs2_stackglue-objs := stackglue.o 47ocfs2_stackglue-objs := stackglue.o
41ocfs2_stack_o2cb-objs := stack_o2cb.o 48ocfs2_stack_o2cb-objs := stack_o2cb.o
42ocfs2_stack_user-objs := stack_user.o 49ocfs2_stack_user-objs := stack_user.o
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
new file mode 100644
index 000000000000..12dfb44c22e5
--- /dev/null
+++ b/fs/ocfs2/acl.c
@@ -0,0 +1,479 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * acl.c
5 *
6 * Copyright (C) 2004, 2008 Oracle. All rights reserved.
7 *
8 * CREDITS:
9 * Lots of code in this file is copy from linux/fs/ext3/acl.c.
10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public
14 * License version 2 as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 */
21
22#include <linux/init.h>
23#include <linux/module.h>
24#include <linux/string.h>
25
26#define MLOG_MASK_PREFIX ML_INODE
27#include <cluster/masklog.h>
28
29#include "ocfs2.h"
30#include "alloc.h"
31#include "dlmglue.h"
32#include "file.h"
33#include "ocfs2_fs.h"
34
35#include "xattr.h"
36#include "acl.h"
37
38/*
39 * Convert from xattr value to acl struct.
40 */
41static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size)
42{
43 int n, count;
44 struct posix_acl *acl;
45
46 if (!value)
47 return NULL;
48 if (size < sizeof(struct posix_acl_entry))
49 return ERR_PTR(-EINVAL);
50
51 count = size / sizeof(struct posix_acl_entry);
52 if (count < 0)
53 return ERR_PTR(-EINVAL);
54 if (count == 0)
55 return NULL;
56
57 acl = posix_acl_alloc(count, GFP_NOFS);
58 if (!acl)
59 return ERR_PTR(-ENOMEM);
60 for (n = 0; n < count; n++) {
61 struct ocfs2_acl_entry *entry =
62 (struct ocfs2_acl_entry *)value;
63
64 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
65 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
66 acl->a_entries[n].e_id = le32_to_cpu(entry->e_id);
67 value += sizeof(struct posix_acl_entry);
68
69 }
70 return acl;
71}
72
73/*
74 * Convert acl struct to xattr value.
75 */
76static void *ocfs2_acl_to_xattr(const struct posix_acl *acl, size_t *size)
77{
78 struct ocfs2_acl_entry *entry = NULL;
79 char *ocfs2_acl;
80 size_t n;
81
82 *size = acl->a_count * sizeof(struct posix_acl_entry);
83
84 ocfs2_acl = kmalloc(*size, GFP_NOFS);
85 if (!ocfs2_acl)
86 return ERR_PTR(-ENOMEM);
87
88 entry = (struct ocfs2_acl_entry *)ocfs2_acl;
89 for (n = 0; n < acl->a_count; n++, entry++) {
90 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
91 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
92 entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
93 }
94 return ocfs2_acl;
95}
96
97static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode,
98 int type,
99 struct buffer_head *di_bh)
100{
101 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
102 int name_index;
103 char *value = NULL;
104 struct posix_acl *acl;
105 int retval;
106
107 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
108 return NULL;
109
110 switch (type) {
111 case ACL_TYPE_ACCESS:
112 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
113 break;
114 case ACL_TYPE_DEFAULT:
115 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
116 break;
117 default:
118 return ERR_PTR(-EINVAL);
119 }
120
121 retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index, "", NULL, 0);
122 if (retval > 0) {
123 value = kmalloc(retval, GFP_NOFS);
124 if (!value)
125 return ERR_PTR(-ENOMEM);
126 retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
127 "", value, retval);
128 }
129
130 if (retval > 0)
131 acl = ocfs2_acl_from_xattr(value, retval);
132 else if (retval == -ENODATA || retval == 0)
133 acl = NULL;
134 else
135 acl = ERR_PTR(retval);
136
137 kfree(value);
138
139 return acl;
140}
141
142
143/*
144 * Get posix acl.
145 */
146static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type)
147{
148 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
149 struct buffer_head *di_bh = NULL;
150 struct posix_acl *acl;
151 int ret;
152
153 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
154 return NULL;
155
156 ret = ocfs2_inode_lock(inode, &di_bh, 0);
157 if (ret < 0) {
158 mlog_errno(ret);
159 acl = ERR_PTR(ret);
160 return acl;
161 }
162
163 acl = ocfs2_get_acl_nolock(inode, type, di_bh);
164
165 ocfs2_inode_unlock(inode, 0);
166
167 brelse(di_bh);
168
169 return acl;
170}
171
172/*
173 * Set the access or default ACL of an inode.
174 */
175static int ocfs2_set_acl(handle_t *handle,
176 struct inode *inode,
177 struct buffer_head *di_bh,
178 int type,
179 struct posix_acl *acl,
180 struct ocfs2_alloc_context *meta_ac,
181 struct ocfs2_alloc_context *data_ac)
182{
183 int name_index;
184 void *value = NULL;
185 size_t size = 0;
186 int ret;
187
188 if (S_ISLNK(inode->i_mode))
189 return -EOPNOTSUPP;
190
191 switch (type) {
192 case ACL_TYPE_ACCESS:
193 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
194 if (acl) {
195 mode_t mode = inode->i_mode;
196 ret = posix_acl_equiv_mode(acl, &mode);
197 if (ret < 0)
198 return ret;
199 else {
200 inode->i_mode = mode;
201 if (ret == 0)
202 acl = NULL;
203 }
204 }
205 break;
206 case ACL_TYPE_DEFAULT:
207 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
208 if (!S_ISDIR(inode->i_mode))
209 return acl ? -EACCES : 0;
210 break;
211 default:
212 return -EINVAL;
213 }
214
215 if (acl) {
216 value = ocfs2_acl_to_xattr(acl, &size);
217 if (IS_ERR(value))
218 return (int)PTR_ERR(value);
219 }
220
221 if (handle)
222 ret = ocfs2_xattr_set_handle(handle, inode, di_bh, name_index,
223 "", value, size, 0,
224 meta_ac, data_ac);
225 else
226 ret = ocfs2_xattr_set(inode, name_index, "", value, size, 0);
227
228 kfree(value);
229
230 return ret;
231}
232
233int ocfs2_check_acl(struct inode *inode, int mask)
234{
235 struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
236
237 if (IS_ERR(acl))
238 return PTR_ERR(acl);
239 if (acl) {
240 int ret = posix_acl_permission(inode, acl, mask);
241 posix_acl_release(acl);
242 return ret;
243 }
244
245 return -EAGAIN;
246}
247
248int ocfs2_acl_chmod(struct inode *inode)
249{
250 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
251 struct posix_acl *acl, *clone;
252 int ret;
253
254 if (S_ISLNK(inode->i_mode))
255 return -EOPNOTSUPP;
256
257 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
258 return 0;
259
260 acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
261 if (IS_ERR(acl) || !acl)
262 return PTR_ERR(acl);
263 clone = posix_acl_clone(acl, GFP_KERNEL);
264 posix_acl_release(acl);
265 if (!clone)
266 return -ENOMEM;
267 ret = posix_acl_chmod_masq(clone, inode->i_mode);
268 if (!ret)
269 ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS,
270 clone, NULL, NULL);
271 posix_acl_release(clone);
272 return ret;
273}
274
275/*
276 * Initialize the ACLs of a new inode. If parent directory has default ACL,
277 * then clone to new inode. Called from ocfs2_mknod.
278 */
279int ocfs2_init_acl(handle_t *handle,
280 struct inode *inode,
281 struct inode *dir,
282 struct buffer_head *di_bh,
283 struct buffer_head *dir_bh,
284 struct ocfs2_alloc_context *meta_ac,
285 struct ocfs2_alloc_context *data_ac)
286{
287 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
288 struct posix_acl *acl = NULL;
289 int ret = 0;
290
291 if (!S_ISLNK(inode->i_mode)) {
292 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
293 acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
294 dir_bh);
295 if (IS_ERR(acl))
296 return PTR_ERR(acl);
297 }
298 if (!acl)
299 inode->i_mode &= ~current->fs->umask;
300 }
301 if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
302 struct posix_acl *clone;
303 mode_t mode;
304
305 if (S_ISDIR(inode->i_mode)) {
306 ret = ocfs2_set_acl(handle, inode, di_bh,
307 ACL_TYPE_DEFAULT, acl,
308 meta_ac, data_ac);
309 if (ret)
310 goto cleanup;
311 }
312 clone = posix_acl_clone(acl, GFP_NOFS);
313 ret = -ENOMEM;
314 if (!clone)
315 goto cleanup;
316
317 mode = inode->i_mode;
318 ret = posix_acl_create_masq(clone, &mode);
319 if (ret >= 0) {
320 inode->i_mode = mode;
321 if (ret > 0) {
322 ret = ocfs2_set_acl(handle, inode,
323 di_bh, ACL_TYPE_ACCESS,
324 clone, meta_ac, data_ac);
325 }
326 }
327 posix_acl_release(clone);
328 }
329cleanup:
330 posix_acl_release(acl);
331 return ret;
332}
333
334static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
335 char *list,
336 size_t list_len,
337 const char *name,
338 size_t name_len)
339{
340 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
341 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
342
343 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
344 return 0;
345
346 if (list && size <= list_len)
347 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
348 return size;
349}
350
351static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
352 char *list,
353 size_t list_len,
354 const char *name,
355 size_t name_len)
356{
357 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
359
360 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
361 return 0;
362
363 if (list && size <= list_len)
364 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
365 return size;
366}
367
368static int ocfs2_xattr_get_acl(struct inode *inode,
369 int type,
370 void *buffer,
371 size_t size)
372{
373 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
374 struct posix_acl *acl;
375 int ret;
376
377 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
378 return -EOPNOTSUPP;
379
380 acl = ocfs2_get_acl(inode, type);
381 if (IS_ERR(acl))
382 return PTR_ERR(acl);
383 if (acl == NULL)
384 return -ENODATA;
385 ret = posix_acl_to_xattr(acl, buffer, size);
386 posix_acl_release(acl);
387
388 return ret;
389}
390
391static int ocfs2_xattr_get_acl_access(struct inode *inode,
392 const char *name,
393 void *buffer,
394 size_t size)
395{
396 if (strcmp(name, "") != 0)
397 return -EINVAL;
398 return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
399}
400
401static int ocfs2_xattr_get_acl_default(struct inode *inode,
402 const char *name,
403 void *buffer,
404 size_t size)
405{
406 if (strcmp(name, "") != 0)
407 return -EINVAL;
408 return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
409}
410
411static int ocfs2_xattr_set_acl(struct inode *inode,
412 int type,
413 const void *value,
414 size_t size)
415{
416 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
417 struct posix_acl *acl;
418 int ret = 0;
419
420 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
421 return -EOPNOTSUPP;
422
423 if (!is_owner_or_cap(inode))
424 return -EPERM;
425
426 if (value) {
427 acl = posix_acl_from_xattr(value, size);
428 if (IS_ERR(acl))
429 return PTR_ERR(acl);
430 else if (acl) {
431 ret = posix_acl_valid(acl);
432 if (ret)
433 goto cleanup;
434 }
435 } else
436 acl = NULL;
437
438 ret = ocfs2_set_acl(NULL, inode, NULL, type, acl, NULL, NULL);
439
440cleanup:
441 posix_acl_release(acl);
442 return ret;
443}
444
445static int ocfs2_xattr_set_acl_access(struct inode *inode,
446 const char *name,
447 const void *value,
448 size_t size,
449 int flags)
450{
451 if (strcmp(name, "") != 0)
452 return -EINVAL;
453 return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
454}
455
456static int ocfs2_xattr_set_acl_default(struct inode *inode,
457 const char *name,
458 const void *value,
459 size_t size,
460 int flags)
461{
462 if (strcmp(name, "") != 0)
463 return -EINVAL;
464 return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
465}
466
467struct xattr_handler ocfs2_xattr_acl_access_handler = {
468 .prefix = POSIX_ACL_XATTR_ACCESS,
469 .list = ocfs2_xattr_list_acl_access,
470 .get = ocfs2_xattr_get_acl_access,
471 .set = ocfs2_xattr_set_acl_access,
472};
473
474struct xattr_handler ocfs2_xattr_acl_default_handler = {
475 .prefix = POSIX_ACL_XATTR_DEFAULT,
476 .list = ocfs2_xattr_list_acl_default,
477 .get = ocfs2_xattr_get_acl_default,
478 .set = ocfs2_xattr_set_acl_default,
479};
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
new file mode 100644
index 000000000000..8f6389ed4da5
--- /dev/null
+++ b/fs/ocfs2/acl.h
@@ -0,0 +1,58 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * acl.h
5 *
6 * Copyright (C) 2004, 2008 Oracle. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License version 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 */
17
18#ifndef OCFS2_ACL_H
19#define OCFS2_ACL_H
20
21#include <linux/posix_acl_xattr.h>
22
23struct ocfs2_acl_entry {
24 __le16 e_tag;
25 __le16 e_perm;
26 __le32 e_id;
27};
28
29#ifdef CONFIG_OCFS2_FS_POSIX_ACL
30
31extern int ocfs2_check_acl(struct inode *, int);
32extern int ocfs2_acl_chmod(struct inode *);
33extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
34 struct buffer_head *, struct buffer_head *,
35 struct ocfs2_alloc_context *,
36 struct ocfs2_alloc_context *);
37
38#else /* CONFIG_OCFS2_FS_POSIX_ACL*/
39
40#define ocfs2_check_acl NULL
41static inline int ocfs2_acl_chmod(struct inode *inode)
42{
43 return 0;
44}
45static inline int ocfs2_init_acl(handle_t *handle,
46 struct inode *inode,
47 struct inode *dir,
48 struct buffer_head *di_bh,
49 struct buffer_head *dir_bh,
50 struct ocfs2_alloc_context *meta_ac,
51 struct ocfs2_alloc_context *data_ac)
52{
53 return 0;
54}
55
56#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
57
58#endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 0cc2deb9394c..d861096c9d81 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -28,6 +28,7 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/swap.h> 30#include <linux/swap.h>
31#include <linux/quotaops.h>
31 32
32#define MLOG_MASK_PREFIX ML_DISK_ALLOC 33#define MLOG_MASK_PREFIX ML_DISK_ALLOC
33#include <cluster/masklog.h> 34#include <cluster/masklog.h>
@@ -36,6 +37,7 @@
36 37
37#include "alloc.h" 38#include "alloc.h"
38#include "aops.h" 39#include "aops.h"
40#include "blockcheck.h"
39#include "dlmglue.h" 41#include "dlmglue.h"
40#include "extent_map.h" 42#include "extent_map.h"
41#include "inode.h" 43#include "inode.h"
@@ -46,6 +48,7 @@
46#include "file.h" 48#include "file.h"
47#include "super.h" 49#include "super.h"
48#include "uptodate.h" 50#include "uptodate.h"
51#include "xattr.h"
49 52
50#include "buffer_head_io.h" 53#include "buffer_head_io.h"
51 54
@@ -187,20 +190,12 @@ static int ocfs2_dinode_insert_check(struct inode *inode,
187static int ocfs2_dinode_sanity_check(struct inode *inode, 190static int ocfs2_dinode_sanity_check(struct inode *inode,
188 struct ocfs2_extent_tree *et) 191 struct ocfs2_extent_tree *et)
189{ 192{
190 int ret = 0; 193 struct ocfs2_dinode *di = et->et_object;
191 struct ocfs2_dinode *di;
192 194
193 BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); 195 BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
196 BUG_ON(!OCFS2_IS_VALID_DINODE(di));
194 197
195 di = et->et_object; 198 return 0;
196 if (!OCFS2_IS_VALID_DINODE(di)) {
197 ret = -EIO;
198 ocfs2_error(inode->i_sb,
199 "Inode %llu has invalid path root",
200 (unsigned long long)OCFS2_I(inode)->ip_blkno);
201 }
202
203 return ret;
204} 199}
205 200
206static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et) 201static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
@@ -213,36 +208,33 @@ static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
213 208
214static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et) 209static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et)
215{ 210{
216 struct ocfs2_xattr_value_root *xv = et->et_object; 211 struct ocfs2_xattr_value_buf *vb = et->et_object;
217 212
218 et->et_root_el = &xv->xr_list; 213 et->et_root_el = &vb->vb_xv->xr_list;
219} 214}
220 215
221static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et, 216static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et,
222 u64 blkno) 217 u64 blkno)
223{ 218{
224 struct ocfs2_xattr_value_root *xv = 219 struct ocfs2_xattr_value_buf *vb = et->et_object;
225 (struct ocfs2_xattr_value_root *)et->et_object;
226 220
227 xv->xr_last_eb_blk = cpu_to_le64(blkno); 221 vb->vb_xv->xr_last_eb_blk = cpu_to_le64(blkno);
228} 222}
229 223
230static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et) 224static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
231{ 225{
232 struct ocfs2_xattr_value_root *xv = 226 struct ocfs2_xattr_value_buf *vb = et->et_object;
233 (struct ocfs2_xattr_value_root *) et->et_object;
234 227
235 return le64_to_cpu(xv->xr_last_eb_blk); 228 return le64_to_cpu(vb->vb_xv->xr_last_eb_blk);
236} 229}
237 230
238static void ocfs2_xattr_value_update_clusters(struct inode *inode, 231static void ocfs2_xattr_value_update_clusters(struct inode *inode,
239 struct ocfs2_extent_tree *et, 232 struct ocfs2_extent_tree *et,
240 u32 clusters) 233 u32 clusters)
241{ 234{
242 struct ocfs2_xattr_value_root *xv = 235 struct ocfs2_xattr_value_buf *vb = et->et_object;
243 (struct ocfs2_xattr_value_root *)et->et_object;
244 236
245 le32_add_cpu(&xv->xr_clusters, clusters); 237 le32_add_cpu(&vb->vb_xv->xr_clusters, clusters);
246} 238}
247 239
248static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = { 240static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = {
@@ -304,11 +296,13 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
304static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, 296static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
305 struct inode *inode, 297 struct inode *inode,
306 struct buffer_head *bh, 298 struct buffer_head *bh,
299 ocfs2_journal_access_func access,
307 void *obj, 300 void *obj,
308 struct ocfs2_extent_tree_operations *ops) 301 struct ocfs2_extent_tree_operations *ops)
309{ 302{
310 et->et_ops = ops; 303 et->et_ops = ops;
311 et->et_root_bh = bh; 304 et->et_root_bh = bh;
305 et->et_root_journal_access = access;
312 if (!obj) 306 if (!obj)
313 obj = (void *)bh->b_data; 307 obj = (void *)bh->b_data;
314 et->et_object = obj; 308 et->et_object = obj;
@@ -324,23 +318,23 @@ void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
324 struct inode *inode, 318 struct inode *inode,
325 struct buffer_head *bh) 319 struct buffer_head *bh)
326{ 320{
327 __ocfs2_init_extent_tree(et, inode, bh, NULL, &ocfs2_dinode_et_ops); 321 __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_di,
322 NULL, &ocfs2_dinode_et_ops);
328} 323}
329 324
330void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, 325void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
331 struct inode *inode, 326 struct inode *inode,
332 struct buffer_head *bh) 327 struct buffer_head *bh)
333{ 328{
334 __ocfs2_init_extent_tree(et, inode, bh, NULL, 329 __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_xb,
335 &ocfs2_xattr_tree_et_ops); 330 NULL, &ocfs2_xattr_tree_et_ops);
336} 331}
337 332
338void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, 333void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
339 struct inode *inode, 334 struct inode *inode,
340 struct buffer_head *bh, 335 struct ocfs2_xattr_value_buf *vb)
341 struct ocfs2_xattr_value_root *xv)
342{ 336{
343 __ocfs2_init_extent_tree(et, inode, bh, xv, 337 __ocfs2_init_extent_tree(et, inode, vb->vb_bh, vb->vb_access, vb,
344 &ocfs2_xattr_value_et_ops); 338 &ocfs2_xattr_value_et_ops);
345} 339}
346 340
@@ -362,6 +356,15 @@ static inline void ocfs2_et_update_clusters(struct inode *inode,
362 et->et_ops->eo_update_clusters(inode, et, clusters); 356 et->et_ops->eo_update_clusters(inode, et, clusters);
363} 357}
364 358
359static inline int ocfs2_et_root_journal_access(handle_t *handle,
360 struct inode *inode,
361 struct ocfs2_extent_tree *et,
362 int type)
363{
364 return et->et_root_journal_access(handle, inode, et->et_root_bh,
365 type);
366}
367
365static inline int ocfs2_et_insert_check(struct inode *inode, 368static inline int ocfs2_et_insert_check(struct inode *inode,
366 struct ocfs2_extent_tree *et, 369 struct ocfs2_extent_tree *et,
367 struct ocfs2_extent_rec *rec) 370 struct ocfs2_extent_rec *rec)
@@ -402,12 +405,14 @@ struct ocfs2_path_item {
402#define OCFS2_MAX_PATH_DEPTH 5 405#define OCFS2_MAX_PATH_DEPTH 5
403 406
404struct ocfs2_path { 407struct ocfs2_path {
405 int p_tree_depth; 408 int p_tree_depth;
406 struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH]; 409 ocfs2_journal_access_func p_root_access;
410 struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH];
407}; 411};
408 412
409#define path_root_bh(_path) ((_path)->p_node[0].bh) 413#define path_root_bh(_path) ((_path)->p_node[0].bh)
410#define path_root_el(_path) ((_path)->p_node[0].el) 414#define path_root_el(_path) ((_path)->p_node[0].el)
415#define path_root_access(_path)((_path)->p_root_access)
411#define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh) 416#define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh)
412#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el) 417#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
413#define path_num_items(_path) ((_path)->p_tree_depth + 1) 418#define path_num_items(_path) ((_path)->p_tree_depth + 1)
@@ -440,6 +445,8 @@ static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
440 */ 445 */
441 if (keep_root) 446 if (keep_root)
442 depth = le16_to_cpu(path_root_el(path)->l_tree_depth); 447 depth = le16_to_cpu(path_root_el(path)->l_tree_depth);
448 else
449 path_root_access(path) = NULL;
443 450
444 path->p_tree_depth = depth; 451 path->p_tree_depth = depth;
445} 452}
@@ -465,6 +472,7 @@ static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src)
465 472
466 BUG_ON(path_root_bh(dest) != path_root_bh(src)); 473 BUG_ON(path_root_bh(dest) != path_root_bh(src));
467 BUG_ON(path_root_el(dest) != path_root_el(src)); 474 BUG_ON(path_root_el(dest) != path_root_el(src));
475 BUG_ON(path_root_access(dest) != path_root_access(src));
468 476
469 ocfs2_reinit_path(dest, 1); 477 ocfs2_reinit_path(dest, 1);
470 478
@@ -486,6 +494,7 @@ static void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src)
486 int i; 494 int i;
487 495
488 BUG_ON(path_root_bh(dest) != path_root_bh(src)); 496 BUG_ON(path_root_bh(dest) != path_root_bh(src));
497 BUG_ON(path_root_access(dest) != path_root_access(src));
489 498
490 for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) { 499 for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
491 brelse(dest->p_node[i].bh); 500 brelse(dest->p_node[i].bh);
@@ -521,7 +530,8 @@ static inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index,
521} 530}
522 531
523static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, 532static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
524 struct ocfs2_extent_list *root_el) 533 struct ocfs2_extent_list *root_el,
534 ocfs2_journal_access_func access)
525{ 535{
526 struct ocfs2_path *path; 536 struct ocfs2_path *path;
527 537
@@ -533,11 +543,48 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
533 get_bh(root_bh); 543 get_bh(root_bh);
534 path_root_bh(path) = root_bh; 544 path_root_bh(path) = root_bh;
535 path_root_el(path) = root_el; 545 path_root_el(path) = root_el;
546 path_root_access(path) = access;
536 } 547 }
537 548
538 return path; 549 return path;
539} 550}
540 551
552static struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
553{
554 return ocfs2_new_path(path_root_bh(path), path_root_el(path),
555 path_root_access(path));
556}
557
558static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
559{
560 return ocfs2_new_path(et->et_root_bh, et->et_root_el,
561 et->et_root_journal_access);
562}
563
564/*
565 * Journal the buffer at depth idx. All idx>0 are extent_blocks,
566 * otherwise it's the root_access function.
567 *
568 * I don't like the way this function's name looks next to
569 * ocfs2_journal_access_path(), but I don't have a better one.
570 */
571static int ocfs2_path_bh_journal_access(handle_t *handle,
572 struct inode *inode,
573 struct ocfs2_path *path,
574 int idx)
575{
576 ocfs2_journal_access_func access = path_root_access(path);
577
578 if (!access)
579 access = ocfs2_journal_access;
580
581 if (idx)
582 access = ocfs2_journal_access_eb;
583
584 return access(handle, inode, path->p_node[idx].bh,
585 OCFS2_JOURNAL_ACCESS_WRITE);
586}
587
541/* 588/*
542 * Convenience function to journal all components in a path. 589 * Convenience function to journal all components in a path.
543 */ 590 */
@@ -550,8 +597,7 @@ static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
550 goto out; 597 goto out;
551 598
552 for(i = 0; i < path_num_items(path); i++) { 599 for(i = 0; i < path_num_items(path); i++) {
553 ret = ocfs2_journal_access(handle, inode, path->p_node[i].bh, 600 ret = ocfs2_path_bh_journal_access(handle, inode, path, i);
554 OCFS2_JOURNAL_ACCESS_WRITE);
555 if (ret < 0) { 601 if (ret < 0) {
556 mlog_errno(ret); 602 mlog_errno(ret);
557 goto out; 603 goto out;
@@ -686,6 +732,80 @@ struct ocfs2_merge_ctxt {
686 int c_split_covers_rec; 732 int c_split_covers_rec;
687}; 733};
688 734
735static int ocfs2_validate_extent_block(struct super_block *sb,
736 struct buffer_head *bh)
737{
738 int rc;
739 struct ocfs2_extent_block *eb =
740 (struct ocfs2_extent_block *)bh->b_data;
741
742 mlog(0, "Validating extent block %llu\n",
743 (unsigned long long)bh->b_blocknr);
744
745 BUG_ON(!buffer_uptodate(bh));
746
747 /*
748 * If the ecc fails, we return the error but otherwise
749 * leave the filesystem running. We know any error is
750 * local to this block.
751 */
752 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &eb->h_check);
753 if (rc) {
754 mlog(ML_ERROR, "Checksum failed for extent block %llu\n",
755 (unsigned long long)bh->b_blocknr);
756 return rc;
757 }
758
759 /*
760 * Errors after here are fatal.
761 */
762
763 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
764 ocfs2_error(sb,
765 "Extent block #%llu has bad signature %.*s",
766 (unsigned long long)bh->b_blocknr, 7,
767 eb->h_signature);
768 return -EINVAL;
769 }
770
771 if (le64_to_cpu(eb->h_blkno) != bh->b_blocknr) {
772 ocfs2_error(sb,
773 "Extent block #%llu has an invalid h_blkno "
774 "of %llu",
775 (unsigned long long)bh->b_blocknr,
776 (unsigned long long)le64_to_cpu(eb->h_blkno));
777 return -EINVAL;
778 }
779
780 if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) {
781 ocfs2_error(sb,
782 "Extent block #%llu has an invalid "
783 "h_fs_generation of #%u",
784 (unsigned long long)bh->b_blocknr,
785 le32_to_cpu(eb->h_fs_generation));
786 return -EINVAL;
787 }
788
789 return 0;
790}
791
792int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
793 struct buffer_head **bh)
794{
795 int rc;
796 struct buffer_head *tmp = *bh;
797
798 rc = ocfs2_read_block(inode, eb_blkno, &tmp,
799 ocfs2_validate_extent_block);
800
801 /* If ocfs2_read_block() got us a new bh, pass it up. */
802 if (!rc && !*bh)
803 *bh = tmp;
804
805 return rc;
806}
807
808
689/* 809/*
690 * How many free extents have we got before we need more meta data? 810 * How many free extents have we got before we need more meta data?
691 */ 811 */
@@ -705,8 +825,7 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb,
705 last_eb_blk = ocfs2_et_get_last_eb_blk(et); 825 last_eb_blk = ocfs2_et_get_last_eb_blk(et);
706 826
707 if (last_eb_blk) { 827 if (last_eb_blk) {
708 retval = ocfs2_read_block(inode, last_eb_blk, 828 retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
709 &eb_bh);
710 if (retval < 0) { 829 if (retval < 0) {
711 mlog_errno(retval); 830 mlog_errno(retval);
712 goto bail; 831 goto bail;
@@ -768,8 +887,8 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
768 } 887 }
769 ocfs2_set_new_buffer_uptodate(inode, bhs[i]); 888 ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
770 889
771 status = ocfs2_journal_access(handle, inode, bhs[i], 890 status = ocfs2_journal_access_eb(handle, inode, bhs[i],
772 OCFS2_JOURNAL_ACCESS_CREATE); 891 OCFS2_JOURNAL_ACCESS_CREATE);
773 if (status < 0) { 892 if (status < 0) {
774 mlog_errno(status); 893 mlog_errno(status);
775 goto bail; 894 goto bail;
@@ -908,15 +1027,12 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
908 for(i = 0; i < new_blocks; i++) { 1027 for(i = 0; i < new_blocks; i++) {
909 bh = new_eb_bhs[i]; 1028 bh = new_eb_bhs[i];
910 eb = (struct ocfs2_extent_block *) bh->b_data; 1029 eb = (struct ocfs2_extent_block *) bh->b_data;
911 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1030 /* ocfs2_create_new_meta_bhs() should create it right! */
912 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1031 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
913 status = -EIO;
914 goto bail;
915 }
916 eb_el = &eb->h_list; 1032 eb_el = &eb->h_list;
917 1033
918 status = ocfs2_journal_access(handle, inode, bh, 1034 status = ocfs2_journal_access_eb(handle, inode, bh,
919 OCFS2_JOURNAL_ACCESS_CREATE); 1035 OCFS2_JOURNAL_ACCESS_CREATE);
920 if (status < 0) { 1036 if (status < 0) {
921 mlog_errno(status); 1037 mlog_errno(status);
922 goto bail; 1038 goto bail;
@@ -955,21 +1071,21 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
955 * journal_dirty erroring as it won't unless we've aborted the 1071 * journal_dirty erroring as it won't unless we've aborted the
956 * handle (in which case we would never be here) so reserving 1072 * handle (in which case we would never be here) so reserving
957 * the write with journal_access is all we need to do. */ 1073 * the write with journal_access is all we need to do. */
958 status = ocfs2_journal_access(handle, inode, *last_eb_bh, 1074 status = ocfs2_journal_access_eb(handle, inode, *last_eb_bh,
959 OCFS2_JOURNAL_ACCESS_WRITE); 1075 OCFS2_JOURNAL_ACCESS_WRITE);
960 if (status < 0) { 1076 if (status < 0) {
961 mlog_errno(status); 1077 mlog_errno(status);
962 goto bail; 1078 goto bail;
963 } 1079 }
964 status = ocfs2_journal_access(handle, inode, et->et_root_bh, 1080 status = ocfs2_et_root_journal_access(handle, inode, et,
965 OCFS2_JOURNAL_ACCESS_WRITE); 1081 OCFS2_JOURNAL_ACCESS_WRITE);
966 if (status < 0) { 1082 if (status < 0) {
967 mlog_errno(status); 1083 mlog_errno(status);
968 goto bail; 1084 goto bail;
969 } 1085 }
970 if (eb_bh) { 1086 if (eb_bh) {
971 status = ocfs2_journal_access(handle, inode, eb_bh, 1087 status = ocfs2_journal_access_eb(handle, inode, eb_bh,
972 OCFS2_JOURNAL_ACCESS_WRITE); 1088 OCFS2_JOURNAL_ACCESS_WRITE);
973 if (status < 0) { 1089 if (status < 0) {
974 mlog_errno(status); 1090 mlog_errno(status);
975 goto bail; 1091 goto bail;
@@ -1052,17 +1168,14 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
1052 } 1168 }
1053 1169
1054 eb = (struct ocfs2_extent_block *) new_eb_bh->b_data; 1170 eb = (struct ocfs2_extent_block *) new_eb_bh->b_data;
1055 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1171 /* ocfs2_create_new_meta_bhs() should create it right! */
1056 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1172 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
1057 status = -EIO;
1058 goto bail;
1059 }
1060 1173
1061 eb_el = &eb->h_list; 1174 eb_el = &eb->h_list;
1062 root_el = et->et_root_el; 1175 root_el = et->et_root_el;
1063 1176
1064 status = ocfs2_journal_access(handle, inode, new_eb_bh, 1177 status = ocfs2_journal_access_eb(handle, inode, new_eb_bh,
1065 OCFS2_JOURNAL_ACCESS_CREATE); 1178 OCFS2_JOURNAL_ACCESS_CREATE);
1066 if (status < 0) { 1179 if (status < 0) {
1067 mlog_errno(status); 1180 mlog_errno(status);
1068 goto bail; 1181 goto bail;
@@ -1080,8 +1193,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
1080 goto bail; 1193 goto bail;
1081 } 1194 }
1082 1195
1083 status = ocfs2_journal_access(handle, inode, et->et_root_bh, 1196 status = ocfs2_et_root_journal_access(handle, inode, et,
1084 OCFS2_JOURNAL_ACCESS_WRITE); 1197 OCFS2_JOURNAL_ACCESS_WRITE);
1085 if (status < 0) { 1198 if (status < 0) {
1086 mlog_errno(status); 1199 mlog_errno(status);
1087 goto bail; 1200 goto bail;
@@ -1176,18 +1289,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
1176 brelse(bh); 1289 brelse(bh);
1177 bh = NULL; 1290 bh = NULL;
1178 1291
1179 status = ocfs2_read_block(inode, blkno, &bh); 1292 status = ocfs2_read_extent_block(inode, blkno, &bh);
1180 if (status < 0) { 1293 if (status < 0) {
1181 mlog_errno(status); 1294 mlog_errno(status);
1182 goto bail; 1295 goto bail;
1183 } 1296 }
1184 1297
1185 eb = (struct ocfs2_extent_block *) bh->b_data; 1298 eb = (struct ocfs2_extent_block *) bh->b_data;
1186 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
1187 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
1188 status = -EIO;
1189 goto bail;
1190 }
1191 el = &eb->h_list; 1299 el = &eb->h_list;
1192 1300
1193 if (le16_to_cpu(el->l_next_free_rec) < 1301 if (le16_to_cpu(el->l_next_free_rec) <
@@ -1540,7 +1648,7 @@ static int __ocfs2_find_path(struct inode *inode,
1540 1648
1541 brelse(bh); 1649 brelse(bh);
1542 bh = NULL; 1650 bh = NULL;
1543 ret = ocfs2_read_block(inode, blkno, &bh); 1651 ret = ocfs2_read_extent_block(inode, blkno, &bh);
1544 if (ret) { 1652 if (ret) {
1545 mlog_errno(ret); 1653 mlog_errno(ret);
1546 goto out; 1654 goto out;
@@ -1548,11 +1656,6 @@ static int __ocfs2_find_path(struct inode *inode,
1548 1656
1549 eb = (struct ocfs2_extent_block *) bh->b_data; 1657 eb = (struct ocfs2_extent_block *) bh->b_data;
1550 el = &eb->h_list; 1658 el = &eb->h_list;
1551 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
1552 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
1553 ret = -EIO;
1554 goto out;
1555 }
1556 1659
1557 if (le16_to_cpu(el->l_next_free_rec) > 1660 if (le16_to_cpu(el->l_next_free_rec) >
1558 le16_to_cpu(el->l_count)) { 1661 le16_to_cpu(el->l_count)) {
@@ -1860,25 +1963,23 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
1860 root_bh = left_path->p_node[subtree_index].bh; 1963 root_bh = left_path->p_node[subtree_index].bh;
1861 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 1964 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
1862 1965
1863 ret = ocfs2_journal_access(handle, inode, root_bh, 1966 ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
1864 OCFS2_JOURNAL_ACCESS_WRITE); 1967 subtree_index);
1865 if (ret) { 1968 if (ret) {
1866 mlog_errno(ret); 1969 mlog_errno(ret);
1867 goto out; 1970 goto out;
1868 } 1971 }
1869 1972
1870 for(i = subtree_index + 1; i < path_num_items(right_path); i++) { 1973 for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
1871 ret = ocfs2_journal_access(handle, inode, 1974 ret = ocfs2_path_bh_journal_access(handle, inode,
1872 right_path->p_node[i].bh, 1975 right_path, i);
1873 OCFS2_JOURNAL_ACCESS_WRITE);
1874 if (ret) { 1976 if (ret) {
1875 mlog_errno(ret); 1977 mlog_errno(ret);
1876 goto out; 1978 goto out;
1877 } 1979 }
1878 1980
1879 ret = ocfs2_journal_access(handle, inode, 1981 ret = ocfs2_path_bh_journal_access(handle, inode,
1880 left_path->p_node[i].bh, 1982 left_path, i);
1881 OCFS2_JOURNAL_ACCESS_WRITE);
1882 if (ret) { 1983 if (ret) {
1883 mlog_errno(ret); 1984 mlog_errno(ret);
1884 goto out; 1985 goto out;
@@ -2102,8 +2203,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2102 2203
2103 *ret_left_path = NULL; 2204 *ret_left_path = NULL;
2104 2205
2105 left_path = ocfs2_new_path(path_root_bh(right_path), 2206 left_path = ocfs2_new_path_from_path(right_path);
2106 path_root_el(right_path));
2107 if (!left_path) { 2207 if (!left_path) {
2108 ret = -ENOMEM; 2208 ret = -ENOMEM;
2109 mlog_errno(ret); 2209 mlog_errno(ret);
@@ -2398,9 +2498,9 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2398 return -EAGAIN; 2498 return -EAGAIN;
2399 2499
2400 if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) { 2500 if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
2401 ret = ocfs2_journal_access(handle, inode, 2501 ret = ocfs2_journal_access_eb(handle, inode,
2402 path_leaf_bh(right_path), 2502 path_leaf_bh(right_path),
2403 OCFS2_JOURNAL_ACCESS_WRITE); 2503 OCFS2_JOURNAL_ACCESS_WRITE);
2404 if (ret) { 2504 if (ret) {
2405 mlog_errno(ret); 2505 mlog_errno(ret);
2406 goto out; 2506 goto out;
@@ -2417,8 +2517,8 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2417 * We have to update i_last_eb_blk during the meta 2517 * We have to update i_last_eb_blk during the meta
2418 * data delete. 2518 * data delete.
2419 */ 2519 */
2420 ret = ocfs2_journal_access(handle, inode, et_root_bh, 2520 ret = ocfs2_et_root_journal_access(handle, inode, et,
2421 OCFS2_JOURNAL_ACCESS_WRITE); 2521 OCFS2_JOURNAL_ACCESS_WRITE);
2422 if (ret) { 2522 if (ret) {
2423 mlog_errno(ret); 2523 mlog_errno(ret);
2424 goto out; 2524 goto out;
@@ -2433,25 +2533,23 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2433 */ 2533 */
2434 BUG_ON(right_has_empty && !del_right_subtree); 2534 BUG_ON(right_has_empty && !del_right_subtree);
2435 2535
2436 ret = ocfs2_journal_access(handle, inode, root_bh, 2536 ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
2437 OCFS2_JOURNAL_ACCESS_WRITE); 2537 subtree_index);
2438 if (ret) { 2538 if (ret) {
2439 mlog_errno(ret); 2539 mlog_errno(ret);
2440 goto out; 2540 goto out;
2441 } 2541 }
2442 2542
2443 for(i = subtree_index + 1; i < path_num_items(right_path); i++) { 2543 for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
2444 ret = ocfs2_journal_access(handle, inode, 2544 ret = ocfs2_path_bh_journal_access(handle, inode,
2445 right_path->p_node[i].bh, 2545 right_path, i);
2446 OCFS2_JOURNAL_ACCESS_WRITE);
2447 if (ret) { 2546 if (ret) {
2448 mlog_errno(ret); 2547 mlog_errno(ret);
2449 goto out; 2548 goto out;
2450 } 2549 }
2451 2550
2452 ret = ocfs2_journal_access(handle, inode, 2551 ret = ocfs2_path_bh_journal_access(handle, inode,
2453 left_path->p_node[i].bh, 2552 left_path, i);
2454 OCFS2_JOURNAL_ACCESS_WRITE);
2455 if (ret) { 2553 if (ret) {
2456 mlog_errno(ret); 2554 mlog_errno(ret);
2457 goto out; 2555 goto out;
@@ -2596,16 +2694,17 @@ out:
2596 2694
2597static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode, 2695static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
2598 handle_t *handle, 2696 handle_t *handle,
2599 struct buffer_head *bh, 2697 struct ocfs2_path *path)
2600 struct ocfs2_extent_list *el)
2601{ 2698{
2602 int ret; 2699 int ret;
2700 struct buffer_head *bh = path_leaf_bh(path);
2701 struct ocfs2_extent_list *el = path_leaf_el(path);
2603 2702
2604 if (!ocfs2_is_empty_extent(&el->l_recs[0])) 2703 if (!ocfs2_is_empty_extent(&el->l_recs[0]))
2605 return 0; 2704 return 0;
2606 2705
2607 ret = ocfs2_journal_access(handle, inode, bh, 2706 ret = ocfs2_path_bh_journal_access(handle, inode, path,
2608 OCFS2_JOURNAL_ACCESS_WRITE); 2707 path_num_items(path) - 1);
2609 if (ret) { 2708 if (ret) {
2610 mlog_errno(ret); 2709 mlog_errno(ret);
2611 goto out; 2710 goto out;
@@ -2644,8 +2743,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2644 goto out; 2743 goto out;
2645 } 2744 }
2646 2745
2647 left_path = ocfs2_new_path(path_root_bh(path), 2746 left_path = ocfs2_new_path_from_path(path);
2648 path_root_el(path));
2649 if (!left_path) { 2747 if (!left_path) {
2650 ret = -ENOMEM; 2748 ret = -ENOMEM;
2651 mlog_errno(ret); 2749 mlog_errno(ret);
@@ -2654,8 +2752,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2654 2752
2655 ocfs2_cp_path(left_path, path); 2753 ocfs2_cp_path(left_path, path);
2656 2754
2657 right_path = ocfs2_new_path(path_root_bh(path), 2755 right_path = ocfs2_new_path_from_path(path);
2658 path_root_el(path));
2659 if (!right_path) { 2756 if (!right_path) {
2660 ret = -ENOMEM; 2757 ret = -ENOMEM;
2661 mlog_errno(ret); 2758 mlog_errno(ret);
@@ -2689,9 +2786,8 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2689 * Caller might still want to make changes to the 2786 * Caller might still want to make changes to the
2690 * tree root, so re-add it to the journal here. 2787 * tree root, so re-add it to the journal here.
2691 */ 2788 */
2692 ret = ocfs2_journal_access(handle, inode, 2789 ret = ocfs2_path_bh_journal_access(handle, inode,
2693 path_root_bh(left_path), 2790 left_path, 0);
2694 OCFS2_JOURNAL_ACCESS_WRITE);
2695 if (ret) { 2791 if (ret) {
2696 mlog_errno(ret); 2792 mlog_errno(ret);
2697 goto out; 2793 goto out;
@@ -2785,8 +2881,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
2785 * We have a path to the left of this one - it needs 2881 * We have a path to the left of this one - it needs
2786 * an update too. 2882 * an update too.
2787 */ 2883 */
2788 left_path = ocfs2_new_path(path_root_bh(path), 2884 left_path = ocfs2_new_path_from_path(path);
2789 path_root_el(path));
2790 if (!left_path) { 2885 if (!left_path) {
2791 ret = -ENOMEM; 2886 ret = -ENOMEM;
2792 mlog_errno(ret); 2887 mlog_errno(ret);
@@ -2875,8 +2970,7 @@ rightmost_no_delete:
2875 * it up front. 2970 * it up front.
2876 */ 2971 */
2877 ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, 2972 ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
2878 path_leaf_bh(path), 2973 path);
2879 path_leaf_el(path));
2880 if (ret) 2974 if (ret)
2881 mlog_errno(ret); 2975 mlog_errno(ret);
2882 goto out; 2976 goto out;
@@ -3027,8 +3121,7 @@ static int ocfs2_get_right_path(struct inode *inode,
3027 /* This function shouldn't be called for the rightmost leaf. */ 3121 /* This function shouldn't be called for the rightmost leaf. */
3028 BUG_ON(right_cpos == 0); 3122 BUG_ON(right_cpos == 0);
3029 3123
3030 right_path = ocfs2_new_path(path_root_bh(left_path), 3124 right_path = ocfs2_new_path_from_path(left_path);
3031 path_root_el(left_path));
3032 if (!right_path) { 3125 if (!right_path) {
3033 ret = -ENOMEM; 3126 ret = -ENOMEM;
3034 mlog_errno(ret); 3127 mlog_errno(ret);
@@ -3111,8 +3204,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3111 root_bh = left_path->p_node[subtree_index].bh; 3204 root_bh = left_path->p_node[subtree_index].bh;
3112 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 3205 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
3113 3206
3114 ret = ocfs2_journal_access(handle, inode, root_bh, 3207 ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
3115 OCFS2_JOURNAL_ACCESS_WRITE); 3208 subtree_index);
3116 if (ret) { 3209 if (ret) {
3117 mlog_errno(ret); 3210 mlog_errno(ret);
3118 goto out; 3211 goto out;
@@ -3120,17 +3213,15 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3120 3213
3121 for (i = subtree_index + 1; 3214 for (i = subtree_index + 1;
3122 i < path_num_items(right_path); i++) { 3215 i < path_num_items(right_path); i++) {
3123 ret = ocfs2_journal_access(handle, inode, 3216 ret = ocfs2_path_bh_journal_access(handle, inode,
3124 right_path->p_node[i].bh, 3217 right_path, i);
3125 OCFS2_JOURNAL_ACCESS_WRITE);
3126 if (ret) { 3218 if (ret) {
3127 mlog_errno(ret); 3219 mlog_errno(ret);
3128 goto out; 3220 goto out;
3129 } 3221 }
3130 3222
3131 ret = ocfs2_journal_access(handle, inode, 3223 ret = ocfs2_path_bh_journal_access(handle, inode,
3132 left_path->p_node[i].bh, 3224 left_path, i);
3133 OCFS2_JOURNAL_ACCESS_WRITE);
3134 if (ret) { 3225 if (ret) {
3135 mlog_errno(ret); 3226 mlog_errno(ret);
3136 goto out; 3227 goto out;
@@ -3142,8 +3233,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3142 right_rec = &el->l_recs[index + 1]; 3233 right_rec = &el->l_recs[index + 1];
3143 } 3234 }
3144 3235
3145 ret = ocfs2_journal_access(handle, inode, bh, 3236 ret = ocfs2_path_bh_journal_access(handle, inode, left_path,
3146 OCFS2_JOURNAL_ACCESS_WRITE); 3237 path_num_items(left_path) - 1);
3147 if (ret) { 3238 if (ret) {
3148 mlog_errno(ret); 3239 mlog_errno(ret);
3149 goto out; 3240 goto out;
@@ -3199,8 +3290,7 @@ static int ocfs2_get_left_path(struct inode *inode,
3199 /* This function shouldn't be called for the leftmost leaf. */ 3290 /* This function shouldn't be called for the leftmost leaf. */
3200 BUG_ON(left_cpos == 0); 3291 BUG_ON(left_cpos == 0);
3201 3292
3202 left_path = ocfs2_new_path(path_root_bh(right_path), 3293 left_path = ocfs2_new_path_from_path(right_path);
3203 path_root_el(right_path));
3204 if (!left_path) { 3294 if (!left_path) {
3205 ret = -ENOMEM; 3295 ret = -ENOMEM;
3206 mlog_errno(ret); 3296 mlog_errno(ret);
@@ -3283,8 +3373,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3283 root_bh = left_path->p_node[subtree_index].bh; 3373 root_bh = left_path->p_node[subtree_index].bh;
3284 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 3374 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
3285 3375
3286 ret = ocfs2_journal_access(handle, inode, root_bh, 3376 ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
3287 OCFS2_JOURNAL_ACCESS_WRITE); 3377 subtree_index);
3288 if (ret) { 3378 if (ret) {
3289 mlog_errno(ret); 3379 mlog_errno(ret);
3290 goto out; 3380 goto out;
@@ -3292,17 +3382,15 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3292 3382
3293 for (i = subtree_index + 1; 3383 for (i = subtree_index + 1;
3294 i < path_num_items(right_path); i++) { 3384 i < path_num_items(right_path); i++) {
3295 ret = ocfs2_journal_access(handle, inode, 3385 ret = ocfs2_path_bh_journal_access(handle, inode,
3296 right_path->p_node[i].bh, 3386 right_path, i);
3297 OCFS2_JOURNAL_ACCESS_WRITE);
3298 if (ret) { 3387 if (ret) {
3299 mlog_errno(ret); 3388 mlog_errno(ret);
3300 goto out; 3389 goto out;
3301 } 3390 }
3302 3391
3303 ret = ocfs2_journal_access(handle, inode, 3392 ret = ocfs2_path_bh_journal_access(handle, inode,
3304 left_path->p_node[i].bh, 3393 left_path, i);
3305 OCFS2_JOURNAL_ACCESS_WRITE);
3306 if (ret) { 3394 if (ret) {
3307 mlog_errno(ret); 3395 mlog_errno(ret);
3308 goto out; 3396 goto out;
@@ -3314,8 +3402,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3314 has_empty_extent = 1; 3402 has_empty_extent = 1;
3315 } 3403 }
3316 3404
3317 ret = ocfs2_journal_access(handle, inode, bh, 3405 ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
3318 OCFS2_JOURNAL_ACCESS_WRITE); 3406 path_num_items(right_path) - 1);
3319 if (ret) { 3407 if (ret) {
3320 mlog_errno(ret); 3408 mlog_errno(ret);
3321 goto out; 3409 goto out;
@@ -3732,8 +3820,7 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
3732 * leftmost leaf. 3820 * leftmost leaf.
3733 */ 3821 */
3734 if (left_cpos) { 3822 if (left_cpos) {
3735 left_path = ocfs2_new_path(path_root_bh(right_path), 3823 left_path = ocfs2_new_path_from_path(right_path);
3736 path_root_el(right_path));
3737 if (!left_path) { 3824 if (!left_path) {
3738 ret = -ENOMEM; 3825 ret = -ENOMEM;
3739 mlog_errno(ret); 3826 mlog_errno(ret);
@@ -3781,7 +3868,7 @@ static void ocfs2_split_record(struct inode *inode,
3781 struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el; 3868 struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el;
3782 struct ocfs2_extent_rec *rec, *tmprec; 3869 struct ocfs2_extent_rec *rec, *tmprec;
3783 3870
3784 right_el = path_leaf_el(right_path);; 3871 right_el = path_leaf_el(right_path);
3785 if (left_path) 3872 if (left_path)
3786 left_el = path_leaf_el(left_path); 3873 left_el = path_leaf_el(left_path);
3787 3874
@@ -3958,8 +4045,8 @@ static int ocfs2_do_insert_extent(struct inode *inode,
3958 4045
3959 el = et->et_root_el; 4046 el = et->et_root_el;
3960 4047
3961 ret = ocfs2_journal_access(handle, inode, et->et_root_bh, 4048 ret = ocfs2_et_root_journal_access(handle, inode, et,
3962 OCFS2_JOURNAL_ACCESS_WRITE); 4049 OCFS2_JOURNAL_ACCESS_WRITE);
3963 if (ret) { 4050 if (ret) {
3964 mlog_errno(ret); 4051 mlog_errno(ret);
3965 goto out; 4052 goto out;
@@ -3970,7 +4057,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
3970 goto out_update_clusters; 4057 goto out_update_clusters;
3971 } 4058 }
3972 4059
3973 right_path = ocfs2_new_path(et->et_root_bh, et->et_root_el); 4060 right_path = ocfs2_new_path_from_et(et);
3974 if (!right_path) { 4061 if (!right_path) {
3975 ret = -ENOMEM; 4062 ret = -ENOMEM;
3976 mlog_errno(ret); 4063 mlog_errno(ret);
@@ -4020,8 +4107,8 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4020 * ocfs2_rotate_tree_right() might have extended the 4107 * ocfs2_rotate_tree_right() might have extended the
4021 * transaction without re-journaling our tree root. 4108 * transaction without re-journaling our tree root.
4022 */ 4109 */
4023 ret = ocfs2_journal_access(handle, inode, et->et_root_bh, 4110 ret = ocfs2_et_root_journal_access(handle, inode, et,
4024 OCFS2_JOURNAL_ACCESS_WRITE); 4111 OCFS2_JOURNAL_ACCESS_WRITE);
4025 if (ret) { 4112 if (ret) {
4026 mlog_errno(ret); 4113 mlog_errno(ret);
4027 goto out; 4114 goto out;
@@ -4082,8 +4169,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4082 goto out; 4169 goto out;
4083 4170
4084 if (left_cpos != 0) { 4171 if (left_cpos != 0) {
4085 left_path = ocfs2_new_path(path_root_bh(path), 4172 left_path = ocfs2_new_path_from_path(path);
4086 path_root_el(path));
4087 if (!left_path) 4173 if (!left_path)
4088 goto out; 4174 goto out;
4089 4175
@@ -4097,8 +4183,15 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4097 le16_to_cpu(new_el->l_count)) { 4183 le16_to_cpu(new_el->l_count)) {
4098 bh = path_leaf_bh(left_path); 4184 bh = path_leaf_bh(left_path);
4099 eb = (struct ocfs2_extent_block *)bh->b_data; 4185 eb = (struct ocfs2_extent_block *)bh->b_data;
4100 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, 4186 ocfs2_error(inode->i_sb,
4101 eb); 4187 "Extent block #%llu has an "
4188 "invalid l_next_free_rec of "
4189 "%d. It should have "
4190 "matched the l_count of %d",
4191 (unsigned long long)le64_to_cpu(eb->h_blkno),
4192 le16_to_cpu(new_el->l_next_free_rec),
4193 le16_to_cpu(new_el->l_count));
4194 status = -EINVAL;
4102 goto out; 4195 goto out;
4103 } 4196 }
4104 rec = &new_el->l_recs[ 4197 rec = &new_el->l_recs[
@@ -4132,8 +4225,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4132 if (right_cpos == 0) 4225 if (right_cpos == 0)
4133 goto out; 4226 goto out;
4134 4227
4135 right_path = ocfs2_new_path(path_root_bh(path), 4228 right_path = ocfs2_new_path_from_path(path);
4136 path_root_el(path));
4137 if (!right_path) 4229 if (!right_path)
4138 goto out; 4230 goto out;
4139 4231
@@ -4147,8 +4239,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4147 if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { 4239 if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
4148 bh = path_leaf_bh(right_path); 4240 bh = path_leaf_bh(right_path);
4149 eb = (struct ocfs2_extent_block *)bh->b_data; 4241 eb = (struct ocfs2_extent_block *)bh->b_data;
4150 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, 4242 ocfs2_error(inode->i_sb,
4151 eb); 4243 "Extent block #%llu has an "
4244 "invalid l_next_free_rec of %d",
4245 (unsigned long long)le64_to_cpu(eb->h_blkno),
4246 le16_to_cpu(new_el->l_next_free_rec));
4247 status = -EINVAL;
4152 goto out; 4248 goto out;
4153 } 4249 }
4154 rec = &new_el->l_recs[1]; 4250 rec = &new_el->l_recs[1];
@@ -4294,7 +4390,9 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4294 * ocfs2_figure_insert_type() and ocfs2_add_branch() 4390 * ocfs2_figure_insert_type() and ocfs2_add_branch()
4295 * may want it later. 4391 * may want it later.
4296 */ 4392 */
4297 ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh); 4393 ret = ocfs2_read_extent_block(inode,
4394 ocfs2_et_get_last_eb_blk(et),
4395 &bh);
4298 if (ret) { 4396 if (ret) {
4299 mlog_exit(ret); 4397 mlog_exit(ret);
4300 goto out; 4398 goto out;
@@ -4320,7 +4418,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4320 return 0; 4418 return 0;
4321 } 4419 }
4322 4420
4323 path = ocfs2_new_path(et->et_root_bh, et->et_root_el); 4421 path = ocfs2_new_path_from_et(et);
4324 if (!path) { 4422 if (!path) {
4325 ret = -ENOMEM; 4423 ret = -ENOMEM;
4326 mlog_errno(ret); 4424 mlog_errno(ret);
@@ -4531,9 +4629,9 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
4531 4629
4532 BUG_ON(num_bits > clusters_to_add); 4630 BUG_ON(num_bits > clusters_to_add);
4533 4631
4534 /* reserve our write early -- insert_extent may update the inode */ 4632 /* reserve our write early -- insert_extent may update the tree root */
4535 status = ocfs2_journal_access(handle, inode, et->et_root_bh, 4633 status = ocfs2_et_root_journal_access(handle, inode, et,
4536 OCFS2_JOURNAL_ACCESS_WRITE); 4634 OCFS2_JOURNAL_ACCESS_WRITE);
4537 if (status < 0) { 4635 if (status < 0) {
4538 mlog_errno(status); 4636 mlog_errno(status);
4539 goto leave; 4637 goto leave;
@@ -4760,20 +4858,15 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4760 if (path->p_tree_depth) { 4858 if (path->p_tree_depth) {
4761 struct ocfs2_extent_block *eb; 4859 struct ocfs2_extent_block *eb;
4762 4860
4763 ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), 4861 ret = ocfs2_read_extent_block(inode,
4764 &last_eb_bh); 4862 ocfs2_et_get_last_eb_blk(et),
4863 &last_eb_bh);
4765 if (ret) { 4864 if (ret) {
4766 mlog_exit(ret); 4865 mlog_exit(ret);
4767 goto out; 4866 goto out;
4768 } 4867 }
4769 4868
4770 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 4869 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
4771 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
4772 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
4773 ret = -EROFS;
4774 goto out;
4775 }
4776
4777 rightmost_el = &eb->h_list; 4870 rightmost_el = &eb->h_list;
4778 } else 4871 } else
4779 rightmost_el = path_root_el(path); 4872 rightmost_el = path_root_el(path);
@@ -4854,7 +4947,7 @@ int ocfs2_mark_extent_written(struct inode *inode,
4854 if (et->et_ops == &ocfs2_dinode_et_ops) 4947 if (et->et_ops == &ocfs2_dinode_et_ops)
4855 ocfs2_extent_map_trunc(inode, 0); 4948 ocfs2_extent_map_trunc(inode, 0);
4856 4949
4857 left_path = ocfs2_new_path(et->et_root_bh, et->et_root_el); 4950 left_path = ocfs2_new_path_from_et(et);
4858 if (!left_path) { 4951 if (!left_path) {
4859 ret = -ENOMEM; 4952 ret = -ENOMEM;
4860 mlog_errno(ret); 4953 mlog_errno(ret);
@@ -4918,8 +5011,9 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
4918 5011
4919 depth = path->p_tree_depth; 5012 depth = path->p_tree_depth;
4920 if (depth > 0) { 5013 if (depth > 0) {
4921 ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), 5014 ret = ocfs2_read_extent_block(inode,
4922 &last_eb_bh); 5015 ocfs2_et_get_last_eb_blk(et),
5016 &last_eb_bh);
4923 if (ret < 0) { 5017 if (ret < 0) {
4924 mlog_errno(ret); 5018 mlog_errno(ret);
4925 goto out; 5019 goto out;
@@ -5025,8 +5119,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5025 } 5119 }
5026 5120
5027 if (left_cpos && le16_to_cpu(el->l_next_free_rec) > 1) { 5121 if (left_cpos && le16_to_cpu(el->l_next_free_rec) > 1) {
5028 left_path = ocfs2_new_path(path_root_bh(path), 5122 left_path = ocfs2_new_path_from_path(path);
5029 path_root_el(path));
5030 if (!left_path) { 5123 if (!left_path) {
5031 ret = -ENOMEM; 5124 ret = -ENOMEM;
5032 mlog_errno(ret); 5125 mlog_errno(ret);
@@ -5135,7 +5228,7 @@ int ocfs2_remove_extent(struct inode *inode,
5135 5228
5136 ocfs2_extent_map_trunc(inode, 0); 5229 ocfs2_extent_map_trunc(inode, 0);
5137 5230
5138 path = ocfs2_new_path(et->et_root_bh, et->et_root_el); 5231 path = ocfs2_new_path_from_et(et);
5139 if (!path) { 5232 if (!path) {
5140 ret = -ENOMEM; 5233 ret = -ENOMEM;
5141 mlog_errno(ret); 5234 mlog_errno(ret);
@@ -5255,6 +5348,78 @@ out:
5255 return ret; 5348 return ret;
5256} 5349}
5257 5350
5351int ocfs2_remove_btree_range(struct inode *inode,
5352 struct ocfs2_extent_tree *et,
5353 u32 cpos, u32 phys_cpos, u32 len,
5354 struct ocfs2_cached_dealloc_ctxt *dealloc)
5355{
5356 int ret;
5357 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
5358 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5359 struct inode *tl_inode = osb->osb_tl_inode;
5360 handle_t *handle;
5361 struct ocfs2_alloc_context *meta_ac = NULL;
5362
5363 ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac);
5364 if (ret) {
5365 mlog_errno(ret);
5366 return ret;
5367 }
5368
5369 mutex_lock(&tl_inode->i_mutex);
5370
5371 if (ocfs2_truncate_log_needs_flush(osb)) {
5372 ret = __ocfs2_flush_truncate_log(osb);
5373 if (ret < 0) {
5374 mlog_errno(ret);
5375 goto out;
5376 }
5377 }
5378
5379 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5380 if (IS_ERR(handle)) {
5381 ret = PTR_ERR(handle);
5382 mlog_errno(ret);
5383 goto out;
5384 }
5385
5386 ret = ocfs2_et_root_journal_access(handle, inode, et,
5387 OCFS2_JOURNAL_ACCESS_WRITE);
5388 if (ret) {
5389 mlog_errno(ret);
5390 goto out;
5391 }
5392
5393 ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac,
5394 dealloc);
5395 if (ret) {
5396 mlog_errno(ret);
5397 goto out_commit;
5398 }
5399
5400 ocfs2_et_update_clusters(inode, et, -len);
5401
5402 ret = ocfs2_journal_dirty(handle, et->et_root_bh);
5403 if (ret) {
5404 mlog_errno(ret);
5405 goto out_commit;
5406 }
5407
5408 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
5409 if (ret)
5410 mlog_errno(ret);
5411
5412out_commit:
5413 ocfs2_commit_trans(osb, handle);
5414out:
5415 mutex_unlock(&tl_inode->i_mutex);
5416
5417 if (meta_ac)
5418 ocfs2_free_alloc_context(meta_ac);
5419
5420 return ret;
5421}
5422
5258int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) 5423int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
5259{ 5424{
5260 struct buffer_head *tl_bh = osb->osb_tl_bh; 5425 struct buffer_head *tl_bh = osb->osb_tl_bh;
@@ -5308,13 +5473,13 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5308 start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); 5473 start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
5309 5474
5310 di = (struct ocfs2_dinode *) tl_bh->b_data; 5475 di = (struct ocfs2_dinode *) tl_bh->b_data;
5311 tl = &di->id2.i_dealloc;
5312 if (!OCFS2_IS_VALID_DINODE(di)) {
5313 OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
5314 status = -EIO;
5315 goto bail;
5316 }
5317 5476
5477 /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated
5478 * by the underlying call to ocfs2_read_inode_block(), so any
5479 * corruption is a code bug */
5480 BUG_ON(!OCFS2_IS_VALID_DINODE(di));
5481
5482 tl = &di->id2.i_dealloc;
5318 tl_count = le16_to_cpu(tl->tl_count); 5483 tl_count = le16_to_cpu(tl->tl_count);
5319 mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || 5484 mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
5320 tl_count == 0, 5485 tl_count == 0,
@@ -5332,8 +5497,8 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5332 goto bail; 5497 goto bail;
5333 } 5498 }
5334 5499
5335 status = ocfs2_journal_access(handle, tl_inode, tl_bh, 5500 status = ocfs2_journal_access_di(handle, tl_inode, tl_bh,
5336 OCFS2_JOURNAL_ACCESS_WRITE); 5501 OCFS2_JOURNAL_ACCESS_WRITE);
5337 if (status < 0) { 5502 if (status < 0) {
5338 mlog_errno(status); 5503 mlog_errno(status);
5339 goto bail; 5504 goto bail;
@@ -5394,8 +5559,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5394 while (i >= 0) { 5559 while (i >= 0) {
5395 /* Caller has given us at least enough credits to 5560 /* Caller has given us at least enough credits to
5396 * update the truncate log dinode */ 5561 * update the truncate log dinode */
5397 status = ocfs2_journal_access(handle, tl_inode, tl_bh, 5562 status = ocfs2_journal_access_di(handle, tl_inode, tl_bh,
5398 OCFS2_JOURNAL_ACCESS_WRITE); 5563 OCFS2_JOURNAL_ACCESS_WRITE);
5399 if (status < 0) { 5564 if (status < 0) {
5400 mlog_errno(status); 5565 mlog_errno(status);
5401 goto bail; 5566 goto bail;
@@ -5464,13 +5629,13 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
5464 BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 5629 BUG_ON(mutex_trylock(&tl_inode->i_mutex));
5465 5630
5466 di = (struct ocfs2_dinode *) tl_bh->b_data; 5631 di = (struct ocfs2_dinode *) tl_bh->b_data;
5467 tl = &di->id2.i_dealloc;
5468 if (!OCFS2_IS_VALID_DINODE(di)) {
5469 OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
5470 status = -EIO;
5471 goto out;
5472 }
5473 5632
5633 /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated
5634 * by the underlying call to ocfs2_read_inode_block(), so any
5635 * corruption is a code bug */
5636 BUG_ON(!OCFS2_IS_VALID_DINODE(di));
5637
5638 tl = &di->id2.i_dealloc;
5474 num_to_flush = le16_to_cpu(tl->tl_used); 5639 num_to_flush = le16_to_cpu(tl->tl_used);
5475 mlog(0, "Flush %u records from truncate log #%llu\n", 5640 mlog(0, "Flush %u records from truncate log #%llu\n",
5476 num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno); 5641 num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno);
@@ -5586,7 +5751,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
5586 goto bail; 5751 goto bail;
5587 } 5752 }
5588 5753
5589 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); 5754 status = ocfs2_read_inode_block(inode, &bh);
5590 if (status < 0) { 5755 if (status < 0) {
5591 iput(inode); 5756 iput(inode);
5592 mlog_errno(status); 5757 mlog_errno(status);
@@ -5625,13 +5790,13 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
5625 } 5790 }
5626 5791
5627 di = (struct ocfs2_dinode *) tl_bh->b_data; 5792 di = (struct ocfs2_dinode *) tl_bh->b_data;
5628 tl = &di->id2.i_dealloc;
5629 if (!OCFS2_IS_VALID_DINODE(di)) {
5630 OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di);
5631 status = -EIO;
5632 goto bail;
5633 }
5634 5793
5794 /* tl_bh is loaded from ocfs2_get_truncate_log_info(). It's
5795 * validated by the underlying call to ocfs2_read_inode_block(),
5796 * so any corruption is a code bug */
5797 BUG_ON(!OCFS2_IS_VALID_DINODE(di));
5798
5799 tl = &di->id2.i_dealloc;
5635 if (le16_to_cpu(tl->tl_used)) { 5800 if (le16_to_cpu(tl->tl_used)) {
5636 mlog(0, "We'll have %u logs to recover\n", 5801 mlog(0, "We'll have %u logs to recover\n",
5637 le16_to_cpu(tl->tl_used)); 5802 le16_to_cpu(tl->tl_used));
@@ -5651,6 +5816,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
5651 * tl_used. */ 5816 * tl_used. */
5652 tl->tl_used = 0; 5817 tl->tl_used = 0;
5653 5818
5819 ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check);
5654 status = ocfs2_write_block(osb, tl_bh, tl_inode); 5820 status = ocfs2_write_block(osb, tl_bh, tl_inode);
5655 if (status < 0) { 5821 if (status < 0) {
5656 mlog_errno(status); 5822 mlog_errno(status);
@@ -5800,7 +5966,10 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
5800 */ 5966 */
5801 5967
5802/* 5968/*
5803 * Describes a single block free from a suballocator 5969 * Describe a single bit freed from a suballocator. For the block
5970 * suballocators, it represents one block. For the global cluster
5971 * allocator, it represents some clusters and free_bit indicates
5972 * clusters number.
5804 */ 5973 */
5805struct ocfs2_cached_block_free { 5974struct ocfs2_cached_block_free {
5806 struct ocfs2_cached_block_free *free_next; 5975 struct ocfs2_cached_block_free *free_next;
@@ -5815,10 +5984,10 @@ struct ocfs2_per_slot_free_list {
5815 struct ocfs2_cached_block_free *f_first; 5984 struct ocfs2_cached_block_free *f_first;
5816}; 5985};
5817 5986
5818static int ocfs2_free_cached_items(struct ocfs2_super *osb, 5987static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
5819 int sysfile_type, 5988 int sysfile_type,
5820 int slot, 5989 int slot,
5821 struct ocfs2_cached_block_free *head) 5990 struct ocfs2_cached_block_free *head)
5822{ 5991{
5823 int ret; 5992 int ret;
5824 u64 bg_blkno; 5993 u64 bg_blkno;
@@ -5893,6 +6062,82 @@ out:
5893 return ret; 6062 return ret;
5894} 6063}
5895 6064
6065int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6066 u64 blkno, unsigned int bit)
6067{
6068 int ret = 0;
6069 struct ocfs2_cached_block_free *item;
6070
6071 item = kmalloc(sizeof(*item), GFP_NOFS);
6072 if (item == NULL) {
6073 ret = -ENOMEM;
6074 mlog_errno(ret);
6075 return ret;
6076 }
6077
6078 mlog(0, "Insert clusters: (bit %u, blk %llu)\n",
6079 bit, (unsigned long long)blkno);
6080
6081 item->free_blk = blkno;
6082 item->free_bit = bit;
6083 item->free_next = ctxt->c_global_allocator;
6084
6085 ctxt->c_global_allocator = item;
6086 return ret;
6087}
6088
6089static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
6090 struct ocfs2_cached_block_free *head)
6091{
6092 struct ocfs2_cached_block_free *tmp;
6093 struct inode *tl_inode = osb->osb_tl_inode;
6094 handle_t *handle;
6095 int ret = 0;
6096
6097 mutex_lock(&tl_inode->i_mutex);
6098
6099 while (head) {
6100 if (ocfs2_truncate_log_needs_flush(osb)) {
6101 ret = __ocfs2_flush_truncate_log(osb);
6102 if (ret < 0) {
6103 mlog_errno(ret);
6104 break;
6105 }
6106 }
6107
6108 handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
6109 if (IS_ERR(handle)) {
6110 ret = PTR_ERR(handle);
6111 mlog_errno(ret);
6112 break;
6113 }
6114
6115 ret = ocfs2_truncate_log_append(osb, handle, head->free_blk,
6116 head->free_bit);
6117
6118 ocfs2_commit_trans(osb, handle);
6119 tmp = head;
6120 head = head->free_next;
6121 kfree(tmp);
6122
6123 if (ret < 0) {
6124 mlog_errno(ret);
6125 break;
6126 }
6127 }
6128
6129 mutex_unlock(&tl_inode->i_mutex);
6130
6131 while (head) {
6132 /* Premature exit may have left some dangling items. */
6133 tmp = head;
6134 head = head->free_next;
6135 kfree(tmp);
6136 }
6137
6138 return ret;
6139}
6140
5896int ocfs2_run_deallocs(struct ocfs2_super *osb, 6141int ocfs2_run_deallocs(struct ocfs2_super *osb,
5897 struct ocfs2_cached_dealloc_ctxt *ctxt) 6142 struct ocfs2_cached_dealloc_ctxt *ctxt)
5898{ 6143{
@@ -5908,8 +6153,10 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
5908 if (fl->f_first) { 6153 if (fl->f_first) {
5909 mlog(0, "Free items: (type %u, slot %d)\n", 6154 mlog(0, "Free items: (type %u, slot %d)\n",
5910 fl->f_inode_type, fl->f_slot); 6155 fl->f_inode_type, fl->f_slot);
5911 ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type, 6156 ret2 = ocfs2_free_cached_blocks(osb,
5912 fl->f_slot, fl->f_first); 6157 fl->f_inode_type,
6158 fl->f_slot,
6159 fl->f_first);
5913 if (ret2) 6160 if (ret2)
5914 mlog_errno(ret2); 6161 mlog_errno(ret2);
5915 if (!ret) 6162 if (!ret)
@@ -5920,6 +6167,17 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
5920 kfree(fl); 6167 kfree(fl);
5921 } 6168 }
5922 6169
6170 if (ctxt->c_global_allocator) {
6171 ret2 = ocfs2_free_cached_clusters(osb,
6172 ctxt->c_global_allocator);
6173 if (ret2)
6174 mlog_errno(ret2);
6175 if (!ret)
6176 ret = ret2;
6177
6178 ctxt->c_global_allocator = NULL;
6179 }
6180
5923 return ret; 6181 return ret;
5924} 6182}
5925 6183
@@ -6075,11 +6333,10 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode,
6075 6333
6076 eb = (struct ocfs2_extent_block *) bh->b_data; 6334 eb = (struct ocfs2_extent_block *) bh->b_data;
6077 el = &eb->h_list; 6335 el = &eb->h_list;
6078 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 6336
6079 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 6337 /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block().
6080 ret = -EROFS; 6338 * Any corruption is a code bug. */
6081 goto out; 6339 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
6082 }
6083 6340
6084 *new_last_eb = bh; 6341 *new_last_eb = bh;
6085 get_bh(*new_last_eb); 6342 get_bh(*new_last_eb);
@@ -6326,8 +6583,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6326 } 6583 }
6327 6584
6328 if (last_eb_bh) { 6585 if (last_eb_bh) {
6329 status = ocfs2_journal_access(handle, inode, last_eb_bh, 6586 status = ocfs2_journal_access_eb(handle, inode, last_eb_bh,
6330 OCFS2_JOURNAL_ACCESS_WRITE); 6587 OCFS2_JOURNAL_ACCESS_WRITE);
6331 if (status < 0) { 6588 if (status < 0) {
6332 mlog_errno(status); 6589 mlog_errno(status);
6333 goto bail; 6590 goto bail;
@@ -6350,6 +6607,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6350 goto bail; 6607 goto bail;
6351 } 6608 }
6352 6609
6610 vfs_dq_free_space_nodirty(inode,
6611 ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
6353 spin_lock(&OCFS2_I(inode)->ip_lock); 6612 spin_lock(&OCFS2_I(inode)->ip_lock);
6354 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - 6613 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
6355 clusters_to_del; 6614 clusters_to_del;
@@ -6436,11 +6695,6 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
6436 mlog_errno(ret); 6695 mlog_errno(ret);
6437 else if (ocfs2_should_order_data(inode)) { 6696 else if (ocfs2_should_order_data(inode)) {
6438 ret = ocfs2_jbd2_file_inode(handle, inode); 6697 ret = ocfs2_jbd2_file_inode(handle, inode);
6439#ifdef CONFIG_OCFS2_COMPAT_JBD
6440 ret = walk_page_buffers(handle, page_buffers(page),
6441 from, to, &partial,
6442 ocfs2_journal_dirty_data);
6443#endif
6444 if (ret < 0) 6698 if (ret < 0)
6445 mlog_errno(ret); 6699 mlog_errno(ret);
6446 } 6700 }
@@ -6663,6 +6917,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6663 struct page **pages = NULL; 6917 struct page **pages = NULL;
6664 loff_t end = osb->s_clustersize; 6918 loff_t end = osb->s_clustersize;
6665 struct ocfs2_extent_tree et; 6919 struct ocfs2_extent_tree et;
6920 int did_quota = 0;
6666 6921
6667 has_data = i_size_read(inode) ? 1 : 0; 6922 has_data = i_size_read(inode) ? 1 : 0;
6668 6923
@@ -6682,15 +6937,16 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6682 } 6937 }
6683 } 6938 }
6684 6939
6685 handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS); 6940 handle = ocfs2_start_trans(osb,
6941 ocfs2_inline_to_extents_credits(osb->sb));
6686 if (IS_ERR(handle)) { 6942 if (IS_ERR(handle)) {
6687 ret = PTR_ERR(handle); 6943 ret = PTR_ERR(handle);
6688 mlog_errno(ret); 6944 mlog_errno(ret);
6689 goto out_unlock; 6945 goto out_unlock;
6690 } 6946 }
6691 6947
6692 ret = ocfs2_journal_access(handle, inode, di_bh, 6948 ret = ocfs2_journal_access_di(handle, inode, di_bh,
6693 OCFS2_JOURNAL_ACCESS_WRITE); 6949 OCFS2_JOURNAL_ACCESS_WRITE);
6694 if (ret) { 6950 if (ret) {
6695 mlog_errno(ret); 6951 mlog_errno(ret);
6696 goto out_commit; 6952 goto out_commit;
@@ -6701,6 +6957,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6701 unsigned int page_end; 6957 unsigned int page_end;
6702 u64 phys; 6958 u64 phys;
6703 6959
6960 if (vfs_dq_alloc_space_nodirty(inode,
6961 ocfs2_clusters_to_bytes(osb->sb, 1))) {
6962 ret = -EDQUOT;
6963 goto out_commit;
6964 }
6965 did_quota = 1;
6966
6704 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, 6967 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
6705 &num); 6968 &num);
6706 if (ret) { 6969 if (ret) {
@@ -6774,6 +7037,10 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6774 } 7037 }
6775 7038
6776out_commit: 7039out_commit:
7040 if (ret < 0 && did_quota)
7041 vfs_dq_free_space_nodirty(inode,
7042 ocfs2_clusters_to_bytes(osb->sb, 1));
7043
6777 ocfs2_commit_trans(osb, handle); 7044 ocfs2_commit_trans(osb, handle);
6778 7045
6779out_unlock: 7046out_unlock:
@@ -6813,7 +7080,8 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
6813 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, 7080 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
6814 i_size_read(inode)); 7081 i_size_read(inode));
6815 7082
6816 path = ocfs2_new_path(fe_bh, &di->id2.i_list); 7083 path = ocfs2_new_path(fe_bh, &di->id2.i_list,
7084 ocfs2_journal_access_di);
6817 if (!path) { 7085 if (!path) {
6818 status = -ENOMEM; 7086 status = -ENOMEM;
6819 mlog_errno(status); 7087 mlog_errno(status);
@@ -6984,20 +7252,14 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
6984 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); 7252 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
6985 7253
6986 if (fe->id2.i_list.l_tree_depth) { 7254 if (fe->id2.i_list.l_tree_depth) {
6987 status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk), 7255 status = ocfs2_read_extent_block(inode,
6988 &last_eb_bh); 7256 le64_to_cpu(fe->i_last_eb_blk),
7257 &last_eb_bh);
6989 if (status < 0) { 7258 if (status < 0) {
6990 mlog_errno(status); 7259 mlog_errno(status);
6991 goto bail; 7260 goto bail;
6992 } 7261 }
6993 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 7262 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
6994 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
6995 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
6996
6997 brelse(last_eb_bh);
6998 status = -EIO;
6999 goto bail;
7000 }
7001 } 7263 }
7002 7264
7003 (*tc)->tc_last_eb_bh = last_eb_bh; 7265 (*tc)->tc_last_eb_bh = last_eb_bh;
@@ -7052,8 +7314,8 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
7052 goto out; 7314 goto out;
7053 } 7315 }
7054 7316
7055 ret = ocfs2_journal_access(handle, inode, di_bh, 7317 ret = ocfs2_journal_access_di(handle, inode, di_bh,
7056 OCFS2_JOURNAL_ACCESS_WRITE); 7318 OCFS2_JOURNAL_ACCESS_WRITE);
7057 if (ret) { 7319 if (ret) {
7058 mlog_errno(ret); 7320 mlog_errno(ret);
7059 goto out_commit; 7321 goto out_commit;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 70257c84cfbe..cceff5c37f47 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -45,7 +45,9 @@
45 * 45 *
46 * ocfs2_extent_tree contains info for the root of the b-tree, it must have a 46 * ocfs2_extent_tree contains info for the root of the b-tree, it must have a
47 * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree 47 * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
48 * functions. 48 * functions. With metadata ecc, we now call different journal_access
49 * functions for each type of metadata, so it must have the
50 * root_journal_access function.
49 * ocfs2_extent_tree_operations abstract the normal operations we do for 51 * ocfs2_extent_tree_operations abstract the normal operations we do for
50 * the root of extent b-tree. 52 * the root of extent b-tree.
51 */ 53 */
@@ -54,6 +56,7 @@ struct ocfs2_extent_tree {
54 struct ocfs2_extent_tree_operations *et_ops; 56 struct ocfs2_extent_tree_operations *et_ops;
55 struct buffer_head *et_root_bh; 57 struct buffer_head *et_root_bh;
56 struct ocfs2_extent_list *et_root_el; 58 struct ocfs2_extent_list *et_root_el;
59 ocfs2_journal_access_func et_root_journal_access;
57 void *et_object; 60 void *et_object;
58 unsigned int et_max_leaf_clusters; 61 unsigned int et_max_leaf_clusters;
59}; 62};
@@ -68,10 +71,18 @@ void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
68void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, 71void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
69 struct inode *inode, 72 struct inode *inode,
70 struct buffer_head *bh); 73 struct buffer_head *bh);
74struct ocfs2_xattr_value_buf;
71void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, 75void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
72 struct inode *inode, 76 struct inode *inode,
73 struct buffer_head *bh, 77 struct ocfs2_xattr_value_buf *vb);
74 struct ocfs2_xattr_value_root *xv); 78
79/*
80 * Read an extent block into *bh. If *bh is NULL, a bh will be
81 * allocated. This is a cached read. The extent block will be validated
82 * with ocfs2_validate_extent_block().
83 */
84int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
85 struct buffer_head **bh);
75 86
76struct ocfs2_alloc_context; 87struct ocfs2_alloc_context;
77int ocfs2_insert_extent(struct ocfs2_super *osb, 88int ocfs2_insert_extent(struct ocfs2_super *osb,
@@ -110,6 +121,11 @@ int ocfs2_remove_extent(struct inode *inode,
110 u32 cpos, u32 len, handle_t *handle, 121 u32 cpos, u32 len, handle_t *handle,
111 struct ocfs2_alloc_context *meta_ac, 122 struct ocfs2_alloc_context *meta_ac,
112 struct ocfs2_cached_dealloc_ctxt *dealloc); 123 struct ocfs2_cached_dealloc_ctxt *dealloc);
124int ocfs2_remove_btree_range(struct inode *inode,
125 struct ocfs2_extent_tree *et,
126 u32 cpos, u32 phys_cpos, u32 len,
127 struct ocfs2_cached_dealloc_ctxt *dealloc);
128
113int ocfs2_num_free_extents(struct ocfs2_super *osb, 129int ocfs2_num_free_extents(struct ocfs2_super *osb,
114 struct inode *inode, 130 struct inode *inode,
115 struct ocfs2_extent_tree *et); 131 struct ocfs2_extent_tree *et);
@@ -167,10 +183,18 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
167 */ 183 */
168struct ocfs2_cached_dealloc_ctxt { 184struct ocfs2_cached_dealloc_ctxt {
169 struct ocfs2_per_slot_free_list *c_first_suballocator; 185 struct ocfs2_per_slot_free_list *c_first_suballocator;
186 struct ocfs2_cached_block_free *c_global_allocator;
170}; 187};
171static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c) 188static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
172{ 189{
173 c->c_first_suballocator = NULL; 190 c->c_first_suballocator = NULL;
191 c->c_global_allocator = NULL;
192}
193int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
194 u64 blkno, unsigned int bit);
195static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
196{
197 return c->c_global_allocator != NULL;
174} 198}
175int ocfs2_run_deallocs(struct ocfs2_super *osb, 199int ocfs2_run_deallocs(struct ocfs2_super *osb,
176 struct ocfs2_cached_dealloc_ctxt *ctxt); 200 struct ocfs2_cached_dealloc_ctxt *ctxt);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index c22543b33420..a067a6cffb01 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -27,6 +27,7 @@
27#include <linux/swap.h> 27#include <linux/swap.h>
28#include <linux/pipe_fs_i.h> 28#include <linux/pipe_fs_i.h>
29#include <linux/mpage.h> 29#include <linux/mpage.h>
30#include <linux/quotaops.h>
30 31
31#define MLOG_MASK_PREFIX ML_FILE_IO 32#define MLOG_MASK_PREFIX ML_FILE_IO
32#include <cluster/masklog.h> 33#include <cluster/masklog.h>
@@ -68,20 +69,13 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
68 goto bail; 69 goto bail;
69 } 70 }
70 71
71 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); 72 status = ocfs2_read_inode_block(inode, &bh);
72 if (status < 0) { 73 if (status < 0) {
73 mlog_errno(status); 74 mlog_errno(status);
74 goto bail; 75 goto bail;
75 } 76 }
76 fe = (struct ocfs2_dinode *) bh->b_data; 77 fe = (struct ocfs2_dinode *) bh->b_data;
77 78
78 if (!OCFS2_IS_VALID_DINODE(fe)) {
79 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
80 (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
81 fe->i_signature);
82 goto bail;
83 }
84
85 if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, 79 if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
86 le32_to_cpu(fe->i_clusters))) { 80 le32_to_cpu(fe->i_clusters))) {
87 mlog(ML_ERROR, "block offset is outside the allocated size: " 81 mlog(ML_ERROR, "block offset is outside the allocated size: "
@@ -262,7 +256,7 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
262 BUG_ON(!PageLocked(page)); 256 BUG_ON(!PageLocked(page));
263 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); 257 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
264 258
265 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); 259 ret = ocfs2_read_inode_block(inode, &di_bh);
266 if (ret) { 260 if (ret) {
267 mlog_errno(ret); 261 mlog_errno(ret);
268 goto out; 262 goto out;
@@ -481,12 +475,6 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
481 475
482 if (ocfs2_should_order_data(inode)) { 476 if (ocfs2_should_order_data(inode)) {
483 ret = ocfs2_jbd2_file_inode(handle, inode); 477 ret = ocfs2_jbd2_file_inode(handle, inode);
484#ifdef CONFIG_OCFS2_COMPAT_JBD
485 ret = walk_page_buffers(handle,
486 page_buffers(page),
487 from, to, NULL,
488 ocfs2_journal_dirty_data);
489#endif
490 if (ret < 0) 478 if (ret < 0)
491 mlog_errno(ret); 479 mlog_errno(ret);
492 } 480 }
@@ -1072,15 +1060,8 @@ static void ocfs2_write_failure(struct inode *inode,
1072 tmppage = wc->w_pages[i]; 1060 tmppage = wc->w_pages[i];
1073 1061
1074 if (page_has_buffers(tmppage)) { 1062 if (page_has_buffers(tmppage)) {
1075 if (ocfs2_should_order_data(inode)) { 1063 if (ocfs2_should_order_data(inode))
1076 ocfs2_jbd2_file_inode(wc->w_handle, inode); 1064 ocfs2_jbd2_file_inode(wc->w_handle, inode);
1077#ifdef CONFIG_OCFS2_COMPAT_JBD
1078 walk_page_buffers(wc->w_handle,
1079 page_buffers(tmppage),
1080 from, to, NULL,
1081 ocfs2_journal_dirty_data);
1082#endif
1083 }
1084 1065
1085 block_commit_write(tmppage, from, to); 1066 block_commit_write(tmppage, from, to);
1086 } 1067 }
@@ -1531,8 +1512,8 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
1531 goto out; 1512 goto out;
1532 } 1513 }
1533 1514
1534 ret = ocfs2_journal_access(handle, inode, wc->w_di_bh, 1515 ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
1535 OCFS2_JOURNAL_ACCESS_WRITE); 1516 OCFS2_JOURNAL_ACCESS_WRITE);
1536 if (ret) { 1517 if (ret) {
1537 ocfs2_commit_trans(osb, handle); 1518 ocfs2_commit_trans(osb, handle);
1538 1519
@@ -1750,15 +1731,20 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1750 1731
1751 wc->w_handle = handle; 1732 wc->w_handle = handle;
1752 1733
1734 if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode,
1735 ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) {
1736 ret = -EDQUOT;
1737 goto out_commit;
1738 }
1753 /* 1739 /*
1754 * We don't want this to fail in ocfs2_write_end(), so do it 1740 * We don't want this to fail in ocfs2_write_end(), so do it
1755 * here. 1741 * here.
1756 */ 1742 */
1757 ret = ocfs2_journal_access(handle, inode, wc->w_di_bh, 1743 ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
1758 OCFS2_JOURNAL_ACCESS_WRITE); 1744 OCFS2_JOURNAL_ACCESS_WRITE);
1759 if (ret) { 1745 if (ret) {
1760 mlog_errno(ret); 1746 mlog_errno(ret);
1761 goto out_commit; 1747 goto out_quota;
1762 } 1748 }
1763 1749
1764 /* 1750 /*
@@ -1771,14 +1757,14 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1771 mmap_page); 1757 mmap_page);
1772 if (ret) { 1758 if (ret) {
1773 mlog_errno(ret); 1759 mlog_errno(ret);
1774 goto out_commit; 1760 goto out_quota;
1775 } 1761 }
1776 1762
1777 ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, 1763 ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
1778 len); 1764 len);
1779 if (ret) { 1765 if (ret) {
1780 mlog_errno(ret); 1766 mlog_errno(ret);
1781 goto out_commit; 1767 goto out_quota;
1782 } 1768 }
1783 1769
1784 if (data_ac) 1770 if (data_ac)
@@ -1790,6 +1776,10 @@ success:
1790 *pagep = wc->w_target_page; 1776 *pagep = wc->w_target_page;
1791 *fsdata = wc; 1777 *fsdata = wc;
1792 return 0; 1778 return 0;
1779out_quota:
1780 if (clusters_to_alloc)
1781 vfs_dq_free_space(inode,
1782 ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
1793out_commit: 1783out_commit:
1794 ocfs2_commit_trans(osb, handle); 1784 ocfs2_commit_trans(osb, handle);
1795 1785
@@ -1919,15 +1909,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1919 } 1909 }
1920 1910
1921 if (page_has_buffers(tmppage)) { 1911 if (page_has_buffers(tmppage)) {
1922 if (ocfs2_should_order_data(inode)) { 1912 if (ocfs2_should_order_data(inode))
1923 ocfs2_jbd2_file_inode(wc->w_handle, inode); 1913 ocfs2_jbd2_file_inode(wc->w_handle, inode);
1924#ifdef CONFIG_OCFS2_COMPAT_JBD
1925 walk_page_buffers(wc->w_handle,
1926 page_buffers(tmppage),
1927 from, to, NULL,
1928 ocfs2_journal_dirty_data);
1929#endif
1930 }
1931 block_commit_write(tmppage, from, to); 1914 block_commit_write(tmppage, from, to);
1932 } 1915 }
1933 } 1916 }
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
new file mode 100644
index 000000000000..2a947c44e594
--- /dev/null
+++ b/fs/ocfs2/blockcheck.c
@@ -0,0 +1,477 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * blockcheck.c
5 *
6 * Checksum and ECC codes for the OCFS2 userspace library.
7 *
8 * Copyright (C) 2006, 2008 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License, version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#include <linux/kernel.h>
21#include <linux/types.h>
22#include <linux/crc32.h>
23#include <linux/buffer_head.h>
24#include <linux/bitops.h>
25#include <asm/byteorder.h>
26
27#include <cluster/masklog.h>
28
29#include "ocfs2.h"
30
31#include "blockcheck.h"
32
33
34/*
35 * We use the following conventions:
36 *
37 * d = # data bits
38 * p = # parity bits
39 * c = # total code bits (d + p)
40 */
41
42
43/*
44 * Calculate the bit offset in the hamming code buffer based on the bit's
45 * offset in the data buffer. Since the hamming code reserves all
46 * power-of-two bits for parity, the data bit number and the code bit
47 * number are offest by all the parity bits beforehand.
48 *
49 * Recall that bit numbers in hamming code are 1-based. This function
50 * takes the 0-based data bit from the caller.
51 *
52 * An example. Take bit 1 of the data buffer. 1 is a power of two (2^0),
53 * so it's a parity bit. 2 is a power of two (2^1), so it's a parity bit.
54 * 3 is not a power of two. So bit 1 of the data buffer ends up as bit 3
55 * in the code buffer.
56 *
57 * The caller can pass in *p if it wants to keep track of the most recent
58 * number of parity bits added. This allows the function to start the
59 * calculation at the last place.
60 */
61static unsigned int calc_code_bit(unsigned int i, unsigned int *p_cache)
62{
63 unsigned int b, p = 0;
64
65 /*
66 * Data bits are 0-based, but we're talking code bits, which
67 * are 1-based.
68 */
69 b = i + 1;
70
71 /* Use the cache if it is there */
72 if (p_cache)
73 p = *p_cache;
74 b += p;
75
76 /*
77 * For every power of two below our bit number, bump our bit.
78 *
79 * We compare with (b + 1) because we have to compare with what b
80 * would be _if_ it were bumped up by the parity bit. Capice?
81 *
82 * p is set above.
83 */
84 for (; (1 << p) < (b + 1); p++)
85 b++;
86
87 if (p_cache)
88 *p_cache = p;
89
90 return b;
91}
92
93/*
94 * This is the low level encoder function. It can be called across
95 * multiple hunks just like the crc32 code. 'd' is the number of bits
96 * _in_this_hunk_. nr is the bit offset of this hunk. So, if you had
97 * two 512B buffers, you would do it like so:
98 *
99 * parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
100 * parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
101 *
102 * If you just have one buffer, use ocfs2_hamming_encode_block().
103 */
104u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr)
105{
106 unsigned int i, b, p = 0;
107
108 BUG_ON(!d);
109
110 /*
111 * b is the hamming code bit number. Hamming code specifies a
112 * 1-based array, but C uses 0-based. So 'i' is for C, and 'b' is
113 * for the algorithm.
114 *
115 * The i++ in the for loop is so that the start offset passed
116 * to ocfs2_find_next_bit_set() is one greater than the previously
117 * found bit.
118 */
119 for (i = 0; (i = ocfs2_find_next_bit(data, d, i)) < d; i++)
120 {
121 /*
122 * i is the offset in this hunk, nr + i is the total bit
123 * offset.
124 */
125 b = calc_code_bit(nr + i, &p);
126
127 /*
128 * Data bits in the resultant code are checked by
129 * parity bits that are part of the bit number
130 * representation. Huh?
131 *
132 * <wikipedia href="http://en.wikipedia.org/wiki/Hamming_code">
133 * In other words, the parity bit at position 2^k
134 * checks bits in positions having bit k set in
135 * their binary representation. Conversely, for
136 * instance, bit 13, i.e. 1101(2), is checked by
137 * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1.
138 * </wikipedia>
139 *
140 * Note that 'k' is the _code_ bit number. 'b' in
141 * our loop.
142 */
143 parity ^= b;
144 }
145
146 /* While the data buffer was treated as little endian, the
147 * return value is in host endian. */
148 return parity;
149}
150
151u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize)
152{
153 return ocfs2_hamming_encode(0, data, blocksize * 8, 0);
154}
155
156/*
157 * Like ocfs2_hamming_encode(), this can handle hunks. nr is the bit
158 * offset of the current hunk. If bit to be fixed is not part of the
159 * current hunk, this does nothing.
160 *
161 * If you only have one hunk, use ocfs2_hamming_fix_block().
162 */
163void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
164 unsigned int fix)
165{
166 unsigned int i, b;
167
168 BUG_ON(!d);
169
170 /*
171 * If the bit to fix has an hweight of 1, it's a parity bit. One
172 * busted parity bit is its own error. Nothing to do here.
173 */
174 if (hweight32(fix) == 1)
175 return;
176
177 /*
178 * nr + d is the bit right past the data hunk we're looking at.
179 * If fix after that, nothing to do
180 */
181 if (fix >= calc_code_bit(nr + d, NULL))
182 return;
183
184 /*
185 * nr is the offset in the data hunk we're starting at. Let's
186 * start b at the offset in the code buffer. See hamming_encode()
187 * for a more detailed description of 'b'.
188 */
189 b = calc_code_bit(nr, NULL);
190 /* If the fix is before this hunk, nothing to do */
191 if (fix < b)
192 return;
193
194 for (i = 0; i < d; i++, b++)
195 {
196 /* Skip past parity bits */
197 while (hweight32(b) == 1)
198 b++;
199
200 /*
201 * i is the offset in this data hunk.
202 * nr + i is the offset in the total data buffer.
203 * b is the offset in the total code buffer.
204 *
205 * Thus, when b == fix, bit i in the current hunk needs
206 * fixing.
207 */
208 if (b == fix)
209 {
210 if (ocfs2_test_bit(i, data))
211 ocfs2_clear_bit(i, data);
212 else
213 ocfs2_set_bit(i, data);
214 break;
215 }
216 }
217}
218
219void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
220 unsigned int fix)
221{
222 ocfs2_hamming_fix(data, blocksize * 8, 0, fix);
223}
224
225/*
226 * This function generates check information for a block.
227 * data is the block to be checked. bc is a pointer to the
228 * ocfs2_block_check structure describing the crc32 and the ecc.
229 *
230 * bc should be a pointer inside data, as the function will
231 * take care of zeroing it before calculating the check information. If
232 * bc does not point inside data, the caller must make sure any inline
233 * ocfs2_block_check structures are zeroed.
234 *
235 * The data buffer must be in on-disk endian (little endian for ocfs2).
236 * bc will be filled with little-endian values and will be ready to go to
237 * disk.
238 */
239void ocfs2_block_check_compute(void *data, size_t blocksize,
240 struct ocfs2_block_check *bc)
241{
242 u32 crc;
243 u32 ecc;
244
245 memset(bc, 0, sizeof(struct ocfs2_block_check));
246
247 crc = crc32_le(~0, data, blocksize);
248 ecc = ocfs2_hamming_encode_block(data, blocksize);
249
250 /*
251 * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
252 * larger than 16 bits.
253 */
254 BUG_ON(ecc > USHORT_MAX);
255
256 bc->bc_crc32e = cpu_to_le32(crc);
257 bc->bc_ecc = cpu_to_le16((u16)ecc);
258}
259
260/*
261 * This function validates existing check information. Like _compute,
262 * the function will take care of zeroing bc before calculating check codes.
263 * If bc is not a pointer inside data, the caller must have zeroed any
264 * inline ocfs2_block_check structures.
265 *
266 * Again, the data passed in should be the on-disk endian.
267 */
268int ocfs2_block_check_validate(void *data, size_t blocksize,
269 struct ocfs2_block_check *bc)
270{
271 int rc = 0;
272 struct ocfs2_block_check check;
273 u32 crc, ecc;
274
275 check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
276 check.bc_ecc = le16_to_cpu(bc->bc_ecc);
277
278 memset(bc, 0, sizeof(struct ocfs2_block_check));
279
280 /* Fast path - if the crc32 validates, we're good to go */
281 crc = crc32_le(~0, data, blocksize);
282 if (crc == check.bc_crc32e)
283 goto out;
284
285 mlog(ML_ERROR,
286 "CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
287 (unsigned int)check.bc_crc32e, (unsigned int)crc);
288
289 /* Ok, try ECC fixups */
290 ecc = ocfs2_hamming_encode_block(data, blocksize);
291 ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc);
292
293 /* And check the crc32 again */
294 crc = crc32_le(~0, data, blocksize);
295 if (crc == check.bc_crc32e)
296 goto out;
297
298 mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
299 (unsigned int)check.bc_crc32e, (unsigned int)crc);
300
301 rc = -EIO;
302
303out:
304 bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
305 bc->bc_ecc = cpu_to_le16(check.bc_ecc);
306
307 return rc;
308}
309
310/*
311 * This function generates check information for a list of buffer_heads.
312 * bhs is the blocks to be checked. bc is a pointer to the
313 * ocfs2_block_check structure describing the crc32 and the ecc.
314 *
315 * bc should be a pointer inside data, as the function will
316 * take care of zeroing it before calculating the check information. If
317 * bc does not point inside data, the caller must make sure any inline
318 * ocfs2_block_check structures are zeroed.
319 *
320 * The data buffer must be in on-disk endian (little endian for ocfs2).
321 * bc will be filled with little-endian values and will be ready to go to
322 * disk.
323 */
324void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
325 struct ocfs2_block_check *bc)
326{
327 int i;
328 u32 crc, ecc;
329
330 BUG_ON(nr < 0);
331
332 if (!nr)
333 return;
334
335 memset(bc, 0, sizeof(struct ocfs2_block_check));
336
337 for (i = 0, crc = ~0, ecc = 0; i < nr; i++) {
338 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
339 /*
340 * The number of bits in a buffer is obviously b_size*8.
341 * The offset of this buffer is b_size*i, so the bit offset
342 * of this buffer is b_size*8*i.
343 */
344 ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
345 bhs[i]->b_size * 8,
346 bhs[i]->b_size * 8 * i);
347 }
348
349 /*
350 * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
351 * larger than 16 bits.
352 */
353 BUG_ON(ecc > USHORT_MAX);
354
355 bc->bc_crc32e = cpu_to_le32(crc);
356 bc->bc_ecc = cpu_to_le16((u16)ecc);
357}
358
359/*
360 * This function validates existing check information on a list of
361 * buffer_heads. Like _compute_bhs, the function will take care of
362 * zeroing bc before calculating check codes. If bc is not a pointer
363 * inside data, the caller must have zeroed any inline
364 * ocfs2_block_check structures.
365 *
366 * Again, the data passed in should be the on-disk endian.
367 */
368int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
369 struct ocfs2_block_check *bc)
370{
371 int i, rc = 0;
372 struct ocfs2_block_check check;
373 u32 crc, ecc, fix;
374
375 BUG_ON(nr < 0);
376
377 if (!nr)
378 return 0;
379
380 check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
381 check.bc_ecc = le16_to_cpu(bc->bc_ecc);
382
383 memset(bc, 0, sizeof(struct ocfs2_block_check));
384
385 /* Fast path - if the crc32 validates, we're good to go */
386 for (i = 0, crc = ~0; i < nr; i++)
387 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
388 if (crc == check.bc_crc32e)
389 goto out;
390
391 mlog(ML_ERROR,
392 "CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
393 (unsigned int)check.bc_crc32e, (unsigned int)crc);
394
395 /* Ok, try ECC fixups */
396 for (i = 0, ecc = 0; i < nr; i++) {
397 /*
398 * The number of bits in a buffer is obviously b_size*8.
399 * The offset of this buffer is b_size*i, so the bit offset
400 * of this buffer is b_size*8*i.
401 */
402 ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
403 bhs[i]->b_size * 8,
404 bhs[i]->b_size * 8 * i);
405 }
406 fix = ecc ^ check.bc_ecc;
407 for (i = 0; i < nr; i++) {
408 /*
409 * Try the fix against each buffer. It will only affect
410 * one of them.
411 */
412 ocfs2_hamming_fix(bhs[i]->b_data, bhs[i]->b_size * 8,
413 bhs[i]->b_size * 8 * i, fix);
414 }
415
416 /* And check the crc32 again */
417 for (i = 0, crc = ~0; i < nr; i++)
418 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
419 if (crc == check.bc_crc32e)
420 goto out;
421
422 mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
423 (unsigned int)check.bc_crc32e, (unsigned int)crc);
424
425 rc = -EIO;
426
427out:
428 bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
429 bc->bc_ecc = cpu_to_le16(check.bc_ecc);
430
431 return rc;
432}
433
434/*
435 * These are the main API. They check the superblock flag before
436 * calling the underlying operations.
437 *
438 * They expect the buffer(s) to be in disk format.
439 */
440void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
441 struct ocfs2_block_check *bc)
442{
443 if (ocfs2_meta_ecc(OCFS2_SB(sb)))
444 ocfs2_block_check_compute(data, sb->s_blocksize, bc);
445}
446
447int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
448 struct ocfs2_block_check *bc)
449{
450 int rc = 0;
451
452 if (ocfs2_meta_ecc(OCFS2_SB(sb)))
453 rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc);
454
455 return rc;
456}
457
458void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
459 struct buffer_head **bhs, int nr,
460 struct ocfs2_block_check *bc)
461{
462 if (ocfs2_meta_ecc(OCFS2_SB(sb)))
463 ocfs2_block_check_compute_bhs(bhs, nr, bc);
464}
465
466int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
467 struct buffer_head **bhs, int nr,
468 struct ocfs2_block_check *bc)
469{
470 int rc = 0;
471
472 if (ocfs2_meta_ecc(OCFS2_SB(sb)))
473 rc = ocfs2_block_check_validate_bhs(bhs, nr, bc);
474
475 return rc;
476}
477
diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h
new file mode 100644
index 000000000000..70ec3feda32f
--- /dev/null
+++ b/fs/ocfs2/blockcheck.h
@@ -0,0 +1,82 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * blockcheck.h
5 *
6 * Checksum and ECC codes for the OCFS2 userspace library.
7 *
8 * Copyright (C) 2004, 2008 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License, version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#ifndef OCFS2_BLOCKCHECK_H
21#define OCFS2_BLOCKCHECK_H
22
23
24/* High level block API */
25void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
26 struct ocfs2_block_check *bc);
27int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
28 struct ocfs2_block_check *bc);
29void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
30 struct buffer_head **bhs, int nr,
31 struct ocfs2_block_check *bc);
32int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
33 struct buffer_head **bhs, int nr,
34 struct ocfs2_block_check *bc);
35
36/* Lower level API */
37void ocfs2_block_check_compute(void *data, size_t blocksize,
38 struct ocfs2_block_check *bc);
39int ocfs2_block_check_validate(void *data, size_t blocksize,
40 struct ocfs2_block_check *bc);
41void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
42 struct ocfs2_block_check *bc);
43int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
44 struct ocfs2_block_check *bc);
45
46/*
47 * Hamming code functions
48 */
49
50/*
51 * Encoding hamming code parity bits for a buffer.
52 *
53 * This is the low level encoder function. It can be called across
54 * multiple hunks just like the crc32 code. 'd' is the number of bits
55 * _in_this_hunk_. nr is the bit offset of this hunk. So, if you had
56 * two 512B buffers, you would do it like so:
57 *
58 * parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
59 * parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
60 *
61 * If you just have one buffer, use ocfs2_hamming_encode_block().
62 */
63u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d,
64 unsigned int nr);
65/*
66 * Fix a buffer with a bit error. The 'fix' is the original parity
67 * xor'd with the parity calculated now.
68 *
69 * Like ocfs2_hamming_encode(), this can handle hunks. nr is the bit
70 * offset of the current hunk. If bit to be fixed is not part of the
71 * current hunk, this does nothing.
72 *
73 * If you only have one buffer, use ocfs2_hamming_fix_block().
74 */
75void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
76 unsigned int fix);
77
78/* Convenience wrappers for a single buffer of data */
79extern u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize);
80extern void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
81 unsigned int fix);
82#endif
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 3a178ec48d7c..15c8e6deee2e 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -39,6 +39,18 @@
39 39
40#include "buffer_head_io.h" 40#include "buffer_head_io.h"
41 41
42/*
43 * Bits on bh->b_state used by ocfs2.
44 *
45 * These MUST be after the JBD2 bits. Hence, we use BH_JBDPrivateStart.
46 */
47enum ocfs2_state_bits {
48 BH_NeedsValidate = BH_JBDPrivateStart,
49};
50
51/* Expand the magic b_state functions */
52BUFFER_FNS(NeedsValidate, needs_validate);
53
42int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, 54int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
43 struct inode *inode) 55 struct inode *inode)
44{ 56{
@@ -166,7 +178,9 @@ bail:
166} 178}
167 179
168int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, 180int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
169 struct buffer_head *bhs[], int flags) 181 struct buffer_head *bhs[], int flags,
182 int (*validate)(struct super_block *sb,
183 struct buffer_head *bh))
170{ 184{
171 int status = 0; 185 int status = 0;
172 int i, ignore_cache = 0; 186 int i, ignore_cache = 0;
@@ -298,6 +312,8 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
298 312
299 clear_buffer_uptodate(bh); 313 clear_buffer_uptodate(bh);
300 get_bh(bh); /* for end_buffer_read_sync() */ 314 get_bh(bh); /* for end_buffer_read_sync() */
315 if (validate)
316 set_buffer_needs_validate(bh);
301 bh->b_end_io = end_buffer_read_sync; 317 bh->b_end_io = end_buffer_read_sync;
302 submit_bh(READ, bh); 318 submit_bh(READ, bh);
303 continue; 319 continue;
@@ -328,6 +344,20 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
328 bhs[i] = NULL; 344 bhs[i] = NULL;
329 continue; 345 continue;
330 } 346 }
347
348 if (buffer_needs_validate(bh)) {
349 /* We never set NeedsValidate if the
350 * buffer was held by the journal, so
351 * that better not have changed */
352 BUG_ON(buffer_jbd(bh));
353 clear_buffer_needs_validate(bh);
354 status = validate(inode->i_sb, bh);
355 if (status) {
356 put_bh(bh);
357 bhs[i] = NULL;
358 continue;
359 }
360 }
331 } 361 }
332 362
333 /* Always set the buffer in the cache, even if it was 363 /* Always set the buffer in the cache, even if it was
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index 75e1dcb1ade7..c75d682dadd8 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -31,21 +31,24 @@
31void ocfs2_end_buffer_io_sync(struct buffer_head *bh, 31void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
32 int uptodate); 32 int uptodate);
33 33
34static inline int ocfs2_read_block(struct inode *inode,
35 u64 off,
36 struct buffer_head **bh);
37
38int ocfs2_write_block(struct ocfs2_super *osb, 34int ocfs2_write_block(struct ocfs2_super *osb,
39 struct buffer_head *bh, 35 struct buffer_head *bh,
40 struct inode *inode); 36 struct inode *inode);
41int ocfs2_read_blocks(struct inode *inode,
42 u64 block,
43 int nr,
44 struct buffer_head *bhs[],
45 int flags);
46int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, 37int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
47 unsigned int nr, struct buffer_head *bhs[]); 38 unsigned int nr, struct buffer_head *bhs[]);
48 39
40/*
41 * If not NULL, validate() will be called on a buffer that is freshly
42 * read from disk. It will not be called if the buffer was in cache.
43 * Note that if validate() is being used for this buffer, it needs to
44 * be set even for a READAHEAD call, as it marks the buffer for later
45 * validation.
46 */
47int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
48 struct buffer_head *bhs[], int flags,
49 int (*validate)(struct super_block *sb,
50 struct buffer_head *bh));
51
49int ocfs2_write_super_or_backup(struct ocfs2_super *osb, 52int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
50 struct buffer_head *bh); 53 struct buffer_head *bh);
51 54
@@ -53,7 +56,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
53#define OCFS2_BH_READAHEAD 8 56#define OCFS2_BH_READAHEAD 8
54 57
55static inline int ocfs2_read_block(struct inode *inode, u64 off, 58static inline int ocfs2_read_block(struct inode *inode, u64 off,
56 struct buffer_head **bh) 59 struct buffer_head **bh,
60 int (*validate)(struct super_block *sb,
61 struct buffer_head *bh))
57{ 62{
58 int status = 0; 63 int status = 0;
59 64
@@ -63,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off,
63 goto bail; 68 goto bail;
64 } 69 }
65 70
66 status = ocfs2_read_blocks(inode, off, 1, bh, 0); 71 status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate);
67 72
68bail: 73bail:
69 return status; 74 return status;
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 6ebaa58e2c03..04697ba7f73e 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -854,7 +854,7 @@ static int o2hb_thread(void *data)
854 854
855 while (!kthread_should_stop() && !reg->hr_unclean_stop) { 855 while (!kthread_should_stop() && !reg->hr_unclean_stop) {
856 /* We track the time spent inside 856 /* We track the time spent inside
857 * o2hb_do_disk_heartbeat so that we avoid more then 857 * o2hb_do_disk_heartbeat so that we avoid more than
858 * hr_timeout_ms between disk writes. On busy systems 858 * hr_timeout_ms between disk writes. On busy systems
859 * this should result in a heartbeat which is less 859 * this should result in a heartbeat which is less
860 * likely to time itself out. */ 860 * likely to time itself out. */
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index d8a0cb92cef6..96df5416993e 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -110,6 +110,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
110 define_mask(QUORUM), 110 define_mask(QUORUM),
111 define_mask(EXPORT), 111 define_mask(EXPORT),
112 define_mask(XATTR), 112 define_mask(XATTR),
113 define_mask(QUOTA),
113 define_mask(ERROR), 114 define_mask(ERROR),
114 define_mask(NOTICE), 115 define_mask(NOTICE),
115 define_mask(KTHREAD), 116 define_mask(KTHREAD),
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 57670c680471..7e72a81bc2d4 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -113,6 +113,7 @@
113#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */ 113#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */
114#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ 114#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
115#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ 115#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
116#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
116/* bits that are infrequently given and frequently matched in the high word */ 117/* bits that are infrequently given and frequently matched in the high word */
117#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 118#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
118#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 119#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index 52276c02f710..f8424874fa07 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -304,8 +304,8 @@ static int sc_seq_show(struct seq_file *seq, void *v)
304 * use of it here generates a warning with -Wbitwise */ 304 * use of it here generates a warning with -Wbitwise */
305 seq_printf(seq, "%p:\n" 305 seq_printf(seq, "%p:\n"
306 " krefs: %d\n" 306 " krefs: %d\n"
307 " sock: %u.%u.%u.%u:%u -> " 307 " sock: %pI4:%u -> "
308 "%u.%u.%u.%u:%u\n" 308 "%pI4:%u\n"
309 " remote node: %s\n" 309 " remote node: %s\n"
310 " page off: %zu\n" 310 " page off: %zu\n"
311 " handshake ok: %u\n" 311 " handshake ok: %u\n"
@@ -319,8 +319,8 @@ static int sc_seq_show(struct seq_file *seq, void *v)
319 " func type: %u\n", 319 " func type: %u\n",
320 sc, 320 sc,
321 atomic_read(&sc->sc_kref.refcount), 321 atomic_read(&sc->sc_kref.refcount),
322 NIPQUAD(saddr), inet ? ntohs(sport) : 0, 322 &saddr, inet ? ntohs(sport) : 0,
323 NIPQUAD(daddr), inet ? ntohs(dport) : 0, 323 &daddr, inet ? ntohs(dport) : 0,
324 sc->sc_node->nd_name, 324 sc->sc_node->nd_name,
325 sc->sc_page_off, 325 sc->sc_page_off,
326 sc->sc_handshake_ok, 326 sc->sc_handshake_ok,
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 816a3f61330c..70e8fa9e2539 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -250,7 +250,7 @@ static ssize_t o2nm_node_ipv4_port_write(struct o2nm_node *node,
250 250
251static ssize_t o2nm_node_ipv4_address_read(struct o2nm_node *node, char *page) 251static ssize_t o2nm_node_ipv4_address_read(struct o2nm_node *node, char *page)
252{ 252{
253 return sprintf(page, "%u.%u.%u.%u\n", NIPQUAD(node->nd_ipv4_address)); 253 return sprintf(page, "%pI4\n", &node->nd_ipv4_address);
254} 254}
255 255
256static ssize_t o2nm_node_ipv4_address_write(struct o2nm_node *node, 256static ssize_t o2nm_node_ipv4_address_write(struct o2nm_node *node,
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 2bcf706d9dd3..9fbe849f6344 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1597,8 +1597,8 @@ static void o2net_start_connect(struct work_struct *work)
1597 ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr, 1597 ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr,
1598 sizeof(myaddr)); 1598 sizeof(myaddr));
1599 if (ret) { 1599 if (ret) {
1600 mlog(ML_ERROR, "bind failed with %d at address %u.%u.%u.%u\n", 1600 mlog(ML_ERROR, "bind failed with %d at address %pI4\n",
1601 ret, NIPQUAD(mynode->nd_ipv4_address)); 1601 ret, &mynode->nd_ipv4_address);
1602 goto out; 1602 goto out;
1603 } 1603 }
1604 1604
@@ -1790,17 +1790,16 @@ static int o2net_accept_one(struct socket *sock)
1790 1790
1791 node = o2nm_get_node_by_ip(sin.sin_addr.s_addr); 1791 node = o2nm_get_node_by_ip(sin.sin_addr.s_addr);
1792 if (node == NULL) { 1792 if (node == NULL) {
1793 mlog(ML_NOTICE, "attempt to connect from unknown node at " 1793 mlog(ML_NOTICE, "attempt to connect from unknown node at %pI4:%d\n",
1794 "%u.%u.%u.%u:%d\n", NIPQUAD(sin.sin_addr.s_addr), 1794 &sin.sin_addr.s_addr, ntohs(sin.sin_port));
1795 ntohs(sin.sin_port));
1796 ret = -EINVAL; 1795 ret = -EINVAL;
1797 goto out; 1796 goto out;
1798 } 1797 }
1799 1798
1800 if (o2nm_this_node() > node->nd_num) { 1799 if (o2nm_this_node() > node->nd_num) {
1801 mlog(ML_NOTICE, "unexpected connect attempted from a lower " 1800 mlog(ML_NOTICE, "unexpected connect attempted from a lower "
1802 "numbered node '%s' at " "%u.%u.%u.%u:%d with num %u\n", 1801 "numbered node '%s' at " "%pI4:%d with num %u\n",
1803 node->nd_name, NIPQUAD(sin.sin_addr.s_addr), 1802 node->nd_name, &sin.sin_addr.s_addr,
1804 ntohs(sin.sin_port), node->nd_num); 1803 ntohs(sin.sin_port), node->nd_num);
1805 ret = -EINVAL; 1804 ret = -EINVAL;
1806 goto out; 1805 goto out;
@@ -1810,8 +1809,8 @@ static int o2net_accept_one(struct socket *sock)
1810 * and tries to connect before we see their heartbeat */ 1809 * and tries to connect before we see their heartbeat */
1811 if (!o2hb_check_node_heartbeating_from_callback(node->nd_num)) { 1810 if (!o2hb_check_node_heartbeating_from_callback(node->nd_num)) {
1812 mlog(ML_CONN, "attempt to connect from node '%s' at " 1811 mlog(ML_CONN, "attempt to connect from node '%s' at "
1813 "%u.%u.%u.%u:%d but it isn't heartbeating\n", 1812 "%pI4:%d but it isn't heartbeating\n",
1814 node->nd_name, NIPQUAD(sin.sin_addr.s_addr), 1813 node->nd_name, &sin.sin_addr.s_addr,
1815 ntohs(sin.sin_port)); 1814 ntohs(sin.sin_port));
1816 ret = -EINVAL; 1815 ret = -EINVAL;
1817 goto out; 1816 goto out;
@@ -1827,8 +1826,8 @@ static int o2net_accept_one(struct socket *sock)
1827 spin_unlock(&nn->nn_lock); 1826 spin_unlock(&nn->nn_lock);
1828 if (ret) { 1827 if (ret) {
1829 mlog(ML_NOTICE, "attempt to connect from node '%s' at " 1828 mlog(ML_NOTICE, "attempt to connect from node '%s' at "
1830 "%u.%u.%u.%u:%d but it already has an open connection\n", 1829 "%pI4:%d but it already has an open connection\n",
1831 node->nd_name, NIPQUAD(sin.sin_addr.s_addr), 1830 node->nd_name, &sin.sin_addr.s_addr,
1832 ntohs(sin.sin_port)); 1831 ntohs(sin.sin_port));
1833 goto out; 1832 goto out;
1834 } 1833 }
@@ -1924,15 +1923,15 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port)
1924 sock->sk->sk_reuse = 1; 1923 sock->sk->sk_reuse = 1;
1925 ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); 1924 ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
1926 if (ret < 0) { 1925 if (ret < 0) {
1927 mlog(ML_ERROR, "unable to bind socket at %u.%u.%u.%u:%u, " 1926 mlog(ML_ERROR, "unable to bind socket at %pI4:%u, "
1928 "ret=%d\n", NIPQUAD(addr), ntohs(port), ret); 1927 "ret=%d\n", &addr, ntohs(port), ret);
1929 goto out; 1928 goto out;
1930 } 1929 }
1931 1930
1932 ret = sock->ops->listen(sock, 64); 1931 ret = sock->ops->listen(sock, 64);
1933 if (ret < 0) { 1932 if (ret < 0) {
1934 mlog(ML_ERROR, "unable to listen on %u.%u.%u.%u:%u, ret=%d\n", 1933 mlog(ML_ERROR, "unable to listen on %pI4:%u, ret=%d\n",
1935 NIPQUAD(addr), ntohs(port), ret); 1934 &addr, ntohs(port), ret);
1936 } 1935 }
1937 1936
1938out: 1937out:
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 026e6eb85187..f2c4098cf337 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -40,6 +40,7 @@
40#include <linux/types.h> 40#include <linux/types.h>
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/quotaops.h>
43 44
44#define MLOG_MASK_PREFIX ML_NAMEI 45#define MLOG_MASK_PREFIX ML_NAMEI
45#include <cluster/masklog.h> 46#include <cluster/masklog.h>
@@ -47,6 +48,7 @@
47#include "ocfs2.h" 48#include "ocfs2.h"
48 49
49#include "alloc.h" 50#include "alloc.h"
51#include "blockcheck.h"
50#include "dir.h" 52#include "dir.h"
51#include "dlmglue.h" 53#include "dlmglue.h"
52#include "extent_map.h" 54#include "extent_map.h"
@@ -82,47 +84,72 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
82 struct ocfs2_alloc_context *meta_ac, 84 struct ocfs2_alloc_context *meta_ac,
83 struct buffer_head **new_bh); 85 struct buffer_head **new_bh);
84 86
85static struct buffer_head *ocfs2_bread(struct inode *inode, 87/*
86 int block, int *err, int reada) 88 * These are distinct checks because future versions of the file system will
89 * want to have a trailing dirent structure independent of indexing.
90 */
91static int ocfs2_dir_has_trailer(struct inode *dir)
87{ 92{
88 struct buffer_head *bh = NULL; 93 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
89 int tmperr; 94 return 0;
90 u64 p_blkno;
91 int readflags = 0;
92 95
93 if (reada) 96 return ocfs2_meta_ecc(OCFS2_SB(dir->i_sb));
94 readflags |= OCFS2_BH_READAHEAD; 97}
95 98
96 if (((u64)block << inode->i_sb->s_blocksize_bits) >= 99static int ocfs2_supports_dir_trailer(struct ocfs2_super *osb)
97 i_size_read(inode)) { 100{
98 BUG_ON(!reada); 101 return ocfs2_meta_ecc(osb);
99 return NULL; 102}
100 }
101 103
102 down_read(&OCFS2_I(inode)->ip_alloc_sem); 104static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb)
103 tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, 105{
104 NULL); 106 return sb->s_blocksize - sizeof(struct ocfs2_dir_block_trailer);
105 up_read(&OCFS2_I(inode)->ip_alloc_sem); 107}
106 if (tmperr < 0) {
107 mlog_errno(tmperr);
108 goto fail;
109 }
110 108
111 tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags); 109#define ocfs2_trailer_from_bh(_bh, _sb) ((struct ocfs2_dir_block_trailer *) ((_bh)->b_data + ocfs2_dir_trailer_blk_off((_sb))))
112 if (tmperr < 0)
113 goto fail;
114 110
115 tmperr = 0; 111/* XXX ocfs2_block_dqtrailer() is similar but not quite - can we make
112 * them more consistent? */
113struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
114 void *data)
115{
116 char *p = data;
116 117
117 *err = 0; 118 p += blocksize - sizeof(struct ocfs2_dir_block_trailer);
118 return bh; 119 return (struct ocfs2_dir_block_trailer *)p;
120}
119 121
120fail: 122/*
121 brelse(bh); 123 * XXX: This is executed once on every dirent. We should consider optimizing
122 bh = NULL; 124 * it.
125 */
126static int ocfs2_skip_dir_trailer(struct inode *dir,
127 struct ocfs2_dir_entry *de,
128 unsigned long offset,
129 unsigned long blklen)
130{
131 unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer);
123 132
124 *err = -EIO; 133 if (!ocfs2_dir_has_trailer(dir))
125 return NULL; 134 return 0;
135
136 if (offset != toff)
137 return 0;
138
139 return 1;
140}
141
142static void ocfs2_init_dir_trailer(struct inode *inode,
143 struct buffer_head *bh)
144{
145 struct ocfs2_dir_block_trailer *trailer;
146
147 trailer = ocfs2_trailer_from_bh(bh, inode->i_sb);
148 strcpy(trailer->db_signature, OCFS2_DIR_TRAILER_SIGNATURE);
149 trailer->db_compat_rec_len =
150 cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer));
151 trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
152 trailer->db_blkno = cpu_to_le64(bh->b_blocknr);
126} 153}
127 154
128/* 155/*
@@ -231,7 +258,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name,
231 struct ocfs2_dinode *di; 258 struct ocfs2_dinode *di;
232 struct ocfs2_inline_data *data; 259 struct ocfs2_inline_data *data;
233 260
234 ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); 261 ret = ocfs2_read_inode_block(dir, &di_bh);
235 if (ret) { 262 if (ret) {
236 mlog_errno(ret); 263 mlog_errno(ret);
237 goto out; 264 goto out;
@@ -250,6 +277,108 @@ out:
250 return NULL; 277 return NULL;
251} 278}
252 279
280static int ocfs2_validate_dir_block(struct super_block *sb,
281 struct buffer_head *bh)
282{
283 int rc;
284 struct ocfs2_dir_block_trailer *trailer =
285 ocfs2_trailer_from_bh(bh, sb);
286
287
288 /*
289 * We don't validate dirents here, that's handled
290 * in-place when the code walks them.
291 */
292 mlog(0, "Validating dirblock %llu\n",
293 (unsigned long long)bh->b_blocknr);
294
295 BUG_ON(!buffer_uptodate(bh));
296
297 /*
298 * If the ecc fails, we return the error but otherwise
299 * leave the filesystem running. We know any error is
300 * local to this block.
301 *
302 * Note that we are safe to call this even if the directory
303 * doesn't have a trailer. Filesystems without metaecc will do
304 * nothing, and filesystems with it will have one.
305 */
306 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &trailer->db_check);
307 if (rc)
308 mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
309 (unsigned long long)bh->b_blocknr);
310
311 return rc;
312}
313
314/*
315 * This function forces all errors to -EIO for consistency with its
316 * predecessor, ocfs2_bread(). We haven't audited what returning the
317 * real error codes would do to callers. We log the real codes with
318 * mlog_errno() before we squash them.
319 */
320static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
321 struct buffer_head **bh, int flags)
322{
323 int rc = 0;
324 struct buffer_head *tmp = *bh;
325 struct ocfs2_dir_block_trailer *trailer;
326
327 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
328 ocfs2_validate_dir_block);
329 if (rc) {
330 mlog_errno(rc);
331 goto out;
332 }
333
334 /*
335 * We check the trailer here rather than in
336 * ocfs2_validate_dir_block() because that function doesn't have
337 * the inode to test.
338 */
339 if (!(flags & OCFS2_BH_READAHEAD) &&
340 ocfs2_dir_has_trailer(inode)) {
341 trailer = ocfs2_trailer_from_bh(tmp, inode->i_sb);
342 if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
343 rc = -EINVAL;
344 ocfs2_error(inode->i_sb,
345 "Invalid dirblock #%llu: "
346 "signature = %.*s\n",
347 (unsigned long long)tmp->b_blocknr, 7,
348 trailer->db_signature);
349 goto out;
350 }
351 if (le64_to_cpu(trailer->db_blkno) != tmp->b_blocknr) {
352 rc = -EINVAL;
353 ocfs2_error(inode->i_sb,
354 "Directory block #%llu has an invalid "
355 "db_blkno of %llu",
356 (unsigned long long)tmp->b_blocknr,
357 (unsigned long long)le64_to_cpu(trailer->db_blkno));
358 goto out;
359 }
360 if (le64_to_cpu(trailer->db_parent_dinode) !=
361 OCFS2_I(inode)->ip_blkno) {
362 rc = -EINVAL;
363 ocfs2_error(inode->i_sb,
364 "Directory block #%llu on dinode "
365 "#%llu has an invalid parent_dinode "
366 "of %llu",
367 (unsigned long long)tmp->b_blocknr,
368 (unsigned long long)OCFS2_I(inode)->ip_blkno,
369 (unsigned long long)le64_to_cpu(trailer->db_blkno));
370 goto out;
371 }
372 }
373
374 /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
375 if (!*bh)
376 *bh = tmp;
377
378out:
379 return rc ? -EIO : 0;
380}
381
253static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, 382static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
254 struct inode *dir, 383 struct inode *dir,
255 struct ocfs2_dir_entry **res_dir) 384 struct ocfs2_dir_entry **res_dir)
@@ -296,15 +425,17 @@ restart:
296 } 425 }
297 num++; 426 num++;
298 427
299 bh = ocfs2_bread(dir, b++, &err, 1); 428 bh = NULL;
429 err = ocfs2_read_dir_block(dir, b++, &bh,
430 OCFS2_BH_READAHEAD);
300 bh_use[ra_max] = bh; 431 bh_use[ra_max] = bh;
301 } 432 }
302 } 433 }
303 if ((bh = bh_use[ra_ptr++]) == NULL) 434 if ((bh = bh_use[ra_ptr++]) == NULL)
304 goto next; 435 goto next;
305 if (ocfs2_read_block(dir, block, &bh)) { 436 if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
306 /* read error, skip block & hope for the best. 437 /* read error, skip block & hope for the best.
307 * ocfs2_read_block() has released the bh. */ 438 * ocfs2_read_dir_block() has released the bh. */
308 ocfs2_error(dir->i_sb, "reading directory %llu, " 439 ocfs2_error(dir->i_sb, "reading directory %llu, "
309 "offset %lu\n", 440 "offset %lu\n",
310 (unsigned long long)OCFS2_I(dir)->ip_blkno, 441 (unsigned long long)OCFS2_I(dir)->ip_blkno,
@@ -381,14 +512,18 @@ int ocfs2_update_entry(struct inode *dir, handle_t *handle,
381 struct inode *new_entry_inode) 512 struct inode *new_entry_inode)
382{ 513{
383 int ret; 514 int ret;
515 ocfs2_journal_access_func access = ocfs2_journal_access_db;
384 516
385 /* 517 /*
386 * The same code works fine for both inline-data and extent 518 * The same code works fine for both inline-data and extent
387 * based directories, so no need to split this up. 519 * based directories, so no need to split this up. The only
520 * difference is the journal_access function.
388 */ 521 */
389 522
390 ret = ocfs2_journal_access(handle, dir, de_bh, 523 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
391 OCFS2_JOURNAL_ACCESS_WRITE); 524 access = ocfs2_journal_access_di;
525
526 ret = access(handle, dir, de_bh, OCFS2_JOURNAL_ACCESS_WRITE);
392 if (ret) { 527 if (ret) {
393 mlog_errno(ret); 528 mlog_errno(ret);
394 goto out; 529 goto out;
@@ -410,9 +545,13 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
410{ 545{
411 struct ocfs2_dir_entry *de, *pde; 546 struct ocfs2_dir_entry *de, *pde;
412 int i, status = -ENOENT; 547 int i, status = -ENOENT;
548 ocfs2_journal_access_func access = ocfs2_journal_access_db;
413 549
414 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh); 550 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
415 551
552 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
553 access = ocfs2_journal_access_di;
554
416 i = 0; 555 i = 0;
417 pde = NULL; 556 pde = NULL;
418 de = (struct ocfs2_dir_entry *) first_de; 557 de = (struct ocfs2_dir_entry *) first_de;
@@ -423,8 +562,8 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
423 goto bail; 562 goto bail;
424 } 563 }
425 if (de == de_del) { 564 if (de == de_del) {
426 status = ocfs2_journal_access(handle, dir, bh, 565 status = access(handle, dir, bh,
427 OCFS2_JOURNAL_ACCESS_WRITE); 566 OCFS2_JOURNAL_ACCESS_WRITE);
428 if (status < 0) { 567 if (status < 0) {
429 status = -EIO; 568 status = -EIO;
430 mlog_errno(status); 569 mlog_errno(status);
@@ -458,7 +597,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle,
458 struct ocfs2_dinode *di; 597 struct ocfs2_dinode *di;
459 struct ocfs2_inline_data *data; 598 struct ocfs2_inline_data *data;
460 599
461 ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); 600 ret = ocfs2_read_inode_block(dir, &di_bh);
462 if (ret) { 601 if (ret) {
463 mlog_errno(ret); 602 mlog_errno(ret);
464 goto out; 603 goto out;
@@ -576,6 +715,16 @@ int __ocfs2_add_entry(handle_t *handle,
576 goto bail; 715 goto bail;
577 } 716 }
578 717
718 /* We're guaranteed that we should have space, so we
719 * can't possibly have hit the trailer...right? */
720 mlog_bug_on_msg(ocfs2_skip_dir_trailer(dir, de, offset, size),
721 "Hit dir trailer trying to insert %.*s "
722 "(namelen %d) into directory %llu. "
723 "offset is %lu, trailer offset is %d\n",
724 namelen, name, namelen,
725 (unsigned long long)parent_fe_bh->b_blocknr,
726 offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
727
579 if (ocfs2_dirent_would_fit(de, rec_len)) { 728 if (ocfs2_dirent_would_fit(de, rec_len)) {
580 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 729 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
581 retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh); 730 retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
@@ -584,8 +733,14 @@ int __ocfs2_add_entry(handle_t *handle,
584 goto bail; 733 goto bail;
585 } 734 }
586 735
587 status = ocfs2_journal_access(handle, dir, insert_bh, 736 if (insert_bh == parent_fe_bh)
588 OCFS2_JOURNAL_ACCESS_WRITE); 737 status = ocfs2_journal_access_di(handle, dir,
738 insert_bh,
739 OCFS2_JOURNAL_ACCESS_WRITE);
740 else
741 status = ocfs2_journal_access_db(handle, dir,
742 insert_bh,
743 OCFS2_JOURNAL_ACCESS_WRITE);
589 /* By now the buffer is marked for journaling */ 744 /* By now the buffer is marked for journaling */
590 offset += le16_to_cpu(de->rec_len); 745 offset += le16_to_cpu(de->rec_len);
591 if (le64_to_cpu(de->inode)) { 746 if (le64_to_cpu(de->inode)) {
@@ -611,6 +766,7 @@ int __ocfs2_add_entry(handle_t *handle,
611 retval = 0; 766 retval = 0;
612 goto bail; 767 goto bail;
613 } 768 }
769
614 offset += le16_to_cpu(de->rec_len); 770 offset += le16_to_cpu(de->rec_len);
615 de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len)); 771 de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
616 } 772 }
@@ -636,7 +792,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
636 struct ocfs2_inline_data *data; 792 struct ocfs2_inline_data *data;
637 struct ocfs2_dir_entry *de; 793 struct ocfs2_dir_entry *de;
638 794
639 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); 795 ret = ocfs2_read_inode_block(inode, &di_bh);
640 if (ret) { 796 if (ret) {
641 mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", 797 mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
642 (unsigned long long)OCFS2_I(inode)->ip_blkno); 798 (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -724,7 +880,6 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
724 int i, stored; 880 int i, stored;
725 struct buffer_head * bh, * tmp; 881 struct buffer_head * bh, * tmp;
726 struct ocfs2_dir_entry * de; 882 struct ocfs2_dir_entry * de;
727 int err;
728 struct super_block * sb = inode->i_sb; 883 struct super_block * sb = inode->i_sb;
729 unsigned int ra_sectors = 16; 884 unsigned int ra_sectors = 16;
730 885
@@ -735,12 +890,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
735 890
736 while (!error && !stored && *f_pos < i_size_read(inode)) { 891 while (!error && !stored && *f_pos < i_size_read(inode)) {
737 blk = (*f_pos) >> sb->s_blocksize_bits; 892 blk = (*f_pos) >> sb->s_blocksize_bits;
738 bh = ocfs2_bread(inode, blk, &err, 0); 893 if (ocfs2_read_dir_block(inode, blk, &bh, 0)) {
739 if (!bh) { 894 /* Skip the corrupt dirblock and keep trying */
740 mlog(ML_ERROR,
741 "directory #%llu contains a hole at offset %lld\n",
742 (unsigned long long)OCFS2_I(inode)->ip_blkno,
743 *f_pos);
744 *f_pos += sb->s_blocksize - offset; 895 *f_pos += sb->s_blocksize - offset;
745 continue; 896 continue;
746 } 897 }
@@ -754,8 +905,10 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
754 || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) { 905 || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
755 for (i = ra_sectors >> (sb->s_blocksize_bits - 9); 906 for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
756 i > 0; i--) { 907 i > 0; i--) {
757 tmp = ocfs2_bread(inode, ++blk, &err, 1); 908 tmp = NULL;
758 brelse(tmp); 909 if (!ocfs2_read_dir_block(inode, ++blk, &tmp,
910 OCFS2_BH_READAHEAD))
911 brelse(tmp);
759 } 912 }
760 last_ra_blk = blk; 913 last_ra_blk = blk;
761 ra_sectors = 8; 914 ra_sectors = 8;
@@ -828,6 +981,7 @@ revalidate:
828 } 981 }
829 offset = 0; 982 offset = 0;
830 brelse(bh); 983 brelse(bh);
984 bh = NULL;
831 } 985 }
832 986
833 stored = 0; 987 stored = 0;
@@ -1050,9 +1204,15 @@ int ocfs2_empty_dir(struct inode *inode)
1050 return !priv.seen_other; 1204 return !priv.seen_other;
1051} 1205}
1052 1206
1053static void ocfs2_fill_initial_dirents(struct inode *inode, 1207/*
1054 struct inode *parent, 1208 * Fills "." and ".." dirents in a new directory block. Returns dirent for
1055 char *start, unsigned int size) 1209 * "..", which might be used during creation of a directory with a trailing
1210 * header. It is otherwise safe to ignore the return code.
1211 */
1212static struct ocfs2_dir_entry *ocfs2_fill_initial_dirents(struct inode *inode,
1213 struct inode *parent,
1214 char *start,
1215 unsigned int size)
1056{ 1216{
1057 struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start; 1217 struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start;
1058 1218
@@ -1069,6 +1229,8 @@ static void ocfs2_fill_initial_dirents(struct inode *inode,
1069 de->name_len = 2; 1229 de->name_len = 2;
1070 strcpy(de->name, ".."); 1230 strcpy(de->name, "..");
1071 ocfs2_set_de_type(de, S_IFDIR); 1231 ocfs2_set_de_type(de, S_IFDIR);
1232
1233 return de;
1072} 1234}
1073 1235
1074/* 1236/*
@@ -1086,8 +1248,8 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
1086 struct ocfs2_inline_data *data = &di->id2.i_data; 1248 struct ocfs2_inline_data *data = &di->id2.i_data;
1087 unsigned int size = le16_to_cpu(data->id_count); 1249 unsigned int size = le16_to_cpu(data->id_count);
1088 1250
1089 ret = ocfs2_journal_access(handle, inode, di_bh, 1251 ret = ocfs2_journal_access_di(handle, inode, di_bh,
1090 OCFS2_JOURNAL_ACCESS_WRITE); 1252 OCFS2_JOURNAL_ACCESS_WRITE);
1091 if (ret) { 1253 if (ret) {
1092 mlog_errno(ret); 1254 mlog_errno(ret);
1093 goto out; 1255 goto out;
@@ -1121,10 +1283,15 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
1121 struct ocfs2_alloc_context *data_ac) 1283 struct ocfs2_alloc_context *data_ac)
1122{ 1284{
1123 int status; 1285 int status;
1286 unsigned int size = osb->sb->s_blocksize;
1124 struct buffer_head *new_bh = NULL; 1287 struct buffer_head *new_bh = NULL;
1288 struct ocfs2_dir_entry *de;
1125 1289
1126 mlog_entry_void(); 1290 mlog_entry_void();
1127 1291
1292 if (ocfs2_supports_dir_trailer(osb))
1293 size = ocfs2_dir_trailer_blk_off(parent->i_sb);
1294
1128 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, 1295 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
1129 data_ac, NULL, &new_bh); 1296 data_ac, NULL, &new_bh);
1130 if (status < 0) { 1297 if (status < 0) {
@@ -1134,16 +1301,17 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
1134 1301
1135 ocfs2_set_new_buffer_uptodate(inode, new_bh); 1302 ocfs2_set_new_buffer_uptodate(inode, new_bh);
1136 1303
1137 status = ocfs2_journal_access(handle, inode, new_bh, 1304 status = ocfs2_journal_access_db(handle, inode, new_bh,
1138 OCFS2_JOURNAL_ACCESS_CREATE); 1305 OCFS2_JOURNAL_ACCESS_CREATE);
1139 if (status < 0) { 1306 if (status < 0) {
1140 mlog_errno(status); 1307 mlog_errno(status);
1141 goto bail; 1308 goto bail;
1142 } 1309 }
1143 memset(new_bh->b_data, 0, osb->sb->s_blocksize); 1310 memset(new_bh->b_data, 0, osb->sb->s_blocksize);
1144 1311
1145 ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, 1312 de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size);
1146 osb->sb->s_blocksize); 1313 if (ocfs2_supports_dir_trailer(osb))
1314 ocfs2_init_dir_trailer(inode, new_bh);
1147 1315
1148 status = ocfs2_journal_dirty(handle, new_bh); 1316 status = ocfs2_journal_dirty(handle, new_bh);
1149 if (status < 0) { 1317 if (status < 0) {
@@ -1184,13 +1352,27 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb,
1184 data_ac); 1352 data_ac);
1185} 1353}
1186 1354
1355/*
1356 * Expand rec_len of the rightmost dirent in a directory block so that it
1357 * contains the end of our valid space for dirents. We do this during
1358 * expansion from an inline directory to one with extents. The first dir block
1359 * in that case is taken from the inline data portion of the inode block.
1360 *
1361 * We add the dir trailer if this filesystem wants it.
1362 */
1187static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, 1363static void ocfs2_expand_last_dirent(char *start, unsigned int old_size,
1188 unsigned int new_size) 1364 struct super_block *sb)
1189{ 1365{
1190 struct ocfs2_dir_entry *de; 1366 struct ocfs2_dir_entry *de;
1191 struct ocfs2_dir_entry *prev_de; 1367 struct ocfs2_dir_entry *prev_de;
1192 char *de_buf, *limit; 1368 char *de_buf, *limit;
1193 unsigned int bytes = new_size - old_size; 1369 unsigned int new_size = sb->s_blocksize;
1370 unsigned int bytes;
1371
1372 if (ocfs2_supports_dir_trailer(OCFS2_SB(sb)))
1373 new_size = ocfs2_dir_trailer_blk_off(sb);
1374
1375 bytes = new_size - old_size;
1194 1376
1195 limit = start + old_size; 1377 limit = start + old_size;
1196 de_buf = start; 1378 de_buf = start;
@@ -1216,9 +1398,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1216 unsigned int blocks_wanted, 1398 unsigned int blocks_wanted,
1217 struct buffer_head **first_block_bh) 1399 struct buffer_head **first_block_bh)
1218{ 1400{
1219 int ret, credits = OCFS2_INLINE_TO_EXTENTS_CREDITS;
1220 u32 alloc, bit_off, len; 1401 u32 alloc, bit_off, len;
1221 struct super_block *sb = dir->i_sb; 1402 struct super_block *sb = dir->i_sb;
1403 int ret, credits = ocfs2_inline_to_extents_credits(sb);
1222 u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits; 1404 u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits;
1223 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 1405 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
1224 struct ocfs2_inode_info *oi = OCFS2_I(dir); 1406 struct ocfs2_inode_info *oi = OCFS2_I(dir);
@@ -1227,6 +1409,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1227 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1409 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1228 handle_t *handle; 1410 handle_t *handle;
1229 struct ocfs2_extent_tree et; 1411 struct ocfs2_extent_tree et;
1412 int did_quota = 0;
1230 1413
1231 ocfs2_init_dinode_extent_tree(&et, dir, di_bh); 1414 ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
1232 1415
@@ -1264,6 +1447,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1264 goto out_sem; 1447 goto out_sem;
1265 } 1448 }
1266 1449
1450 if (vfs_dq_alloc_space_nodirty(dir,
1451 ocfs2_clusters_to_bytes(osb->sb, alloc))) {
1452 ret = -EDQUOT;
1453 goto out_commit;
1454 }
1455 did_quota = 1;
1267 /* 1456 /*
1268 * Try to claim as many clusters as the bitmap can give though 1457 * Try to claim as many clusters as the bitmap can give though
1269 * if we only get one now, that's enough to continue. The rest 1458 * if we only get one now, that's enough to continue. The rest
@@ -1290,8 +1479,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1290 1479
1291 ocfs2_set_new_buffer_uptodate(dir, dirdata_bh); 1480 ocfs2_set_new_buffer_uptodate(dir, dirdata_bh);
1292 1481
1293 ret = ocfs2_journal_access(handle, dir, dirdata_bh, 1482 ret = ocfs2_journal_access_db(handle, dir, dirdata_bh,
1294 OCFS2_JOURNAL_ACCESS_CREATE); 1483 OCFS2_JOURNAL_ACCESS_CREATE);
1295 if (ret) { 1484 if (ret) {
1296 mlog_errno(ret); 1485 mlog_errno(ret);
1297 goto out_commit; 1486 goto out_commit;
@@ -1300,8 +1489,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1300 memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir)); 1489 memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
1301 memset(dirdata_bh->b_data + i_size_read(dir), 0, 1490 memset(dirdata_bh->b_data + i_size_read(dir), 0,
1302 sb->s_blocksize - i_size_read(dir)); 1491 sb->s_blocksize - i_size_read(dir));
1303 ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), 1492 ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), sb);
1304 sb->s_blocksize); 1493 if (ocfs2_supports_dir_trailer(osb))
1494 ocfs2_init_dir_trailer(dir, dirdata_bh);
1305 1495
1306 ret = ocfs2_journal_dirty(handle, dirdata_bh); 1496 ret = ocfs2_journal_dirty(handle, dirdata_bh);
1307 if (ret) { 1497 if (ret) {
@@ -1317,8 +1507,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1317 * We let the later dirent insert modify c/mtime - to the user 1507 * We let the later dirent insert modify c/mtime - to the user
1318 * the data hasn't changed. 1508 * the data hasn't changed.
1319 */ 1509 */
1320 ret = ocfs2_journal_access(handle, dir, di_bh, 1510 ret = ocfs2_journal_access_di(handle, dir, di_bh,
1321 OCFS2_JOURNAL_ACCESS_CREATE); 1511 OCFS2_JOURNAL_ACCESS_CREATE);
1322 if (ret) { 1512 if (ret) {
1323 mlog_errno(ret); 1513 mlog_errno(ret);
1324 goto out_commit; 1514 goto out_commit;
@@ -1386,6 +1576,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1386 dirdata_bh = NULL; 1576 dirdata_bh = NULL;
1387 1577
1388out_commit: 1578out_commit:
1579 if (ret < 0 && did_quota)
1580 vfs_dq_free_space_nodirty(dir,
1581 ocfs2_clusters_to_bytes(osb->sb, 2));
1389 ocfs2_commit_trans(osb, handle); 1582 ocfs2_commit_trans(osb, handle);
1390 1583
1391out_sem: 1584out_sem:
@@ -1410,7 +1603,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
1410 struct buffer_head **new_bh) 1603 struct buffer_head **new_bh)
1411{ 1604{
1412 int status; 1605 int status;
1413 int extend; 1606 int extend, did_quota = 0;
1414 u64 p_blkno, v_blkno; 1607 u64 p_blkno, v_blkno;
1415 1608
1416 spin_lock(&OCFS2_I(dir)->ip_lock); 1609 spin_lock(&OCFS2_I(dir)->ip_lock);
@@ -1420,6 +1613,13 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
1420 if (extend) { 1613 if (extend) {
1421 u32 offset = OCFS2_I(dir)->ip_clusters; 1614 u32 offset = OCFS2_I(dir)->ip_clusters;
1422 1615
1616 if (vfs_dq_alloc_space_nodirty(dir,
1617 ocfs2_clusters_to_bytes(sb, 1))) {
1618 status = -EDQUOT;
1619 goto bail;
1620 }
1621 did_quota = 1;
1622
1423 status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset, 1623 status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
1424 1, 0, parent_fe_bh, handle, 1624 1, 0, parent_fe_bh, handle,
1425 data_ac, meta_ac, NULL); 1625 data_ac, meta_ac, NULL);
@@ -1445,6 +1645,8 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
1445 } 1645 }
1446 status = 0; 1646 status = 0;
1447bail: 1647bail:
1648 if (did_quota && status < 0)
1649 vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
1448 mlog_exit(status); 1650 mlog_exit(status);
1449 return status; 1651 return status;
1450} 1652}
@@ -1569,16 +1771,22 @@ do_extend:
1569 1771
1570 ocfs2_set_new_buffer_uptodate(dir, new_bh); 1772 ocfs2_set_new_buffer_uptodate(dir, new_bh);
1571 1773
1572 status = ocfs2_journal_access(handle, dir, new_bh, 1774 status = ocfs2_journal_access_db(handle, dir, new_bh,
1573 OCFS2_JOURNAL_ACCESS_CREATE); 1775 OCFS2_JOURNAL_ACCESS_CREATE);
1574 if (status < 0) { 1776 if (status < 0) {
1575 mlog_errno(status); 1777 mlog_errno(status);
1576 goto bail; 1778 goto bail;
1577 } 1779 }
1578 memset(new_bh->b_data, 0, sb->s_blocksize); 1780 memset(new_bh->b_data, 0, sb->s_blocksize);
1781
1579 de = (struct ocfs2_dir_entry *) new_bh->b_data; 1782 de = (struct ocfs2_dir_entry *) new_bh->b_data;
1580 de->inode = 0; 1783 de->inode = 0;
1581 de->rec_len = cpu_to_le16(sb->s_blocksize); 1784 if (ocfs2_dir_has_trailer(dir)) {
1785 de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb));
1786 ocfs2_init_dir_trailer(dir, new_bh);
1787 } else {
1788 de->rec_len = cpu_to_le16(sb->s_blocksize);
1789 }
1582 status = ocfs2_journal_dirty(handle, new_bh); 1790 status = ocfs2_journal_dirty(handle, new_bh);
1583 if (status < 0) { 1791 if (status < 0) {
1584 mlog_errno(status); 1792 mlog_errno(status);
@@ -1620,11 +1828,21 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
1620 unsigned int *blocks_wanted) 1828 unsigned int *blocks_wanted)
1621{ 1829{
1622 int ret; 1830 int ret;
1831 struct super_block *sb = dir->i_sb;
1623 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1832 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1624 struct ocfs2_dir_entry *de, *last_de = NULL; 1833 struct ocfs2_dir_entry *de, *last_de = NULL;
1625 char *de_buf, *limit; 1834 char *de_buf, *limit;
1626 unsigned long offset = 0; 1835 unsigned long offset = 0;
1627 unsigned int rec_len, new_rec_len; 1836 unsigned int rec_len, new_rec_len, free_space = dir->i_sb->s_blocksize;
1837
1838 /*
1839 * This calculates how many free bytes we'd have in block zero, should
1840 * this function force expansion to an extent tree.
1841 */
1842 if (ocfs2_supports_dir_trailer(OCFS2_SB(sb)))
1843 free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir);
1844 else
1845 free_space = dir->i_sb->s_blocksize - i_size_read(dir);
1628 1846
1629 de_buf = di->id2.i_data.id_data; 1847 de_buf = di->id2.i_data.id_data;
1630 limit = de_buf + i_size_read(dir); 1848 limit = de_buf + i_size_read(dir);
@@ -1641,6 +1859,11 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
1641 ret = -EEXIST; 1859 ret = -EEXIST;
1642 goto out; 1860 goto out;
1643 } 1861 }
1862 /*
1863 * No need to check for a trailing dirent record here as
1864 * they're not used for inline dirs.
1865 */
1866
1644 if (ocfs2_dirent_would_fit(de, rec_len)) { 1867 if (ocfs2_dirent_would_fit(de, rec_len)) {
1645 /* Ok, we found a spot. Return this bh and let 1868 /* Ok, we found a spot. Return this bh and let
1646 * the caller actually fill it in. */ 1869 * the caller actually fill it in. */
@@ -1661,7 +1884,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
1661 * dirent can be found. 1884 * dirent can be found.
1662 */ 1885 */
1663 *blocks_wanted = 1; 1886 *blocks_wanted = 1;
1664 new_rec_len = le16_to_cpu(last_de->rec_len) + (dir->i_sb->s_blocksize - i_size_read(dir)); 1887 new_rec_len = le16_to_cpu(last_de->rec_len) + free_space;
1665 if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len))) 1888 if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
1666 *blocks_wanted = 2; 1889 *blocks_wanted = 2;
1667 1890
@@ -1679,9 +1902,10 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
1679 struct ocfs2_dir_entry *de; 1902 struct ocfs2_dir_entry *de;
1680 struct super_block *sb = dir->i_sb; 1903 struct super_block *sb = dir->i_sb;
1681 int status; 1904 int status;
1905 int blocksize = dir->i_sb->s_blocksize;
1682 1906
1683 bh = ocfs2_bread(dir, 0, &status, 0); 1907 status = ocfs2_read_dir_block(dir, 0, &bh, 0);
1684 if (!bh) { 1908 if (status) {
1685 mlog_errno(status); 1909 mlog_errno(status);
1686 goto bail; 1910 goto bail;
1687 } 1911 }
@@ -1702,11 +1926,10 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
1702 status = -ENOSPC; 1926 status = -ENOSPC;
1703 goto bail; 1927 goto bail;
1704 } 1928 }
1705 bh = ocfs2_bread(dir, 1929 status = ocfs2_read_dir_block(dir,
1706 offset >> sb->s_blocksize_bits, 1930 offset >> sb->s_blocksize_bits,
1707 &status, 1931 &bh, 0);
1708 0); 1932 if (status) {
1709 if (!bh) {
1710 mlog_errno(status); 1933 mlog_errno(status);
1711 goto bail; 1934 goto bail;
1712 } 1935 }
@@ -1721,6 +1944,11 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
1721 status = -EEXIST; 1944 status = -EEXIST;
1722 goto bail; 1945 goto bail;
1723 } 1946 }
1947
1948 if (ocfs2_skip_dir_trailer(dir, de, offset % blocksize,
1949 blocksize))
1950 goto next;
1951
1724 if (ocfs2_dirent_would_fit(de, rec_len)) { 1952 if (ocfs2_dirent_would_fit(de, rec_len)) {
1725 /* Ok, we found a spot. Return this bh and let 1953 /* Ok, we found a spot. Return this bh and let
1726 * the caller actually fill it in. */ 1954 * the caller actually fill it in. */
@@ -1729,6 +1957,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
1729 status = 0; 1957 status = 0;
1730 goto bail; 1958 goto bail;
1731 } 1959 }
1960next:
1732 offset += le16_to_cpu(de->rec_len); 1961 offset += le16_to_cpu(de->rec_len);
1733 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len)); 1962 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
1734 } 1963 }
diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h
index ce48b9080d87..c511e2e18e9f 100644
--- a/fs/ocfs2/dir.h
+++ b/fs/ocfs2/dir.h
@@ -83,4 +83,6 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb,
83 struct buffer_head *fe_bh, 83 struct buffer_head *fe_bh,
84 struct ocfs2_alloc_context *data_ac); 84 struct ocfs2_alloc_context *data_ac);
85 85
86struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
87 void *data);
86#endif /* OCFS2_DIR_H */ 88#endif /* OCFS2_DIR_H */
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 644bee55d8ba..d07ddbe4b283 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -275,6 +275,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
275 struct list_head *iter, *head=NULL; 275 struct list_head *iter, *head=NULL;
276 u64 cookie; 276 u64 cookie;
277 u32 flags; 277 u32 flags;
278 u8 node;
278 279
279 if (!dlm_grab(dlm)) { 280 if (!dlm_grab(dlm)) {
280 dlm_error(DLM_REJECTED); 281 dlm_error(DLM_REJECTED);
@@ -286,18 +287,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
286 287
287 name = past->name; 288 name = past->name;
288 locklen = past->namelen; 289 locklen = past->namelen;
289 cookie = be64_to_cpu(past->cookie); 290 cookie = past->cookie;
290 flags = be32_to_cpu(past->flags); 291 flags = be32_to_cpu(past->flags);
292 node = past->node_idx;
291 293
292 if (locklen > DLM_LOCKID_NAME_MAX) { 294 if (locklen > DLM_LOCKID_NAME_MAX) {
293 ret = DLM_IVBUFLEN; 295 ret = DLM_IVBUFLEN;
294 mlog(ML_ERROR, "Invalid name length in proxy ast handler!\n"); 296 mlog(ML_ERROR, "Invalid name length (%d) in proxy ast "
297 "handler!\n", locklen);
295 goto leave; 298 goto leave;
296 } 299 }
297 300
298 if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) == 301 if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) ==
299 (LKM_PUT_LVB|LKM_GET_LVB)) { 302 (LKM_PUT_LVB|LKM_GET_LVB)) {
300 mlog(ML_ERROR, "both PUT and GET lvb specified\n"); 303 mlog(ML_ERROR, "Both PUT and GET lvb specified, (0x%x)\n",
304 flags);
301 ret = DLM_BADARGS; 305 ret = DLM_BADARGS;
302 goto leave; 306 goto leave;
303 } 307 }
@@ -310,22 +314,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
310 if (past->type != DLM_AST && 314 if (past->type != DLM_AST &&
311 past->type != DLM_BAST) { 315 past->type != DLM_BAST) {
312 mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu" 316 mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu"
313 "name=%.*s\n", past->type, 317 "name=%.*s, node=%u\n", past->type,
314 dlm_get_lock_cookie_node(cookie), 318 dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
315 dlm_get_lock_cookie_seq(cookie), 319 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
316 locklen, name); 320 locklen, name, node);
317 ret = DLM_IVLOCKID; 321 ret = DLM_IVLOCKID;
318 goto leave; 322 goto leave;
319 } 323 }
320 324
321 res = dlm_lookup_lockres(dlm, name, locklen); 325 res = dlm_lookup_lockres(dlm, name, locklen);
322 if (!res) { 326 if (!res) {
323 mlog(0, "got %sast for unknown lockres! " 327 mlog(0, "Got %sast for unknown lockres! cookie=%u:%llu, "
324 "cookie=%u:%llu, name=%.*s, namelen=%u\n", 328 "name=%.*s, node=%u\n", (past->type == DLM_AST ? "" : "b"),
325 past->type == DLM_AST ? "" : "b", 329 dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
326 dlm_get_lock_cookie_node(cookie), 330 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
327 dlm_get_lock_cookie_seq(cookie), 331 locklen, name, node);
328 locklen, name, locklen);
329 ret = DLM_IVLOCKID; 332 ret = DLM_IVLOCKID;
330 goto leave; 333 goto leave;
331 } 334 }
@@ -337,12 +340,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
337 340
338 spin_lock(&res->spinlock); 341 spin_lock(&res->spinlock);
339 if (res->state & DLM_LOCK_RES_RECOVERING) { 342 if (res->state & DLM_LOCK_RES_RECOVERING) {
340 mlog(0, "responding with DLM_RECOVERING!\n"); 343 mlog(0, "Responding with DLM_RECOVERING!\n");
341 ret = DLM_RECOVERING; 344 ret = DLM_RECOVERING;
342 goto unlock_out; 345 goto unlock_out;
343 } 346 }
344 if (res->state & DLM_LOCK_RES_MIGRATING) { 347 if (res->state & DLM_LOCK_RES_MIGRATING) {
345 mlog(0, "responding with DLM_MIGRATING!\n"); 348 mlog(0, "Responding with DLM_MIGRATING!\n");
346 ret = DLM_MIGRATING; 349 ret = DLM_MIGRATING;
347 goto unlock_out; 350 goto unlock_out;
348 } 351 }
@@ -351,7 +354,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
351 lock = NULL; 354 lock = NULL;
352 list_for_each(iter, head) { 355 list_for_each(iter, head) {
353 lock = list_entry (iter, struct dlm_lock, list); 356 lock = list_entry (iter, struct dlm_lock, list);
354 if (be64_to_cpu(lock->ml.cookie) == cookie) 357 if (lock->ml.cookie == cookie)
355 goto do_ast; 358 goto do_ast;
356 } 359 }
357 360
@@ -363,15 +366,15 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
363 366
364 list_for_each(iter, head) { 367 list_for_each(iter, head) {
365 lock = list_entry (iter, struct dlm_lock, list); 368 lock = list_entry (iter, struct dlm_lock, list);
366 if (be64_to_cpu(lock->ml.cookie) == cookie) 369 if (lock->ml.cookie == cookie)
367 goto do_ast; 370 goto do_ast;
368 } 371 }
369 372
370 mlog(0, "got %sast for unknown lock! cookie=%u:%llu, " 373 mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, "
371 "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", 374 "node=%u\n", past->type == DLM_AST ? "" : "b",
372 dlm_get_lock_cookie_node(cookie), 375 dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
373 dlm_get_lock_cookie_seq(cookie), 376 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
374 locklen, name, locklen); 377 locklen, name, node);
375 378
376 ret = DLM_NORMAL; 379 ret = DLM_NORMAL;
377unlock_out: 380unlock_out:
@@ -383,8 +386,8 @@ do_ast:
383 if (past->type == DLM_AST) { 386 if (past->type == DLM_AST) {
384 /* do not alter lock refcount. switching lists. */ 387 /* do not alter lock refcount. switching lists. */
385 list_move_tail(&lock->list, &res->granted); 388 list_move_tail(&lock->list, &res->granted);
386 mlog(0, "ast: adding to granted list... type=%d, " 389 mlog(0, "ast: Adding to granted list... type=%d, "
387 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); 390 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
388 if (lock->ml.convert_type != LKM_IVMODE) { 391 if (lock->ml.convert_type != LKM_IVMODE) {
389 lock->ml.type = lock->ml.convert_type; 392 lock->ml.type = lock->ml.convert_type;
390 lock->ml.convert_type = LKM_IVMODE; 393 lock->ml.convert_type = LKM_IVMODE;
@@ -408,7 +411,6 @@ do_ast:
408 dlm_do_local_bast(dlm, res, lock, past->blocked_type); 411 dlm_do_local_bast(dlm, res, lock, past->blocked_type);
409 412
410leave: 413leave:
411
412 if (res) 414 if (res)
413 dlm_lockres_put(res); 415 dlm_lockres_put(res);
414 416
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index d5a86fb81a49..bb53714813ab 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -140,6 +140,7 @@ struct dlm_ctxt
140 unsigned int purge_count; 140 unsigned int purge_count;
141 spinlock_t spinlock; 141 spinlock_t spinlock;
142 spinlock_t ast_lock; 142 spinlock_t ast_lock;
143 spinlock_t track_lock;
143 char *name; 144 char *name;
144 u8 node_num; 145 u8 node_num;
145 u32 key; 146 u32 key;
@@ -316,6 +317,8 @@ struct dlm_lock_resource
316 * put on a list for the dlm thread to run. */ 317 * put on a list for the dlm thread to run. */
317 unsigned long last_used; 318 unsigned long last_used;
318 319
320 struct dlm_ctxt *dlm;
321
319 unsigned migration_pending:1; 322 unsigned migration_pending:1;
320 atomic_t asts_reserved; 323 atomic_t asts_reserved;
321 spinlock_t spinlock; 324 spinlock_t spinlock;
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 1b81dcba175d..b32f60a5acfb 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -630,43 +630,38 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
630{ 630{
631 struct debug_lockres *dl = m->private; 631 struct debug_lockres *dl = m->private;
632 struct dlm_ctxt *dlm = dl->dl_ctxt; 632 struct dlm_ctxt *dlm = dl->dl_ctxt;
633 struct dlm_lock_resource *oldres = dl->dl_res;
633 struct dlm_lock_resource *res = NULL; 634 struct dlm_lock_resource *res = NULL;
635 struct list_head *track_list;
634 636
635 spin_lock(&dlm->spinlock); 637 spin_lock(&dlm->track_lock);
638 if (oldres)
639 track_list = &oldres->tracking;
640 else
641 track_list = &dlm->tracking_list;
636 642
637 if (dl->dl_res) { 643 list_for_each_entry(res, track_list, tracking) {
638 list_for_each_entry(res, &dl->dl_res->tracking, tracking) { 644 if (&res->tracking == &dlm->tracking_list)
639 if (dl->dl_res) { 645 res = NULL;
640 dlm_lockres_put(dl->dl_res); 646 else
641 dl->dl_res = NULL;
642 }
643 if (&res->tracking == &dlm->tracking_list) {
644 mlog(0, "End of list found, %p\n", res);
645 dl = NULL;
646 break;
647 }
648 dlm_lockres_get(res); 647 dlm_lockres_get(res);
649 dl->dl_res = res; 648 break;
650 break;
651 }
652 } else {
653 if (!list_empty(&dlm->tracking_list)) {
654 list_for_each_entry(res, &dlm->tracking_list, tracking)
655 break;
656 dlm_lockres_get(res);
657 dl->dl_res = res;
658 } else
659 dl = NULL;
660 } 649 }
650 spin_unlock(&dlm->track_lock);
661 651
662 if (dl) { 652 if (oldres)
663 spin_lock(&dl->dl_res->spinlock); 653 dlm_lockres_put(oldres);
664 dump_lockres(dl->dl_res, dl->dl_buf, dl->dl_len - 1);
665 spin_unlock(&dl->dl_res->spinlock);
666 }
667 654
668 spin_unlock(&dlm->spinlock); 655 dl->dl_res = res;
656
657 if (res) {
658 spin_lock(&res->spinlock);
659 dump_lockres(res, dl->dl_buf, dl->dl_len - 1);
660 spin_unlock(&res->spinlock);
661 } else
662 dl = NULL;
669 663
664 /* passed to seq_show */
670 return dl; 665 return dl;
671} 666}
672 667
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 63f8125824e8..d8d578f45613 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1550,6 +1550,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1550 spin_lock_init(&dlm->spinlock); 1550 spin_lock_init(&dlm->spinlock);
1551 spin_lock_init(&dlm->master_lock); 1551 spin_lock_init(&dlm->master_lock);
1552 spin_lock_init(&dlm->ast_lock); 1552 spin_lock_init(&dlm->ast_lock);
1553 spin_lock_init(&dlm->track_lock);
1553 INIT_LIST_HEAD(&dlm->list); 1554 INIT_LIST_HEAD(&dlm->list);
1554 INIT_LIST_HEAD(&dlm->dirty_list); 1555 INIT_LIST_HEAD(&dlm->dirty_list);
1555 INIT_LIST_HEAD(&dlm->reco.resources); 1556 INIT_LIST_HEAD(&dlm->reco.resources);
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index ba962d71b34d..1c9efb406a96 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -339,9 +339,8 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
339 ip = DLMFS_I(inode); 339 ip = DLMFS_I(inode);
340 340
341 inode->i_mode = mode; 341 inode->i_mode = mode;
342 inode->i_uid = current->fsuid; 342 inode->i_uid = current_fsuid();
343 inode->i_gid = current->fsgid; 343 inode->i_gid = current_fsgid();
344 inode->i_blocks = 0;
345 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; 344 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
346 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 345 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
347 inc_nlink(inode); 346 inc_nlink(inode);
@@ -365,9 +364,8 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
365 return NULL; 364 return NULL;
366 365
367 inode->i_mode = mode; 366 inode->i_mode = mode;
368 inode->i_uid = current->fsuid; 367 inode->i_uid = current_fsuid();
369 inode->i_gid = current->fsgid; 368 inode->i_gid = current_fsgid();
370 inode->i_blocks = 0;
371 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; 369 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
372 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 370 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
373 371
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 44f87caf3683..54e182a27caf 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -505,8 +505,10 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
505static void dlm_lockres_release(struct kref *kref) 505static void dlm_lockres_release(struct kref *kref)
506{ 506{
507 struct dlm_lock_resource *res; 507 struct dlm_lock_resource *res;
508 struct dlm_ctxt *dlm;
508 509
509 res = container_of(kref, struct dlm_lock_resource, refs); 510 res = container_of(kref, struct dlm_lock_resource, refs);
511 dlm = res->dlm;
510 512
511 /* This should not happen -- all lockres' have a name 513 /* This should not happen -- all lockres' have a name
512 * associated with them at init time. */ 514 * associated with them at init time. */
@@ -515,6 +517,7 @@ static void dlm_lockres_release(struct kref *kref)
515 mlog(0, "destroying lockres %.*s\n", res->lockname.len, 517 mlog(0, "destroying lockres %.*s\n", res->lockname.len,
516 res->lockname.name); 518 res->lockname.name);
517 519
520 spin_lock(&dlm->track_lock);
518 if (!list_empty(&res->tracking)) 521 if (!list_empty(&res->tracking))
519 list_del_init(&res->tracking); 522 list_del_init(&res->tracking);
520 else { 523 else {
@@ -522,6 +525,9 @@ static void dlm_lockres_release(struct kref *kref)
522 res->lockname.len, res->lockname.name); 525 res->lockname.len, res->lockname.name);
523 dlm_print_one_lock_resource(res); 526 dlm_print_one_lock_resource(res);
524 } 527 }
528 spin_unlock(&dlm->track_lock);
529
530 dlm_put(dlm);
525 531
526 if (!hlist_unhashed(&res->hash_node) || 532 if (!hlist_unhashed(&res->hash_node) ||
527 !list_empty(&res->granted) || 533 !list_empty(&res->granted) ||
@@ -595,6 +601,10 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
595 res->migration_pending = 0; 601 res->migration_pending = 0;
596 res->inflight_locks = 0; 602 res->inflight_locks = 0;
597 603
604 /* put in dlm_lockres_release */
605 dlm_grab(dlm);
606 res->dlm = dlm;
607
598 kref_init(&res->refs); 608 kref_init(&res->refs);
599 609
600 /* just for consistency */ 610 /* just for consistency */
@@ -722,14 +732,21 @@ lookup:
722 if (tmpres) { 732 if (tmpres) {
723 int dropping_ref = 0; 733 int dropping_ref = 0;
724 734
735 spin_unlock(&dlm->spinlock);
736
725 spin_lock(&tmpres->spinlock); 737 spin_lock(&tmpres->spinlock);
738 /* We wait for the other thread that is mastering the resource */
739 if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
740 __dlm_wait_on_lockres(tmpres);
741 BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN);
742 }
743
726 if (tmpres->owner == dlm->node_num) { 744 if (tmpres->owner == dlm->node_num) {
727 BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF); 745 BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF);
728 dlm_lockres_grab_inflight_ref(dlm, tmpres); 746 dlm_lockres_grab_inflight_ref(dlm, tmpres);
729 } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) 747 } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF)
730 dropping_ref = 1; 748 dropping_ref = 1;
731 spin_unlock(&tmpres->spinlock); 749 spin_unlock(&tmpres->spinlock);
732 spin_unlock(&dlm->spinlock);
733 750
734 /* wait until done messaging the master, drop our ref to allow 751 /* wait until done messaging the master, drop our ref to allow
735 * the lockres to be purged, start over. */ 752 * the lockres to be purged, start over. */
@@ -2949,7 +2966,7 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
2949 struct dlm_node_iter *iter) 2966 struct dlm_node_iter *iter)
2950{ 2967{
2951 struct dlm_migrate_request migrate; 2968 struct dlm_migrate_request migrate;
2952 int ret, status = 0; 2969 int ret, skip, status = 0;
2953 int nodenum; 2970 int nodenum;
2954 2971
2955 memset(&migrate, 0, sizeof(migrate)); 2972 memset(&migrate, 0, sizeof(migrate));
@@ -2966,12 +2983,27 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
2966 nodenum == new_master) 2983 nodenum == new_master)
2967 continue; 2984 continue;
2968 2985
2986 /* We could race exit domain. If exited, skip. */
2987 spin_lock(&dlm->spinlock);
2988 skip = (!test_bit(nodenum, dlm->domain_map));
2989 spin_unlock(&dlm->spinlock);
2990 if (skip) {
2991 clear_bit(nodenum, iter->node_map);
2992 continue;
2993 }
2994
2969 ret = o2net_send_message(DLM_MIGRATE_REQUEST_MSG, dlm->key, 2995 ret = o2net_send_message(DLM_MIGRATE_REQUEST_MSG, dlm->key,
2970 &migrate, sizeof(migrate), nodenum, 2996 &migrate, sizeof(migrate), nodenum,
2971 &status); 2997 &status);
2972 if (ret < 0) 2998 if (ret < 0) {
2973 mlog_errno(ret); 2999 mlog(0, "migrate_request returned %d!\n", ret);
2974 else if (status < 0) { 3000 if (!dlm_is_host_down(ret)) {
3001 mlog(ML_ERROR, "unhandled error=%d!\n", ret);
3002 BUG();
3003 }
3004 clear_bit(nodenum, iter->node_map);
3005 ret = 0;
3006 } else if (status < 0) {
2975 mlog(0, "migrate request (node %u) returned %d!\n", 3007 mlog(0, "migrate request (node %u) returned %d!\n",
2976 nodenum, status); 3008 nodenum, status);
2977 ret = status; 3009 ret = status;
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 4060bb328bc8..d1295203029f 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -181,7 +181,8 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
181 181
182 spin_lock(&res->spinlock); 182 spin_lock(&res->spinlock);
183 /* This ensures that clear refmap is sent after the set */ 183 /* This ensures that clear refmap is sent after the set */
184 __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); 184 __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_SETREF_INPROG |
185 DLM_LOCK_RES_MIGRATING));
185 spin_unlock(&res->spinlock); 186 spin_unlock(&res->spinlock);
186 187
187 /* clear our bit from the master's refmap, ignore errors */ 188 /* clear our bit from the master's refmap, ignore errors */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 6e6cc0a2e5f7..b0c4cadd4c45 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -32,6 +32,7 @@
32#include <linux/debugfs.h> 32#include <linux/debugfs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/time.h> 34#include <linux/time.h>
35#include <linux/quotaops.h>
35 36
36#define MLOG_MASK_PREFIX ML_DLM_GLUE 37#define MLOG_MASK_PREFIX ML_DLM_GLUE
37#include <cluster/masklog.h> 38#include <cluster/masklog.h>
@@ -51,6 +52,7 @@
51#include "slot_map.h" 52#include "slot_map.h"
52#include "super.h" 53#include "super.h"
53#include "uptodate.h" 54#include "uptodate.h"
55#include "quota.h"
54 56
55#include "buffer_head_io.h" 57#include "buffer_head_io.h"
56 58
@@ -68,6 +70,7 @@ struct ocfs2_mask_waiter {
68static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 70static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
69static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 71static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
70static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); 72static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
73static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
71 74
72/* 75/*
73 * Return value from ->downconvert_worker functions. 76 * Return value from ->downconvert_worker functions.
@@ -102,6 +105,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
102static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 105static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
103 struct ocfs2_lock_res *lockres); 106 struct ocfs2_lock_res *lockres);
104 107
108static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
105 109
106#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 110#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
107 111
@@ -111,8 +115,7 @@ static void ocfs2_dump_meta_lvb_info(u64 level,
111 unsigned int line, 115 unsigned int line,
112 struct ocfs2_lock_res *lockres) 116 struct ocfs2_lock_res *lockres)
113{ 117{
114 struct ocfs2_meta_lvb *lvb = 118 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
115 (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
116 119
117 mlog(level, "LVB information for %s (called from %s:%u):\n", 120 mlog(level, "LVB information for %s (called from %s:%u):\n",
118 lockres->l_name, function, line); 121 lockres->l_name, function, line);
@@ -258,6 +261,12 @@ static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
258 .flags = 0, 261 .flags = 0,
259}; 262};
260 263
264static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
265 .set_lvb = ocfs2_set_qinfo_lvb,
266 .get_osb = ocfs2_get_qinfo_osb,
267 .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
268};
269
261static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 270static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
262{ 271{
263 return lockres->l_type == OCFS2_LOCK_TYPE_META || 272 return lockres->l_type == OCFS2_LOCK_TYPE_META ||
@@ -279,6 +288,13 @@ static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res
279 return (struct ocfs2_dentry_lock *)lockres->l_priv; 288 return (struct ocfs2_dentry_lock *)lockres->l_priv;
280} 289}
281 290
291static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
292{
293 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
294
295 return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
296}
297
282static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 298static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
283{ 299{
284 if (lockres->l_ops->get_osb) 300 if (lockres->l_ops->get_osb)
@@ -507,6 +523,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
507 return OCFS2_SB(inode->i_sb); 523 return OCFS2_SB(inode->i_sb);
508} 524}
509 525
526static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
527{
528 struct ocfs2_mem_dqinfo *info = lockres->l_priv;
529
530 return OCFS2_SB(info->dqi_gi.dqi_sb);
531}
532
510static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 533static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
511{ 534{
512 struct ocfs2_file_private *fp = lockres->l_priv; 535 struct ocfs2_file_private *fp = lockres->l_priv;
@@ -609,6 +632,17 @@ void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
609 lockres->l_flags |= OCFS2_LOCK_NOCACHE; 632 lockres->l_flags |= OCFS2_LOCK_NOCACHE;
610} 633}
611 634
635void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
636 struct ocfs2_mem_dqinfo *info)
637{
638 ocfs2_lock_res_init_once(lockres);
639 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
640 0, lockres->l_name);
641 ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
642 OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
643 info);
644}
645
612void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 646void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
613{ 647{
614 mlog_entry_void(); 648 mlog_entry_void();
@@ -1290,7 +1324,7 @@ again:
1290 goto out; 1324 goto out;
1291 } 1325 }
1292 1326
1293 mlog(0, "lock %s, successfull return from ocfs2_dlm_lock\n", 1327 mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
1294 lockres->l_name); 1328 lockres->l_name);
1295 1329
1296 /* At this point we've gone inside the dlm and need to 1330 /* At this point we've gone inside the dlm and need to
@@ -1829,7 +1863,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1829 1863
1830 mlog_entry_void(); 1864 mlog_entry_void();
1831 1865
1832 lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1866 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1833 1867
1834 /* 1868 /*
1835 * Invalidate the LVB of a deleted inode - this way other 1869 * Invalidate the LVB of a deleted inode - this way other
@@ -1881,7 +1915,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1881 1915
1882 mlog_meta_lvb(0, lockres); 1916 mlog_meta_lvb(0, lockres);
1883 1917
1884 lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1918 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1885 1919
1886 /* We're safe here without the lockres lock... */ 1920 /* We're safe here without the lockres lock... */
1887 spin_lock(&oi->ip_lock); 1921 spin_lock(&oi->ip_lock);
@@ -1916,8 +1950,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1916static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 1950static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
1917 struct ocfs2_lock_res *lockres) 1951 struct ocfs2_lock_res *lockres)
1918{ 1952{
1919 struct ocfs2_meta_lvb *lvb = 1953 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1920 (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
1921 1954
1922 if (lvb->lvb_version == OCFS2_LVB_VERSION 1955 if (lvb->lvb_version == OCFS2_LVB_VERSION
1923 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 1956 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
@@ -2024,7 +2057,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
2024 } else { 2057 } else {
2025 /* Boo, we have to go to disk. */ 2058 /* Boo, we have to go to disk. */
2026 /* read bh, cast, ocfs2_refresh_inode */ 2059 /* read bh, cast, ocfs2_refresh_inode */
2027 status = ocfs2_read_block(inode, oi->ip_blkno, bh); 2060 status = ocfs2_read_inode_block(inode, bh);
2028 if (status < 0) { 2061 if (status < 0) {
2029 mlog_errno(status); 2062 mlog_errno(status);
2030 goto bail_refresh; 2063 goto bail_refresh;
@@ -2032,18 +2065,14 @@ static int ocfs2_inode_lock_update(struct inode *inode,
2032 fe = (struct ocfs2_dinode *) (*bh)->b_data; 2065 fe = (struct ocfs2_dinode *) (*bh)->b_data;
2033 2066
2034 /* This is a good chance to make sure we're not 2067 /* This is a good chance to make sure we're not
2035 * locking an invalid object. 2068 * locking an invalid object. ocfs2_read_inode_block()
2069 * already checked that the inode block is sane.
2036 * 2070 *
2037 * We bug on a stale inode here because we checked 2071 * We bug on a stale inode here because we checked
2038 * above whether it was wiped from disk. The wiping 2072 * above whether it was wiped from disk. The wiping
2039 * node provides a guarantee that we receive that 2073 * node provides a guarantee that we receive that
2040 * message and can mark the inode before dropping any 2074 * message and can mark the inode before dropping any
2041 * locks associated with it. */ 2075 * locks associated with it. */
2042 if (!OCFS2_IS_VALID_DINODE(fe)) {
2043 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
2044 status = -EIO;
2045 goto bail_refresh;
2046 }
2047 mlog_bug_on_msg(inode->i_generation != 2076 mlog_bug_on_msg(inode->i_generation !=
2048 le32_to_cpu(fe->i_generation), 2077 le32_to_cpu(fe->i_generation),
2049 "Invalid dinode %llu disk generation: %u " 2078 "Invalid dinode %llu disk generation: %u "
@@ -2085,7 +2114,7 @@ static int ocfs2_assign_bh(struct inode *inode,
2085 return 0; 2114 return 0;
2086 } 2115 }
2087 2116
2088 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh); 2117 status = ocfs2_read_inode_block(inode, ret_bh);
2089 if (status < 0) 2118 if (status < 0)
2090 mlog_errno(status); 2119 mlog_errno(status);
2091 2120
@@ -2922,7 +2951,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
2922 ocfs2_dlm_dump_lksb(&lockres->l_lksb); 2951 ocfs2_dlm_dump_lksb(&lockres->l_lksb);
2923 BUG(); 2952 BUG();
2924 } 2953 }
2925 mlog(0, "lock %s, successfull return from ocfs2_dlm_unlock\n", 2954 mlog(0, "lock %s, successful return from ocfs2_dlm_unlock\n",
2926 lockres->l_name); 2955 lockres->l_name);
2927 2956
2928 ocfs2_wait_on_busy_lock(lockres); 2957 ocfs2_wait_on_busy_lock(lockres);
@@ -3449,6 +3478,117 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3449 return UNBLOCK_CONTINUE_POST; 3478 return UNBLOCK_CONTINUE_POST;
3450} 3479}
3451 3480
3481static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
3482{
3483 struct ocfs2_qinfo_lvb *lvb;
3484 struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
3485 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
3486 oinfo->dqi_gi.dqi_type);
3487
3488 mlog_entry_void();
3489
3490 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3491 lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
3492 lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
3493 lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
3494 lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
3495 lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
3496 lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
3497 lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
3498
3499 mlog_exit_void();
3500}
3501
3502void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3503{
3504 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3505 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
3506 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3507
3508 mlog_entry_void();
3509 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
3510 ocfs2_cluster_unlock(osb, lockres, level);
3511 mlog_exit_void();
3512}
3513
3514static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
3515{
3516 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
3517 oinfo->dqi_gi.dqi_type);
3518 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3519 struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3520 struct buffer_head *bh = NULL;
3521 struct ocfs2_global_disk_dqinfo *gdinfo;
3522 int status = 0;
3523
3524 if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
3525 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
3526 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
3527 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
3528 oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
3529 oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
3530 oinfo->dqi_gi.dqi_free_entry =
3531 be32_to_cpu(lvb->lvb_free_entry);
3532 } else {
3533 status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh);
3534 if (status) {
3535 mlog_errno(status);
3536 goto bail;
3537 }
3538 gdinfo = (struct ocfs2_global_disk_dqinfo *)
3539 (bh->b_data + OCFS2_GLOBAL_INFO_OFF);
3540 info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
3541 info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
3542 oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
3543 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
3544 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
3545 oinfo->dqi_gi.dqi_free_entry =
3546 le32_to_cpu(gdinfo->dqi_free_entry);
3547 brelse(bh);
3548 ocfs2_track_lock_refresh(lockres);
3549 }
3550
3551bail:
3552 return status;
3553}
3554
3555/* Lock quota info, this function expects at least shared lock on the quota file
3556 * so that we can safely refresh quota info from disk. */
3557int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3558{
3559 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3560 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
3561 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3562 int status = 0;
3563
3564 mlog_entry_void();
3565
3566 /* On RO devices, locking really isn't needed... */
3567 if (ocfs2_is_hard_readonly(osb)) {
3568 if (ex)
3569 status = -EROFS;
3570 goto bail;
3571 }
3572 if (ocfs2_mount_local(osb))
3573 goto bail;
3574
3575 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
3576 if (status < 0) {
3577 mlog_errno(status);
3578 goto bail;
3579 }
3580 if (!ocfs2_should_refresh_lock_res(lockres))
3581 goto bail;
3582 /* OK, we have the lock but we need to refresh the quota info */
3583 status = ocfs2_refresh_qinfo(oinfo);
3584 if (status)
3585 ocfs2_qinfo_unlock(oinfo, ex);
3586 ocfs2_complete_lock_res_refresh(lockres, status);
3587bail:
3588 mlog_exit(status);
3589 return status;
3590}
3591
3452/* 3592/*
3453 * This is the filesystem locking protocol. It provides the lock handling 3593 * This is the filesystem locking protocol. It provides the lock handling
3454 * hooks for the underlying DLM. It has a maximum version number. 3594 * hooks for the underlying DLM. It has a maximum version number.
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 2bb01f09c1b1..3f8d9986b8e0 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -49,6 +49,19 @@ struct ocfs2_meta_lvb {
49 __be32 lvb_reserved2; 49 __be32 lvb_reserved2;
50}; 50};
51 51
52#define OCFS2_QINFO_LVB_VERSION 1
53
54struct ocfs2_qinfo_lvb {
55 __u8 lvb_version;
56 __u8 lvb_reserved[3];
57 __be32 lvb_bgrace;
58 __be32 lvb_igrace;
59 __be32 lvb_syncms;
60 __be32 lvb_blocks;
61 __be32 lvb_free_blk;
62 __be32 lvb_free_entry;
63};
64
52/* ocfs2_inode_lock_full() 'arg_flags' flags */ 65/* ocfs2_inode_lock_full() 'arg_flags' flags */
53/* don't wait on recovery. */ 66/* don't wait on recovery. */
54#define OCFS2_META_LOCK_RECOVERY (0x01) 67#define OCFS2_META_LOCK_RECOVERY (0x01)
@@ -69,6 +82,9 @@ void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
69struct ocfs2_file_private; 82struct ocfs2_file_private;
70void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 83void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
71 struct ocfs2_file_private *fp); 84 struct ocfs2_file_private *fp);
85struct ocfs2_mem_dqinfo;
86void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
87 struct ocfs2_mem_dqinfo *info);
72void ocfs2_lock_res_free(struct ocfs2_lock_res *res); 88void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
73int ocfs2_create_new_inode_locks(struct inode *inode); 89int ocfs2_create_new_inode_locks(struct inode *inode);
74int ocfs2_drop_inode_locks(struct inode *inode); 90int ocfs2_drop_inode_locks(struct inode *inode);
@@ -103,6 +119,9 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex);
103void ocfs2_dentry_unlock(struct dentry *dentry, int ex); 119void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
104int ocfs2_file_lock(struct file *file, int ex, int trylock); 120int ocfs2_file_lock(struct file *file, int ex, int trylock);
105void ocfs2_file_unlock(struct file *file); 121void ocfs2_file_unlock(struct file *file);
122int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex);
123void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex);
124
106 125
107void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); 126void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
108void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 127void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 2baedac58234..f2bb1a04d253 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
293 struct ocfs2_extent_block *eb; 293 struct ocfs2_extent_block *eb;
294 struct ocfs2_extent_list *el; 294 struct ocfs2_extent_list *el;
295 295
296 ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh); 296 ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
297 if (ret) { 297 if (ret) {
298 mlog_errno(ret); 298 mlog_errno(ret);
299 goto out; 299 goto out;
@@ -302,12 +302,6 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
302 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 302 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
303 el = &eb->h_list; 303 el = &eb->h_list;
304 304
305 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
306 ret = -EROFS;
307 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
308 goto out;
309 }
310
311 if (el->l_tree_depth) { 305 if (el->l_tree_depth) {
312 ocfs2_error(inode->i_sb, 306 ocfs2_error(inode->i_sb,
313 "Inode %lu has non zero tree depth in " 307 "Inode %lu has non zero tree depth in "
@@ -381,23 +375,16 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
381 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) 375 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
382 goto no_more_extents; 376 goto no_more_extents;
383 377
384 ret = ocfs2_read_block(inode, 378 ret = ocfs2_read_extent_block(inode,
385 le64_to_cpu(eb->h_next_leaf_blk), 379 le64_to_cpu(eb->h_next_leaf_blk),
386 &next_eb_bh); 380 &next_eb_bh);
387 if (ret) { 381 if (ret) {
388 mlog_errno(ret); 382 mlog_errno(ret);
389 goto out; 383 goto out;
390 } 384 }
391 next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
392
393 if (!OCFS2_IS_VALID_EXTENT_BLOCK(next_eb)) {
394 ret = -EROFS;
395 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, next_eb);
396 goto out;
397 }
398 385
386 next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
399 el = &next_eb->h_list; 387 el = &next_eb->h_list;
400
401 i = ocfs2_search_for_hole_index(el, v_cluster); 388 i = ocfs2_search_for_hole_index(el, v_cluster);
402 } 389 }
403 390
@@ -630,7 +617,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
630 if (ret == 0) 617 if (ret == 0)
631 goto out; 618 goto out;
632 619
633 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); 620 ret = ocfs2_read_inode_block(inode, &di_bh);
634 if (ret) { 621 if (ret) {
635 mlog_errno(ret); 622 mlog_errno(ret);
636 goto out; 623 goto out;
@@ -819,3 +806,74 @@ out:
819 806
820 return ret; 807 return ret;
821} 808}
809
810int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
811 struct buffer_head *bhs[], int flags,
812 int (*validate)(struct super_block *sb,
813 struct buffer_head *bh))
814{
815 int rc = 0;
816 u64 p_block, p_count;
817 int i, count, done = 0;
818
819 mlog_entry("(inode = %p, v_block = %llu, nr = %d, bhs = %p, "
820 "flags = %x, validate = %p)\n",
821 inode, (unsigned long long)v_block, nr, bhs, flags,
822 validate);
823
824 if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
825 i_size_read(inode)) {
826 BUG_ON(!(flags & OCFS2_BH_READAHEAD));
827 goto out;
828 }
829
830 while (done < nr) {
831 down_read(&OCFS2_I(inode)->ip_alloc_sem);
832 rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
833 &p_block, &p_count, NULL);
834 up_read(&OCFS2_I(inode)->ip_alloc_sem);
835 if (rc) {
836 mlog_errno(rc);
837 break;
838 }
839
840 if (!p_block) {
841 rc = -EIO;
842 mlog(ML_ERROR,
843 "Inode #%llu contains a hole at offset %llu\n",
844 (unsigned long long)OCFS2_I(inode)->ip_blkno,
845 (unsigned long long)(v_block + done) <<
846 inode->i_sb->s_blocksize_bits);
847 break;
848 }
849
850 count = nr - done;
851 if (p_count < count)
852 count = p_count;
853
854 /*
855 * If the caller passed us bhs, they should have come
856 * from a previous readahead call to this function. Thus,
857 * they should have the right b_blocknr.
858 */
859 for (i = 0; i < count; i++) {
860 if (!bhs[done + i])
861 continue;
862 BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
863 }
864
865 rc = ocfs2_read_blocks(inode, p_block, count, bhs + done,
866 flags, validate);
867 if (rc) {
868 mlog_errno(rc);
869 break;
870 }
871 done += count;
872 }
873
874out:
875 mlog_exit(rc);
876 return rc;
877}
878
879
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index 1c4aa8b06f34..b7dd9731b462 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -57,4 +57,28 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
57 u32 *p_cluster, u32 *num_clusters, 57 u32 *p_cluster, u32 *num_clusters,
58 struct ocfs2_extent_list *el); 58 struct ocfs2_extent_list *el);
59 59
60int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
61 struct buffer_head *bhs[], int flags,
62 int (*validate)(struct super_block *sb,
63 struct buffer_head *bh));
64static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block,
65 struct buffer_head **bh,
66 int (*validate)(struct super_block *sb,
67 struct buffer_head *bh))
68{
69 int status = 0;
70
71 if (bh == NULL) {
72 printk("ocfs2: bh == NULL\n");
73 status = -EINVAL;
74 goto bail;
75 }
76
77 status = ocfs2_read_virt_blocks(inode, v_block, 1, bh, 0, validate);
78
79bail:
80 return status;
81}
82
83
60#endif /* _EXTENT_MAP_H */ 84#endif /* _EXTENT_MAP_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e2570a3bc2b2..a5887df2cd8a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -35,6 +35,7 @@
35#include <linux/mount.h> 35#include <linux/mount.h>
36#include <linux/writeback.h> 36#include <linux/writeback.h>
37#include <linux/falloc.h> 37#include <linux/falloc.h>
38#include <linux/quotaops.h>
38 39
39#define MLOG_MASK_PREFIX ML_INODE 40#define MLOG_MASK_PREFIX ML_INODE
40#include <cluster/masklog.h> 41#include <cluster/masklog.h>
@@ -56,6 +57,8 @@
56#include "suballoc.h" 57#include "suballoc.h"
57#include "super.h" 58#include "super.h"
58#include "xattr.h" 59#include "xattr.h"
60#include "acl.h"
61#include "quota.h"
59 62
60#include "buffer_head_io.h" 63#include "buffer_head_io.h"
61 64
@@ -253,8 +256,8 @@ int ocfs2_update_inode_atime(struct inode *inode,
253 goto out; 256 goto out;
254 } 257 }
255 258
256 ret = ocfs2_journal_access(handle, inode, bh, 259 ret = ocfs2_journal_access_di(handle, inode, bh,
257 OCFS2_JOURNAL_ACCESS_WRITE); 260 OCFS2_JOURNAL_ACCESS_WRITE);
258 if (ret) { 261 if (ret) {
259 mlog_errno(ret); 262 mlog_errno(ret);
260 goto out_commit; 263 goto out_commit;
@@ -303,9 +306,9 @@ bail:
303 return status; 306 return status;
304} 307}
305 308
306static int ocfs2_simple_size_update(struct inode *inode, 309int ocfs2_simple_size_update(struct inode *inode,
307 struct buffer_head *di_bh, 310 struct buffer_head *di_bh,
308 u64 new_i_size) 311 u64 new_i_size)
309{ 312{
310 int ret; 313 int ret;
311 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 314 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -350,8 +353,8 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
350 goto out; 353 goto out;
351 } 354 }
352 355
353 status = ocfs2_journal_access(handle, inode, fe_bh, 356 status = ocfs2_journal_access_di(handle, inode, fe_bh,
354 OCFS2_JOURNAL_ACCESS_WRITE); 357 OCFS2_JOURNAL_ACCESS_WRITE);
355 if (status < 0) { 358 if (status < 0) {
356 mlog_errno(status); 359 mlog_errno(status);
357 goto out_commit; 360 goto out_commit;
@@ -401,12 +404,9 @@ static int ocfs2_truncate_file(struct inode *inode,
401 (unsigned long long)OCFS2_I(inode)->ip_blkno, 404 (unsigned long long)OCFS2_I(inode)->ip_blkno,
402 (unsigned long long)new_i_size); 405 (unsigned long long)new_i_size);
403 406
407 /* We trust di_bh because it comes from ocfs2_inode_lock(), which
408 * already validated it */
404 fe = (struct ocfs2_dinode *) di_bh->b_data; 409 fe = (struct ocfs2_dinode *) di_bh->b_data;
405 if (!OCFS2_IS_VALID_DINODE(fe)) {
406 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
407 status = -EIO;
408 goto bail;
409 }
410 410
411 mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), 411 mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
412 "Inode %llu, inode i_size = %lld != di " 412 "Inode %llu, inode i_size = %lld != di "
@@ -536,6 +536,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
536 enum ocfs2_alloc_restarted why; 536 enum ocfs2_alloc_restarted why;
537 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 537 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
538 struct ocfs2_extent_tree et; 538 struct ocfs2_extent_tree et;
539 int did_quota = 0;
539 540
540 mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); 541 mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
541 542
@@ -545,18 +546,12 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
545 */ 546 */
546 BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); 547 BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
547 548
548 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); 549 status = ocfs2_read_inode_block(inode, &bh);
549 if (status < 0) { 550 if (status < 0) {
550 mlog_errno(status); 551 mlog_errno(status);
551 goto leave; 552 goto leave;
552 } 553 }
553
554 fe = (struct ocfs2_dinode *) bh->b_data; 554 fe = (struct ocfs2_dinode *) bh->b_data;
555 if (!OCFS2_IS_VALID_DINODE(fe)) {
556 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
557 status = -EIO;
558 goto leave;
559 }
560 555
561restart_all: 556restart_all:
562 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); 557 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
@@ -585,11 +580,18 @@ restart_all:
585 } 580 }
586 581
587restarted_transaction: 582restarted_transaction:
583 if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb,
584 clusters_to_add))) {
585 status = -EDQUOT;
586 goto leave;
587 }
588 did_quota = 1;
589
588 /* reserve a write to the file entry early on - that we if we 590 /* reserve a write to the file entry early on - that we if we
589 * run out of credits in the allocation path, we can still 591 * run out of credits in the allocation path, we can still
590 * update i_size. */ 592 * update i_size. */
591 status = ocfs2_journal_access(handle, inode, bh, 593 status = ocfs2_journal_access_di(handle, inode, bh,
592 OCFS2_JOURNAL_ACCESS_WRITE); 594 OCFS2_JOURNAL_ACCESS_WRITE);
593 if (status < 0) { 595 if (status < 0) {
594 mlog_errno(status); 596 mlog_errno(status);
595 goto leave; 597 goto leave;
@@ -622,6 +624,10 @@ restarted_transaction:
622 spin_lock(&OCFS2_I(inode)->ip_lock); 624 spin_lock(&OCFS2_I(inode)->ip_lock);
623 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); 625 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
624 spin_unlock(&OCFS2_I(inode)->ip_lock); 626 spin_unlock(&OCFS2_I(inode)->ip_lock);
627 /* Release unused quota reservation */
628 vfs_dq_free_space(inode,
629 ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
630 did_quota = 0;
625 631
626 if (why != RESTART_NONE && clusters_to_add) { 632 if (why != RESTART_NONE && clusters_to_add) {
627 if (why == RESTART_META) { 633 if (why == RESTART_META) {
@@ -654,6 +660,9 @@ restarted_transaction:
654 OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode)); 660 OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode));
655 661
656leave: 662leave:
663 if (status < 0 && did_quota)
664 vfs_dq_free_space(inode,
665 ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
657 if (handle) { 666 if (handle) {
658 ocfs2_commit_trans(osb, handle); 667 ocfs2_commit_trans(osb, handle);
659 handle = NULL; 668 handle = NULL;
@@ -885,6 +894,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
885 struct ocfs2_super *osb = OCFS2_SB(sb); 894 struct ocfs2_super *osb = OCFS2_SB(sb);
886 struct buffer_head *bh = NULL; 895 struct buffer_head *bh = NULL;
887 handle_t *handle = NULL; 896 handle_t *handle = NULL;
897 int locked[MAXQUOTAS] = {0, 0};
898 int credits, qtype;
899 struct ocfs2_mem_dqinfo *oinfo;
888 900
889 mlog_entry("(0x%p, '%.*s')\n", dentry, 901 mlog_entry("(0x%p, '%.*s')\n", dentry,
890 dentry->d_name.len, dentry->d_name.name); 902 dentry->d_name.len, dentry->d_name.name);
@@ -955,11 +967,47 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
955 } 967 }
956 } 968 }
957 969
958 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 970 if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
959 if (IS_ERR(handle)) { 971 (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
960 status = PTR_ERR(handle); 972 credits = OCFS2_INODE_UPDATE_CREDITS;
961 mlog_errno(status); 973 if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
962 goto bail_unlock; 974 && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
975 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
976 oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv;
977 status = ocfs2_lock_global_qf(oinfo, 1);
978 if (status < 0)
979 goto bail_unlock;
980 credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) +
981 ocfs2_calc_qdel_credits(sb, USRQUOTA);
982 locked[USRQUOTA] = 1;
983 }
984 if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid
985 && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
986 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
987 oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv;
988 status = ocfs2_lock_global_qf(oinfo, 1);
989 if (status < 0)
990 goto bail_unlock;
991 credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) +
992 ocfs2_calc_qdel_credits(sb, GRPQUOTA);
993 locked[GRPQUOTA] = 1;
994 }
995 handle = ocfs2_start_trans(osb, credits);
996 if (IS_ERR(handle)) {
997 status = PTR_ERR(handle);
998 mlog_errno(status);
999 goto bail_unlock;
1000 }
1001 status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
1002 if (status < 0)
1003 goto bail_commit;
1004 } else {
1005 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1006 if (IS_ERR(handle)) {
1007 status = PTR_ERR(handle);
1008 mlog_errno(status);
1009 goto bail_unlock;
1010 }
963 } 1011 }
964 1012
965 /* 1013 /*
@@ -982,6 +1030,12 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
982bail_commit: 1030bail_commit:
983 ocfs2_commit_trans(osb, handle); 1031 ocfs2_commit_trans(osb, handle);
984bail_unlock: 1032bail_unlock:
1033 for (qtype = 0; qtype < MAXQUOTAS; qtype++) {
1034 if (!locked[qtype])
1035 continue;
1036 oinfo = sb_dqinfo(sb, qtype)->dqi_priv;
1037 ocfs2_unlock_global_qf(oinfo, 1);
1038 }
985 ocfs2_inode_unlock(inode, 1); 1039 ocfs2_inode_unlock(inode, 1);
986bail_unlock_rw: 1040bail_unlock_rw:
987 if (size_change) 1041 if (size_change)
@@ -989,6 +1043,12 @@ bail_unlock_rw:
989bail: 1043bail:
990 brelse(bh); 1044 brelse(bh);
991 1045
1046 if (!status && attr->ia_valid & ATTR_MODE) {
1047 status = ocfs2_acl_chmod(inode);
1048 if (status < 0)
1049 mlog_errno(status);
1050 }
1051
992 mlog_exit(status); 1052 mlog_exit(status);
993 return status; 1053 return status;
994} 1054}
@@ -1035,7 +1095,7 @@ int ocfs2_permission(struct inode *inode, int mask)
1035 goto out; 1095 goto out;
1036 } 1096 }
1037 1097
1038 ret = generic_permission(inode, mask, NULL); 1098 ret = generic_permission(inode, mask, ocfs2_check_acl);
1039 1099
1040 ocfs2_inode_unlock(inode, 0); 1100 ocfs2_inode_unlock(inode, 0);
1041out: 1101out:
@@ -1061,8 +1121,8 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1061 goto out; 1121 goto out;
1062 } 1122 }
1063 1123
1064 ret = ocfs2_journal_access(handle, inode, bh, 1124 ret = ocfs2_journal_access_di(handle, inode, bh,
1065 OCFS2_JOURNAL_ACCESS_WRITE); 1125 OCFS2_JOURNAL_ACCESS_WRITE);
1066 if (ret < 0) { 1126 if (ret < 0) {
1067 mlog_errno(ret); 1127 mlog_errno(ret);
1068 goto out_trans; 1128 goto out_trans;
@@ -1128,9 +1188,8 @@ static int ocfs2_write_remove_suid(struct inode *inode)
1128{ 1188{
1129 int ret; 1189 int ret;
1130 struct buffer_head *bh = NULL; 1190 struct buffer_head *bh = NULL;
1131 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1132 1191
1133 ret = ocfs2_read_block(inode, oi->ip_blkno, &bh); 1192 ret = ocfs2_read_inode_block(inode, &bh);
1134 if (ret < 0) { 1193 if (ret < 0) {
1135 mlog_errno(ret); 1194 mlog_errno(ret);
1136 goto out; 1195 goto out;
@@ -1156,8 +1215,7 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
1156 struct buffer_head *di_bh = NULL; 1215 struct buffer_head *di_bh = NULL;
1157 1216
1158 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 1217 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1159 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, 1218 ret = ocfs2_read_inode_block(inode, &di_bh);
1160 &di_bh);
1161 if (ret) { 1219 if (ret) {
1162 mlog_errno(ret); 1220 mlog_errno(ret);
1163 goto out; 1221 goto out;
@@ -1226,83 +1284,6 @@ out:
1226 return ret; 1284 return ret;
1227} 1285}
1228 1286
1229static int __ocfs2_remove_inode_range(struct inode *inode,
1230 struct buffer_head *di_bh,
1231 u32 cpos, u32 phys_cpos, u32 len,
1232 struct ocfs2_cached_dealloc_ctxt *dealloc)
1233{
1234 int ret;
1235 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
1236 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1237 struct inode *tl_inode = osb->osb_tl_inode;
1238 handle_t *handle;
1239 struct ocfs2_alloc_context *meta_ac = NULL;
1240 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1241 struct ocfs2_extent_tree et;
1242
1243 ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
1244
1245 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
1246 if (ret) {
1247 mlog_errno(ret);
1248 return ret;
1249 }
1250
1251 mutex_lock(&tl_inode->i_mutex);
1252
1253 if (ocfs2_truncate_log_needs_flush(osb)) {
1254 ret = __ocfs2_flush_truncate_log(osb);
1255 if (ret < 0) {
1256 mlog_errno(ret);
1257 goto out;
1258 }
1259 }
1260
1261 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
1262 if (IS_ERR(handle)) {
1263 ret = PTR_ERR(handle);
1264 mlog_errno(ret);
1265 goto out;
1266 }
1267
1268 ret = ocfs2_journal_access(handle, inode, di_bh,
1269 OCFS2_JOURNAL_ACCESS_WRITE);
1270 if (ret) {
1271 mlog_errno(ret);
1272 goto out;
1273 }
1274
1275 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
1276 dealloc);
1277 if (ret) {
1278 mlog_errno(ret);
1279 goto out_commit;
1280 }
1281
1282 OCFS2_I(inode)->ip_clusters -= len;
1283 di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
1284
1285 ret = ocfs2_journal_dirty(handle, di_bh);
1286 if (ret) {
1287 mlog_errno(ret);
1288 goto out_commit;
1289 }
1290
1291 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
1292 if (ret)
1293 mlog_errno(ret);
1294
1295out_commit:
1296 ocfs2_commit_trans(osb, handle);
1297out:
1298 mutex_unlock(&tl_inode->i_mutex);
1299
1300 if (meta_ac)
1301 ocfs2_free_alloc_context(meta_ac);
1302
1303 return ret;
1304}
1305
1306/* 1287/*
1307 * Truncate a byte range, avoiding pages within partial clusters. This 1288 * Truncate a byte range, avoiding pages within partial clusters. This
1308 * preserves those pages for the zeroing code to write to. 1289 * preserves those pages for the zeroing code to write to.
@@ -1402,7 +1383,9 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1402 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1383 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1403 struct ocfs2_cached_dealloc_ctxt dealloc; 1384 struct ocfs2_cached_dealloc_ctxt dealloc;
1404 struct address_space *mapping = inode->i_mapping; 1385 struct address_space *mapping = inode->i_mapping;
1386 struct ocfs2_extent_tree et;
1405 1387
1388 ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
1406 ocfs2_init_dealloc_ctxt(&dealloc); 1389 ocfs2_init_dealloc_ctxt(&dealloc);
1407 1390
1408 if (byte_len == 0) 1391 if (byte_len == 0)
@@ -1458,9 +1441,9 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1458 1441
1459 /* Only do work for non-holes */ 1442 /* Only do work for non-holes */
1460 if (phys_cpos != 0) { 1443 if (phys_cpos != 0) {
1461 ret = __ocfs2_remove_inode_range(inode, di_bh, cpos, 1444 ret = ocfs2_remove_btree_range(inode, &et, cpos,
1462 phys_cpos, alloc_size, 1445 phys_cpos, alloc_size,
1463 &dealloc); 1446 &dealloc);
1464 if (ret) { 1447 if (ret) {
1465 mlog_errno(ret); 1448 mlog_errno(ret);
1466 goto out; 1449 goto out;
@@ -1622,7 +1605,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
1622 struct ocfs2_space_resv *sr) 1605 struct ocfs2_space_resv *sr)
1623{ 1606{
1624 struct inode *inode = file->f_path.dentry->d_inode; 1607 struct inode *inode = file->f_path.dentry->d_inode;
1625 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);; 1608 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1626 1609
1627 if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && 1610 if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&
1628 !ocfs2_writes_unwritten_extents(osb)) 1611 !ocfs2_writes_unwritten_extents(osb))
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index e92382cbca5f..172f9fbc9fc7 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -51,6 +51,9 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
51 struct ocfs2_alloc_context *data_ac, 51 struct ocfs2_alloc_context *data_ac,
52 struct ocfs2_alloc_context *meta_ac, 52 struct ocfs2_alloc_context *meta_ac,
53 enum ocfs2_alloc_restarted *reason_ret); 53 enum ocfs2_alloc_restarted *reason_ret);
54int ocfs2_simple_size_update(struct inode *inode,
55 struct buffer_head *di_bh,
56 u64 new_i_size);
54int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, 57int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
55 u64 zero_to); 58 u64 zero_to);
56int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 59int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7aa00d511874..229e707bc050 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -28,6 +28,7 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/pagemap.h> 30#include <linux/pagemap.h>
31#include <linux/quotaops.h>
31 32
32#include <asm/byteorder.h> 33#include <asm/byteorder.h>
33 34
@@ -37,6 +38,7 @@
37#include "ocfs2.h" 38#include "ocfs2.h"
38 39
39#include "alloc.h" 40#include "alloc.h"
41#include "blockcheck.h"
40#include "dlmglue.h" 42#include "dlmglue.h"
41#include "extent_map.h" 43#include "extent_map.h"
42#include "file.h" 44#include "file.h"
@@ -214,12 +216,11 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
214 return 0; 216 return 0;
215} 217}
216 218
217int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, 219void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
218 int create_ino) 220 int create_ino)
219{ 221{
220 struct super_block *sb; 222 struct super_block *sb;
221 struct ocfs2_super *osb; 223 struct ocfs2_super *osb;
222 int status = -EINVAL;
223 int use_plocks = 1; 224 int use_plocks = 1;
224 225
225 mlog_entry("(0x%p, size:%llu)\n", inode, 226 mlog_entry("(0x%p, size:%llu)\n", inode,
@@ -232,25 +233,17 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
232 ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks()) 233 ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks())
233 use_plocks = 0; 234 use_plocks = 0;
234 235
235 /* this means that read_inode cannot create a superblock inode 236 /*
236 * today. change if needed. */ 237 * These have all been checked by ocfs2_read_inode_block() or set
237 if (!OCFS2_IS_VALID_DINODE(fe) || 238 * by ocfs2_mknod_locked(), so a failure is a code bug.
238 !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { 239 */
239 mlog(0, "Invalid dinode: i_ino=%lu, i_blkno=%llu, " 240 BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); /* This means that read_inode
240 "signature = %.*s, flags = 0x%x\n", 241 cannot create a superblock
241 inode->i_ino, 242 inode today. change if
242 (unsigned long long)le64_to_cpu(fe->i_blkno), 7, 243 that is needed. */
243 fe->i_signature, le32_to_cpu(fe->i_flags)); 244 BUG_ON(!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)));
244 goto bail; 245 BUG_ON(le32_to_cpu(fe->i_fs_generation) != osb->fs_generation);
245 }
246 246
247 if (le32_to_cpu(fe->i_fs_generation) != osb->fs_generation) {
248 mlog(ML_ERROR, "file entry generation does not match "
249 "superblock! osb->fs_generation=%x, "
250 "fe->i_fs_generation=%x\n",
251 osb->fs_generation, le32_to_cpu(fe->i_fs_generation));
252 goto bail;
253 }
254 247
255 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 248 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
256 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); 249 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
@@ -284,14 +277,18 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
284 277
285 inode->i_nlink = le16_to_cpu(fe->i_links_count); 278 inode->i_nlink = le16_to_cpu(fe->i_links_count);
286 279
287 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) 280 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
288 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; 281 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
282 inode->i_flags |= S_NOQUOTA;
283 }
289 284
290 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { 285 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
291 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 286 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
292 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); 287 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
293 } else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) { 288 } else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) {
294 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 289 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
290 } else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) {
291 inode->i_flags |= S_NOQUOTA;
295 } else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) { 292 } else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) {
296 mlog(0, "superblock inode: i_ino=%lu\n", inode->i_ino); 293 mlog(0, "superblock inode: i_ino=%lu\n", inode->i_ino);
297 /* we can't actually hit this as read_inode can't 294 /* we can't actually hit this as read_inode can't
@@ -354,10 +351,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
354 351
355 ocfs2_set_inode_flags(inode); 352 ocfs2_set_inode_flags(inode);
356 353
357 status = 0; 354 mlog_exit_void();
358bail:
359 mlog_exit(status);
360 return status;
361} 355}
362 356
363static int ocfs2_read_locked_inode(struct inode *inode, 357static int ocfs2_read_locked_inode(struct inode *inode,
@@ -460,11 +454,14 @@ static int ocfs2_read_locked_inode(struct inode *inode,
460 } 454 }
461 } 455 }
462 456
463 if (can_lock) 457 if (can_lock) {
464 status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh, 458 status = ocfs2_read_inode_block_full(inode, &bh,
465 OCFS2_BH_IGNORE_CACHE); 459 OCFS2_BH_IGNORE_CACHE);
466 else 460 } else {
467 status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); 461 status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
462 if (!status)
463 status = ocfs2_validate_inode_block(osb->sb, bh);
464 }
468 if (status < 0) { 465 if (status < 0) {
469 mlog_errno(status); 466 mlog_errno(status);
470 goto bail; 467 goto bail;
@@ -472,12 +469,6 @@ static int ocfs2_read_locked_inode(struct inode *inode,
472 469
473 status = -EINVAL; 470 status = -EINVAL;
474 fe = (struct ocfs2_dinode *) bh->b_data; 471 fe = (struct ocfs2_dinode *) bh->b_data;
475 if (!OCFS2_IS_VALID_DINODE(fe)) {
476 mlog(0, "Invalid dinode #%llu: signature = %.*s\n",
477 (unsigned long long)args->fi_blkno, 7,
478 fe->i_signature);
479 goto bail;
480 }
481 472
482 /* 473 /*
483 * This is a code bug. Right now the caller needs to 474 * This is a code bug. Right now the caller needs to
@@ -491,10 +482,9 @@ static int ocfs2_read_locked_inode(struct inode *inode,
491 482
492 if (S_ISCHR(le16_to_cpu(fe->i_mode)) || 483 if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
493 S_ISBLK(le16_to_cpu(fe->i_mode))) 484 S_ISBLK(le16_to_cpu(fe->i_mode)))
494 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); 485 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
495 486
496 if (ocfs2_populate_inode(inode, fe, 0) < 0) 487 ocfs2_populate_inode(inode, fe, 0);
497 goto bail;
498 488
499 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); 489 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
500 490
@@ -547,8 +537,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
547 goto out; 537 goto out;
548 } 538 }
549 539
550 status = ocfs2_journal_access(handle, inode, fe_bh, 540 status = ocfs2_journal_access_di(handle, inode, fe_bh,
551 OCFS2_JOURNAL_ACCESS_WRITE); 541 OCFS2_JOURNAL_ACCESS_WRITE);
552 if (status < 0) { 542 if (status < 0) {
553 mlog_errno(status); 543 mlog_errno(status);
554 goto out; 544 goto out;
@@ -615,7 +605,8 @@ static int ocfs2_remove_inode(struct inode *inode,
615 goto bail; 605 goto bail;
616 } 606 }
617 607
618 handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS); 608 handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS +
609 ocfs2_quota_trans_credits(inode->i_sb));
619 if (IS_ERR(handle)) { 610 if (IS_ERR(handle)) {
620 status = PTR_ERR(handle); 611 status = PTR_ERR(handle);
621 mlog_errno(status); 612 mlog_errno(status);
@@ -630,8 +621,8 @@ static int ocfs2_remove_inode(struct inode *inode,
630 } 621 }
631 622
632 /* set the inodes dtime */ 623 /* set the inodes dtime */
633 status = ocfs2_journal_access(handle, inode, di_bh, 624 status = ocfs2_journal_access_di(handle, inode, di_bh,
634 OCFS2_JOURNAL_ACCESS_WRITE); 625 OCFS2_JOURNAL_ACCESS_WRITE);
635 if (status < 0) { 626 if (status < 0) {
636 mlog_errno(status); 627 mlog_errno(status);
637 goto bail_commit; 628 goto bail_commit;
@@ -647,6 +638,7 @@ static int ocfs2_remove_inode(struct inode *inode,
647 } 638 }
648 639
649 ocfs2_remove_from_cache(inode, di_bh); 640 ocfs2_remove_from_cache(inode, di_bh);
641 vfs_dq_free_inode(inode);
650 642
651 status = ocfs2_free_dinode(handle, inode_alloc_inode, 643 status = ocfs2_free_dinode(handle, inode_alloc_inode,
652 inode_alloc_bh, di); 644 inode_alloc_bh, di);
@@ -929,7 +921,10 @@ void ocfs2_delete_inode(struct inode *inode)
929 921
930 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 922 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
931 923
932 if (is_bad_inode(inode)) { 924 /* When we fail in read_inode() we mark inode as bad. The second test
925 * catches the case when inode allocation fails before allocating
926 * a block for inode. */
927 if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) {
933 mlog(0, "Skipping delete of bad inode\n"); 928 mlog(0, "Skipping delete of bad inode\n");
934 goto bail; 929 goto bail;
935 } 930 }
@@ -1195,8 +1190,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1195 mlog_entry("(inode %llu)\n", 1190 mlog_entry("(inode %llu)\n",
1196 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1191 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1197 1192
1198 status = ocfs2_journal_access(handle, inode, bh, 1193 status = ocfs2_journal_access_di(handle, inode, bh,
1199 OCFS2_JOURNAL_ACCESS_WRITE); 1194 OCFS2_JOURNAL_ACCESS_WRITE);
1200 if (status < 0) { 1195 if (status < 0) {
1201 mlog_errno(status); 1196 mlog_errno(status);
1202 goto leave; 1197 goto leave;
@@ -1264,3 +1259,89 @@ void ocfs2_refresh_inode(struct inode *inode,
1264 1259
1265 spin_unlock(&OCFS2_I(inode)->ip_lock); 1260 spin_unlock(&OCFS2_I(inode)->ip_lock);
1266} 1261}
1262
1263int ocfs2_validate_inode_block(struct super_block *sb,
1264 struct buffer_head *bh)
1265{
1266 int rc;
1267 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
1268
1269 mlog(0, "Validating dinode %llu\n",
1270 (unsigned long long)bh->b_blocknr);
1271
1272 BUG_ON(!buffer_uptodate(bh));
1273
1274 /*
1275 * If the ecc fails, we return the error but otherwise
1276 * leave the filesystem running. We know any error is
1277 * local to this block.
1278 */
1279 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check);
1280 if (rc) {
1281 mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
1282 (unsigned long long)bh->b_blocknr);
1283 goto bail;
1284 }
1285
1286 /*
1287 * Errors after here are fatal.
1288 */
1289
1290 rc = -EINVAL;
1291
1292 if (!OCFS2_IS_VALID_DINODE(di)) {
1293 ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n",
1294 (unsigned long long)bh->b_blocknr, 7,
1295 di->i_signature);
1296 goto bail;
1297 }
1298
1299 if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
1300 ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n",
1301 (unsigned long long)bh->b_blocknr,
1302 (unsigned long long)le64_to_cpu(di->i_blkno));
1303 goto bail;
1304 }
1305
1306 if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
1307 ocfs2_error(sb,
1308 "Invalid dinode #%llu: OCFS2_VALID_FL not set\n",
1309 (unsigned long long)bh->b_blocknr);
1310 goto bail;
1311 }
1312
1313 if (le32_to_cpu(di->i_fs_generation) !=
1314 OCFS2_SB(sb)->fs_generation) {
1315 ocfs2_error(sb,
1316 "Invalid dinode #%llu: fs_generation is %u\n",
1317 (unsigned long long)bh->b_blocknr,
1318 le32_to_cpu(di->i_fs_generation));
1319 goto bail;
1320 }
1321
1322 rc = 0;
1323
1324bail:
1325 return rc;
1326}
1327
1328int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
1329 int flags)
1330{
1331 int rc;
1332 struct buffer_head *tmp = *bh;
1333
1334 rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp,
1335 flags, ocfs2_validate_inode_block);
1336
1337 /* If ocfs2_read_blocks() got us a new bh, pass it up. */
1338 if (!rc && !*bh)
1339 *bh = tmp;
1340
1341 return rc;
1342}
1343
1344int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
1345{
1346 return ocfs2_read_inode_block_full(inode, bh, 0);
1347}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 2f37af9bcc4a..eb3c302b38d3 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -128,8 +128,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
128 int sysfile_type); 128 int sysfile_type);
129int ocfs2_inode_init_private(struct inode *inode); 129int ocfs2_inode_init_private(struct inode *inode);
130int ocfs2_inode_revalidate(struct dentry *dentry); 130int ocfs2_inode_revalidate(struct dentry *dentry);
131int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, 131void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
132 int create_ino); 132 int create_ino);
133void ocfs2_read_inode(struct inode *inode); 133void ocfs2_read_inode(struct inode *inode);
134void ocfs2_read_inode2(struct inode *inode, void *opaque); 134void ocfs2_read_inode2(struct inode *inode, void *opaque);
135ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf, 135ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf,
@@ -142,6 +142,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
142 struct buffer_head *bh); 142 struct buffer_head *bh);
143int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb); 143int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
144int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb); 144int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
145struct buffer_head *ocfs2_bread(struct inode *inode,
146 int block, int *err, int reada);
145 147
146void ocfs2_set_inode_flags(struct inode *inode); 148void ocfs2_set_inode_flags(struct inode *inode);
147void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi); 149void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
@@ -153,4 +155,16 @@ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
153 return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits); 155 return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits);
154} 156}
155 157
158/* Validate that a bh contains a valid inode */
159int ocfs2_validate_inode_block(struct super_block *sb,
160 struct buffer_head *bh);
161/*
162 * Read an inode block into *bh. If *bh is NULL, a bh will be allocated.
163 * This is a cached read. The inode will be validated with
164 * ocfs2_validate_inode_block().
165 */
166int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh);
167/* The same, but can be passed OCFS2_BH_* flags */
168int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
169 int flags);
156#endif /* OCFS2_INODE_H */ 170#endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 99fe9d584f3c..57d7d25a2b9a 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -35,6 +35,7 @@
35#include "ocfs2.h" 35#include "ocfs2.h"
36 36
37#include "alloc.h" 37#include "alloc.h"
38#include "blockcheck.h"
38#include "dir.h" 39#include "dir.h"
39#include "dlmglue.h" 40#include "dlmglue.h"
40#include "extent_map.h" 41#include "extent_map.h"
@@ -45,6 +46,7 @@
45#include "slot_map.h" 46#include "slot_map.h"
46#include "super.h" 47#include "super.h"
47#include "sysfile.h" 48#include "sysfile.h"
49#include "quota.h"
48 50
49#include "buffer_head_io.h" 51#include "buffer_head_io.h"
50 52
@@ -52,10 +54,10 @@ DEFINE_SPINLOCK(trans_inc_lock);
52 54
53static int ocfs2_force_read_journal(struct inode *inode); 55static int ocfs2_force_read_journal(struct inode *inode);
54static int ocfs2_recover_node(struct ocfs2_super *osb, 56static int ocfs2_recover_node(struct ocfs2_super *osb,
55 int node_num); 57 int node_num, int slot_num);
56static int __ocfs2_recovery_thread(void *arg); 58static int __ocfs2_recovery_thread(void *arg);
57static int ocfs2_commit_cache(struct ocfs2_super *osb); 59static int ocfs2_commit_cache(struct ocfs2_super *osb);
58static int ocfs2_wait_on_mount(struct ocfs2_super *osb); 60static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
59static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 61static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
60 int dirty, int replayed); 62 int dirty, int replayed);
61static int ocfs2_trylock_journal(struct ocfs2_super *osb, 63static int ocfs2_trylock_journal(struct ocfs2_super *osb,
@@ -64,6 +66,17 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
64 int slot); 66 int slot);
65static int ocfs2_commit_thread(void *arg); 67static int ocfs2_commit_thread(void *arg);
66 68
69static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
70{
71 return __ocfs2_wait_on_mount(osb, 0);
72}
73
74static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
75{
76 return __ocfs2_wait_on_mount(osb, 1);
77}
78
79
67 80
68/* 81/*
69 * The recovery_list is a simple linked list of node numbers to recover. 82 * The recovery_list is a simple linked list of node numbers to recover.
@@ -256,11 +269,9 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
256 BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE); 269 BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
257 BUG_ON(max_buffs <= 0); 270 BUG_ON(max_buffs <= 0);
258 271
259 /* JBD might support this, but our journalling code doesn't yet. */ 272 /* Nested transaction? Just return the handle... */
260 if (journal_current_handle()) { 273 if (journal_current_handle())
261 mlog(ML_ERROR, "Recursive transaction attempted!\n"); 274 return jbd2_journal_start(journal, max_buffs);
262 BUG();
263 }
264 275
265 down_read(&osb->journal->j_trans_barrier); 276 down_read(&osb->journal->j_trans_barrier);
266 277
@@ -285,16 +296,18 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
285int ocfs2_commit_trans(struct ocfs2_super *osb, 296int ocfs2_commit_trans(struct ocfs2_super *osb,
286 handle_t *handle) 297 handle_t *handle)
287{ 298{
288 int ret; 299 int ret, nested;
289 struct ocfs2_journal *journal = osb->journal; 300 struct ocfs2_journal *journal = osb->journal;
290 301
291 BUG_ON(!handle); 302 BUG_ON(!handle);
292 303
304 nested = handle->h_ref > 1;
293 ret = jbd2_journal_stop(handle); 305 ret = jbd2_journal_stop(handle);
294 if (ret < 0) 306 if (ret < 0)
295 mlog_errno(ret); 307 mlog_errno(ret);
296 308
297 up_read(&journal->j_trans_barrier); 309 if (!nested)
310 up_read(&journal->j_trans_barrier);
298 311
299 return ret; 312 return ret;
300} 313}
@@ -357,10 +370,137 @@ bail:
357 return status; 370 return status;
358} 371}
359 372
360int ocfs2_journal_access(handle_t *handle, 373struct ocfs2_triggers {
361 struct inode *inode, 374 struct jbd2_buffer_trigger_type ot_triggers;
362 struct buffer_head *bh, 375 int ot_offset;
363 int type) 376};
377
378static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
379{
380 return container_of(triggers, struct ocfs2_triggers, ot_triggers);
381}
382
383static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
384 struct buffer_head *bh,
385 void *data, size_t size)
386{
387 struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
388
389 /*
390 * We aren't guaranteed to have the superblock here, so we
391 * must unconditionally compute the ecc data.
392 * __ocfs2_journal_access() will only set the triggers if
393 * metaecc is enabled.
394 */
395 ocfs2_block_check_compute(data, size, data + ot->ot_offset);
396}
397
398/*
399 * Quota blocks have their own trigger because the struct ocfs2_block_check
400 * offset depends on the blocksize.
401 */
402static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
403 struct buffer_head *bh,
404 void *data, size_t size)
405{
406 struct ocfs2_disk_dqtrailer *dqt =
407 ocfs2_block_dqtrailer(size, data);
408
409 /*
410 * We aren't guaranteed to have the superblock here, so we
411 * must unconditionally compute the ecc data.
412 * __ocfs2_journal_access() will only set the triggers if
413 * metaecc is enabled.
414 */
415 ocfs2_block_check_compute(data, size, &dqt->dq_check);
416}
417
418/*
419 * Directory blocks also have their own trigger because the
420 * struct ocfs2_block_check offset depends on the blocksize.
421 */
422static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
423 struct buffer_head *bh,
424 void *data, size_t size)
425{
426 struct ocfs2_dir_block_trailer *trailer =
427 ocfs2_dir_trailer_from_size(size, data);
428
429 /*
430 * We aren't guaranteed to have the superblock here, so we
431 * must unconditionally compute the ecc data.
432 * __ocfs2_journal_access() will only set the triggers if
433 * metaecc is enabled.
434 */
435 ocfs2_block_check_compute(data, size, &trailer->db_check);
436}
437
438static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
439 struct buffer_head *bh)
440{
441 mlog(ML_ERROR,
442 "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
443 "bh->b_blocknr = %llu\n",
444 (unsigned long)bh,
445 (unsigned long long)bh->b_blocknr);
446
447 /* We aren't guaranteed to have the superblock here - but if we
448 * don't, it'll just crash. */
449 ocfs2_error(bh->b_assoc_map->host->i_sb,
450 "JBD2 has aborted our journal, ocfs2 cannot continue\n");
451}
452
453static struct ocfs2_triggers di_triggers = {
454 .ot_triggers = {
455 .t_commit = ocfs2_commit_trigger,
456 .t_abort = ocfs2_abort_trigger,
457 },
458 .ot_offset = offsetof(struct ocfs2_dinode, i_check),
459};
460
461static struct ocfs2_triggers eb_triggers = {
462 .ot_triggers = {
463 .t_commit = ocfs2_commit_trigger,
464 .t_abort = ocfs2_abort_trigger,
465 },
466 .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
467};
468
469static struct ocfs2_triggers gd_triggers = {
470 .ot_triggers = {
471 .t_commit = ocfs2_commit_trigger,
472 .t_abort = ocfs2_abort_trigger,
473 },
474 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
475};
476
477static struct ocfs2_triggers db_triggers = {
478 .ot_triggers = {
479 .t_commit = ocfs2_db_commit_trigger,
480 .t_abort = ocfs2_abort_trigger,
481 },
482};
483
484static struct ocfs2_triggers xb_triggers = {
485 .ot_triggers = {
486 .t_commit = ocfs2_commit_trigger,
487 .t_abort = ocfs2_abort_trigger,
488 },
489 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
490};
491
492static struct ocfs2_triggers dq_triggers = {
493 .ot_triggers = {
494 .t_commit = ocfs2_dq_commit_trigger,
495 .t_abort = ocfs2_abort_trigger,
496 },
497};
498
499static int __ocfs2_journal_access(handle_t *handle,
500 struct inode *inode,
501 struct buffer_head *bh,
502 struct ocfs2_triggers *triggers,
503 int type)
364{ 504{
365 int status; 505 int status;
366 506
@@ -406,6 +546,8 @@ int ocfs2_journal_access(handle_t *handle,
406 status = -EINVAL; 546 status = -EINVAL;
407 mlog(ML_ERROR, "Uknown access type!\n"); 547 mlog(ML_ERROR, "Uknown access type!\n");
408 } 548 }
549 if (!status && ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)) && triggers)
550 jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
409 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 551 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
410 552
411 if (status < 0) 553 if (status < 0)
@@ -416,6 +558,54 @@ int ocfs2_journal_access(handle_t *handle,
416 return status; 558 return status;
417} 559}
418 560
561int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
562 struct buffer_head *bh, int type)
563{
564 return __ocfs2_journal_access(handle, inode, bh, &di_triggers,
565 type);
566}
567
568int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
569 struct buffer_head *bh, int type)
570{
571 return __ocfs2_journal_access(handle, inode, bh, &eb_triggers,
572 type);
573}
574
575int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
576 struct buffer_head *bh, int type)
577{
578 return __ocfs2_journal_access(handle, inode, bh, &gd_triggers,
579 type);
580}
581
582int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
583 struct buffer_head *bh, int type)
584{
585 return __ocfs2_journal_access(handle, inode, bh, &db_triggers,
586 type);
587}
588
589int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
590 struct buffer_head *bh, int type)
591{
592 return __ocfs2_journal_access(handle, inode, bh, &xb_triggers,
593 type);
594}
595
596int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
597 struct buffer_head *bh, int type)
598{
599 return __ocfs2_journal_access(handle, inode, bh, &dq_triggers,
600 type);
601}
602
603int ocfs2_journal_access(handle_t *handle, struct inode *inode,
604 struct buffer_head *bh, int type)
605{
606 return __ocfs2_journal_access(handle, inode, bh, NULL, type);
607}
608
419int ocfs2_journal_dirty(handle_t *handle, 609int ocfs2_journal_dirty(handle_t *handle,
420 struct buffer_head *bh) 610 struct buffer_head *bh)
421{ 611{
@@ -434,20 +624,6 @@ int ocfs2_journal_dirty(handle_t *handle,
434 return status; 624 return status;
435} 625}
436 626
437#ifdef CONFIG_OCFS2_COMPAT_JBD
438int ocfs2_journal_dirty_data(handle_t *handle,
439 struct buffer_head *bh)
440{
441 int err = journal_dirty_data(handle, bh);
442 if (err)
443 mlog_errno(err);
444 /* TODO: When we can handle it, abort the handle and go RO on
445 * error here. */
446
447 return err;
448}
449#endif
450
451#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) 627#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
452 628
453void ocfs2_set_journal_params(struct ocfs2_super *osb) 629void ocfs2_set_journal_params(struct ocfs2_super *osb)
@@ -587,17 +763,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
587 mlog_entry_void(); 763 mlog_entry_void();
588 764
589 fe = (struct ocfs2_dinode *)bh->b_data; 765 fe = (struct ocfs2_dinode *)bh->b_data;
590 if (!OCFS2_IS_VALID_DINODE(fe)) { 766
591 /* This is called from startup/shutdown which will 767 /* The journal bh on the osb always comes from ocfs2_journal_init()
592 * handle the errors in a specific manner, so no need 768 * and was validated there inside ocfs2_inode_lock_full(). It's a
593 * to call ocfs2_error() here. */ 769 * code bug if we mess it up. */
594 mlog(ML_ERROR, "Journal dinode %llu has invalid " 770 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
595 "signature: %.*s",
596 (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
597 fe->i_signature);
598 status = -EIO;
599 goto out;
600 }
601 771
602 flags = le32_to_cpu(fe->id1.journal1.ij_flags); 772 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
603 if (dirty) 773 if (dirty)
@@ -609,11 +779,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
609 if (replayed) 779 if (replayed)
610 ocfs2_bump_recovery_generation(fe); 780 ocfs2_bump_recovery_generation(fe);
611 781
782 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
612 status = ocfs2_write_block(osb, bh, journal->j_inode); 783 status = ocfs2_write_block(osb, bh, journal->j_inode);
613 if (status < 0) 784 if (status < 0)
614 mlog_errno(status); 785 mlog_errno(status);
615 786
616out:
617 mlog_exit(status); 787 mlog_exit(status);
618 return status; 788 return status;
619} 789}
@@ -878,6 +1048,7 @@ struct ocfs2_la_recovery_item {
878 int lri_slot; 1048 int lri_slot;
879 struct ocfs2_dinode *lri_la_dinode; 1049 struct ocfs2_dinode *lri_la_dinode;
880 struct ocfs2_dinode *lri_tl_dinode; 1050 struct ocfs2_dinode *lri_tl_dinode;
1051 struct ocfs2_quota_recovery *lri_qrec;
881}; 1052};
882 1053
883/* Does the second half of the recovery process. By this point, the 1054/* Does the second half of the recovery process. By this point, the
@@ -898,6 +1069,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
898 struct ocfs2_super *osb = journal->j_osb; 1069 struct ocfs2_super *osb = journal->j_osb;
899 struct ocfs2_dinode *la_dinode, *tl_dinode; 1070 struct ocfs2_dinode *la_dinode, *tl_dinode;
900 struct ocfs2_la_recovery_item *item, *n; 1071 struct ocfs2_la_recovery_item *item, *n;
1072 struct ocfs2_quota_recovery *qrec;
901 LIST_HEAD(tmp_la_list); 1073 LIST_HEAD(tmp_la_list);
902 1074
903 mlog_entry_void(); 1075 mlog_entry_void();
@@ -913,6 +1085,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
913 1085
914 mlog(0, "Complete recovery for slot %d\n", item->lri_slot); 1086 mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
915 1087
1088 ocfs2_wait_on_quotas(osb);
1089
916 la_dinode = item->lri_la_dinode; 1090 la_dinode = item->lri_la_dinode;
917 if (la_dinode) { 1091 if (la_dinode) {
918 mlog(0, "Clean up local alloc %llu\n", 1092 mlog(0, "Clean up local alloc %llu\n",
@@ -943,6 +1117,16 @@ void ocfs2_complete_recovery(struct work_struct *work)
943 if (ret < 0) 1117 if (ret < 0)
944 mlog_errno(ret); 1118 mlog_errno(ret);
945 1119
1120 qrec = item->lri_qrec;
1121 if (qrec) {
1122 mlog(0, "Recovering quota files");
1123 ret = ocfs2_finish_quota_recovery(osb, qrec,
1124 item->lri_slot);
1125 if (ret < 0)
1126 mlog_errno(ret);
1127 /* Recovery info is already freed now */
1128 }
1129
946 kfree(item); 1130 kfree(item);
947 } 1131 }
948 1132
@@ -956,7 +1140,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
956static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, 1140static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
957 int slot_num, 1141 int slot_num,
958 struct ocfs2_dinode *la_dinode, 1142 struct ocfs2_dinode *la_dinode,
959 struct ocfs2_dinode *tl_dinode) 1143 struct ocfs2_dinode *tl_dinode,
1144 struct ocfs2_quota_recovery *qrec)
960{ 1145{
961 struct ocfs2_la_recovery_item *item; 1146 struct ocfs2_la_recovery_item *item;
962 1147
@@ -971,6 +1156,9 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
971 if (tl_dinode) 1156 if (tl_dinode)
972 kfree(tl_dinode); 1157 kfree(tl_dinode);
973 1158
1159 if (qrec)
1160 ocfs2_free_quota_recovery(qrec);
1161
974 mlog_errno(-ENOMEM); 1162 mlog_errno(-ENOMEM);
975 return; 1163 return;
976 } 1164 }
@@ -979,6 +1167,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
979 item->lri_la_dinode = la_dinode; 1167 item->lri_la_dinode = la_dinode;
980 item->lri_slot = slot_num; 1168 item->lri_slot = slot_num;
981 item->lri_tl_dinode = tl_dinode; 1169 item->lri_tl_dinode = tl_dinode;
1170 item->lri_qrec = qrec;
982 1171
983 spin_lock(&journal->j_lock); 1172 spin_lock(&journal->j_lock);
984 list_add_tail(&item->lri_list, &journal->j_la_cleanups); 1173 list_add_tail(&item->lri_list, &journal->j_la_cleanups);
@@ -998,6 +1187,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
998 ocfs2_queue_recovery_completion(journal, 1187 ocfs2_queue_recovery_completion(journal,
999 osb->slot_num, 1188 osb->slot_num,
1000 osb->local_alloc_copy, 1189 osb->local_alloc_copy,
1190 NULL,
1001 NULL); 1191 NULL);
1002 ocfs2_schedule_truncate_log_flush(osb, 0); 1192 ocfs2_schedule_truncate_log_flush(osb, 0);
1003 1193
@@ -1006,11 +1196,26 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
1006 } 1196 }
1007} 1197}
1008 1198
1199void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
1200{
1201 if (osb->quota_rec) {
1202 ocfs2_queue_recovery_completion(osb->journal,
1203 osb->slot_num,
1204 NULL,
1205 NULL,
1206 osb->quota_rec);
1207 osb->quota_rec = NULL;
1208 }
1209}
1210
1009static int __ocfs2_recovery_thread(void *arg) 1211static int __ocfs2_recovery_thread(void *arg)
1010{ 1212{
1011 int status, node_num; 1213 int status, node_num, slot_num;
1012 struct ocfs2_super *osb = arg; 1214 struct ocfs2_super *osb = arg;
1013 struct ocfs2_recovery_map *rm = osb->recovery_map; 1215 struct ocfs2_recovery_map *rm = osb->recovery_map;
1216 int *rm_quota = NULL;
1217 int rm_quota_used = 0, i;
1218 struct ocfs2_quota_recovery *qrec;
1014 1219
1015 mlog_entry_void(); 1220 mlog_entry_void();
1016 1221
@@ -1019,6 +1224,11 @@ static int __ocfs2_recovery_thread(void *arg)
1019 goto bail; 1224 goto bail;
1020 } 1225 }
1021 1226
1227 rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS);
1228 if (!rm_quota) {
1229 status = -ENOMEM;
1230 goto bail;
1231 }
1022restart: 1232restart:
1023 status = ocfs2_super_lock(osb, 1); 1233 status = ocfs2_super_lock(osb, 1);
1024 if (status < 0) { 1234 if (status < 0) {
@@ -1032,8 +1242,28 @@ restart:
1032 * clear it until ocfs2_recover_node() has succeeded. */ 1242 * clear it until ocfs2_recover_node() has succeeded. */
1033 node_num = rm->rm_entries[0]; 1243 node_num = rm->rm_entries[0];
1034 spin_unlock(&osb->osb_lock); 1244 spin_unlock(&osb->osb_lock);
1035 1245 mlog(0, "checking node %d\n", node_num);
1036 status = ocfs2_recover_node(osb, node_num); 1246 slot_num = ocfs2_node_num_to_slot(osb, node_num);
1247 if (slot_num == -ENOENT) {
1248 status = 0;
1249 mlog(0, "no slot for this node, so no recovery"
1250 "required.\n");
1251 goto skip_recovery;
1252 }
1253 mlog(0, "node %d was using slot %d\n", node_num, slot_num);
1254
1255 /* It is a bit subtle with quota recovery. We cannot do it
1256 * immediately because we have to obtain cluster locks from
1257 * quota files and we also don't want to just skip it because
1258 * then quota usage would be out of sync until some node takes
1259 * the slot. So we remember which nodes need quota recovery
1260 * and when everything else is done, we recover quotas. */
1261 for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
1262 if (i == rm_quota_used)
1263 rm_quota[rm_quota_used++] = slot_num;
1264
1265 status = ocfs2_recover_node(osb, node_num, slot_num);
1266skip_recovery:
1037 if (!status) { 1267 if (!status) {
1038 ocfs2_recovery_map_clear(osb, node_num); 1268 ocfs2_recovery_map_clear(osb, node_num);
1039 } else { 1269 } else {
@@ -1055,13 +1285,27 @@ restart:
1055 if (status < 0) 1285 if (status < 0)
1056 mlog_errno(status); 1286 mlog_errno(status);
1057 1287
1288 /* Now it is right time to recover quotas... We have to do this under
1289 * superblock lock so that noone can start using the slot (and crash)
1290 * before we recover it */
1291 for (i = 0; i < rm_quota_used; i++) {
1292 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
1293 if (IS_ERR(qrec)) {
1294 status = PTR_ERR(qrec);
1295 mlog_errno(status);
1296 continue;
1297 }
1298 ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
1299 NULL, NULL, qrec);
1300 }
1301
1058 ocfs2_super_unlock(osb, 1); 1302 ocfs2_super_unlock(osb, 1);
1059 1303
1060 /* We always run recovery on our own orphan dir - the dead 1304 /* We always run recovery on our own orphan dir - the dead
1061 * node(s) may have disallowd a previos inode delete. Re-processing 1305 * node(s) may have disallowd a previos inode delete. Re-processing
1062 * is therefore required. */ 1306 * is therefore required. */
1063 ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, 1307 ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
1064 NULL); 1308 NULL, NULL);
1065 1309
1066bail: 1310bail:
1067 mutex_lock(&osb->recovery_lock); 1311 mutex_lock(&osb->recovery_lock);
@@ -1076,6 +1320,9 @@ bail:
1076 1320
1077 mutex_unlock(&osb->recovery_lock); 1321 mutex_unlock(&osb->recovery_lock);
1078 1322
1323 if (rm_quota)
1324 kfree(rm_quota);
1325
1079 mlog_exit(status); 1326 mlog_exit(status);
1080 /* no one is callint kthread_stop() for us so the kthread() api 1327 /* no one is callint kthread_stop() for us so the kthread() api
1081 * requires that we call do_exit(). And it isn't exported, but 1328 * requires that we call do_exit(). And it isn't exported, but
@@ -1135,8 +1382,7 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1135 } 1382 }
1136 SET_INODE_JOURNAL(inode); 1383 SET_INODE_JOURNAL(inode);
1137 1384
1138 status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh, 1385 status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
1139 OCFS2_BH_IGNORE_CACHE);
1140 if (status < 0) { 1386 if (status < 0) {
1141 mlog_errno(status); 1387 mlog_errno(status);
1142 goto bail; 1388 goto bail;
@@ -1268,6 +1514,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1268 osb->slot_recovery_generations[slot_num] = 1514 osb->slot_recovery_generations[slot_num] =
1269 ocfs2_get_recovery_generation(fe); 1515 ocfs2_get_recovery_generation(fe);
1270 1516
1517 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
1271 status = ocfs2_write_block(osb, bh, inode); 1518 status = ocfs2_write_block(osb, bh, inode);
1272 if (status < 0) 1519 if (status < 0)
1273 mlog_errno(status); 1520 mlog_errno(status);
@@ -1304,31 +1551,19 @@ done:
1304 * far less concerning. 1551 * far less concerning.
1305 */ 1552 */
1306static int ocfs2_recover_node(struct ocfs2_super *osb, 1553static int ocfs2_recover_node(struct ocfs2_super *osb,
1307 int node_num) 1554 int node_num, int slot_num)
1308{ 1555{
1309 int status = 0; 1556 int status = 0;
1310 int slot_num;
1311 struct ocfs2_dinode *la_copy = NULL; 1557 struct ocfs2_dinode *la_copy = NULL;
1312 struct ocfs2_dinode *tl_copy = NULL; 1558 struct ocfs2_dinode *tl_copy = NULL;
1313 1559
1314 mlog_entry("(node_num=%d, osb->node_num = %d)\n", 1560 mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n",
1315 node_num, osb->node_num); 1561 node_num, slot_num, osb->node_num);
1316
1317 mlog(0, "checking node %d\n", node_num);
1318 1562
1319 /* Should not ever be called to recover ourselves -- in that 1563 /* Should not ever be called to recover ourselves -- in that
1320 * case we should've called ocfs2_journal_load instead. */ 1564 * case we should've called ocfs2_journal_load instead. */
1321 BUG_ON(osb->node_num == node_num); 1565 BUG_ON(osb->node_num == node_num);
1322 1566
1323 slot_num = ocfs2_node_num_to_slot(osb, node_num);
1324 if (slot_num == -ENOENT) {
1325 status = 0;
1326 mlog(0, "no slot for this node, so no recovery required.\n");
1327 goto done;
1328 }
1329
1330 mlog(0, "node %d was using slot %d\n", node_num, slot_num);
1331
1332 status = ocfs2_replay_journal(osb, node_num, slot_num); 1567 status = ocfs2_replay_journal(osb, node_num, slot_num);
1333 if (status < 0) { 1568 if (status < 0) {
1334 if (status == -EBUSY) { 1569 if (status == -EBUSY) {
@@ -1364,7 +1599,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1364 1599
1365 /* This will kfree the memory pointed to by la_copy and tl_copy */ 1600 /* This will kfree the memory pointed to by la_copy and tl_copy */
1366 ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, 1601 ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
1367 tl_copy); 1602 tl_copy, NULL);
1368 1603
1369 status = 0; 1604 status = 0;
1370done: 1605done:
@@ -1659,13 +1894,14 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
1659 return ret; 1894 return ret;
1660} 1895}
1661 1896
1662static int ocfs2_wait_on_mount(struct ocfs2_super *osb) 1897static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
1663{ 1898{
1664 /* This check is good because ocfs2 will wait on our recovery 1899 /* This check is good because ocfs2 will wait on our recovery
1665 * thread before changing it to something other than MOUNTED 1900 * thread before changing it to something other than MOUNTED
1666 * or DISABLED. */ 1901 * or DISABLED. */
1667 wait_event(osb->osb_mount_event, 1902 wait_event(osb->osb_mount_event,
1668 atomic_read(&osb->vol_state) == VOLUME_MOUNTED || 1903 (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
1904 atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
1669 atomic_read(&osb->vol_state) == VOLUME_DISABLED); 1905 atomic_read(&osb->vol_state) == VOLUME_DISABLED);
1670 1906
1671 /* If there's an error on mount, then we may never get to the 1907 /* If there's an error on mount, then we may never get to the
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index d4d14e9a3cea..3c3532e1307c 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -27,12 +27,7 @@
27#define OCFS2_JOURNAL_H 27#define OCFS2_JOURNAL_H
28 28
29#include <linux/fs.h> 29#include <linux/fs.h>
30#ifndef CONFIG_OCFS2_COMPAT_JBD 30#include <linux/jbd2.h>
31# include <linux/jbd2.h>
32#else
33# include <linux/jbd.h>
34# include "ocfs2_jbd_compat.h"
35#endif
36 31
37enum ocfs2_journal_state { 32enum ocfs2_journal_state {
38 OCFS2_JOURNAL_FREE = 0, 33 OCFS2_JOURNAL_FREE = 0,
@@ -173,6 +168,7 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb,
173 int node_num); 168 int node_num);
174int ocfs2_mark_dead_nodes(struct ocfs2_super *osb); 169int ocfs2_mark_dead_nodes(struct ocfs2_super *osb);
175void ocfs2_complete_mount_recovery(struct ocfs2_super *osb); 170void ocfs2_complete_mount_recovery(struct ocfs2_super *osb);
171void ocfs2_complete_quota_recovery(struct ocfs2_super *osb);
176 172
177static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb) 173static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb)
178{ 174{
@@ -216,9 +212,12 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
216 * ocfs2_extend_trans - Extend a handle by nblocks credits. This may 212 * ocfs2_extend_trans - Extend a handle by nblocks credits. This may
217 * commit the handle to disk in the process, but will 213 * commit the handle to disk in the process, but will
218 * not release any locks taken during the transaction. 214 * not release any locks taken during the transaction.
219 * ocfs2_journal_access - Notify the handle that we want to journal this 215 * ocfs2_journal_access* - Notify the handle that we want to journal this
220 * buffer. Will have to call ocfs2_journal_dirty once 216 * buffer. Will have to call ocfs2_journal_dirty once
221 * we've actually dirtied it. Type is one of . or . 217 * we've actually dirtied it. Type is one of . or .
218 * Always call the specific flavor of
219 * ocfs2_journal_access_*() unless you intend to
220 * manage the checksum by hand.
222 * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data. 221 * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data.
223 * ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before 222 * ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before
224 * the current handle commits. 223 * the current handle commits.
@@ -248,10 +247,29 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks);
248#define OCFS2_JOURNAL_ACCESS_WRITE 1 247#define OCFS2_JOURNAL_ACCESS_WRITE 1
249#define OCFS2_JOURNAL_ACCESS_UNDO 2 248#define OCFS2_JOURNAL_ACCESS_UNDO 2
250 249
251int ocfs2_journal_access(handle_t *handle, 250
252 struct inode *inode, 251/* ocfs2_inode */
253 struct buffer_head *bh, 252int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
254 int type); 253 struct buffer_head *bh, int type);
254/* ocfs2_extent_block */
255int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
256 struct buffer_head *bh, int type);
257/* ocfs2_group_desc */
258int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
259 struct buffer_head *bh, int type);
260/* ocfs2_xattr_block */
261int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
262 struct buffer_head *bh, int type);
263/* quota blocks */
264int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
265 struct buffer_head *bh, int type);
266/* dirblock */
267int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
268 struct buffer_head *bh, int type);
269/* Anything that has no ecc */
270int ocfs2_journal_access(handle_t *handle, struct inode *inode,
271 struct buffer_head *bh, int type);
272
255/* 273/*
256 * A word about the journal_access/journal_dirty "dance". It is 274 * A word about the journal_access/journal_dirty "dance". It is
257 * entirely legal to journal_access a buffer more than once (as long 275 * entirely legal to journal_access a buffer more than once (as long
@@ -273,10 +291,6 @@ int ocfs2_journal_access(handle_t *handle,
273 */ 291 */
274int ocfs2_journal_dirty(handle_t *handle, 292int ocfs2_journal_dirty(handle_t *handle,
275 struct buffer_head *bh); 293 struct buffer_head *bh);
276#ifdef CONFIG_OCFS2_COMPAT_JBD
277int ocfs2_journal_dirty_data(handle_t *handle,
278 struct buffer_head *bh);
279#endif
280 294
281/* 295/*
282 * Credit Macros: 296 * Credit Macros:
@@ -293,6 +307,37 @@ int ocfs2_journal_dirty_data(handle_t *handle,
293/* extended attribute block update */ 307/* extended attribute block update */
294#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 308#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
295 309
310/* global quotafile inode update, data block */
311#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
312
313/*
314 * The two writes below can accidentally see global info dirty due
315 * to set_info() quotactl so make them prepared for the writes.
316 */
317/* quota data block, global info */
318/* Write to local quota file */
319#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1)
320
321/* global quota data block, local quota data block, global quota inode,
322 * global quota info */
323#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3)
324
325static inline int ocfs2_quota_trans_credits(struct super_block *sb)
326{
327 int credits = 0;
328
329 if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA))
330 credits += OCFS2_QWRITE_CREDITS;
331 if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA))
332 credits += OCFS2_QWRITE_CREDITS;
333 return credits;
334}
335
336/* Number of credits needed for removing quota structure from file */
337int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
338/* Number of credits needed for initialization of new quota structure */
339int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
340
296/* group extend. inode update and last group update. */ 341/* group extend. inode update and last group update. */
297#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 342#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
298 343
@@ -303,8 +348,11 @@ int ocfs2_journal_dirty_data(handle_t *handle,
303 * prev. group desc. if we relink. */ 348 * prev. group desc. if we relink. */
304#define OCFS2_SUBALLOC_ALLOC (3) 349#define OCFS2_SUBALLOC_ALLOC (3)
305 350
306#define OCFS2_INLINE_TO_EXTENTS_CREDITS (OCFS2_SUBALLOC_ALLOC \ 351static inline int ocfs2_inline_to_extents_credits(struct super_block *sb)
307 + OCFS2_INODE_UPDATE_CREDITS) 352{
353 return OCFS2_SUBALLOC_ALLOC + OCFS2_INODE_UPDATE_CREDITS +
354 ocfs2_quota_trans_credits(sb);
355}
308 356
309/* dinode + group descriptor update. We don't relink on free yet. */ 357/* dinode + group descriptor update. We don't relink on free yet. */
310#define OCFS2_SUBALLOC_FREE (2) 358#define OCFS2_SUBALLOC_FREE (2)
@@ -313,16 +361,23 @@ int ocfs2_journal_dirty_data(handle_t *handle,
313#define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ 361#define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \
314 + OCFS2_TRUNCATE_LOG_UPDATE) 362 + OCFS2_TRUNCATE_LOG_UPDATE)
315 363
316#define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS) 364static inline int ocfs2_remove_extent_credits(struct super_block *sb)
365{
366 return OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS +
367 ocfs2_quota_trans_credits(sb);
368}
317 369
318/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + 370/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
319 * bitmap block for the new bit) */ 371 * bitmap block for the new bit) */
320#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) 372#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
321 373
322/* parent fe, parent block, new file entry, inode alloc fe, inode alloc 374/* parent fe, parent block, new file entry, inode alloc fe, inode alloc
323 * group descriptor + mkdir/symlink blocks */ 375 * group descriptor + mkdir/symlink blocks + quota update */
324#define OCFS2_MKNOD_CREDITS (3 + OCFS2_SUBALLOC_ALLOC \ 376static inline int ocfs2_mknod_credits(struct super_block *sb)
325 + OCFS2_DIR_LINK_ADDITIONAL_CREDITS) 377{
378 return 3 + OCFS2_SUBALLOC_ALLOC + OCFS2_DIR_LINK_ADDITIONAL_CREDITS +
379 ocfs2_quota_trans_credits(sb);
380}
326 381
327/* local alloc metadata change + main bitmap updates */ 382/* local alloc metadata change + main bitmap updates */
328#define OCFS2_WINDOW_MOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS \ 383#define OCFS2_WINDOW_MOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS \
@@ -332,13 +387,21 @@ int ocfs2_journal_dirty_data(handle_t *handle,
332 * for the dinode, one for the new block. */ 387 * for the dinode, one for the new block. */
333#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) 388#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
334 389
335/* file update (nlink, etc) + directory mtime/ctime + dir entry block */ 390/* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota
336#define OCFS2_LINK_CREDITS (2*OCFS2_INODE_UPDATE_CREDITS + 1) 391 * update on dir */
392static inline int ocfs2_link_credits(struct super_block *sb)
393{
394 return 2*OCFS2_INODE_UPDATE_CREDITS + 1 +
395 ocfs2_quota_trans_credits(sb);
396}
337 397
338/* inode + dir inode (if we unlink a dir), + dir entry block + orphan 398/* inode + dir inode (if we unlink a dir), + dir entry block + orphan
339 * dir inode link */ 399 * dir inode link */
340#define OCFS2_UNLINK_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 1 \ 400static inline int ocfs2_unlink_credits(struct super_block *sb)
341 + OCFS2_LINK_CREDITS) 401{
402 /* The quota update from ocfs2_link_credits is unused here... */
403 return 2 * OCFS2_INODE_UPDATE_CREDITS + 1 + ocfs2_link_credits(sb);
404}
342 405
343/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + 406/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
344 * inode alloc group descriptor */ 407 * inode alloc group descriptor */
@@ -347,8 +410,10 @@ int ocfs2_journal_dirty_data(handle_t *handle,
347/* dinode update, old dir dinode update, new dir dinode update, old 410/* dinode update, old dir dinode update, new dir dinode update, old
348 * dir dir entry, new dir dir entry, dir entry update for renaming 411 * dir dir entry, new dir dir entry, dir entry update for renaming
349 * directory + target unlink */ 412 * directory + target unlink */
350#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \ 413static inline int ocfs2_rename_credits(struct super_block *sb)
351 + OCFS2_UNLINK_CREDITS) 414{
415 return 3 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_unlink_credits(sb);
416}
352 417
353/* global bitmap dinode, group desc., relinked group, 418/* global bitmap dinode, group desc., relinked group,
354 * suballocator dinode, group desc., relinked group, 419 * suballocator dinode, group desc., relinked group,
@@ -386,18 +451,19 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
386 * credit for the dinode there. */ 451 * credit for the dinode there. */
387 extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); 452 extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
388 453
389 return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks; 454 return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks +
455 ocfs2_quota_trans_credits(sb);
390} 456}
391 457
392static inline int ocfs2_calc_symlink_credits(struct super_block *sb) 458static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
393{ 459{
394 int blocks = OCFS2_MKNOD_CREDITS; 460 int blocks = ocfs2_mknod_credits(sb);
395 461
396 /* links can be longer than one block so we may update many 462 /* links can be longer than one block so we may update many
397 * within our single allocated extent. */ 463 * within our single allocated extent. */
398 blocks += ocfs2_clusters_to_blocks(sb, 1); 464 blocks += ocfs2_clusters_to_blocks(sb, 1);
399 465
400 return blocks; 466 return blocks + ocfs2_quota_trans_credits(sb);
401} 467}
402 468
403static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb, 469static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb,
@@ -434,6 +500,8 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
434 /* update to the truncate log. */ 500 /* update to the truncate log. */
435 credits += OCFS2_TRUNCATE_LOG_UPDATE; 501 credits += OCFS2_TRUNCATE_LOG_UPDATE;
436 502
503 credits += ocfs2_quota_trans_credits(sb);
504
437 return credits; 505 return credits;
438} 506}
439 507
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 687b28713c32..ec70cdbe77fc 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -36,6 +36,7 @@
36#include "ocfs2.h" 36#include "ocfs2.h"
37 37
38#include "alloc.h" 38#include "alloc.h"
39#include "blockcheck.h"
39#include "dlmglue.h" 40#include "dlmglue.h"
40#include "inode.h" 41#include "inode.h"
41#include "journal.h" 42#include "journal.h"
@@ -248,8 +249,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
248 goto bail; 249 goto bail;
249 } 250 }
250 251
251 status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, 252 status = ocfs2_read_inode_block_full(inode, &alloc_bh,
252 &alloc_bh, OCFS2_BH_IGNORE_CACHE); 253 OCFS2_BH_IGNORE_CACHE);
253 if (status < 0) { 254 if (status < 0) {
254 mlog_errno(status); 255 mlog_errno(status);
255 goto bail; 256 goto bail;
@@ -382,8 +383,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
382 } 383 }
383 memcpy(alloc_copy, alloc, bh->b_size); 384 memcpy(alloc_copy, alloc, bh->b_size);
384 385
385 status = ocfs2_journal_access(handle, local_alloc_inode, bh, 386 status = ocfs2_journal_access_di(handle, local_alloc_inode, bh,
386 OCFS2_JOURNAL_ACCESS_WRITE); 387 OCFS2_JOURNAL_ACCESS_WRITE);
387 if (status < 0) { 388 if (status < 0) {
388 mlog_errno(status); 389 mlog_errno(status);
389 goto out_commit; 390 goto out_commit;
@@ -459,8 +460,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
459 460
460 mutex_lock(&inode->i_mutex); 461 mutex_lock(&inode->i_mutex);
461 462
462 status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, 463 status = ocfs2_read_inode_block_full(inode, &alloc_bh,
463 &alloc_bh, OCFS2_BH_IGNORE_CACHE); 464 OCFS2_BH_IGNORE_CACHE);
464 if (status < 0) { 465 if (status < 0) {
465 mlog_errno(status); 466 mlog_errno(status);
466 goto bail; 467 goto bail;
@@ -476,6 +477,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
476 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 477 alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
477 ocfs2_clear_local_alloc(alloc); 478 ocfs2_clear_local_alloc(alloc);
478 479
480 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
479 status = ocfs2_write_block(osb, alloc_bh, inode); 481 status = ocfs2_write_block(osb, alloc_bh, inode);
480 if (status < 0) 482 if (status < 0)
481 mlog_errno(status); 483 mlog_errno(status);
@@ -762,9 +764,9 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
762 * delete bits from it! */ 764 * delete bits from it! */
763 *num_bits = bits_wanted; 765 *num_bits = bits_wanted;
764 766
765 status = ocfs2_journal_access(handle, local_alloc_inode, 767 status = ocfs2_journal_access_di(handle, local_alloc_inode,
766 osb->local_alloc_bh, 768 osb->local_alloc_bh,
767 OCFS2_JOURNAL_ACCESS_WRITE); 769 OCFS2_JOURNAL_ACCESS_WRITE);
768 if (status < 0) { 770 if (status < 0) {
769 mlog_errno(status); 771 mlog_errno(status);
770 goto bail; 772 goto bail;
@@ -1240,9 +1242,9 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1240 } 1242 }
1241 memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); 1243 memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
1242 1244
1243 status = ocfs2_journal_access(handle, local_alloc_inode, 1245 status = ocfs2_journal_access_di(handle, local_alloc_inode,
1244 osb->local_alloc_bh, 1246 osb->local_alloc_bh,
1245 OCFS2_JOURNAL_ACCESS_WRITE); 1247 OCFS2_JOURNAL_ACCESS_WRITE);
1246 if (status < 0) { 1248 if (status < 0) {
1247 mlog_errno(status); 1249 mlog_errno(status);
1248 goto bail; 1250 goto bail;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f4967e634ffd..084aba86c3b2 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -40,6 +40,7 @@
40#include <linux/types.h> 40#include <linux/types.h>
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/quotaops.h>
43 44
44#define MLOG_MASK_PREFIX ML_NAMEI 45#define MLOG_MASK_PREFIX ML_NAMEI
45#include <cluster/masklog.h> 46#include <cluster/masklog.h>
@@ -61,17 +62,18 @@
61#include "sysfile.h" 62#include "sysfile.h"
62#include "uptodate.h" 63#include "uptodate.h"
63#include "xattr.h" 64#include "xattr.h"
65#include "acl.h"
64 66
65#include "buffer_head_io.h" 67#include "buffer_head_io.h"
66 68
67static int ocfs2_mknod_locked(struct ocfs2_super *osb, 69static int ocfs2_mknod_locked(struct ocfs2_super *osb,
68 struct inode *dir, 70 struct inode *dir,
69 struct dentry *dentry, int mode, 71 struct inode *inode,
72 struct dentry *dentry,
70 dev_t dev, 73 dev_t dev,
71 struct buffer_head **new_fe_bh, 74 struct buffer_head **new_fe_bh,
72 struct buffer_head *parent_fe_bh, 75 struct buffer_head *parent_fe_bh,
73 handle_t *handle, 76 handle_t *handle,
74 struct inode **ret_inode,
75 struct ocfs2_alloc_context *inode_ac); 77 struct ocfs2_alloc_context *inode_ac);
76 78
77static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, 79static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
@@ -186,6 +188,35 @@ bail:
186 return ret; 188 return ret;
187} 189}
188 190
191static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
192{
193 struct inode *inode;
194
195 inode = new_inode(dir->i_sb);
196 if (!inode) {
197 mlog(ML_ERROR, "new_inode failed!\n");
198 return NULL;
199 }
200
201 /* populate as many fields early on as possible - many of
202 * these are used by the support functions here and in
203 * callers. */
204 if (S_ISDIR(mode))
205 inode->i_nlink = 2;
206 else
207 inode->i_nlink = 1;
208 inode->i_uid = current_fsuid();
209 if (dir->i_mode & S_ISGID) {
210 inode->i_gid = dir->i_gid;
211 if (S_ISDIR(mode))
212 mode |= S_ISGID;
213 } else
214 inode->i_gid = current_fsgid();
215 inode->i_mode = mode;
216 vfs_dq_init(inode);
217 return inode;
218}
219
189static int ocfs2_mknod(struct inode *dir, 220static int ocfs2_mknod(struct inode *dir,
190 struct dentry *dentry, 221 struct dentry *dentry,
191 int mode, 222 int mode,
@@ -201,6 +232,13 @@ static int ocfs2_mknod(struct inode *dir,
201 struct inode *inode = NULL; 232 struct inode *inode = NULL;
202 struct ocfs2_alloc_context *inode_ac = NULL; 233 struct ocfs2_alloc_context *inode_ac = NULL;
203 struct ocfs2_alloc_context *data_ac = NULL; 234 struct ocfs2_alloc_context *data_ac = NULL;
235 struct ocfs2_alloc_context *xattr_ac = NULL;
236 int want_clusters = 0;
237 int xattr_credits = 0;
238 struct ocfs2_security_xattr_info si = {
239 .enable = 1,
240 };
241 int did_quota_inode = 0;
204 242
205 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, 243 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
206 (unsigned long)dev, dentry->d_name.len, 244 (unsigned long)dev, dentry->d_name.len,
@@ -250,17 +288,46 @@ static int ocfs2_mknod(struct inode *dir,
250 goto leave; 288 goto leave;
251 } 289 }
252 290
253 /* Reserve a cluster if creating an extent based directory. */ 291 inode = ocfs2_get_init_inode(dir, mode);
254 if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) { 292 if (!inode) {
255 status = ocfs2_reserve_clusters(osb, 1, &data_ac); 293 status = -ENOMEM;
256 if (status < 0) { 294 mlog_errno(status);
257 if (status != -ENOSPC) 295 goto leave;
258 mlog_errno(status); 296 }
297
298 /* get security xattr */
299 status = ocfs2_init_security_get(inode, dir, &si);
300 if (status) {
301 if (status == -EOPNOTSUPP)
302 si.enable = 0;
303 else {
304 mlog_errno(status);
259 goto leave; 305 goto leave;
260 } 306 }
261 } 307 }
262 308
263 handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS); 309 /* calculate meta data/clusters for setting security and acl xattr */
310 status = ocfs2_calc_xattr_init(dir, parent_fe_bh, mode,
311 &si, &want_clusters,
312 &xattr_credits, &xattr_ac);
313 if (status < 0) {
314 mlog_errno(status);
315 goto leave;
316 }
317
318 /* Reserve a cluster if creating an extent based directory. */
319 if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb))
320 want_clusters += 1;
321
322 status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
323 if (status < 0) {
324 if (status != -ENOSPC)
325 mlog_errno(status);
326 goto leave;
327 }
328
329 handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb) +
330 xattr_credits);
264 if (IS_ERR(handle)) { 331 if (IS_ERR(handle)) {
265 status = PTR_ERR(handle); 332 status = PTR_ERR(handle);
266 handle = NULL; 333 handle = NULL;
@@ -268,10 +335,19 @@ static int ocfs2_mknod(struct inode *dir,
268 goto leave; 335 goto leave;
269 } 336 }
270 337
338 /* We don't use standard VFS wrapper because we don't want vfs_dq_init
339 * to be called. */
340 if (sb_any_quota_active(osb->sb) &&
341 osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
342 status = -EDQUOT;
343 goto leave;
344 }
345 did_quota_inode = 1;
346
271 /* do the real work now. */ 347 /* do the real work now. */
272 status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev, 348 status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev,
273 &new_fe_bh, parent_fe_bh, handle, 349 &new_fe_bh, parent_fe_bh, handle,
274 &inode, inode_ac); 350 inode_ac);
275 if (status < 0) { 351 if (status < 0) {
276 mlog_errno(status); 352 mlog_errno(status);
277 goto leave; 353 goto leave;
@@ -285,8 +361,8 @@ static int ocfs2_mknod(struct inode *dir,
285 goto leave; 361 goto leave;
286 } 362 }
287 363
288 status = ocfs2_journal_access(handle, dir, parent_fe_bh, 364 status = ocfs2_journal_access_di(handle, dir, parent_fe_bh,
289 OCFS2_JOURNAL_ACCESS_WRITE); 365 OCFS2_JOURNAL_ACCESS_WRITE);
290 if (status < 0) { 366 if (status < 0) {
291 mlog_errno(status); 367 mlog_errno(status);
292 goto leave; 368 goto leave;
@@ -300,6 +376,22 @@ static int ocfs2_mknod(struct inode *dir,
300 inc_nlink(dir); 376 inc_nlink(dir);
301 } 377 }
302 378
379 status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
380 xattr_ac, data_ac);
381 if (status < 0) {
382 mlog_errno(status);
383 goto leave;
384 }
385
386 if (si.enable) {
387 status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
388 xattr_ac, data_ac);
389 if (status < 0) {
390 mlog_errno(status);
391 goto leave;
392 }
393 }
394
303 status = ocfs2_add_entry(handle, dentry, inode, 395 status = ocfs2_add_entry(handle, dentry, inode,
304 OCFS2_I(inode)->ip_blkno, parent_fe_bh, 396 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
305 de_bh); 397 de_bh);
@@ -320,6 +412,8 @@ static int ocfs2_mknod(struct inode *dir,
320 d_instantiate(dentry, inode); 412 d_instantiate(dentry, inode);
321 status = 0; 413 status = 0;
322leave: 414leave:
415 if (status < 0 && did_quota_inode)
416 vfs_dq_free_inode(inode);
323 if (handle) 417 if (handle)
324 ocfs2_commit_trans(osb, handle); 418 ocfs2_commit_trans(osb, handle);
325 419
@@ -331,9 +425,13 @@ leave:
331 brelse(new_fe_bh); 425 brelse(new_fe_bh);
332 brelse(de_bh); 426 brelse(de_bh);
333 brelse(parent_fe_bh); 427 brelse(parent_fe_bh);
428 kfree(si.name);
429 kfree(si.value);
334 430
335 if ((status < 0) && inode) 431 if ((status < 0) && inode) {
432 clear_nlink(inode);
336 iput(inode); 433 iput(inode);
434 }
337 435
338 if (inode_ac) 436 if (inode_ac)
339 ocfs2_free_alloc_context(inode_ac); 437 ocfs2_free_alloc_context(inode_ac);
@@ -341,6 +439,9 @@ leave:
341 if (data_ac) 439 if (data_ac)
342 ocfs2_free_alloc_context(data_ac); 440 ocfs2_free_alloc_context(data_ac);
343 441
442 if (xattr_ac)
443 ocfs2_free_alloc_context(xattr_ac);
444
344 mlog_exit(status); 445 mlog_exit(status);
345 446
346 return status; 447 return status;
@@ -348,12 +449,12 @@ leave:
348 449
349static int ocfs2_mknod_locked(struct ocfs2_super *osb, 450static int ocfs2_mknod_locked(struct ocfs2_super *osb,
350 struct inode *dir, 451 struct inode *dir,
351 struct dentry *dentry, int mode, 452 struct inode *inode,
453 struct dentry *dentry,
352 dev_t dev, 454 dev_t dev,
353 struct buffer_head **new_fe_bh, 455 struct buffer_head **new_fe_bh,
354 struct buffer_head *parent_fe_bh, 456 struct buffer_head *parent_fe_bh,
355 handle_t *handle, 457 handle_t *handle,
356 struct inode **ret_inode,
357 struct ocfs2_alloc_context *inode_ac) 458 struct ocfs2_alloc_context *inode_ac)
358{ 459{
359 int status = 0; 460 int status = 0;
@@ -361,14 +462,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
361 struct ocfs2_extent_list *fel; 462 struct ocfs2_extent_list *fel;
362 u64 fe_blkno = 0; 463 u64 fe_blkno = 0;
363 u16 suballoc_bit; 464 u16 suballoc_bit;
364 struct inode *inode = NULL;
365 465
366 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, 466 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
367 (unsigned long)dev, dentry->d_name.len, 467 inode->i_mode, (unsigned long)dev, dentry->d_name.len,
368 dentry->d_name.name); 468 dentry->d_name.name);
369 469
370 *new_fe_bh = NULL; 470 *new_fe_bh = NULL;
371 *ret_inode = NULL;
372 471
373 status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit, 472 status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
374 &fe_blkno); 473 &fe_blkno);
@@ -377,23 +476,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
377 goto leave; 476 goto leave;
378 } 477 }
379 478
380 inode = new_inode(dir->i_sb);
381 if (!inode) {
382 status = -ENOMEM;
383 mlog(ML_ERROR, "new_inode failed!\n");
384 goto leave;
385 }
386
387 /* populate as many fields early on as possible - many of 479 /* populate as many fields early on as possible - many of
388 * these are used by the support functions here and in 480 * these are used by the support functions here and in
389 * callers. */ 481 * callers. */
390 inode->i_ino = ino_from_blkno(osb->sb, fe_blkno); 482 inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
391 OCFS2_I(inode)->ip_blkno = fe_blkno; 483 OCFS2_I(inode)->ip_blkno = fe_blkno;
392 if (S_ISDIR(mode))
393 inode->i_nlink = 2;
394 else
395 inode->i_nlink = 1;
396 inode->i_mode = mode;
397 spin_lock(&osb->osb_lock); 484 spin_lock(&osb->osb_lock);
398 inode->i_generation = osb->s_next_generation++; 485 inode->i_generation = osb->s_next_generation++;
399 spin_unlock(&osb->osb_lock); 486 spin_unlock(&osb->osb_lock);
@@ -406,8 +493,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
406 } 493 }
407 ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh); 494 ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh);
408 495
409 status = ocfs2_journal_access(handle, inode, *new_fe_bh, 496 status = ocfs2_journal_access_di(handle, inode, *new_fe_bh,
410 OCFS2_JOURNAL_ACCESS_CREATE); 497 OCFS2_JOURNAL_ACCESS_CREATE);
411 if (status < 0) { 498 if (status < 0) {
412 mlog_errno(status); 499 mlog_errno(status);
413 goto leave; 500 goto leave;
@@ -421,17 +508,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
421 fe->i_blkno = cpu_to_le64(fe_blkno); 508 fe->i_blkno = cpu_to_le64(fe_blkno);
422 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); 509 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
423 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); 510 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
424 fe->i_uid = cpu_to_le32(current->fsuid); 511 fe->i_uid = cpu_to_le32(inode->i_uid);
425 if (dir->i_mode & S_ISGID) { 512 fe->i_gid = cpu_to_le32(inode->i_gid);
426 fe->i_gid = cpu_to_le32(dir->i_gid); 513 fe->i_mode = cpu_to_le16(inode->i_mode);
427 if (S_ISDIR(mode)) 514 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
428 mode |= S_ISGID;
429 } else
430 fe->i_gid = cpu_to_le32(current->fsgid);
431 fe->i_mode = cpu_to_le16(mode);
432 if (S_ISCHR(mode) || S_ISBLK(mode))
433 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev)); 515 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
434
435 fe->i_links_count = cpu_to_le16(inode->i_nlink); 516 fe->i_links_count = cpu_to_le16(inode->i_nlink);
436 517
437 fe->i_last_eb_blk = 0; 518 fe->i_last_eb_blk = 0;
@@ -446,7 +527,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
446 /* 527 /*
447 * If supported, directories start with inline data. 528 * If supported, directories start with inline data.
448 */ 529 */
449 if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) { 530 if (S_ISDIR(inode->i_mode) && ocfs2_supports_inline_data(osb)) {
450 u16 feat = le16_to_cpu(fe->i_dyn_features); 531 u16 feat = le16_to_cpu(fe->i_dyn_features);
451 532
452 fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL); 533 fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
@@ -465,15 +546,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
465 goto leave; 546 goto leave;
466 } 547 }
467 548
468 if (ocfs2_populate_inode(inode, fe, 1) < 0) { 549 ocfs2_populate_inode(inode, fe, 1);
469 mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
470 "i_blkno=%llu, i_ino=%lu\n",
471 (unsigned long long)(*new_fe_bh)->b_blocknr,
472 (unsigned long long)le64_to_cpu(fe->i_blkno),
473 inode->i_ino);
474 BUG();
475 }
476
477 ocfs2_inode_set_new(osb, inode); 550 ocfs2_inode_set_new(osb, inode);
478 if (!ocfs2_mount_local(osb)) { 551 if (!ocfs2_mount_local(osb)) {
479 status = ocfs2_create_new_inode_locks(inode); 552 status = ocfs2_create_new_inode_locks(inode);
@@ -484,17 +557,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
484 status = 0; /* error in ocfs2_create_new_inode_locks is not 557 status = 0; /* error in ocfs2_create_new_inode_locks is not
485 * critical */ 558 * critical */
486 559
487 *ret_inode = inode;
488leave: 560leave:
489 if (status < 0) { 561 if (status < 0) {
490 if (*new_fe_bh) { 562 if (*new_fe_bh) {
491 brelse(*new_fe_bh); 563 brelse(*new_fe_bh);
492 *new_fe_bh = NULL; 564 *new_fe_bh = NULL;
493 } 565 }
494 if (inode) {
495 clear_nlink(inode);
496 iput(inode);
497 }
498 } 566 }
499 567
500 mlog_exit(status); 568 mlog_exit(status);
@@ -588,7 +656,7 @@ static int ocfs2_link(struct dentry *old_dentry,
588 goto out_unlock_inode; 656 goto out_unlock_inode;
589 } 657 }
590 658
591 handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS); 659 handle = ocfs2_start_trans(osb, ocfs2_link_credits(osb->sb));
592 if (IS_ERR(handle)) { 660 if (IS_ERR(handle)) {
593 err = PTR_ERR(handle); 661 err = PTR_ERR(handle);
594 handle = NULL; 662 handle = NULL;
@@ -596,8 +664,8 @@ static int ocfs2_link(struct dentry *old_dentry,
596 goto out_unlock_inode; 664 goto out_unlock_inode;
597 } 665 }
598 666
599 err = ocfs2_journal_access(handle, inode, fe_bh, 667 err = ocfs2_journal_access_di(handle, inode, fe_bh,
600 OCFS2_JOURNAL_ACCESS_WRITE); 668 OCFS2_JOURNAL_ACCESS_WRITE);
601 if (err < 0) { 669 if (err < 0) {
602 mlog_errno(err); 670 mlog_errno(err);
603 goto out_commit; 671 goto out_commit;
@@ -775,7 +843,7 @@ static int ocfs2_unlink(struct inode *dir,
775 } 843 }
776 } 844 }
777 845
778 handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS); 846 handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb));
779 if (IS_ERR(handle)) { 847 if (IS_ERR(handle)) {
780 status = PTR_ERR(handle); 848 status = PTR_ERR(handle);
781 handle = NULL; 849 handle = NULL;
@@ -783,8 +851,8 @@ static int ocfs2_unlink(struct inode *dir,
783 goto leave; 851 goto leave;
784 } 852 }
785 853
786 status = ocfs2_journal_access(handle, inode, fe_bh, 854 status = ocfs2_journal_access_di(handle, inode, fe_bh,
787 OCFS2_JOURNAL_ACCESS_WRITE); 855 OCFS2_JOURNAL_ACCESS_WRITE);
788 if (status < 0) { 856 if (status < 0) {
789 mlog_errno(status); 857 mlog_errno(status);
790 goto leave; 858 goto leave;
@@ -1181,7 +1249,7 @@ static int ocfs2_rename(struct inode *old_dir,
1181 } 1249 }
1182 } 1250 }
1183 1251
1184 handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS); 1252 handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb));
1185 if (IS_ERR(handle)) { 1253 if (IS_ERR(handle)) {
1186 status = PTR_ERR(handle); 1254 status = PTR_ERR(handle);
1187 handle = NULL; 1255 handle = NULL;
@@ -1197,8 +1265,8 @@ static int ocfs2_rename(struct inode *old_dir,
1197 goto bail; 1265 goto bail;
1198 } 1266 }
1199 } 1267 }
1200 status = ocfs2_journal_access(handle, new_inode, newfe_bh, 1268 status = ocfs2_journal_access_di(handle, new_inode, newfe_bh,
1201 OCFS2_JOURNAL_ACCESS_WRITE); 1269 OCFS2_JOURNAL_ACCESS_WRITE);
1202 if (status < 0) { 1270 if (status < 0) {
1203 mlog_errno(status); 1271 mlog_errno(status);
1204 goto bail; 1272 goto bail;
@@ -1244,8 +1312,8 @@ static int ocfs2_rename(struct inode *old_dir,
1244 old_inode->i_ctime = CURRENT_TIME; 1312 old_inode->i_ctime = CURRENT_TIME;
1245 mark_inode_dirty(old_inode); 1313 mark_inode_dirty(old_inode);
1246 1314
1247 status = ocfs2_journal_access(handle, old_inode, old_inode_bh, 1315 status = ocfs2_journal_access_di(handle, old_inode, old_inode_bh,
1248 OCFS2_JOURNAL_ACCESS_WRITE); 1316 OCFS2_JOURNAL_ACCESS_WRITE);
1249 if (status >= 0) { 1317 if (status >= 0) {
1250 old_di = (struct ocfs2_dinode *) old_inode_bh->b_data; 1318 old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
1251 1319
@@ -1321,9 +1389,9 @@ static int ocfs2_rename(struct inode *old_dir,
1321 (int)old_dir_nlink, old_dir->i_nlink); 1389 (int)old_dir_nlink, old_dir->i_nlink);
1322 } else { 1390 } else {
1323 struct ocfs2_dinode *fe; 1391 struct ocfs2_dinode *fe;
1324 status = ocfs2_journal_access(handle, old_dir, 1392 status = ocfs2_journal_access_di(handle, old_dir,
1325 old_dir_bh, 1393 old_dir_bh,
1326 OCFS2_JOURNAL_ACCESS_WRITE); 1394 OCFS2_JOURNAL_ACCESS_WRITE);
1327 fe = (struct ocfs2_dinode *) old_dir_bh->b_data; 1395 fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
1328 fe->i_links_count = cpu_to_le16(old_dir->i_nlink); 1396 fe->i_links_count = cpu_to_le16(old_dir->i_nlink);
1329 status = ocfs2_journal_dirty(handle, old_dir_bh); 1397 status = ocfs2_journal_dirty(handle, old_dir_bh);
@@ -1496,6 +1564,13 @@ static int ocfs2_symlink(struct inode *dir,
1496 handle_t *handle = NULL; 1564 handle_t *handle = NULL;
1497 struct ocfs2_alloc_context *inode_ac = NULL; 1565 struct ocfs2_alloc_context *inode_ac = NULL;
1498 struct ocfs2_alloc_context *data_ac = NULL; 1566 struct ocfs2_alloc_context *data_ac = NULL;
1567 struct ocfs2_alloc_context *xattr_ac = NULL;
1568 int want_clusters = 0;
1569 int xattr_credits = 0;
1570 struct ocfs2_security_xattr_info si = {
1571 .enable = 1,
1572 };
1573 int did_quota = 0, did_quota_inode = 0;
1499 1574
1500 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, 1575 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
1501 dentry, symname, dentry->d_name.len, dentry->d_name.name); 1576 dentry, symname, dentry->d_name.len, dentry->d_name.name);
@@ -1542,17 +1617,46 @@ static int ocfs2_symlink(struct inode *dir,
1542 goto bail; 1617 goto bail;
1543 } 1618 }
1544 1619
1545 /* don't reserve bitmap space for fast symlinks. */ 1620 inode = ocfs2_get_init_inode(dir, S_IFLNK | S_IRWXUGO);
1546 if (l > ocfs2_fast_symlink_chars(sb)) { 1621 if (!inode) {
1547 status = ocfs2_reserve_clusters(osb, 1, &data_ac); 1622 status = -ENOMEM;
1623 mlog_errno(status);
1624 goto bail;
1625 }
1626
1627 /* get security xattr */
1628 status = ocfs2_init_security_get(inode, dir, &si);
1629 if (status) {
1630 if (status == -EOPNOTSUPP)
1631 si.enable = 0;
1632 else {
1633 mlog_errno(status);
1634 goto bail;
1635 }
1636 }
1637
1638 /* calculate meta data/clusters for setting security xattr */
1639 if (si.enable) {
1640 status = ocfs2_calc_security_init(dir, &si, &want_clusters,
1641 &xattr_credits, &xattr_ac);
1548 if (status < 0) { 1642 if (status < 0) {
1549 if (status != -ENOSPC) 1643 mlog_errno(status);
1550 mlog_errno(status);
1551 goto bail; 1644 goto bail;
1552 } 1645 }
1553 } 1646 }
1554 1647
1555 handle = ocfs2_start_trans(osb, credits); 1648 /* don't reserve bitmap space for fast symlinks. */
1649 if (l > ocfs2_fast_symlink_chars(sb))
1650 want_clusters += 1;
1651
1652 status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
1653 if (status < 0) {
1654 if (status != -ENOSPC)
1655 mlog_errno(status);
1656 goto bail;
1657 }
1658
1659 handle = ocfs2_start_trans(osb, credits + xattr_credits);
1556 if (IS_ERR(handle)) { 1660 if (IS_ERR(handle)) {
1557 status = PTR_ERR(handle); 1661 status = PTR_ERR(handle);
1558 handle = NULL; 1662 handle = NULL;
@@ -1560,10 +1664,18 @@ static int ocfs2_symlink(struct inode *dir,
1560 goto bail; 1664 goto bail;
1561 } 1665 }
1562 1666
1563 status = ocfs2_mknod_locked(osb, dir, dentry, 1667 /* We don't use standard VFS wrapper because we don't want vfs_dq_init
1564 S_IFLNK | S_IRWXUGO, 0, 1668 * to be called. */
1565 &new_fe_bh, parent_fe_bh, handle, 1669 if (sb_any_quota_active(osb->sb) &&
1566 &inode, inode_ac); 1670 osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
1671 status = -EDQUOT;
1672 goto bail;
1673 }
1674 did_quota_inode = 1;
1675
1676 status = ocfs2_mknod_locked(osb, dir, inode, dentry,
1677 0, &new_fe_bh, parent_fe_bh, handle,
1678 inode_ac);
1567 if (status < 0) { 1679 if (status < 0) {
1568 mlog_errno(status); 1680 mlog_errno(status);
1569 goto bail; 1681 goto bail;
@@ -1576,6 +1688,12 @@ static int ocfs2_symlink(struct inode *dir,
1576 u32 offset = 0; 1688 u32 offset = 0;
1577 1689
1578 inode->i_op = &ocfs2_symlink_inode_operations; 1690 inode->i_op = &ocfs2_symlink_inode_operations;
1691 if (vfs_dq_alloc_space_nodirty(inode,
1692 ocfs2_clusters_to_bytes(osb->sb, 1))) {
1693 status = -EDQUOT;
1694 goto bail;
1695 }
1696 did_quota = 1;
1579 status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, 1697 status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
1580 new_fe_bh, 1698 new_fe_bh,
1581 handle, data_ac, NULL, 1699 handle, data_ac, NULL,
@@ -1614,6 +1732,15 @@ static int ocfs2_symlink(struct inode *dir,
1614 } 1732 }
1615 } 1733 }
1616 1734
1735 if (si.enable) {
1736 status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
1737 xattr_ac, data_ac);
1738 if (status < 0) {
1739 mlog_errno(status);
1740 goto bail;
1741 }
1742 }
1743
1617 status = ocfs2_add_entry(handle, dentry, inode, 1744 status = ocfs2_add_entry(handle, dentry, inode,
1618 le64_to_cpu(fe->i_blkno), parent_fe_bh, 1745 le64_to_cpu(fe->i_blkno), parent_fe_bh,
1619 de_bh); 1746 de_bh);
@@ -1632,6 +1759,11 @@ static int ocfs2_symlink(struct inode *dir,
1632 dentry->d_op = &ocfs2_dentry_ops; 1759 dentry->d_op = &ocfs2_dentry_ops;
1633 d_instantiate(dentry, inode); 1760 d_instantiate(dentry, inode);
1634bail: 1761bail:
1762 if (status < 0 && did_quota)
1763 vfs_dq_free_space_nodirty(inode,
1764 ocfs2_clusters_to_bytes(osb->sb, 1));
1765 if (status < 0 && did_quota_inode)
1766 vfs_dq_free_inode(inode);
1635 if (handle) 1767 if (handle)
1636 ocfs2_commit_trans(osb, handle); 1768 ocfs2_commit_trans(osb, handle);
1637 1769
@@ -1640,12 +1772,18 @@ bail:
1640 brelse(new_fe_bh); 1772 brelse(new_fe_bh);
1641 brelse(parent_fe_bh); 1773 brelse(parent_fe_bh);
1642 brelse(de_bh); 1774 brelse(de_bh);
1775 kfree(si.name);
1776 kfree(si.value);
1643 if (inode_ac) 1777 if (inode_ac)
1644 ocfs2_free_alloc_context(inode_ac); 1778 ocfs2_free_alloc_context(inode_ac);
1645 if (data_ac) 1779 if (data_ac)
1646 ocfs2_free_alloc_context(data_ac); 1780 ocfs2_free_alloc_context(data_ac);
1647 if ((status < 0) && inode) 1781 if (xattr_ac)
1782 ocfs2_free_alloc_context(xattr_ac);
1783 if ((status < 0) && inode) {
1784 clear_nlink(inode);
1648 iput(inode); 1785 iput(inode);
1786 }
1649 1787
1650 mlog_exit(status); 1788 mlog_exit(status);
1651 1789
@@ -1754,16 +1892,14 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1754 1892
1755 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 1893 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
1756 1894
1757 status = ocfs2_read_block(orphan_dir_inode, 1895 status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh);
1758 OCFS2_I(orphan_dir_inode)->ip_blkno,
1759 &orphan_dir_bh);
1760 if (status < 0) { 1896 if (status < 0) {
1761 mlog_errno(status); 1897 mlog_errno(status);
1762 goto leave; 1898 goto leave;
1763 } 1899 }
1764 1900
1765 status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh, 1901 status = ocfs2_journal_access_di(handle, orphan_dir_inode, orphan_dir_bh,
1766 OCFS2_JOURNAL_ACCESS_WRITE); 1902 OCFS2_JOURNAL_ACCESS_WRITE);
1767 if (status < 0) { 1903 if (status < 0) {
1768 mlog_errno(status); 1904 mlog_errno(status);
1769 goto leave; 1905 goto leave;
@@ -1850,8 +1986,8 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
1850 goto leave; 1986 goto leave;
1851 } 1987 }
1852 1988
1853 status = ocfs2_journal_access(handle,orphan_dir_inode, orphan_dir_bh, 1989 status = ocfs2_journal_access_di(handle,orphan_dir_inode, orphan_dir_bh,
1854 OCFS2_JOURNAL_ACCESS_WRITE); 1990 OCFS2_JOURNAL_ACCESS_WRITE);
1855 if (status < 0) { 1991 if (status < 0) {
1856 mlog_errno(status); 1992 mlog_errno(status);
1857 goto leave; 1993 goto leave;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 3fed9e3d8992..ad5c24a29edd 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -161,6 +161,7 @@ enum ocfs2_vol_state
161{ 161{
162 VOLUME_INIT = 0, 162 VOLUME_INIT = 0,
163 VOLUME_MOUNTED, 163 VOLUME_MOUNTED,
164 VOLUME_MOUNTED_QUOTAS,
164 VOLUME_DISMOUNTED, 165 VOLUME_DISMOUNTED,
165 VOLUME_DISABLED 166 VOLUME_DISABLED
166}; 167};
@@ -195,6 +196,9 @@ enum ocfs2_mount_options
195 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ 196 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
196 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ 197 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
197 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ 198 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
199 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */
200 OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */
201 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
198}; 202};
199 203
200#define OCFS2_OSB_SOFT_RO 0x0001 204#define OCFS2_OSB_SOFT_RO 0x0001
@@ -205,6 +209,7 @@ enum ocfs2_mount_options
205struct ocfs2_journal; 209struct ocfs2_journal;
206struct ocfs2_slot_info; 210struct ocfs2_slot_info;
207struct ocfs2_recovery_map; 211struct ocfs2_recovery_map;
212struct ocfs2_quota_recovery;
208struct ocfs2_super 213struct ocfs2_super
209{ 214{
210 struct task_struct *commit_task; 215 struct task_struct *commit_task;
@@ -286,10 +291,11 @@ struct ocfs2_super
286 char *local_alloc_debug_buf; 291 char *local_alloc_debug_buf;
287#endif 292#endif
288 293
289 /* Next two fields are for local node slot recovery during 294 /* Next three fields are for local node slot recovery during
290 * mount. */ 295 * mount. */
291 int dirty; 296 int dirty;
292 struct ocfs2_dinode *local_alloc_copy; 297 struct ocfs2_dinode *local_alloc_copy;
298 struct ocfs2_quota_recovery *quota_rec;
293 299
294 struct ocfs2_alloc_stats alloc_stats; 300 struct ocfs2_alloc_stats alloc_stats;
295 char dev_str[20]; /* "major,minor" of the device */ 301 char dev_str[20]; /* "major,minor" of the device */
@@ -333,6 +339,10 @@ struct ocfs2_super
333 339
334#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) 340#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
335 341
342/* Useful typedef for passing around journal access functions */
343typedef int (*ocfs2_journal_access_func)(handle_t *handle, struct inode *inode,
344 struct buffer_head *bh, int type);
345
336static inline int ocfs2_should_order_data(struct inode *inode) 346static inline int ocfs2_should_order_data(struct inode *inode)
337{ 347{
338 if (!S_ISREG(inode->i_mode)) 348 if (!S_ISREG(inode->i_mode))
@@ -376,6 +386,13 @@ static inline int ocfs2_supports_xattr(struct ocfs2_super *osb)
376 return 0; 386 return 0;
377} 387}
378 388
389static inline int ocfs2_meta_ecc(struct ocfs2_super *osb)
390{
391 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_META_ECC)
392 return 1;
393 return 0;
394}
395
379/* set / clear functions because cluster events can make these happen 396/* set / clear functions because cluster events can make these happen
380 * in parallel so we want the transitions to be atomic. this also 397 * in parallel so we want the transitions to be atomic. this also
381 * means that any future flags osb_flags must be protected by spinlock 398 * means that any future flags osb_flags must be protected by spinlock
@@ -443,39 +460,19 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
443#define OCFS2_IS_VALID_DINODE(ptr) \ 460#define OCFS2_IS_VALID_DINODE(ptr) \
444 (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) 461 (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
445 462
446#define OCFS2_RO_ON_INVALID_DINODE(__sb, __di) do { \
447 typeof(__di) ____di = (__di); \
448 ocfs2_error((__sb), \
449 "Dinode # %llu has bad signature %.*s", \
450 (unsigned long long)le64_to_cpu((____di)->i_blkno), 7, \
451 (____di)->i_signature); \
452} while (0)
453
454#define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ 463#define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \
455 (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) 464 (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
456 465
457#define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb) do { \
458 typeof(__eb) ____eb = (__eb); \
459 ocfs2_error((__sb), \
460 "Extent Block # %llu has bad signature %.*s", \
461 (unsigned long long)le64_to_cpu((____eb)->h_blkno), 7, \
462 (____eb)->h_signature); \
463} while (0)
464
465#define OCFS2_IS_VALID_GROUP_DESC(ptr) \ 466#define OCFS2_IS_VALID_GROUP_DESC(ptr) \
466 (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) 467 (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE))
467 468
468#define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd) do { \
469 typeof(__gd) ____gd = (__gd); \
470 ocfs2_error((__sb), \
471 "Group Descriptor # %llu has bad signature %.*s", \
472 (unsigned long long)le64_to_cpu((____gd)->bg_blkno), 7, \
473 (____gd)->bg_signature); \
474} while (0)
475 469
476#define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ 470#define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \
477 (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) 471 (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE))
478 472
473#define OCFS2_IS_VALID_DIR_TRAILER(ptr) \
474 (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE))
475
479static inline unsigned long ino_from_blkno(struct super_block *sb, 476static inline unsigned long ino_from_blkno(struct super_block *sb,
480 u64 blkno) 477 u64 blkno)
481{ 478{
@@ -632,5 +629,6 @@ static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
632#define ocfs2_clear_bit ext2_clear_bit 629#define ocfs2_clear_bit ext2_clear_bit
633#define ocfs2_test_bit ext2_test_bit 630#define ocfs2_test_bit ext2_test_bit
634#define ocfs2_find_next_zero_bit ext2_find_next_zero_bit 631#define ocfs2_find_next_zero_bit ext2_find_next_zero_bit
632#define ocfs2_find_next_bit ext2_find_next_bit
635#endif /* OCFS2_H */ 633#endif /* OCFS2_H */
636 634
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 5e0c0d0aef7d..c7ae45aaa36c 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -65,6 +65,7 @@
65#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01" 65#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01"
66#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" 66#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01"
67#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01" 67#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01"
68#define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1"
68 69
69/* Compatibility flags */ 70/* Compatibility flags */
70#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ 71#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
@@ -93,8 +94,11 @@
93 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ 94 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \
94 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ 95 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
95 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ 96 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
96 | OCFS2_FEATURE_INCOMPAT_XATTR) 97 | OCFS2_FEATURE_INCOMPAT_XATTR \
97#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 98 | OCFS2_FEATURE_INCOMPAT_META_ECC)
99#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
100 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
101 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
98 102
99/* 103/*
100 * Heartbeat-only devices are missing journals and other files. The 104 * Heartbeat-only devices are missing journals and other files. The
@@ -147,6 +151,9 @@
147/* Support for extended attributes */ 151/* Support for extended attributes */
148#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200 152#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200
149 153
154/* Metadata checksum and error correction */
155#define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800
156
150/* 157/*
151 * backup superblock flag is used to indicate that this volume 158 * backup superblock flag is used to indicate that this volume
152 * has backup superblocks. 159 * has backup superblocks.
@@ -163,6 +170,12 @@
163 */ 170 */
164#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001 171#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001
165 172
173/*
174 * Maintain quota information for this filesystem
175 */
176#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002
177#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004
178
166/* The byte offset of the first backup block will be 1G. 179/* The byte offset of the first backup block will be 1G.
167 * The following will be 4G, 16G, 64G, 256G and 1T. 180 * The following will be 4G, 16G, 64G, 256G and 1T.
168 */ 181 */
@@ -192,6 +205,7 @@
192#define OCFS2_HEARTBEAT_FL (0x00000200) /* Heartbeat area */ 205#define OCFS2_HEARTBEAT_FL (0x00000200) /* Heartbeat area */
193#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ 206#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
194#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ 207#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
208#define OCFS2_QUOTA_FL (0x00001000) /* Quota file */
195 209
196/* 210/*
197 * Flags on ocfs2_dinode.i_dyn_features 211 * Flags on ocfs2_dinode.i_dyn_features
@@ -329,13 +343,17 @@ enum {
329#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE 343#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
330 HEARTBEAT_SYSTEM_INODE, 344 HEARTBEAT_SYSTEM_INODE,
331 GLOBAL_BITMAP_SYSTEM_INODE, 345 GLOBAL_BITMAP_SYSTEM_INODE,
332#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GLOBAL_BITMAP_SYSTEM_INODE 346 USER_QUOTA_SYSTEM_INODE,
347 GROUP_QUOTA_SYSTEM_INODE,
348#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE
333 ORPHAN_DIR_SYSTEM_INODE, 349 ORPHAN_DIR_SYSTEM_INODE,
334 EXTENT_ALLOC_SYSTEM_INODE, 350 EXTENT_ALLOC_SYSTEM_INODE,
335 INODE_ALLOC_SYSTEM_INODE, 351 INODE_ALLOC_SYSTEM_INODE,
336 JOURNAL_SYSTEM_INODE, 352 JOURNAL_SYSTEM_INODE,
337 LOCAL_ALLOC_SYSTEM_INODE, 353 LOCAL_ALLOC_SYSTEM_INODE,
338 TRUNCATE_LOG_SYSTEM_INODE, 354 TRUNCATE_LOG_SYSTEM_INODE,
355 LOCAL_USER_QUOTA_SYSTEM_INODE,
356 LOCAL_GROUP_QUOTA_SYSTEM_INODE,
339 NUM_SYSTEM_INODES 357 NUM_SYSTEM_INODES
340}; 358};
341 359
@@ -349,6 +367,8 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
349 [SLOT_MAP_SYSTEM_INODE] = { "slot_map", 0, S_IFREG | 0644 }, 367 [SLOT_MAP_SYSTEM_INODE] = { "slot_map", 0, S_IFREG | 0644 },
350 [HEARTBEAT_SYSTEM_INODE] = { "heartbeat", OCFS2_HEARTBEAT_FL, S_IFREG | 0644 }, 368 [HEARTBEAT_SYSTEM_INODE] = { "heartbeat", OCFS2_HEARTBEAT_FL, S_IFREG | 0644 },
351 [GLOBAL_BITMAP_SYSTEM_INODE] = { "global_bitmap", 0, S_IFREG | 0644 }, 369 [GLOBAL_BITMAP_SYSTEM_INODE] = { "global_bitmap", 0, S_IFREG | 0644 },
370 [USER_QUOTA_SYSTEM_INODE] = { "aquota.user", OCFS2_QUOTA_FL, S_IFREG | 0644 },
371 [GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group", OCFS2_QUOTA_FL, S_IFREG | 0644 },
352 372
353 /* Slot-specific system inodes (one copy per slot) */ 373 /* Slot-specific system inodes (one copy per slot) */
354 [ORPHAN_DIR_SYSTEM_INODE] = { "orphan_dir:%04d", 0, S_IFDIR | 0755 }, 374 [ORPHAN_DIR_SYSTEM_INODE] = { "orphan_dir:%04d", 0, S_IFDIR | 0755 },
@@ -356,7 +376,9 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
356 [INODE_ALLOC_SYSTEM_INODE] = { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 }, 376 [INODE_ALLOC_SYSTEM_INODE] = { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
357 [JOURNAL_SYSTEM_INODE] = { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 }, 377 [JOURNAL_SYSTEM_INODE] = { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 },
358 [LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 }, 378 [LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 },
359 [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 } 379 [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 },
380 [LOCAL_USER_QUOTA_SYSTEM_INODE] = { "aquota.user:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 },
381 [LOCAL_GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 },
360}; 382};
361 383
362/* Parameter passed from mount.ocfs2 to module */ 384/* Parameter passed from mount.ocfs2 to module */
@@ -410,6 +432,22 @@ static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
410#define OCFS2_RAW_SB(dinode) (&((dinode)->id2.i_super)) 432#define OCFS2_RAW_SB(dinode) (&((dinode)->id2.i_super))
411 433
412/* 434/*
435 * Block checking structure. This is used in metadata to validate the
436 * contents. If OCFS2_FEATURE_INCOMPAT_META_ECC is not set, it is all
437 * zeros.
438 */
439struct ocfs2_block_check {
440/*00*/ __le32 bc_crc32e; /* 802.3 Ethernet II CRC32 */
441 __le16 bc_ecc; /* Single-error-correction parity vector.
442 This is a simple Hamming code dependant
443 on the blocksize. OCFS2's maximum
444 blocksize, 4K, requires 16 parity bits,
445 so we fit in __le16. */
446 __le16 bc_reserved1;
447/*08*/
448};
449
450/*
413 * On disk extent record for OCFS2 451 * On disk extent record for OCFS2
414 * It describes a range of clusters on disk. 452 * It describes a range of clusters on disk.
415 * 453 *
@@ -496,7 +534,7 @@ struct ocfs2_truncate_log {
496struct ocfs2_extent_block 534struct ocfs2_extent_block
497{ 535{
498/*00*/ __u8 h_signature[8]; /* Signature for verification */ 536/*00*/ __u8 h_signature[8]; /* Signature for verification */
499 __le64 h_reserved1; 537 struct ocfs2_block_check h_check; /* Error checking */
500/*10*/ __le16 h_suballoc_slot; /* Slot suballocator this 538/*10*/ __le16 h_suballoc_slot; /* Slot suballocator this
501 extent_header belongs to */ 539 extent_header belongs to */
502 __le16 h_suballoc_bit; /* Bit offset in suballocator 540 __le16 h_suballoc_bit; /* Bit offset in suballocator
@@ -666,7 +704,8 @@ struct ocfs2_dinode {
666 was set in i_flags */ 704 was set in i_flags */
667 __le16 i_dyn_features; 705 __le16 i_dyn_features;
668 __le64 i_xattr_loc; 706 __le64 i_xattr_loc;
669/*80*/ __le64 i_reserved2[7]; 707/*80*/ struct ocfs2_block_check i_check; /* Error checking */
708/*88*/ __le64 i_reserved2[6];
670/*B8*/ union { 709/*B8*/ union {
671 __le64 i_pad1; /* Generic way to refer to this 710 __le64 i_pad1; /* Generic way to refer to this
672 64bit union */ 711 64bit union */
@@ -715,6 +754,34 @@ struct ocfs2_dir_entry {
715} __attribute__ ((packed)); 754} __attribute__ ((packed));
716 755
717/* 756/*
757 * Per-block record for the unindexed directory btree. This is carefully
758 * crafted so that the rec_len and name_len records of an ocfs2_dir_entry are
759 * mirrored. That way, the directory manipulation code needs a minimal amount
760 * of update.
761 *
762 * NOTE: Keep this structure aligned to a multiple of 4 bytes.
763 */
764struct ocfs2_dir_block_trailer {
765/*00*/ __le64 db_compat_inode; /* Always zero. Was inode */
766
767 __le16 db_compat_rec_len; /* Backwards compatible with
768 * ocfs2_dir_entry. */
769 __u8 db_compat_name_len; /* Always zero. Was name_len */
770 __u8 db_reserved0;
771 __le16 db_reserved1;
772 __le16 db_free_rec_len; /* Size of largest empty hole
773 * in this block. (unused) */
774/*10*/ __u8 db_signature[8]; /* Signature for verification */
775 __le64 db_reserved2;
776 __le64 db_free_next; /* Next block in list (unused) */
777/*20*/ __le64 db_blkno; /* Offset on disk, in blocks */
778 __le64 db_parent_dinode; /* dinode which owns me, in
779 blocks */
780/*30*/ struct ocfs2_block_check db_check; /* Error checking */
781/*40*/
782};
783
784/*
718 * On disk allocator group structure for OCFS2 785 * On disk allocator group structure for OCFS2
719 */ 786 */
720struct ocfs2_group_desc 787struct ocfs2_group_desc
@@ -733,7 +800,8 @@ struct ocfs2_group_desc
733/*20*/ __le64 bg_parent_dinode; /* dinode which owns me, in 800/*20*/ __le64 bg_parent_dinode; /* dinode which owns me, in
734 blocks */ 801 blocks */
735 __le64 bg_blkno; /* Offset on disk, in blocks */ 802 __le64 bg_blkno; /* Offset on disk, in blocks */
736/*30*/ __le64 bg_reserved2[2]; 803/*30*/ struct ocfs2_block_check bg_check; /* Error checking */
804 __le64 bg_reserved2;
737/*40*/ __u8 bg_bitmap[0]; 805/*40*/ __u8 bg_bitmap[0];
738}; 806};
739 807
@@ -776,7 +844,12 @@ struct ocfs2_xattr_header {
776 in this extent record, 844 in this extent record,
777 only valid in the first 845 only valid in the first
778 bucket. */ 846 bucket. */
779 __le64 xh_csum; 847 struct ocfs2_block_check xh_check; /* Error checking
848 (Note, this is only
849 used for xattr
850 buckets. A block uses
851 xb_check and sets
852 this field to zero.) */
780 struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ 853 struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */
781}; 854};
782 855
@@ -827,7 +900,7 @@ struct ocfs2_xattr_block {
827 block group */ 900 block group */
828 __le32 xb_fs_generation; /* Must match super block */ 901 __le32 xb_fs_generation; /* Must match super block */
829/*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */ 902/*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */
830 __le64 xb_csum; 903 struct ocfs2_block_check xb_check; /* Error checking */
831/*20*/ __le16 xb_flags; /* Indicates whether this block contains 904/*20*/ __le16 xb_flags; /* Indicates whether this block contains
832 real xattr or a xattr tree. */ 905 real xattr or a xattr tree. */
833 __le16 xb_reserved0; 906 __le16 xb_reserved0;
@@ -868,6 +941,128 @@ static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe)
868 return xe->xe_type & OCFS2_XATTR_TYPE_MASK; 941 return xe->xe_type & OCFS2_XATTR_TYPE_MASK;
869} 942}
870 943
944/*
945 * On disk structures for global quota file
946 */
947
948/* Magic numbers and known versions for global quota files */
949#define OCFS2_GLOBAL_QMAGICS {\
950 0x0cf52470, /* USRQUOTA */ \
951 0x0cf52471 /* GRPQUOTA */ \
952}
953
954#define OCFS2_GLOBAL_QVERSIONS {\
955 0, \
956 0, \
957}
958
959
960/* Each block of each quota file has a certain fixed number of bytes reserved
961 * for OCFS2 internal use at its end. OCFS2 can use it for things like
962 * checksums, etc. */
963#define OCFS2_QBLK_RESERVED_SPACE 8
964
965/* Generic header of all quota files */
966struct ocfs2_disk_dqheader {
967 __le32 dqh_magic; /* Magic number identifying file */
968 __le32 dqh_version; /* Quota format version */
969};
970
971#define OCFS2_GLOBAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader))
972
973/* Information header of global quota file (immediately follows the generic
974 * header) */
975struct ocfs2_global_disk_dqinfo {
976/*00*/ __le32 dqi_bgrace; /* Grace time for space softlimit excess */
977 __le32 dqi_igrace; /* Grace time for inode softlimit excess */
978 __le32 dqi_syncms; /* Time after which we sync local changes to
979 * global quota file */
980 __le32 dqi_blocks; /* Number of blocks in quota file */
981/*10*/ __le32 dqi_free_blk; /* First free block in quota file */
982 __le32 dqi_free_entry; /* First block with free dquot entry in quota
983 * file */
984};
985
986/* Structure with global user / group information. We reserve some space
987 * for future use. */
988struct ocfs2_global_disk_dqblk {
989/*00*/ __le32 dqb_id; /* ID the structure belongs to */
990 __le32 dqb_use_count; /* Number of nodes having reference to this structure */
991 __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */
992/*10*/ __le64 dqb_isoftlimit; /* preferred inode limit */
993 __le64 dqb_curinodes; /* current # allocated inodes */
994/*20*/ __le64 dqb_bhardlimit; /* absolute limit on disk space */
995 __le64 dqb_bsoftlimit; /* preferred limit on disk space */
996/*30*/ __le64 dqb_curspace; /* current space occupied */
997 __le64 dqb_btime; /* time limit for excessive disk use */
998/*40*/ __le64 dqb_itime; /* time limit for excessive inode use */
999 __le64 dqb_pad1;
1000/*50*/ __le64 dqb_pad2;
1001};
1002
1003/*
1004 * On-disk structures for local quota file
1005 */
1006
1007/* Magic numbers and known versions for local quota files */
1008#define OCFS2_LOCAL_QMAGICS {\
1009 0x0cf524c0, /* USRQUOTA */ \
1010 0x0cf524c1 /* GRPQUOTA */ \
1011}
1012
1013#define OCFS2_LOCAL_QVERSIONS {\
1014 0, \
1015 0, \
1016}
1017
1018/* Quota flags in dqinfo header */
1019#define OLQF_CLEAN 0x0001 /* Quota file is empty (this should be after\
1020 * quota has been cleanly turned off) */
1021
1022#define OCFS2_LOCAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader))
1023
1024/* Information header of local quota file (immediately follows the generic
1025 * header) */
1026struct ocfs2_local_disk_dqinfo {
1027 __le32 dqi_flags; /* Flags for quota file */
1028 __le32 dqi_chunks; /* Number of chunks of quota structures
1029 * with a bitmap */
1030 __le32 dqi_blocks; /* Number of blocks allocated for quota file */
1031};
1032
1033/* Header of one chunk of a quota file */
1034struct ocfs2_local_disk_chunk {
1035 __le32 dqc_free; /* Number of free entries in the bitmap */
1036 u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding
1037 * chunk of quota file */
1038};
1039
1040/* One entry in local quota file */
1041struct ocfs2_local_disk_dqblk {
1042/*00*/ __le64 dqb_id; /* id this quota applies to */
1043 __le64 dqb_spacemod; /* Change in the amount of used space */
1044/*10*/ __le64 dqb_inodemod; /* Change in the amount of used inodes */
1045};
1046
1047
1048/*
1049 * The quota trailer lives at the end of each quota block.
1050 */
1051
1052struct ocfs2_disk_dqtrailer {
1053/*00*/ struct ocfs2_block_check dq_check; /* Error checking */
1054/*08*/ /* Cannot be larger than OCFS2_QBLK_RESERVED_SPACE */
1055};
1056
1057static inline struct ocfs2_disk_dqtrailer *ocfs2_block_dqtrailer(int blocksize,
1058 void *buf)
1059{
1060 char *ptr = buf;
1061 ptr += blocksize - OCFS2_QBLK_RESERVED_SPACE;
1062
1063 return (struct ocfs2_disk_dqtrailer *)ptr;
1064}
1065
871#ifdef __KERNEL__ 1066#ifdef __KERNEL__
872static inline int ocfs2_fast_symlink_chars(struct super_block *sb) 1067static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
873{ 1068{
diff --git a/fs/ocfs2/ocfs2_jbd_compat.h b/fs/ocfs2/ocfs2_jbd_compat.h
deleted file mode 100644
index b91c78f8f558..000000000000
--- a/fs/ocfs2/ocfs2_jbd_compat.h
+++ /dev/null
@@ -1,82 +0,0 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * ocfs2_jbd_compat.h
5 *
6 * Compatibility defines for JBD.
7 *
8 * Copyright (C) 2008 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License version 2 as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#ifndef OCFS2_JBD_COMPAT_H
21#define OCFS2_JBD_COMPAT_H
22
23#ifndef CONFIG_OCFS2_COMPAT_JBD
24# error Should not have been included
25#endif
26
27struct jbd2_inode {
28 unsigned int dummy;
29};
30
31#define JBD2_BARRIER JFS_BARRIER
32#define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE
33
34#define jbd2_journal_ack_err journal_ack_err
35#define jbd2_journal_clear_err journal_clear_err
36#define jbd2_journal_destroy journal_destroy
37#define jbd2_journal_dirty_metadata journal_dirty_metadata
38#define jbd2_journal_errno journal_errno
39#define jbd2_journal_extend journal_extend
40#define jbd2_journal_flush journal_flush
41#define jbd2_journal_force_commit journal_force_commit
42#define jbd2_journal_get_write_access journal_get_write_access
43#define jbd2_journal_get_undo_access journal_get_undo_access
44#define jbd2_journal_init_inode journal_init_inode
45#define jbd2_journal_invalidatepage journal_invalidatepage
46#define jbd2_journal_load journal_load
47#define jbd2_journal_lock_updates journal_lock_updates
48#define jbd2_journal_restart journal_restart
49#define jbd2_journal_start journal_start
50#define jbd2_journal_start_commit journal_start_commit
51#define jbd2_journal_stop journal_stop
52#define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers
53#define jbd2_journal_unlock_updates journal_unlock_updates
54#define jbd2_journal_wipe journal_wipe
55#define jbd2_log_wait_commit log_wait_commit
56
57static inline int jbd2_journal_file_inode(handle_t *handle,
58 struct jbd2_inode *inode)
59{
60 return 0;
61}
62
63static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
64 loff_t new_size)
65{
66 return 0;
67}
68
69static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode,
70 struct inode *inode)
71{
72 return;
73}
74
75static inline void jbd2_journal_release_jbd_inode(journal_t *journal,
76 struct jbd2_inode *jinode)
77{
78 return;
79}
80
81
82#endif /* OCFS2_JBD_COMPAT_H */
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 82c200f7a8f1..eb6f50c9ceca 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -46,6 +46,7 @@ enum ocfs2_lock_type {
46 OCFS2_LOCK_TYPE_DENTRY, 46 OCFS2_LOCK_TYPE_DENTRY,
47 OCFS2_LOCK_TYPE_OPEN, 47 OCFS2_LOCK_TYPE_OPEN,
48 OCFS2_LOCK_TYPE_FLOCK, 48 OCFS2_LOCK_TYPE_FLOCK,
49 OCFS2_LOCK_TYPE_QINFO,
49 OCFS2_NUM_LOCK_TYPES 50 OCFS2_NUM_LOCK_TYPES
50}; 51};
51 52
@@ -77,6 +78,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
77 case OCFS2_LOCK_TYPE_FLOCK: 78 case OCFS2_LOCK_TYPE_FLOCK:
78 c = 'F'; 79 c = 'F';
79 break; 80 break;
81 case OCFS2_LOCK_TYPE_QINFO:
82 c = 'Q';
83 break;
80 default: 84 default:
81 c = '\0'; 85 c = '\0';
82 } 86 }
@@ -95,6 +99,7 @@ static char *ocfs2_lock_type_strings[] = {
95 [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", 99 [OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
96 [OCFS2_LOCK_TYPE_OPEN] = "Open", 100 [OCFS2_LOCK_TYPE_OPEN] = "Open",
97 [OCFS2_LOCK_TYPE_FLOCK] = "Flock", 101 [OCFS2_LOCK_TYPE_FLOCK] = "Flock",
102 [OCFS2_LOCK_TYPE_QINFO] = "Quota",
98}; 103};
99 104
100static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) 105static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
new file mode 100644
index 000000000000..7365e2e08706
--- /dev/null
+++ b/fs/ocfs2/quota.h
@@ -0,0 +1,119 @@
1/*
2 * quota.h for OCFS2
3 *
4 * On disk quota structures for local and global quota file, in-memory
5 * structures.
6 *
7 */
8
9#ifndef _OCFS2_QUOTA_H
10#define _OCFS2_QUOTA_H
11
12#include <linux/types.h>
13#include <linux/slab.h>
14#include <linux/quota.h>
15#include <linux/list.h>
16#include <linux/dqblk_qtree.h>
17
18#include "ocfs2.h"
19
20/* Common stuff */
21/* id number of quota format */
22#define QFMT_OCFS2 3
23
24/*
25 * In-memory structures
26 */
27struct ocfs2_dquot {
28 struct dquot dq_dquot; /* Generic VFS dquot */
29 loff_t dq_local_off; /* Offset in the local quota file */
30 struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */
31 unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */
32 s64 dq_origspace; /* Last globally synced space usage */
33 s64 dq_originodes; /* Last globally synced inode usage */
34};
35
36/* Description of one chunk to recover in memory */
37struct ocfs2_recovery_chunk {
38 struct list_head rc_list; /* List of chunks */
39 int rc_chunk; /* Chunk number */
40 unsigned long *rc_bitmap; /* Bitmap of entries to recover */
41};
42
43struct ocfs2_quota_recovery {
44 struct list_head r_list[MAXQUOTAS]; /* List of chunks to recover */
45};
46
47/* In-memory structure with quota header information */
48struct ocfs2_mem_dqinfo {
49 unsigned int dqi_type; /* Quota type this structure describes */
50 unsigned int dqi_chunks; /* Number of chunks in local quota file */
51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
52 unsigned int dqi_syncms; /* How often should we sync with other nodes */
53 unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
54 struct list_head dqi_chunk; /* List of chunks */
55 struct inode *dqi_gqinode; /* Global quota file inode */
56 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
57 struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */
58 int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */
59 struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */
60 struct buffer_head *dqi_ibh; /* Buffer with information header */
61 struct qtree_mem_dqinfo dqi_gi; /* Info about global file */
62 struct delayed_work dqi_sync_work; /* Work for syncing dquots */
63 struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery
64 * information, in case we
65 * enable quotas on file
66 * needing it */
67};
68
69static inline struct ocfs2_dquot *OCFS2_DQUOT(struct dquot *dquot)
70{
71 return container_of(dquot, struct ocfs2_dquot, dq_dquot);
72}
73
74struct ocfs2_quota_chunk {
75 struct list_head qc_chunk; /* List of quotafile chunks */
76 int qc_num; /* Number of quota chunk */
77 struct buffer_head *qc_headerbh; /* Buffer head with chunk header */
78};
79
80extern struct kmem_cache *ocfs2_dquot_cachep;
81extern struct kmem_cache *ocfs2_qf_chunk_cachep;
82
83extern struct qtree_fmt_operations ocfs2_global_ops;
84
85struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
86 struct ocfs2_super *osb, int slot_num);
87int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
88 struct ocfs2_quota_recovery *rec,
89 int slot_num);
90void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec);
91ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
92 size_t len, loff_t off);
93ssize_t ocfs2_quota_write(struct super_block *sb, int type,
94 const char *data, size_t len, loff_t off);
95int ocfs2_global_read_info(struct super_block *sb, int type);
96int ocfs2_global_write_info(struct super_block *sb, int type);
97int ocfs2_global_read_dquot(struct dquot *dquot);
98int __ocfs2_sync_dquot(struct dquot *dquot, int freeing);
99static inline int ocfs2_sync_dquot(struct dquot *dquot)
100{
101 return __ocfs2_sync_dquot(dquot, 0);
102}
103static inline int ocfs2_global_release_dquot(struct dquot *dquot)
104{
105 return __ocfs2_sync_dquot(dquot, 1);
106}
107
108int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
109void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
110int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
111 struct buffer_head **bh);
112
113extern struct dquot_operations ocfs2_quota_operations;
114extern struct quota_format_type ocfs2_quota_format;
115
116int ocfs2_quota_setup(void);
117void ocfs2_quota_shutdown(void);
118
119#endif /* _OCFS2_QUOTA_H */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
new file mode 100644
index 000000000000..f4efa89baee5
--- /dev/null
+++ b/fs/ocfs2/quota_global.c
@@ -0,0 +1,860 @@
1/*
2 * Implementation of operations over global quota file
3 */
4#include <linux/spinlock.h>
5#include <linux/fs.h>
6#include <linux/quota.h>
7#include <linux/quotaops.h>
8#include <linux/dqblk_qtree.h>
9#include <linux/jiffies.h>
10#include <linux/writeback.h>
11#include <linux/workqueue.h>
12
13#define MLOG_MASK_PREFIX ML_QUOTA
14#include <cluster/masklog.h>
15
16#include "ocfs2_fs.h"
17#include "ocfs2.h"
18#include "alloc.h"
19#include "blockcheck.h"
20#include "inode.h"
21#include "journal.h"
22#include "file.h"
23#include "sysfile.h"
24#include "dlmglue.h"
25#include "uptodate.h"
26#include "quota.h"
27
28static struct workqueue_struct *ocfs2_quota_wq = NULL;
29
30static void qsync_work_fn(struct work_struct *work);
31
32static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp)
33{
34 struct ocfs2_global_disk_dqblk *d = dp;
35 struct mem_dqblk *m = &dquot->dq_dqb;
36
37 /* Update from disk only entries not set by the admin */
38 if (!test_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags)) {
39 m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit);
40 m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit);
41 }
42 if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags))
43 m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes);
44 if (!test_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags)) {
45 m->dqb_bhardlimit = le64_to_cpu(d->dqb_bhardlimit);
46 m->dqb_bsoftlimit = le64_to_cpu(d->dqb_bsoftlimit);
47 }
48 if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags))
49 m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
50 if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags))
51 m->dqb_btime = le64_to_cpu(d->dqb_btime);
52 if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags))
53 m->dqb_itime = le64_to_cpu(d->dqb_itime);
54 OCFS2_DQUOT(dquot)->dq_use_count = le32_to_cpu(d->dqb_use_count);
55}
56
57static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
58{
59 struct ocfs2_global_disk_dqblk *d = dp;
60 struct mem_dqblk *m = &dquot->dq_dqb;
61
62 d->dqb_id = cpu_to_le32(dquot->dq_id);
63 d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count);
64 d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
65 d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
66 d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
67 d->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
68 d->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
70 d->dqb_btime = cpu_to_le64(m->dqb_btime);
71 d->dqb_itime = cpu_to_le64(m->dqb_itime);
72}
73
74static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
75{
76 struct ocfs2_global_disk_dqblk *d = dp;
77 struct ocfs2_mem_dqinfo *oinfo =
78 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
79
80 if (qtree_entry_unused(&oinfo->dqi_gi, dp))
81 return 0;
82 return le32_to_cpu(d->dqb_id) == dquot->dq_id;
83}
84
85struct qtree_fmt_operations ocfs2_global_ops = {
86 .mem2disk_dqblk = ocfs2_global_mem2diskdqb,
87 .disk2mem_dqblk = ocfs2_global_disk2memdqb,
88 .is_id = ocfs2_global_is_id,
89};
90
91static int ocfs2_validate_quota_block(struct super_block *sb,
92 struct buffer_head *bh)
93{
94 struct ocfs2_disk_dqtrailer *dqt =
95 ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data);
96
97 mlog(0, "Validating quota block %llu\n",
98 (unsigned long long)bh->b_blocknr);
99
100 BUG_ON(!buffer_uptodate(bh));
101
102 /*
103 * If the ecc fails, we return the error but otherwise
104 * leave the filesystem running. We know any error is
105 * local to this block.
106 */
107 return ocfs2_validate_meta_ecc(sb, bh->b_data, &dqt->dq_check);
108}
109
110int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
111 struct buffer_head **bh)
112{
113 int rc = 0;
114 struct buffer_head *tmp = *bh;
115
116 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0,
117 ocfs2_validate_quota_block);
118 if (rc)
119 mlog_errno(rc);
120
121 /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
122 if (!rc && !*bh)
123 *bh = tmp;
124
125 return rc;
126}
127
128static int ocfs2_get_quota_block(struct inode *inode, int block,
129 struct buffer_head **bh)
130{
131 u64 pblock, pcount;
132 int err;
133
134 down_read(&OCFS2_I(inode)->ip_alloc_sem);
135 err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount, NULL);
136 up_read(&OCFS2_I(inode)->ip_alloc_sem);
137 if (err) {
138 mlog_errno(err);
139 return err;
140 }
141 *bh = sb_getblk(inode->i_sb, pblock);
142 if (!*bh) {
143 err = -EIO;
144 mlog_errno(err);
145 }
146 return err;;
147}
148
149/* Read data from global quotafile - avoid pagecache and such because we cannot
150 * afford acquiring the locks... We use quota cluster lock to serialize
151 * operations. Caller is responsible for acquiring it. */
152ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
153 size_t len, loff_t off)
154{
155 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
156 struct inode *gqinode = oinfo->dqi_gqinode;
157 loff_t i_size = i_size_read(gqinode);
158 int offset = off & (sb->s_blocksize - 1);
159 sector_t blk = off >> sb->s_blocksize_bits;
160 int err = 0;
161 struct buffer_head *bh;
162 size_t toread, tocopy;
163
164 if (off > i_size)
165 return 0;
166 if (off + len > i_size)
167 len = i_size - off;
168 toread = len;
169 while (toread > 0) {
170 tocopy = min_t(size_t, (sb->s_blocksize - offset), toread);
171 bh = NULL;
172 err = ocfs2_read_quota_block(gqinode, blk, &bh);
173 if (err) {
174 mlog_errno(err);
175 return err;
176 }
177 memcpy(data, bh->b_data + offset, tocopy);
178 brelse(bh);
179 offset = 0;
180 toread -= tocopy;
181 data += tocopy;
182 blk++;
183 }
184 return len;
185}
186
187/* Write to quotafile (we know the transaction is already started and has
188 * enough credits) */
189ssize_t ocfs2_quota_write(struct super_block *sb, int type,
190 const char *data, size_t len, loff_t off)
191{
192 struct mem_dqinfo *info = sb_dqinfo(sb, type);
193 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
194 struct inode *gqinode = oinfo->dqi_gqinode;
195 int offset = off & (sb->s_blocksize - 1);
196 sector_t blk = off >> sb->s_blocksize_bits;
197 int err = 0, new = 0, ja_type;
198 struct buffer_head *bh = NULL;
199 handle_t *handle = journal_current_handle();
200
201 if (!handle) {
202 mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled "
203 "because transaction was not started.\n",
204 (unsigned long long)off, (unsigned long long)len);
205 return -EIO;
206 }
207 if (len > sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset) {
208 WARN_ON(1);
209 len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset;
210 }
211
212 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
213 if (gqinode->i_size < off + len) {
214 down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
215 err = ocfs2_extend_no_holes(gqinode, off + len, off);
216 up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
217 if (err < 0)
218 goto out;
219 err = ocfs2_simple_size_update(gqinode,
220 oinfo->dqi_gqi_bh,
221 off + len);
222 if (err < 0)
223 goto out;
224 new = 1;
225 }
226 /* Not rewriting whole block? */
227 if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) &&
228 !new) {
229 err = ocfs2_read_quota_block(gqinode, blk, &bh);
230 ja_type = OCFS2_JOURNAL_ACCESS_WRITE;
231 } else {
232 err = ocfs2_get_quota_block(gqinode, blk, &bh);
233 ja_type = OCFS2_JOURNAL_ACCESS_CREATE;
234 }
235 if (err) {
236 mlog_errno(err);
237 return err;
238 }
239 lock_buffer(bh);
240 if (new)
241 memset(bh->b_data, 0, sb->s_blocksize);
242 memcpy(bh->b_data + offset, data, len);
243 flush_dcache_page(bh->b_page);
244 set_buffer_uptodate(bh);
245 unlock_buffer(bh);
246 ocfs2_set_buffer_uptodate(gqinode, bh);
247 err = ocfs2_journal_access_dq(handle, gqinode, bh, ja_type);
248 if (err < 0) {
249 brelse(bh);
250 goto out;
251 }
252 err = ocfs2_journal_dirty(handle, bh);
253 brelse(bh);
254 if (err < 0)
255 goto out;
256out:
257 if (err) {
258 mutex_unlock(&gqinode->i_mutex);
259 mlog_errno(err);
260 return err;
261 }
262 gqinode->i_version++;
263 ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh);
264 mutex_unlock(&gqinode->i_mutex);
265 return len;
266}
267
268int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
269{
270 int status;
271 struct buffer_head *bh = NULL;
272
273 status = ocfs2_inode_lock(oinfo->dqi_gqinode, &bh, ex);
274 if (status < 0)
275 return status;
276 spin_lock(&dq_data_lock);
277 if (!oinfo->dqi_gqi_count++)
278 oinfo->dqi_gqi_bh = bh;
279 else
280 WARN_ON(bh != oinfo->dqi_gqi_bh);
281 spin_unlock(&dq_data_lock);
282 return 0;
283}
284
285void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
286{
287 ocfs2_inode_unlock(oinfo->dqi_gqinode, ex);
288 brelse(oinfo->dqi_gqi_bh);
289 spin_lock(&dq_data_lock);
290 if (!--oinfo->dqi_gqi_count)
291 oinfo->dqi_gqi_bh = NULL;
292 spin_unlock(&dq_data_lock);
293}
294
295/* Read information header from global quota file */
296int ocfs2_global_read_info(struct super_block *sb, int type)
297{
298 struct inode *gqinode = NULL;
299 unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE,
300 GROUP_QUOTA_SYSTEM_INODE };
301 struct ocfs2_global_disk_dqinfo dinfo;
302 struct mem_dqinfo *info = sb_dqinfo(sb, type);
303 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
304 int status;
305
306 mlog_entry_void();
307
308 /* Read global header */
309 gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
310 OCFS2_INVALID_SLOT);
311 if (!gqinode) {
312 mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n",
313 type);
314 status = -EINVAL;
315 goto out_err;
316 }
317 oinfo->dqi_gi.dqi_sb = sb;
318 oinfo->dqi_gi.dqi_type = type;
319 ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo);
320 oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk);
321 oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops;
322 oinfo->dqi_gqi_bh = NULL;
323 oinfo->dqi_gqi_count = 0;
324 oinfo->dqi_gqinode = gqinode;
325 status = ocfs2_lock_global_qf(oinfo, 0);
326 if (status < 0) {
327 mlog_errno(status);
328 goto out_err;
329 }
330 status = sb->s_op->quota_read(sb, type, (char *)&dinfo,
331 sizeof(struct ocfs2_global_disk_dqinfo),
332 OCFS2_GLOBAL_INFO_OFF);
333 ocfs2_unlock_global_qf(oinfo, 0);
334 if (status != sizeof(struct ocfs2_global_disk_dqinfo)) {
335 mlog(ML_ERROR, "Cannot read global quota info (%d).\n",
336 status);
337 if (status >= 0)
338 status = -EIO;
339 mlog_errno(status);
340 goto out_err;
341 }
342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
345 oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
346 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
347 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
348 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
349 oinfo->dqi_gi.dqi_blocksize_bits = sb->s_blocksize_bits;
350 oinfo->dqi_gi.dqi_usable_bs = sb->s_blocksize -
351 OCFS2_QBLK_RESERVED_SPACE;
352 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
353 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
354 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
355 oinfo->dqi_syncjiff);
356
357out_err:
358 mlog_exit(status);
359 return status;
360}
361
362/* Write information to global quota file. Expects exlusive lock on quota
363 * file inode and quota info */
364static int __ocfs2_global_write_info(struct super_block *sb, int type)
365{
366 struct mem_dqinfo *info = sb_dqinfo(sb, type);
367 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
368 struct ocfs2_global_disk_dqinfo dinfo;
369 ssize_t size;
370
371 spin_lock(&dq_data_lock);
372 info->dqi_flags &= ~DQF_INFO_DIRTY;
373 dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
374 dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
375 spin_unlock(&dq_data_lock);
376 dinfo.dqi_syncms = cpu_to_le32(oinfo->dqi_syncms);
377 dinfo.dqi_blocks = cpu_to_le32(oinfo->dqi_gi.dqi_blocks);
378 dinfo.dqi_free_blk = cpu_to_le32(oinfo->dqi_gi.dqi_free_blk);
379 dinfo.dqi_free_entry = cpu_to_le32(oinfo->dqi_gi.dqi_free_entry);
380 size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
381 sizeof(struct ocfs2_global_disk_dqinfo),
382 OCFS2_GLOBAL_INFO_OFF);
383 if (size != sizeof(struct ocfs2_global_disk_dqinfo)) {
384 mlog(ML_ERROR, "Cannot write global quota info structure\n");
385 if (size >= 0)
386 size = -EIO;
387 return size;
388 }
389 return 0;
390}
391
392int ocfs2_global_write_info(struct super_block *sb, int type)
393{
394 int err;
395 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
396
397 err = ocfs2_qinfo_lock(info, 1);
398 if (err < 0)
399 return err;
400 err = __ocfs2_global_write_info(sb, type);
401 ocfs2_qinfo_unlock(info, 1);
402 return err;
403}
404
405/* Read in information from global quota file and acquire a reference to it.
406 * dquot_acquire() has already started the transaction and locked quota file */
407int ocfs2_global_read_dquot(struct dquot *dquot)
408{
409 int err, err2, ex = 0;
410 struct ocfs2_mem_dqinfo *info =
411 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
412
413 err = ocfs2_qinfo_lock(info, 0);
414 if (err < 0)
415 goto out;
416 err = qtree_read_dquot(&info->dqi_gi, dquot);
417 if (err < 0)
418 goto out_qlock;
419 OCFS2_DQUOT(dquot)->dq_use_count++;
420 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
421 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
422 if (!dquot->dq_off) { /* No real quota entry? */
423 /* Upgrade to exclusive lock for allocation */
424 err = ocfs2_qinfo_lock(info, 1);
425 if (err < 0)
426 goto out_qlock;
427 ex = 1;
428 }
429 err = qtree_write_dquot(&info->dqi_gi, dquot);
430 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
431 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
432 if (!err)
433 err = err2;
434 }
435out_qlock:
436 if (ex)
437 ocfs2_qinfo_unlock(info, 1);
438 ocfs2_qinfo_unlock(info, 0);
439out:
440 if (err < 0)
441 mlog_errno(err);
442 return err;
443}
444
445/* Sync local information about quota modifications with global quota file.
446 * Caller must have started the transaction and obtained exclusive lock for
447 * global quota file inode */
448int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
449{
450 int err, err2;
451 struct super_block *sb = dquot->dq_sb;
452 int type = dquot->dq_type;
453 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
454 struct ocfs2_global_disk_dqblk dqblk;
455 s64 spacechange, inodechange;
456 time_t olditime, oldbtime;
457
458 err = sb->s_op->quota_read(sb, type, (char *)&dqblk,
459 sizeof(struct ocfs2_global_disk_dqblk),
460 dquot->dq_off);
461 if (err != sizeof(struct ocfs2_global_disk_dqblk)) {
462 if (err >= 0) {
463 mlog(ML_ERROR, "Short read from global quota file "
464 "(%u read)\n", err);
465 err = -EIO;
466 }
467 goto out;
468 }
469
470 /* Update space and inode usage. Get also other information from
471 * global quota file so that we don't overwrite any changes there.
472 * We are */
473 spin_lock(&dq_data_lock);
474 spacechange = dquot->dq_dqb.dqb_curspace -
475 OCFS2_DQUOT(dquot)->dq_origspace;
476 inodechange = dquot->dq_dqb.dqb_curinodes -
477 OCFS2_DQUOT(dquot)->dq_originodes;
478 olditime = dquot->dq_dqb.dqb_itime;
479 oldbtime = dquot->dq_dqb.dqb_btime;
480 ocfs2_global_disk2memdqb(dquot, &dqblk);
481 mlog(0, "Syncing global dquot %u space %lld+%lld, inodes %lld+%lld\n",
482 dquot->dq_id, dquot->dq_dqb.dqb_curspace, (long long)spacechange,
483 dquot->dq_dqb.dqb_curinodes, (long long)inodechange);
484 if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags))
485 dquot->dq_dqb.dqb_curspace += spacechange;
486 if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags))
487 dquot->dq_dqb.dqb_curinodes += inodechange;
488 /* Set properly space grace time... */
489 if (dquot->dq_dqb.dqb_bsoftlimit &&
490 dquot->dq_dqb.dqb_curspace > dquot->dq_dqb.dqb_bsoftlimit) {
491 if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags) &&
492 oldbtime > 0) {
493 if (dquot->dq_dqb.dqb_btime > 0)
494 dquot->dq_dqb.dqb_btime =
495 min(dquot->dq_dqb.dqb_btime, oldbtime);
496 else
497 dquot->dq_dqb.dqb_btime = oldbtime;
498 }
499 } else {
500 dquot->dq_dqb.dqb_btime = 0;
501 clear_bit(DQ_BLKS_B, &dquot->dq_flags);
502 }
503 /* Set properly inode grace time... */
504 if (dquot->dq_dqb.dqb_isoftlimit &&
505 dquot->dq_dqb.dqb_curinodes > dquot->dq_dqb.dqb_isoftlimit) {
506 if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags) &&
507 olditime > 0) {
508 if (dquot->dq_dqb.dqb_itime > 0)
509 dquot->dq_dqb.dqb_itime =
510 min(dquot->dq_dqb.dqb_itime, olditime);
511 else
512 dquot->dq_dqb.dqb_itime = olditime;
513 }
514 } else {
515 dquot->dq_dqb.dqb_itime = 0;
516 clear_bit(DQ_INODES_B, &dquot->dq_flags);
517 }
518 /* All information is properly updated, clear the flags */
519 __clear_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
520 __clear_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
521 __clear_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
522 __clear_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
523 __clear_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
524 __clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
525 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
526 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
527 spin_unlock(&dq_data_lock);
528 err = ocfs2_qinfo_lock(info, freeing);
529 if (err < 0) {
530 mlog(ML_ERROR, "Failed to lock quota info, loosing quota write"
531 " (type=%d, id=%u)\n", dquot->dq_type,
532 (unsigned)dquot->dq_id);
533 goto out;
534 }
535 if (freeing)
536 OCFS2_DQUOT(dquot)->dq_use_count--;
537 err = qtree_write_dquot(&info->dqi_gi, dquot);
538 if (err < 0)
539 goto out_qlock;
540 if (freeing && !OCFS2_DQUOT(dquot)->dq_use_count) {
541 err = qtree_release_dquot(&info->dqi_gi, dquot);
542 if (info_dirty(sb_dqinfo(sb, type))) {
543 err2 = __ocfs2_global_write_info(sb, type);
544 if (!err)
545 err = err2;
546 }
547 }
548out_qlock:
549 ocfs2_qinfo_unlock(info, freeing);
550out:
551 if (err < 0)
552 mlog_errno(err);
553 return err;
554}
555
556/*
557 * Functions for periodic syncing of dquots with global file
558 */
559static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
560{
561 handle_t *handle;
562 struct super_block *sb = dquot->dq_sb;
563 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
564 struct ocfs2_super *osb = OCFS2_SB(sb);
565 int status = 0;
566
567 mlog_entry("id=%u qtype=%u type=%lu device=%s\n", dquot->dq_id,
568 dquot->dq_type, type, sb->s_id);
569 if (type != dquot->dq_type)
570 goto out;
571 status = ocfs2_lock_global_qf(oinfo, 1);
572 if (status < 0)
573 goto out;
574
575 handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS);
576 if (IS_ERR(handle)) {
577 status = PTR_ERR(handle);
578 mlog_errno(status);
579 goto out_ilock;
580 }
581 mutex_lock(&sb_dqopt(sb)->dqio_mutex);
582 status = ocfs2_sync_dquot(dquot);
583 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
584 if (status < 0)
585 mlog_errno(status);
586 /* We have to write local structure as well... */
587 dquot_mark_dquot_dirty(dquot);
588 status = dquot_commit(dquot);
589 if (status < 0)
590 mlog_errno(status);
591 ocfs2_commit_trans(osb, handle);
592out_ilock:
593 ocfs2_unlock_global_qf(oinfo, 1);
594out:
595 mlog_exit(status);
596 return status;
597}
598
599static void qsync_work_fn(struct work_struct *work)
600{
601 struct ocfs2_mem_dqinfo *oinfo = container_of(work,
602 struct ocfs2_mem_dqinfo,
603 dqi_sync_work.work);
604 struct super_block *sb = oinfo->dqi_gqinode->i_sb;
605
606 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
607 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
608 oinfo->dqi_syncjiff);
609}
610
611/*
612 * Wrappers for generic quota functions
613 */
614
615static int ocfs2_write_dquot(struct dquot *dquot)
616{
617 handle_t *handle;
618 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
619 int status = 0;
620
621 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
622
623 handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS);
624 if (IS_ERR(handle)) {
625 status = PTR_ERR(handle);
626 mlog_errno(status);
627 goto out;
628 }
629 status = dquot_commit(dquot);
630 ocfs2_commit_trans(osb, handle);
631out:
632 mlog_exit(status);
633 return status;
634}
635
636int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
637{
638 struct ocfs2_mem_dqinfo *oinfo;
639 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
640 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
641
642 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
643 return 0;
644
645 oinfo = sb_dqinfo(sb, type)->dqi_priv;
646 /* We modify tree, leaf block, global info, local chunk header,
647 * global and local inode */
648 return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
649 2 * OCFS2_INODE_UPDATE_CREDITS;
650}
651
652static int ocfs2_release_dquot(struct dquot *dquot)
653{
654 handle_t *handle;
655 struct ocfs2_mem_dqinfo *oinfo =
656 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
657 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
658 int status = 0;
659
660 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
661
662 status = ocfs2_lock_global_qf(oinfo, 1);
663 if (status < 0)
664 goto out;
665 handle = ocfs2_start_trans(osb,
666 ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_type));
667 if (IS_ERR(handle)) {
668 status = PTR_ERR(handle);
669 mlog_errno(status);
670 goto out_ilock;
671 }
672 status = dquot_release(dquot);
673 ocfs2_commit_trans(osb, handle);
674out_ilock:
675 ocfs2_unlock_global_qf(oinfo, 1);
676out:
677 mlog_exit(status);
678 return status;
679}
680
681int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
682{
683 struct ocfs2_mem_dqinfo *oinfo;
684 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
685 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
686 struct ocfs2_dinode *lfe, *gfe;
687
688 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
689 return 0;
690
691 oinfo = sb_dqinfo(sb, type)->dqi_priv;
692 gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
693 lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
694 /* We can extend local file + global file. In local file we
695 * can modify info, chunk header block and dquot block. In
696 * global file we can modify info, tree and leaf block */
697 return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
698 ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
699 3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
700}
701
702static int ocfs2_acquire_dquot(struct dquot *dquot)
703{
704 handle_t *handle;
705 struct ocfs2_mem_dqinfo *oinfo =
706 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
707 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
708 int status = 0;
709
710 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
711 /* We need an exclusive lock, because we're going to update use count
712 * and instantiate possibly new dquot structure */
713 status = ocfs2_lock_global_qf(oinfo, 1);
714 if (status < 0)
715 goto out;
716 handle = ocfs2_start_trans(osb,
717 ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
718 if (IS_ERR(handle)) {
719 status = PTR_ERR(handle);
720 mlog_errno(status);
721 goto out_ilock;
722 }
723 status = dquot_acquire(dquot);
724 ocfs2_commit_trans(osb, handle);
725out_ilock:
726 ocfs2_unlock_global_qf(oinfo, 1);
727out:
728 mlog_exit(status);
729 return status;
730}
731
732static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
733{
734 unsigned long mask = (1 << (DQ_LASTSET_B + QIF_ILIMITS_B)) |
735 (1 << (DQ_LASTSET_B + QIF_BLIMITS_B)) |
736 (1 << (DQ_LASTSET_B + QIF_INODES_B)) |
737 (1 << (DQ_LASTSET_B + QIF_SPACE_B)) |
738 (1 << (DQ_LASTSET_B + QIF_BTIME_B)) |
739 (1 << (DQ_LASTSET_B + QIF_ITIME_B));
740 int sync = 0;
741 int status;
742 struct super_block *sb = dquot->dq_sb;
743 int type = dquot->dq_type;
744 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
745 handle_t *handle;
746 struct ocfs2_super *osb = OCFS2_SB(sb);
747
748 mlog_entry("id=%u, type=%d", dquot->dq_id, type);
749 dquot_mark_dquot_dirty(dquot);
750
751 /* In case user set some limits, sync dquot immediately to global
752 * quota file so that information propagates quicker */
753 spin_lock(&dq_data_lock);
754 if (dquot->dq_flags & mask)
755 sync = 1;
756 spin_unlock(&dq_data_lock);
757 if (!sync) {
758 status = ocfs2_write_dquot(dquot);
759 goto out;
760 }
761 status = ocfs2_lock_global_qf(oinfo, 1);
762 if (status < 0)
763 goto out;
764 handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS);
765 if (IS_ERR(handle)) {
766 status = PTR_ERR(handle);
767 mlog_errno(status);
768 goto out_ilock;
769 }
770 status = ocfs2_sync_dquot(dquot);
771 if (status < 0) {
772 mlog_errno(status);
773 goto out_trans;
774 }
775 /* Now write updated local dquot structure */
776 status = dquot_commit(dquot);
777out_trans:
778 ocfs2_commit_trans(osb, handle);
779out_ilock:
780 ocfs2_unlock_global_qf(oinfo, 1);
781out:
782 mlog_exit(status);
783 return status;
784}
785
786/* This should happen only after set_dqinfo(). */
787static int ocfs2_write_info(struct super_block *sb, int type)
788{
789 handle_t *handle;
790 int status = 0;
791 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
792
793 mlog_entry_void();
794
795 status = ocfs2_lock_global_qf(oinfo, 1);
796 if (status < 0)
797 goto out;
798 handle = ocfs2_start_trans(OCFS2_SB(sb), OCFS2_QINFO_WRITE_CREDITS);
799 if (IS_ERR(handle)) {
800 status = PTR_ERR(handle);
801 mlog_errno(status);
802 goto out_ilock;
803 }
804 status = dquot_commit_info(sb, type);
805 ocfs2_commit_trans(OCFS2_SB(sb), handle);
806out_ilock:
807 ocfs2_unlock_global_qf(oinfo, 1);
808out:
809 mlog_exit(status);
810 return status;
811}
812
813static struct dquot *ocfs2_alloc_dquot(struct super_block *sb, int type)
814{
815 struct ocfs2_dquot *dquot =
816 kmem_cache_zalloc(ocfs2_dquot_cachep, GFP_NOFS);
817
818 if (!dquot)
819 return NULL;
820 return &dquot->dq_dquot;
821}
822
823static void ocfs2_destroy_dquot(struct dquot *dquot)
824{
825 kmem_cache_free(ocfs2_dquot_cachep, dquot);
826}
827
828struct dquot_operations ocfs2_quota_operations = {
829 .initialize = dquot_initialize,
830 .drop = dquot_drop,
831 .alloc_space = dquot_alloc_space,
832 .alloc_inode = dquot_alloc_inode,
833 .free_space = dquot_free_space,
834 .free_inode = dquot_free_inode,
835 .transfer = dquot_transfer,
836 .write_dquot = ocfs2_write_dquot,
837 .acquire_dquot = ocfs2_acquire_dquot,
838 .release_dquot = ocfs2_release_dquot,
839 .mark_dirty = ocfs2_mark_dquot_dirty,
840 .write_info = ocfs2_write_info,
841 .alloc_dquot = ocfs2_alloc_dquot,
842 .destroy_dquot = ocfs2_destroy_dquot,
843};
844
845int ocfs2_quota_setup(void)
846{
847 ocfs2_quota_wq = create_workqueue("o2quot");
848 if (!ocfs2_quota_wq)
849 return -ENOMEM;
850 return 0;
851}
852
853void ocfs2_quota_shutdown(void)
854{
855 if (ocfs2_quota_wq) {
856 flush_workqueue(ocfs2_quota_wq);
857 destroy_workqueue(ocfs2_quota_wq);
858 ocfs2_quota_wq = NULL;
859 }
860}
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
new file mode 100644
index 000000000000..07deec5e9721
--- /dev/null
+++ b/fs/ocfs2/quota_local.c
@@ -0,0 +1,1253 @@
1/*
2 * Implementation of operations over local quota file
3 */
4
5#include <linux/fs.h>
6#include <linux/quota.h>
7#include <linux/quotaops.h>
8#include <linux/module.h>
9
10#define MLOG_MASK_PREFIX ML_QUOTA
11#include <cluster/masklog.h>
12
13#include "ocfs2_fs.h"
14#include "ocfs2.h"
15#include "inode.h"
16#include "alloc.h"
17#include "file.h"
18#include "buffer_head_io.h"
19#include "journal.h"
20#include "sysfile.h"
21#include "dlmglue.h"
22#include "quota.h"
23
24/* Number of local quota structures per block */
25static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
26{
27 return ((sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) /
28 sizeof(struct ocfs2_local_disk_dqblk));
29}
30
31/* Number of blocks with entries in one chunk */
32static inline unsigned int ol_chunk_blocks(struct super_block *sb)
33{
34 return ((sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
35 OCFS2_QBLK_RESERVED_SPACE) << 3) /
36 ol_quota_entries_per_block(sb);
37}
38
39/* Number of entries in a chunk bitmap */
40static unsigned int ol_chunk_entries(struct super_block *sb)
41{
42 return ol_chunk_blocks(sb) * ol_quota_entries_per_block(sb);
43}
44
45/* Offset of the chunk in quota file */
46static unsigned int ol_quota_chunk_block(struct super_block *sb, int c)
47{
48 /* 1 block for local quota file info, 1 block per chunk for chunk info */
49 return 1 + (ol_chunk_blocks(sb) + 1) * c;
50}
51
52static unsigned int ol_dqblk_block(struct super_block *sb, int c, int off)
53{
54 int epb = ol_quota_entries_per_block(sb);
55
56 return ol_quota_chunk_block(sb, c) + 1 + off / epb;
57}
58
59static unsigned int ol_dqblk_block_off(struct super_block *sb, int c, int off)
60{
61 int epb = ol_quota_entries_per_block(sb);
62
63 return (off % epb) * sizeof(struct ocfs2_local_disk_dqblk);
64}
65
66/* Offset of the dquot structure in the quota file */
67static loff_t ol_dqblk_off(struct super_block *sb, int c, int off)
68{
69 return (ol_dqblk_block(sb, c, off) << sb->s_blocksize_bits) +
70 ol_dqblk_block_off(sb, c, off);
71}
72
73/* Compute block number from given offset */
74static inline unsigned int ol_dqblk_file_block(struct super_block *sb, loff_t off)
75{
76 return off >> sb->s_blocksize_bits;
77}
78
79static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off)
80{
81 return off & ((1 << sb->s_blocksize_bits) - 1);
82}
83
84/* Compute offset in the chunk of a structure with the given offset */
85static int ol_dqblk_chunk_off(struct super_block *sb, int c, loff_t off)
86{
87 int epb = ol_quota_entries_per_block(sb);
88
89 return ((off >> sb->s_blocksize_bits) -
90 ol_quota_chunk_block(sb, c) - 1) * epb
91 + ((unsigned int)(off & ((1 << sb->s_blocksize_bits) - 1))) /
92 sizeof(struct ocfs2_local_disk_dqblk);
93}
94
95/* Write bufferhead into the fs */
96static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
97 void (*modify)(struct buffer_head *, void *), void *private)
98{
99 struct super_block *sb = inode->i_sb;
100 handle_t *handle;
101 int status;
102
103 handle = ocfs2_start_trans(OCFS2_SB(sb), 1);
104 if (IS_ERR(handle)) {
105 status = PTR_ERR(handle);
106 mlog_errno(status);
107 return status;
108 }
109 status = ocfs2_journal_access_dq(handle, inode, bh,
110 OCFS2_JOURNAL_ACCESS_WRITE);
111 if (status < 0) {
112 mlog_errno(status);
113 ocfs2_commit_trans(OCFS2_SB(sb), handle);
114 return status;
115 }
116 lock_buffer(bh);
117 modify(bh, private);
118 unlock_buffer(bh);
119 status = ocfs2_journal_dirty(handle, bh);
120 if (status < 0) {
121 mlog_errno(status);
122 ocfs2_commit_trans(OCFS2_SB(sb), handle);
123 return status;
124 }
125 status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
126 if (status < 0) {
127 mlog_errno(status);
128 return status;
129 }
130 return 0;
131}
132
133/* Check whether we understand format of quota files */
134static int ocfs2_local_check_quota_file(struct super_block *sb, int type)
135{
136 unsigned int lmagics[MAXQUOTAS] = OCFS2_LOCAL_QMAGICS;
137 unsigned int lversions[MAXQUOTAS] = OCFS2_LOCAL_QVERSIONS;
138 unsigned int gmagics[MAXQUOTAS] = OCFS2_GLOBAL_QMAGICS;
139 unsigned int gversions[MAXQUOTAS] = OCFS2_GLOBAL_QVERSIONS;
140 unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE,
141 GROUP_QUOTA_SYSTEM_INODE };
142 struct buffer_head *bh = NULL;
143 struct inode *linode = sb_dqopt(sb)->files[type];
144 struct inode *ginode = NULL;
145 struct ocfs2_disk_dqheader *dqhead;
146 int status, ret = 0;
147
148 /* First check whether we understand local quota file */
149 status = ocfs2_read_quota_block(linode, 0, &bh);
150 if (status) {
151 mlog_errno(status);
152 mlog(ML_ERROR, "failed to read quota file header (type=%d)\n",
153 type);
154 goto out_err;
155 }
156 dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data);
157 if (le32_to_cpu(dqhead->dqh_magic) != lmagics[type]) {
158 mlog(ML_ERROR, "quota file magic does not match (%u != %u),"
159 " type=%d\n", le32_to_cpu(dqhead->dqh_magic),
160 lmagics[type], type);
161 goto out_err;
162 }
163 if (le32_to_cpu(dqhead->dqh_version) != lversions[type]) {
164 mlog(ML_ERROR, "quota file version does not match (%u != %u),"
165 " type=%d\n", le32_to_cpu(dqhead->dqh_version),
166 lversions[type], type);
167 goto out_err;
168 }
169 brelse(bh);
170 bh = NULL;
171
172 /* Next check whether we understand global quota file */
173 ginode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
174 OCFS2_INVALID_SLOT);
175 if (!ginode) {
176 mlog(ML_ERROR, "cannot get global quota file inode "
177 "(type=%d)\n", type);
178 goto out_err;
179 }
180 /* Since the header is read only, we don't care about locking */
181 status = ocfs2_read_quota_block(ginode, 0, &bh);
182 if (status) {
183 mlog_errno(status);
184 mlog(ML_ERROR, "failed to read global quota file header "
185 "(type=%d)\n", type);
186 goto out_err;
187 }
188 dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data);
189 if (le32_to_cpu(dqhead->dqh_magic) != gmagics[type]) {
190 mlog(ML_ERROR, "global quota file magic does not match "
191 "(%u != %u), type=%d\n",
192 le32_to_cpu(dqhead->dqh_magic), gmagics[type], type);
193 goto out_err;
194 }
195 if (le32_to_cpu(dqhead->dqh_version) != gversions[type]) {
196 mlog(ML_ERROR, "global quota file version does not match "
197 "(%u != %u), type=%d\n",
198 le32_to_cpu(dqhead->dqh_version), gversions[type],
199 type);
200 goto out_err;
201 }
202
203 ret = 1;
204out_err:
205 brelse(bh);
206 iput(ginode);
207 return ret;
208}
209
210/* Release given list of quota file chunks */
211static void ocfs2_release_local_quota_bitmaps(struct list_head *head)
212{
213 struct ocfs2_quota_chunk *pos, *next;
214
215 list_for_each_entry_safe(pos, next, head, qc_chunk) {
216 list_del(&pos->qc_chunk);
217 brelse(pos->qc_headerbh);
218 kmem_cache_free(ocfs2_qf_chunk_cachep, pos);
219 }
220}
221
222/* Load quota bitmaps into memory */
223static int ocfs2_load_local_quota_bitmaps(struct inode *inode,
224 struct ocfs2_local_disk_dqinfo *ldinfo,
225 struct list_head *head)
226{
227 struct ocfs2_quota_chunk *newchunk;
228 int i, status;
229
230 INIT_LIST_HEAD(head);
231 for (i = 0; i < le32_to_cpu(ldinfo->dqi_chunks); i++) {
232 newchunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS);
233 if (!newchunk) {
234 ocfs2_release_local_quota_bitmaps(head);
235 return -ENOMEM;
236 }
237 newchunk->qc_num = i;
238 newchunk->qc_headerbh = NULL;
239 status = ocfs2_read_quota_block(inode,
240 ol_quota_chunk_block(inode->i_sb, i),
241 &newchunk->qc_headerbh);
242 if (status) {
243 mlog_errno(status);
244 kmem_cache_free(ocfs2_qf_chunk_cachep, newchunk);
245 ocfs2_release_local_quota_bitmaps(head);
246 return status;
247 }
248 list_add_tail(&newchunk->qc_chunk, head);
249 }
250 return 0;
251}
252
253static void olq_update_info(struct buffer_head *bh, void *private)
254{
255 struct mem_dqinfo *info = private;
256 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
257 struct ocfs2_local_disk_dqinfo *ldinfo;
258
259 ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
260 OCFS2_LOCAL_INFO_OFF);
261 spin_lock(&dq_data_lock);
262 ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
263 ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks);
264 ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks);
265 spin_unlock(&dq_data_lock);
266}
267
268static int ocfs2_add_recovery_chunk(struct super_block *sb,
269 struct ocfs2_local_disk_chunk *dchunk,
270 int chunk,
271 struct list_head *head)
272{
273 struct ocfs2_recovery_chunk *rc;
274
275 rc = kmalloc(sizeof(struct ocfs2_recovery_chunk), GFP_NOFS);
276 if (!rc)
277 return -ENOMEM;
278 rc->rc_chunk = chunk;
279 rc->rc_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
280 if (!rc->rc_bitmap) {
281 kfree(rc);
282 return -ENOMEM;
283 }
284 memcpy(rc->rc_bitmap, dchunk->dqc_bitmap,
285 (ol_chunk_entries(sb) + 7) >> 3);
286 list_add_tail(&rc->rc_list, head);
287 return 0;
288}
289
290static void free_recovery_list(struct list_head *head)
291{
292 struct ocfs2_recovery_chunk *next;
293 struct ocfs2_recovery_chunk *rchunk;
294
295 list_for_each_entry_safe(rchunk, next, head, rc_list) {
296 list_del(&rchunk->rc_list);
297 kfree(rchunk->rc_bitmap);
298 kfree(rchunk);
299 }
300}
301
302void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec)
303{
304 int type;
305
306 for (type = 0; type < MAXQUOTAS; type++)
307 free_recovery_list(&(rec->r_list[type]));
308 kfree(rec);
309}
310
311/* Load entries in our quota file we have to recover*/
312static int ocfs2_recovery_load_quota(struct inode *lqinode,
313 struct ocfs2_local_disk_dqinfo *ldinfo,
314 int type,
315 struct list_head *head)
316{
317 struct super_block *sb = lqinode->i_sb;
318 struct buffer_head *hbh;
319 struct ocfs2_local_disk_chunk *dchunk;
320 int i, chunks = le32_to_cpu(ldinfo->dqi_chunks);
321 int status = 0;
322
323 for (i = 0; i < chunks; i++) {
324 hbh = NULL;
325 status = ocfs2_read_quota_block(lqinode,
326 ol_quota_chunk_block(sb, i),
327 &hbh);
328 if (status) {
329 mlog_errno(status);
330 break;
331 }
332 dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data;
333 if (le32_to_cpu(dchunk->dqc_free) < ol_chunk_entries(sb))
334 status = ocfs2_add_recovery_chunk(sb, dchunk, i, head);
335 brelse(hbh);
336 if (status < 0)
337 break;
338 }
339 if (status < 0)
340 free_recovery_list(head);
341 return status;
342}
343
344static struct ocfs2_quota_recovery *ocfs2_alloc_quota_recovery(void)
345{
346 int type;
347 struct ocfs2_quota_recovery *rec;
348
349 rec = kmalloc(sizeof(struct ocfs2_quota_recovery), GFP_NOFS);
350 if (!rec)
351 return NULL;
352 for (type = 0; type < MAXQUOTAS; type++)
353 INIT_LIST_HEAD(&(rec->r_list[type]));
354 return rec;
355}
356
357/* Load information we need for quota recovery into memory */
358struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
359 struct ocfs2_super *osb,
360 int slot_num)
361{
362 unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
363 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
364 unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
365 LOCAL_GROUP_QUOTA_SYSTEM_INODE };
366 struct super_block *sb = osb->sb;
367 struct ocfs2_local_disk_dqinfo *ldinfo;
368 struct inode *lqinode;
369 struct buffer_head *bh;
370 int type;
371 int status = 0;
372 struct ocfs2_quota_recovery *rec;
373
374 mlog(ML_NOTICE, "Beginning quota recovery in slot %u\n", slot_num);
375 rec = ocfs2_alloc_quota_recovery();
376 if (!rec)
377 return ERR_PTR(-ENOMEM);
378 /* First init... */
379
380 for (type = 0; type < MAXQUOTAS; type++) {
381 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
382 continue;
383 /* At this point, journal of the slot is already replayed so
384 * we can trust metadata and data of the quota file */
385 lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num);
386 if (!lqinode) {
387 status = -ENOENT;
388 goto out;
389 }
390 status = ocfs2_inode_lock_full(lqinode, NULL, 1,
391 OCFS2_META_LOCK_RECOVERY);
392 if (status < 0) {
393 mlog_errno(status);
394 goto out_put;
395 }
396 /* Now read local header */
397 bh = NULL;
398 status = ocfs2_read_quota_block(lqinode, 0, &bh);
399 if (status) {
400 mlog_errno(status);
401 mlog(ML_ERROR, "failed to read quota file info header "
402 "(slot=%d type=%d)\n", slot_num, type);
403 goto out_lock;
404 }
405 ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
406 OCFS2_LOCAL_INFO_OFF);
407 status = ocfs2_recovery_load_quota(lqinode, ldinfo, type,
408 &rec->r_list[type]);
409 brelse(bh);
410out_lock:
411 ocfs2_inode_unlock(lqinode, 1);
412out_put:
413 iput(lqinode);
414 if (status < 0)
415 break;
416 }
417out:
418 if (status < 0) {
419 ocfs2_free_quota_recovery(rec);
420 rec = ERR_PTR(status);
421 }
422 return rec;
423}
424
425/* Sync changes in local quota file into global quota file and
426 * reinitialize local quota file.
427 * The function expects local quota file to be already locked and
428 * dqonoff_mutex locked. */
429static int ocfs2_recover_local_quota_file(struct inode *lqinode,
430 int type,
431 struct ocfs2_quota_recovery *rec)
432{
433 struct super_block *sb = lqinode->i_sb;
434 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
435 struct ocfs2_local_disk_chunk *dchunk;
436 struct ocfs2_local_disk_dqblk *dqblk;
437 struct dquot *dquot;
438 handle_t *handle;
439 struct buffer_head *hbh = NULL, *qbh = NULL;
440 int status = 0;
441 int bit, chunk;
442 struct ocfs2_recovery_chunk *rchunk, *next;
443 qsize_t spacechange, inodechange;
444
445 mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type);
446
447 status = ocfs2_lock_global_qf(oinfo, 1);
448 if (status < 0)
449 goto out;
450
451 list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) {
452 chunk = rchunk->rc_chunk;
453 hbh = NULL;
454 status = ocfs2_read_quota_block(lqinode,
455 ol_quota_chunk_block(sb, chunk),
456 &hbh);
457 if (status) {
458 mlog_errno(status);
459 break;
460 }
461 dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data;
462 for_each_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) {
463 qbh = NULL;
464 status = ocfs2_read_quota_block(lqinode,
465 ol_dqblk_block(sb, chunk, bit),
466 &qbh);
467 if (status) {
468 mlog_errno(status);
469 break;
470 }
471 dqblk = (struct ocfs2_local_disk_dqblk *)(qbh->b_data +
472 ol_dqblk_block_off(sb, chunk, bit));
473 dquot = dqget(sb, le64_to_cpu(dqblk->dqb_id), type);
474 if (!dquot) {
475 status = -EIO;
476 mlog(ML_ERROR, "Failed to get quota structure "
477 "for id %u, type %d. Cannot finish quota "
478 "file recovery.\n",
479 (unsigned)le64_to_cpu(dqblk->dqb_id),
480 type);
481 goto out_put_bh;
482 }
483 handle = ocfs2_start_trans(OCFS2_SB(sb),
484 OCFS2_QSYNC_CREDITS);
485 if (IS_ERR(handle)) {
486 status = PTR_ERR(handle);
487 mlog_errno(status);
488 goto out_put_dquot;
489 }
490 mutex_lock(&sb_dqopt(sb)->dqio_mutex);
491 spin_lock(&dq_data_lock);
492 /* Add usage from quota entry into quota changes
493 * of our node. Auxiliary variables are important
494 * due to signedness */
495 spacechange = le64_to_cpu(dqblk->dqb_spacemod);
496 inodechange = le64_to_cpu(dqblk->dqb_inodemod);
497 dquot->dq_dqb.dqb_curspace += spacechange;
498 dquot->dq_dqb.dqb_curinodes += inodechange;
499 spin_unlock(&dq_data_lock);
500 /* We want to drop reference held by the crashed
501 * node. Since we have our own reference we know
502 * global structure actually won't be freed. */
503 status = ocfs2_global_release_dquot(dquot);
504 if (status < 0) {
505 mlog_errno(status);
506 goto out_commit;
507 }
508 /* Release local quota file entry */
509 status = ocfs2_journal_access_dq(handle, lqinode,
510 qbh, OCFS2_JOURNAL_ACCESS_WRITE);
511 if (status < 0) {
512 mlog_errno(status);
513 goto out_commit;
514 }
515 lock_buffer(qbh);
516 WARN_ON(!ocfs2_test_bit(bit, dchunk->dqc_bitmap));
517 ocfs2_clear_bit(bit, dchunk->dqc_bitmap);
518 le32_add_cpu(&dchunk->dqc_free, 1);
519 unlock_buffer(qbh);
520 status = ocfs2_journal_dirty(handle, qbh);
521 if (status < 0)
522 mlog_errno(status);
523out_commit:
524 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
525 ocfs2_commit_trans(OCFS2_SB(sb), handle);
526out_put_dquot:
527 dqput(dquot);
528out_put_bh:
529 brelse(qbh);
530 if (status < 0)
531 break;
532 }
533 brelse(hbh);
534 list_del(&rchunk->rc_list);
535 kfree(rchunk->rc_bitmap);
536 kfree(rchunk);
537 if (status < 0)
538 break;
539 }
540 ocfs2_unlock_global_qf(oinfo, 1);
541out:
542 if (status < 0)
543 free_recovery_list(&(rec->r_list[type]));
544 mlog_exit(status);
545 return status;
546}
547
548/* Recover local quota files for given node different from us */
549int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
550 struct ocfs2_quota_recovery *rec,
551 int slot_num)
552{
553 unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
554 LOCAL_GROUP_QUOTA_SYSTEM_INODE };
555 struct super_block *sb = osb->sb;
556 struct ocfs2_local_disk_dqinfo *ldinfo;
557 struct buffer_head *bh;
558 handle_t *handle;
559 int type;
560 int status = 0;
561 struct inode *lqinode;
562 unsigned int flags;
563
564 mlog(ML_NOTICE, "Finishing quota recovery in slot %u\n", slot_num);
565 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
566 for (type = 0; type < MAXQUOTAS; type++) {
567 if (list_empty(&(rec->r_list[type])))
568 continue;
569 mlog(0, "Recovering quota in slot %d\n", slot_num);
570 lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num);
571 if (!lqinode) {
572 status = -ENOENT;
573 goto out;
574 }
575 status = ocfs2_inode_lock_full(lqinode, NULL, 1,
576 OCFS2_META_LOCK_NOQUEUE);
577 /* Someone else is holding the lock? Then he must be
578 * doing the recovery. Just skip the file... */
579 if (status == -EAGAIN) {
580 mlog(ML_NOTICE, "skipping quota recovery for slot %d "
581 "because quota file is locked.\n", slot_num);
582 status = 0;
583 goto out_put;
584 } else if (status < 0) {
585 mlog_errno(status);
586 goto out_put;
587 }
588 /* Now read local header */
589 bh = NULL;
590 status = ocfs2_read_quota_block(lqinode, 0, &bh);
591 if (status) {
592 mlog_errno(status);
593 mlog(ML_ERROR, "failed to read quota file info header "
594 "(slot=%d type=%d)\n", slot_num, type);
595 goto out_lock;
596 }
597 ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
598 OCFS2_LOCAL_INFO_OFF);
599 /* Is recovery still needed? */
600 flags = le32_to_cpu(ldinfo->dqi_flags);
601 if (!(flags & OLQF_CLEAN))
602 status = ocfs2_recover_local_quota_file(lqinode,
603 type,
604 rec);
605 /* We don't want to mark file as clean when it is actually
606 * active */
607 if (slot_num == osb->slot_num)
608 goto out_bh;
609 /* Mark quota file as clean if we are recovering quota file of
610 * some other node. */
611 handle = ocfs2_start_trans(osb, 1);
612 if (IS_ERR(handle)) {
613 status = PTR_ERR(handle);
614 mlog_errno(status);
615 goto out_bh;
616 }
617 status = ocfs2_journal_access_dq(handle, lqinode, bh,
618 OCFS2_JOURNAL_ACCESS_WRITE);
619 if (status < 0) {
620 mlog_errno(status);
621 goto out_trans;
622 }
623 lock_buffer(bh);
624 ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN);
625 unlock_buffer(bh);
626 status = ocfs2_journal_dirty(handle, bh);
627 if (status < 0)
628 mlog_errno(status);
629out_trans:
630 ocfs2_commit_trans(osb, handle);
631out_bh:
632 brelse(bh);
633out_lock:
634 ocfs2_inode_unlock(lqinode, 1);
635out_put:
636 iput(lqinode);
637 if (status < 0)
638 break;
639 }
640out:
641 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
642 kfree(rec);
643 return status;
644}
645
646/* Read information header from quota file */
647static int ocfs2_local_read_info(struct super_block *sb, int type)
648{
649 struct ocfs2_local_disk_dqinfo *ldinfo;
650 struct mem_dqinfo *info = sb_dqinfo(sb, type);
651 struct ocfs2_mem_dqinfo *oinfo;
652 struct inode *lqinode = sb_dqopt(sb)->files[type];
653 int status;
654 struct buffer_head *bh = NULL;
655 struct ocfs2_quota_recovery *rec;
656 int locked = 0;
657
658 info->dqi_maxblimit = 0x7fffffffffffffffLL;
659 info->dqi_maxilimit = 0x7fffffffffffffffLL;
660 oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS);
661 if (!oinfo) {
662 mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota"
663 " info.");
664 goto out_err;
665 }
666 info->dqi_priv = oinfo;
667 oinfo->dqi_type = type;
668 INIT_LIST_HEAD(&oinfo->dqi_chunk);
669 oinfo->dqi_rec = NULL;
670 oinfo->dqi_lqi_bh = NULL;
671 oinfo->dqi_ibh = NULL;
672
673 status = ocfs2_global_read_info(sb, type);
674 if (status < 0)
675 goto out_err;
676
677 status = ocfs2_inode_lock(lqinode, &oinfo->dqi_lqi_bh, 1);
678 if (status < 0) {
679 mlog_errno(status);
680 goto out_err;
681 }
682 locked = 1;
683
684 /* Now read local header */
685 status = ocfs2_read_quota_block(lqinode, 0, &bh);
686 if (status) {
687 mlog_errno(status);
688 mlog(ML_ERROR, "failed to read quota file info header "
689 "(type=%d)\n", type);
690 goto out_err;
691 }
692 ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
693 OCFS2_LOCAL_INFO_OFF);
694 info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags);
695 oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks);
696 oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks);
697 oinfo->dqi_ibh = bh;
698
699 /* We crashed when using local quota file? */
700 if (!(info->dqi_flags & OLQF_CLEAN)) {
701 rec = OCFS2_SB(sb)->quota_rec;
702 if (!rec) {
703 rec = ocfs2_alloc_quota_recovery();
704 if (!rec) {
705 status = -ENOMEM;
706 mlog_errno(status);
707 goto out_err;
708 }
709 OCFS2_SB(sb)->quota_rec = rec;
710 }
711
712 status = ocfs2_recovery_load_quota(lqinode, ldinfo, type,
713 &rec->r_list[type]);
714 if (status < 0) {
715 mlog_errno(status);
716 goto out_err;
717 }
718 }
719
720 status = ocfs2_load_local_quota_bitmaps(lqinode,
721 ldinfo,
722 &oinfo->dqi_chunk);
723 if (status < 0) {
724 mlog_errno(status);
725 goto out_err;
726 }
727
728 /* Now mark quota file as used */
729 info->dqi_flags &= ~OLQF_CLEAN;
730 status = ocfs2_modify_bh(lqinode, bh, olq_update_info, info);
731 if (status < 0) {
732 mlog_errno(status);
733 goto out_err;
734 }
735
736 return 0;
737out_err:
738 if (oinfo) {
739 iput(oinfo->dqi_gqinode);
740 ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock);
741 ocfs2_lock_res_free(&oinfo->dqi_gqlock);
742 brelse(oinfo->dqi_lqi_bh);
743 if (locked)
744 ocfs2_inode_unlock(lqinode, 1);
745 ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
746 kfree(oinfo);
747 }
748 brelse(bh);
749 return -1;
750}
751
752/* Write local info to quota file */
753static int ocfs2_local_write_info(struct super_block *sb, int type)
754{
755 struct mem_dqinfo *info = sb_dqinfo(sb, type);
756 struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv)
757 ->dqi_ibh;
758 int status;
759
760 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info,
761 info);
762 if (status < 0) {
763 mlog_errno(status);
764 return -1;
765 }
766
767 return 0;
768}
769
770/* Release info from memory */
771static int ocfs2_local_free_info(struct super_block *sb, int type)
772{
773 struct mem_dqinfo *info = sb_dqinfo(sb, type);
774 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
775 struct ocfs2_quota_chunk *chunk;
776 struct ocfs2_local_disk_chunk *dchunk;
777 int mark_clean = 1, len;
778 int status;
779
780 /* At this point we know there are no more dquots and thus
781 * even if there's some sync in the pdflush queue, it won't
782 * find any dquots and return without doing anything */
783 cancel_delayed_work_sync(&oinfo->dqi_sync_work);
784 iput(oinfo->dqi_gqinode);
785 ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock);
786 ocfs2_lock_res_free(&oinfo->dqi_gqlock);
787 list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) {
788 dchunk = (struct ocfs2_local_disk_chunk *)
789 (chunk->qc_headerbh->b_data);
790 if (chunk->qc_num < oinfo->dqi_chunks - 1) {
791 len = ol_chunk_entries(sb);
792 } else {
793 len = (oinfo->dqi_blocks -
794 ol_quota_chunk_block(sb, chunk->qc_num) - 1)
795 * ol_quota_entries_per_block(sb);
796 }
797 /* Not all entries free? Bug! */
798 if (le32_to_cpu(dchunk->dqc_free) != len) {
799 mlog(ML_ERROR, "releasing quota file with used "
800 "entries (type=%d)\n", type);
801 mark_clean = 0;
802 }
803 }
804 ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
805
806 /* dqonoff_mutex protects us against racing with recovery thread... */
807 if (oinfo->dqi_rec) {
808 ocfs2_free_quota_recovery(oinfo->dqi_rec);
809 mark_clean = 0;
810 }
811
812 if (!mark_clean)
813 goto out;
814
815 /* Mark local file as clean */
816 info->dqi_flags |= OLQF_CLEAN;
817 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type],
818 oinfo->dqi_ibh,
819 olq_update_info,
820 info);
821 if (status < 0) {
822 mlog_errno(status);
823 goto out;
824 }
825
826out:
827 ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1);
828 brelse(oinfo->dqi_ibh);
829 brelse(oinfo->dqi_lqi_bh);
830 kfree(oinfo);
831 return 0;
832}
833
834static void olq_set_dquot(struct buffer_head *bh, void *private)
835{
836 struct ocfs2_dquot *od = private;
837 struct ocfs2_local_disk_dqblk *dqblk;
838 struct super_block *sb = od->dq_dquot.dq_sb;
839
840 dqblk = (struct ocfs2_local_disk_dqblk *)(bh->b_data
841 + ol_dqblk_block_offset(sb, od->dq_local_off));
842
843 dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id);
844 spin_lock(&dq_data_lock);
845 dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace -
846 od->dq_origspace);
847 dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes -
848 od->dq_originodes);
849 spin_unlock(&dq_data_lock);
850 mlog(0, "Writing local dquot %u space %lld inodes %lld\n",
851 od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod),
852 (long long)le64_to_cpu(dqblk->dqb_inodemod));
853}
854
855/* Write dquot to local quota file */
856static int ocfs2_local_write_dquot(struct dquot *dquot)
857{
858 struct super_block *sb = dquot->dq_sb;
859 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
860 struct buffer_head *bh = NULL;
861 int status;
862
863 status = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type],
864 ol_dqblk_file_block(sb, od->dq_local_off),
865 &bh);
866 if (status) {
867 mlog_errno(status);
868 goto out;
869 }
870 status = ocfs2_modify_bh(sb_dqopt(sb)->files[dquot->dq_type], bh,
871 olq_set_dquot, od);
872 if (status < 0) {
873 mlog_errno(status);
874 goto out;
875 }
876out:
877 brelse(bh);
878 return status;
879}
880
881/* Find free entry in local quota file */
882static struct ocfs2_quota_chunk *ocfs2_find_free_entry(struct super_block *sb,
883 int type,
884 int *offset)
885{
886 struct mem_dqinfo *info = sb_dqinfo(sb, type);
887 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
888 struct ocfs2_quota_chunk *chunk;
889 struct ocfs2_local_disk_chunk *dchunk;
890 int found = 0, len;
891
892 list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) {
893 dchunk = (struct ocfs2_local_disk_chunk *)
894 chunk->qc_headerbh->b_data;
895 if (le32_to_cpu(dchunk->dqc_free) > 0) {
896 found = 1;
897 break;
898 }
899 }
900 if (!found)
901 return NULL;
902
903 if (chunk->qc_num < oinfo->dqi_chunks - 1) {
904 len = ol_chunk_entries(sb);
905 } else {
906 len = (oinfo->dqi_blocks -
907 ol_quota_chunk_block(sb, chunk->qc_num) - 1)
908 * ol_quota_entries_per_block(sb);
909 }
910
911 found = ocfs2_find_next_zero_bit(dchunk->dqc_bitmap, len, 0);
912 /* We failed? */
913 if (found == len) {
914 mlog(ML_ERROR, "Did not find empty entry in chunk %d with %u"
915 " entries free (type=%d)\n", chunk->qc_num,
916 le32_to_cpu(dchunk->dqc_free), type);
917 return ERR_PTR(-EIO);
918 }
919 *offset = found;
920 return chunk;
921}
922
923/* Add new chunk to the local quota file */
924static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
925 struct super_block *sb,
926 int type,
927 int *offset)
928{
929 struct mem_dqinfo *info = sb_dqinfo(sb, type);
930 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
931 struct inode *lqinode = sb_dqopt(sb)->files[type];
932 struct ocfs2_quota_chunk *chunk = NULL;
933 struct ocfs2_local_disk_chunk *dchunk;
934 int status;
935 handle_t *handle;
936 struct buffer_head *bh = NULL;
937 u64 p_blkno;
938
939 /* We are protected by dqio_sem so no locking needed */
940 status = ocfs2_extend_no_holes(lqinode,
941 lqinode->i_size + 2 * sb->s_blocksize,
942 lqinode->i_size);
943 if (status < 0) {
944 mlog_errno(status);
945 goto out;
946 }
947 status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
948 lqinode->i_size + 2 * sb->s_blocksize);
949 if (status < 0) {
950 mlog_errno(status);
951 goto out;
952 }
953
954 chunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS);
955 if (!chunk) {
956 status = -ENOMEM;
957 mlog_errno(status);
958 goto out;
959 }
960
961 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
962 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
963 &p_blkno, NULL, NULL);
964 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
965 if (status < 0) {
966 mlog_errno(status);
967 goto out;
968 }
969 bh = sb_getblk(sb, p_blkno);
970 if (!bh) {
971 status = -ENOMEM;
972 mlog_errno(status);
973 goto out;
974 }
975 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
976
977 handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
978 if (IS_ERR(handle)) {
979 status = PTR_ERR(handle);
980 mlog_errno(status);
981 goto out;
982 }
983
984 status = ocfs2_journal_access_dq(handle, lqinode, bh,
985 OCFS2_JOURNAL_ACCESS_WRITE);
986 if (status < 0) {
987 mlog_errno(status);
988 goto out_trans;
989 }
990 lock_buffer(bh);
991 dchunk->dqc_free = cpu_to_le32(ol_quota_entries_per_block(sb));
992 memset(dchunk->dqc_bitmap, 0,
993 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
994 OCFS2_QBLK_RESERVED_SPACE);
995 set_buffer_uptodate(bh);
996 unlock_buffer(bh);
997 status = ocfs2_journal_dirty(handle, bh);
998 if (status < 0) {
999 mlog_errno(status);
1000 goto out_trans;
1001 }
1002
1003 oinfo->dqi_blocks += 2;
1004 oinfo->dqi_chunks++;
1005 status = ocfs2_local_write_info(sb, type);
1006 if (status < 0) {
1007 mlog_errno(status);
1008 goto out_trans;
1009 }
1010 status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
1011 if (status < 0) {
1012 mlog_errno(status);
1013 goto out;
1014 }
1015
1016 list_add_tail(&chunk->qc_chunk, &oinfo->dqi_chunk);
1017 chunk->qc_num = list_entry(chunk->qc_chunk.prev,
1018 struct ocfs2_quota_chunk,
1019 qc_chunk)->qc_num + 1;
1020 chunk->qc_headerbh = bh;
1021 *offset = 0;
1022 return chunk;
1023out_trans:
1024 ocfs2_commit_trans(OCFS2_SB(sb), handle);
1025out:
1026 brelse(bh);
1027 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
1028 return ERR_PTR(status);
1029}
1030
1031/* Find free entry in local quota file */
1032static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1033 struct super_block *sb,
1034 int type,
1035 int *offset)
1036{
1037 struct mem_dqinfo *info = sb_dqinfo(sb, type);
1038 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
1039 struct ocfs2_quota_chunk *chunk;
1040 struct inode *lqinode = sb_dqopt(sb)->files[type];
1041 struct ocfs2_local_disk_chunk *dchunk;
1042 int epb = ol_quota_entries_per_block(sb);
1043 unsigned int chunk_blocks;
1044 int status;
1045 handle_t *handle;
1046
1047 if (list_empty(&oinfo->dqi_chunk))
1048 return ocfs2_local_quota_add_chunk(sb, type, offset);
1049 /* Is the last chunk full? */
1050 chunk = list_entry(oinfo->dqi_chunk.prev,
1051 struct ocfs2_quota_chunk, qc_chunk);
1052 chunk_blocks = oinfo->dqi_blocks -
1053 ol_quota_chunk_block(sb, chunk->qc_num) - 1;
1054 if (ol_chunk_blocks(sb) == chunk_blocks)
1055 return ocfs2_local_quota_add_chunk(sb, type, offset);
1056
1057 /* We are protected by dqio_sem so no locking needed */
1058 status = ocfs2_extend_no_holes(lqinode,
1059 lqinode->i_size + sb->s_blocksize,
1060 lqinode->i_size);
1061 if (status < 0) {
1062 mlog_errno(status);
1063 goto out;
1064 }
1065 status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
1066 lqinode->i_size + sb->s_blocksize);
1067 if (status < 0) {
1068 mlog_errno(status);
1069 goto out;
1070 }
1071 handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
1072 if (IS_ERR(handle)) {
1073 status = PTR_ERR(handle);
1074 mlog_errno(status);
1075 goto out;
1076 }
1077 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
1078 OCFS2_JOURNAL_ACCESS_WRITE);
1079 if (status < 0) {
1080 mlog_errno(status);
1081 goto out_trans;
1082 }
1083
1084 dchunk = (struct ocfs2_local_disk_chunk *)chunk->qc_headerbh->b_data;
1085 lock_buffer(chunk->qc_headerbh);
1086 le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb));
1087 unlock_buffer(chunk->qc_headerbh);
1088 status = ocfs2_journal_dirty(handle, chunk->qc_headerbh);
1089 if (status < 0) {
1090 mlog_errno(status);
1091 goto out_trans;
1092 }
1093 oinfo->dqi_blocks++;
1094 status = ocfs2_local_write_info(sb, type);
1095 if (status < 0) {
1096 mlog_errno(status);
1097 goto out_trans;
1098 }
1099
1100 status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
1101 if (status < 0) {
1102 mlog_errno(status);
1103 goto out;
1104 }
1105 *offset = chunk_blocks * epb;
1106 return chunk;
1107out_trans:
1108 ocfs2_commit_trans(OCFS2_SB(sb), handle);
1109out:
1110 return ERR_PTR(status);
1111}
1112
1113static void olq_alloc_dquot(struct buffer_head *bh, void *private)
1114{
1115 int *offset = private;
1116 struct ocfs2_local_disk_chunk *dchunk;
1117
1118 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
1119 ocfs2_set_bit(*offset, dchunk->dqc_bitmap);
1120 le32_add_cpu(&dchunk->dqc_free, -1);
1121}
1122
1123/* Create dquot in the local file for given id */
1124static int ocfs2_create_local_dquot(struct dquot *dquot)
1125{
1126 struct super_block *sb = dquot->dq_sb;
1127 int type = dquot->dq_type;
1128 struct inode *lqinode = sb_dqopt(sb)->files[type];
1129 struct ocfs2_quota_chunk *chunk;
1130 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
1131 int offset;
1132 int status;
1133
1134 chunk = ocfs2_find_free_entry(sb, type, &offset);
1135 if (!chunk) {
1136 chunk = ocfs2_extend_local_quota_file(sb, type, &offset);
1137 if (IS_ERR(chunk))
1138 return PTR_ERR(chunk);
1139 } else if (IS_ERR(chunk)) {
1140 return PTR_ERR(chunk);
1141 }
1142 od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset);
1143 od->dq_chunk = chunk;
1144
1145 /* Initialize dquot structure on disk */
1146 status = ocfs2_local_write_dquot(dquot);
1147 if (status < 0) {
1148 mlog_errno(status);
1149 goto out;
1150 }
1151
1152 /* Mark structure as allocated */
1153 status = ocfs2_modify_bh(lqinode, chunk->qc_headerbh, olq_alloc_dquot,
1154 &offset);
1155 if (status < 0) {
1156 mlog_errno(status);
1157 goto out;
1158 }
1159out:
1160 return status;
1161}
1162
1163/* Create entry in local file for dquot, load data from the global file */
1164static int ocfs2_local_read_dquot(struct dquot *dquot)
1165{
1166 int status;
1167
1168 mlog_entry("id=%u, type=%d\n", dquot->dq_id, dquot->dq_type);
1169
1170 status = ocfs2_global_read_dquot(dquot);
1171 if (status < 0) {
1172 mlog_errno(status);
1173 goto out_err;
1174 }
1175
1176 /* Now create entry in the local quota file */
1177 status = ocfs2_create_local_dquot(dquot);
1178 if (status < 0) {
1179 mlog_errno(status);
1180 goto out_err;
1181 }
1182 mlog_exit(0);
1183 return 0;
1184out_err:
1185 mlog_exit(status);
1186 return status;
1187}
1188
1189/* Release dquot structure from local quota file. ocfs2_release_dquot() has
1190 * already started a transaction and obtained exclusive lock for global
1191 * quota file. */
1192static int ocfs2_local_release_dquot(struct dquot *dquot)
1193{
1194 int status;
1195 int type = dquot->dq_type;
1196 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
1197 struct super_block *sb = dquot->dq_sb;
1198 struct ocfs2_local_disk_chunk *dchunk;
1199 int offset;
1200 handle_t *handle = journal_current_handle();
1201
1202 BUG_ON(!handle);
1203 /* First write all local changes to global file */
1204 status = ocfs2_global_release_dquot(dquot);
1205 if (status < 0) {
1206 mlog_errno(status);
1207 goto out;
1208 }
1209
1210 status = ocfs2_journal_access_dq(handle, sb_dqopt(sb)->files[type],
1211 od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE);
1212 if (status < 0) {
1213 mlog_errno(status);
1214 goto out;
1215 }
1216 offset = ol_dqblk_chunk_off(sb, od->dq_chunk->qc_num,
1217 od->dq_local_off);
1218 dchunk = (struct ocfs2_local_disk_chunk *)
1219 (od->dq_chunk->qc_headerbh->b_data);
1220 /* Mark structure as freed */
1221 lock_buffer(od->dq_chunk->qc_headerbh);
1222 ocfs2_clear_bit(offset, dchunk->dqc_bitmap);
1223 le32_add_cpu(&dchunk->dqc_free, 1);
1224 unlock_buffer(od->dq_chunk->qc_headerbh);
1225 status = ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
1226 if (status < 0) {
1227 mlog_errno(status);
1228 goto out;
1229 }
1230 status = 0;
1231out:
1232 /* Clear the read bit so that next time someone uses this
1233 * dquot he reads fresh info from disk and allocates local
1234 * dquot structure */
1235 clear_bit(DQ_READ_B, &dquot->dq_flags);
1236 return status;
1237}
1238
1239static struct quota_format_ops ocfs2_format_ops = {
1240 .check_quota_file = ocfs2_local_check_quota_file,
1241 .read_file_info = ocfs2_local_read_info,
1242 .write_file_info = ocfs2_global_write_info,
1243 .free_file_info = ocfs2_local_free_info,
1244 .read_dqblk = ocfs2_local_read_dquot,
1245 .commit_dqblk = ocfs2_local_write_dquot,
1246 .release_dqblk = ocfs2_local_release_dquot,
1247};
1248
1249struct quota_format_type ocfs2_quota_format = {
1250 .qf_fmt_id = QFMT_OCFS2,
1251 .qf_ops = &ocfs2_format_ops,
1252 .qf_owner = THIS_MODULE
1253};
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index ffd48db229a7..424adaa5f900 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -106,8 +106,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
106 mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", 106 mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n",
107 new_clusters, first_new_cluster); 107 new_clusters, first_new_cluster);
108 108
109 ret = ocfs2_journal_access(handle, bm_inode, group_bh, 109 ret = ocfs2_journal_access_gd(handle, bm_inode, group_bh,
110 OCFS2_JOURNAL_ACCESS_WRITE); 110 OCFS2_JOURNAL_ACCESS_WRITE);
111 if (ret < 0) { 111 if (ret < 0) {
112 mlog_errno(ret); 112 mlog_errno(ret);
113 goto out; 113 goto out;
@@ -141,8 +141,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
141 } 141 }
142 142
143 /* update the inode accordingly. */ 143 /* update the inode accordingly. */
144 ret = ocfs2_journal_access(handle, bm_inode, bm_bh, 144 ret = ocfs2_journal_access_di(handle, bm_inode, bm_bh,
145 OCFS2_JOURNAL_ACCESS_WRITE); 145 OCFS2_JOURNAL_ACCESS_WRITE);
146 if (ret < 0) { 146 if (ret < 0) {
147 mlog_errno(ret); 147 mlog_errno(ret);
148 goto out_rollback; 148 goto out_rollback;
@@ -314,6 +314,10 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
314 314
315 fe = (struct ocfs2_dinode *)main_bm_bh->b_data; 315 fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
316 316
317 /* main_bm_bh is validated by inode read inside ocfs2_inode_lock(),
318 * so any corruption is a code bug. */
319 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
320
317 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != 321 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
318 ocfs2_group_bitmap_size(osb->sb) * 8) { 322 ocfs2_group_bitmap_size(osb->sb) * 8) {
319 mlog(ML_ERROR, "The disk is too old and small. " 323 mlog(ML_ERROR, "The disk is too old and small. "
@@ -322,30 +326,18 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
322 goto out_unlock; 326 goto out_unlock;
323 } 327 }
324 328
325 if (!OCFS2_IS_VALID_DINODE(fe)) {
326 OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe);
327 ret = -EIO;
328 goto out_unlock;
329 }
330
331 first_new_cluster = le32_to_cpu(fe->i_clusters); 329 first_new_cluster = le32_to_cpu(fe->i_clusters);
332 lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, 330 lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
333 first_new_cluster - 1); 331 first_new_cluster - 1);
334 332
335 ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh); 333 ret = ocfs2_read_group_descriptor(main_bm_inode, fe, lgd_blkno,
334 &group_bh);
336 if (ret < 0) { 335 if (ret < 0) {
337 mlog_errno(ret); 336 mlog_errno(ret);
338 goto out_unlock; 337 goto out_unlock;
339 } 338 }
340
341 group = (struct ocfs2_group_desc *)group_bh->b_data; 339 group = (struct ocfs2_group_desc *)group_bh->b_data;
342 340
343 ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group);
344 if (ret) {
345 mlog_errno(ret);
346 goto out_unlock;
347 }
348
349 cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); 341 cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
350 if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters > 342 if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters >
351 le16_to_cpu(fe->id2.i_chain.cl_cpg)) { 343 le16_to_cpu(fe->id2.i_chain.cl_cpg)) {
@@ -398,41 +390,16 @@ static int ocfs2_check_new_group(struct inode *inode,
398 struct buffer_head *group_bh) 390 struct buffer_head *group_bh)
399{ 391{
400 int ret; 392 int ret;
401 struct ocfs2_group_desc *gd; 393 struct ocfs2_group_desc *gd =
394 (struct ocfs2_group_desc *)group_bh->b_data;
402 u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); 395 u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
403 unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) *
404 le16_to_cpu(di->id2.i_chain.cl_bpc);
405
406 396
407 gd = (struct ocfs2_group_desc *)group_bh->b_data; 397 ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh);
398 if (ret)
399 goto out;
408 400
409 ret = -EIO; 401 ret = -EINVAL;
410 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) 402 if (le16_to_cpu(gd->bg_chain) != input->chain)
411 mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n",
412 (unsigned long long)le64_to_cpu(gd->bg_blkno));
413 else if (di->i_blkno != gd->bg_parent_dinode)
414 mlog(ML_ERROR, "Group descriptor # %llu has bad parent "
415 "pointer (%llu, expected %llu)\n",
416 (unsigned long long)le64_to_cpu(gd->bg_blkno),
417 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
418 (unsigned long long)le64_to_cpu(di->i_blkno));
419 else if (le16_to_cpu(gd->bg_bits) > max_bits)
420 mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n",
421 (unsigned long long)le64_to_cpu(gd->bg_blkno),
422 le16_to_cpu(gd->bg_bits));
423 else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits))
424 mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
425 "claims that %u are free\n",
426 (unsigned long long)le64_to_cpu(gd->bg_blkno),
427 le16_to_cpu(gd->bg_bits),
428 le16_to_cpu(gd->bg_free_bits_count));
429 else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size)))
430 mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
431 "max bitmap bits of %u\n",
432 (unsigned long long)le64_to_cpu(gd->bg_blkno),
433 le16_to_cpu(gd->bg_bits),
434 8 * le16_to_cpu(gd->bg_size));
435 else if (le16_to_cpu(gd->bg_chain) != input->chain)
436 mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u " 403 mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u "
437 "while input has %u set.\n", 404 "while input has %u set.\n",
438 (unsigned long long)le64_to_cpu(gd->bg_blkno), 405 (unsigned long long)le64_to_cpu(gd->bg_blkno),
@@ -451,6 +418,7 @@ static int ocfs2_check_new_group(struct inode *inode,
451 else 418 else
452 ret = 0; 419 ret = 0;
453 420
421out:
454 return ret; 422 return ret;
455} 423}
456 424
@@ -568,8 +536,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
568 cl = &fe->id2.i_chain; 536 cl = &fe->id2.i_chain;
569 cr = &cl->cl_recs[input->chain]; 537 cr = &cl->cl_recs[input->chain];
570 538
571 ret = ocfs2_journal_access(handle, main_bm_inode, group_bh, 539 ret = ocfs2_journal_access_gd(handle, main_bm_inode, group_bh,
572 OCFS2_JOURNAL_ACCESS_WRITE); 540 OCFS2_JOURNAL_ACCESS_WRITE);
573 if (ret < 0) { 541 if (ret < 0) {
574 mlog_errno(ret); 542 mlog_errno(ret);
575 goto out_commit; 543 goto out_commit;
@@ -584,8 +552,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
584 goto out_commit; 552 goto out_commit;
585 } 553 }
586 554
587 ret = ocfs2_journal_access(handle, main_bm_inode, main_bm_bh, 555 ret = ocfs2_journal_access_di(handle, main_bm_inode, main_bm_bh,
588 OCFS2_JOURNAL_ACCESS_WRITE); 556 OCFS2_JOURNAL_ACCESS_WRITE);
589 if (ret < 0) { 557 if (ret < 0) {
590 mlog_errno(ret); 558 mlog_errno(ret);
591 goto out_commit; 559 goto out_commit;
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index bdda2d8f8508..40661e7824e9 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -151,7 +151,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
151 * this is not true, the read of -1 (UINT64_MAX) will fail. 151 * this is not true, the read of -1 (UINT64_MAX) will fail.
152 */ 152 */
153 ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh, 153 ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
154 OCFS2_BH_IGNORE_CACHE); 154 OCFS2_BH_IGNORE_CACHE, NULL);
155 if (ret == 0) { 155 if (ret == 0) {
156 spin_lock(&osb->osb_lock); 156 spin_lock(&osb->osb_lock);
157 ocfs2_update_slot_info(si); 157 ocfs2_update_slot_info(si);
@@ -405,7 +405,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
405 405
406 bh = NULL; /* Acquire a fresh bh */ 406 bh = NULL; /* Acquire a fresh bh */
407 status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh, 407 status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
408 OCFS2_BH_IGNORE_CACHE); 408 OCFS2_BH_IGNORE_CACHE, NULL);
409 if (status < 0) { 409 if (status < 0) {
410 mlog_errno(status); 410 mlog_errno(status);
411 goto bail; 411 goto bail;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index c5ff18b46b57..a69628603e18 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -35,6 +35,7 @@
35#include "ocfs2.h" 35#include "ocfs2.h"
36 36
37#include "alloc.h" 37#include "alloc.h"
38#include "blockcheck.h"
38#include "dlmglue.h" 39#include "dlmglue.h"
39#include "inode.h" 40#include "inode.h"
40#include "journal.h" 41#include "journal.h"
@@ -145,62 +146,183 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
145 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); 146 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
146} 147}
147 148
148/* somewhat more expensive than our other checks, so use sparingly. */ 149#define do_error(fmt, ...) \
149int ocfs2_check_group_descriptor(struct super_block *sb, 150 do{ \
150 struct ocfs2_dinode *di, 151 if (clean_error) \
151 struct ocfs2_group_desc *gd) 152 mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \
153 else \
154 ocfs2_error(sb, fmt, ##__VA_ARGS__); \
155 } while (0)
156
157static int ocfs2_validate_gd_self(struct super_block *sb,
158 struct buffer_head *bh,
159 int clean_error)
152{ 160{
153 unsigned int max_bits; 161 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
154 162
155 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { 163 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
156 OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd); 164 do_error("Group descriptor #%llu has bad signature %.*s",
157 return -EIO; 165 (unsigned long long)bh->b_blocknr, 7,
166 gd->bg_signature);
167 return -EINVAL;
158 } 168 }
159 169
170 if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) {
171 do_error("Group descriptor #%llu has an invalid bg_blkno "
172 "of %llu",
173 (unsigned long long)bh->b_blocknr,
174 (unsigned long long)le64_to_cpu(gd->bg_blkno));
175 return -EINVAL;
176 }
177
178 if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) {
179 do_error("Group descriptor #%llu has an invalid "
180 "fs_generation of #%u",
181 (unsigned long long)bh->b_blocknr,
182 le32_to_cpu(gd->bg_generation));
183 return -EINVAL;
184 }
185
186 if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
187 do_error("Group descriptor #%llu has bit count %u but "
188 "claims that %u are free",
189 (unsigned long long)bh->b_blocknr,
190 le16_to_cpu(gd->bg_bits),
191 le16_to_cpu(gd->bg_free_bits_count));
192 return -EINVAL;
193 }
194
195 if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
196 do_error("Group descriptor #%llu has bit count %u but "
197 "max bitmap bits of %u",
198 (unsigned long long)bh->b_blocknr,
199 le16_to_cpu(gd->bg_bits),
200 8 * le16_to_cpu(gd->bg_size));
201 return -EINVAL;
202 }
203
204 return 0;
205}
206
207static int ocfs2_validate_gd_parent(struct super_block *sb,
208 struct ocfs2_dinode *di,
209 struct buffer_head *bh,
210 int clean_error)
211{
212 unsigned int max_bits;
213 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
214
160 if (di->i_blkno != gd->bg_parent_dinode) { 215 if (di->i_blkno != gd->bg_parent_dinode) {
161 ocfs2_error(sb, "Group descriptor # %llu has bad parent " 216 do_error("Group descriptor #%llu has bad parent "
162 "pointer (%llu, expected %llu)", 217 "pointer (%llu, expected %llu)",
163 (unsigned long long)le64_to_cpu(gd->bg_blkno), 218 (unsigned long long)bh->b_blocknr,
164 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), 219 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
165 (unsigned long long)le64_to_cpu(di->i_blkno)); 220 (unsigned long long)le64_to_cpu(di->i_blkno));
166 return -EIO; 221 return -EINVAL;
167 } 222 }
168 223
169 max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); 224 max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
170 if (le16_to_cpu(gd->bg_bits) > max_bits) { 225 if (le16_to_cpu(gd->bg_bits) > max_bits) {
171 ocfs2_error(sb, "Group descriptor # %llu has bit count of %u", 226 do_error("Group descriptor #%llu has bit count of %u",
172 (unsigned long long)le64_to_cpu(gd->bg_blkno), 227 (unsigned long long)bh->b_blocknr,
173 le16_to_cpu(gd->bg_bits)); 228 le16_to_cpu(gd->bg_bits));
174 return -EIO; 229 return -EINVAL;
175 } 230 }
176 231
177 if (le16_to_cpu(gd->bg_chain) >= 232 if (le16_to_cpu(gd->bg_chain) >=
178 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { 233 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
179 ocfs2_error(sb, "Group descriptor # %llu has bad chain %u", 234 do_error("Group descriptor #%llu has bad chain %u",
180 (unsigned long long)le64_to_cpu(gd->bg_blkno), 235 (unsigned long long)bh->b_blocknr,
181 le16_to_cpu(gd->bg_chain)); 236 le16_to_cpu(gd->bg_chain));
182 return -EIO; 237 return -EINVAL;
183 } 238 }
184 239
185 if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { 240 return 0;
186 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " 241}
187 "claims that %u are free",
188 (unsigned long long)le64_to_cpu(gd->bg_blkno),
189 le16_to_cpu(gd->bg_bits),
190 le16_to_cpu(gd->bg_free_bits_count));
191 return -EIO;
192 }
193 242
194 if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { 243#undef do_error
195 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " 244
196 "max bitmap bits of %u", 245/*
197 (unsigned long long)le64_to_cpu(gd->bg_blkno), 246 * This version only prints errors. It does not fail the filesystem, and
198 le16_to_cpu(gd->bg_bits), 247 * exists only for resize.
199 8 * le16_to_cpu(gd->bg_size)); 248 */
200 return -EIO; 249int ocfs2_check_group_descriptor(struct super_block *sb,
250 struct ocfs2_dinode *di,
251 struct buffer_head *bh)
252{
253 int rc;
254 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
255
256 BUG_ON(!buffer_uptodate(bh));
257
258 /*
259 * If the ecc fails, we return the error but otherwise
260 * leave the filesystem running. We know any error is
261 * local to this block.
262 */
263 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
264 if (rc) {
265 mlog(ML_ERROR,
266 "Checksum failed for group descriptor %llu\n",
267 (unsigned long long)bh->b_blocknr);
268 } else
269 rc = ocfs2_validate_gd_self(sb, bh, 1);
270 if (!rc)
271 rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
272
273 return rc;
274}
275
276static int ocfs2_validate_group_descriptor(struct super_block *sb,
277 struct buffer_head *bh)
278{
279 int rc;
280 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
281
282 mlog(0, "Validating group descriptor %llu\n",
283 (unsigned long long)bh->b_blocknr);
284
285 BUG_ON(!buffer_uptodate(bh));
286
287 /*
288 * If the ecc fails, we return the error but otherwise
289 * leave the filesystem running. We know any error is
290 * local to this block.
291 */
292 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
293 if (rc)
294 return rc;
295
296 /*
297 * Errors after here are fatal.
298 */
299
300 return ocfs2_validate_gd_self(sb, bh, 0);
301}
302
303int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
304 u64 gd_blkno, struct buffer_head **bh)
305{
306 int rc;
307 struct buffer_head *tmp = *bh;
308
309 rc = ocfs2_read_block(inode, gd_blkno, &tmp,
310 ocfs2_validate_group_descriptor);
311 if (rc)
312 goto out;
313
314 rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
315 if (rc) {
316 brelse(tmp);
317 goto out;
201 } 318 }
202 319
203 return 0; 320 /* If ocfs2_read_block() got us a new bh, pass it up. */
321 if (!*bh)
322 *bh = tmp;
323
324out:
325 return rc;
204} 326}
205 327
206static int ocfs2_block_group_fill(handle_t *handle, 328static int ocfs2_block_group_fill(handle_t *handle,
@@ -225,10 +347,10 @@ static int ocfs2_block_group_fill(handle_t *handle,
225 goto bail; 347 goto bail;
226 } 348 }
227 349
228 status = ocfs2_journal_access(handle, 350 status = ocfs2_journal_access_gd(handle,
229 alloc_inode, 351 alloc_inode,
230 bg_bh, 352 bg_bh,
231 OCFS2_JOURNAL_ACCESS_CREATE); 353 OCFS2_JOURNAL_ACCESS_CREATE);
232 if (status < 0) { 354 if (status < 0) {
233 mlog_errno(status); 355 mlog_errno(status);
234 goto bail; 356 goto bail;
@@ -358,8 +480,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
358 480
359 bg = (struct ocfs2_group_desc *) bg_bh->b_data; 481 bg = (struct ocfs2_group_desc *) bg_bh->b_data;
360 482
361 status = ocfs2_journal_access(handle, alloc_inode, 483 status = ocfs2_journal_access_di(handle, alloc_inode,
362 bh, OCFS2_JOURNAL_ACCESS_WRITE); 484 bh, OCFS2_JOURNAL_ACCESS_WRITE);
363 if (status < 0) { 485 if (status < 0) {
364 mlog_errno(status); 486 mlog_errno(status);
365 goto bail; 487 goto bail;
@@ -441,11 +563,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
441 ac->ac_alloc_slot = slot; 563 ac->ac_alloc_slot = slot;
442 564
443 fe = (struct ocfs2_dinode *) bh->b_data; 565 fe = (struct ocfs2_dinode *) bh->b_data;
444 if (!OCFS2_IS_VALID_DINODE(fe)) { 566
445 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 567 /* The bh was validated by the inode read inside
446 status = -EIO; 568 * ocfs2_inode_lock(). Any corruption is a code bug. */
447 goto bail; 569 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
448 } 570
449 if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { 571 if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
450 ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu", 572 ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
451 (unsigned long long)le64_to_cpu(fe->i_blkno)); 573 (unsigned long long)le64_to_cpu(fe->i_blkno));
@@ -790,10 +912,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
790 int offset, start, found, status = 0; 912 int offset, start, found, status = 0;
791 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 913 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
792 914
793 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 915 /* Callers got this descriptor from
794 OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg); 916 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
795 return -EIO; 917 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
796 }
797 918
798 found = start = best_offset = best_size = 0; 919 found = start = best_offset = best_size = 0;
799 bitmap = bg->bg_bitmap; 920 bitmap = bg->bg_bitmap;
@@ -858,11 +979,9 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
858 979
859 mlog_entry_void(); 980 mlog_entry_void();
860 981
861 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 982 /* All callers get the descriptor via
862 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 983 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
863 status = -EIO; 984 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
864 goto bail;
865 }
866 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); 985 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
867 986
868 mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, 987 mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
@@ -871,10 +990,10 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
871 if (ocfs2_is_cluster_bitmap(alloc_inode)) 990 if (ocfs2_is_cluster_bitmap(alloc_inode))
872 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 991 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
873 992
874 status = ocfs2_journal_access(handle, 993 status = ocfs2_journal_access_gd(handle,
875 alloc_inode, 994 alloc_inode,
876 group_bh, 995 group_bh,
877 journal_type); 996 journal_type);
878 if (status < 0) { 997 if (status < 0) {
879 mlog_errno(status); 998 mlog_errno(status);
880 goto bail; 999 goto bail;
@@ -931,21 +1050,10 @@ static int ocfs2_relink_block_group(handle_t *handle,
931 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 1050 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
932 struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; 1051 struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
933 1052
934 if (!OCFS2_IS_VALID_DINODE(fe)) { 1053 /* The caller got these descriptors from
935 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 1054 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
936 status = -EIO; 1055 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
937 goto out; 1056 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
938 }
939 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
940 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
941 status = -EIO;
942 goto out;
943 }
944 if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) {
945 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg);
946 status = -EIO;
947 goto out;
948 }
949 1057
950 mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", 1058 mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
951 (unsigned long long)le64_to_cpu(fe->i_blkno), chain, 1059 (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
@@ -956,8 +1064,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
956 bg_ptr = le64_to_cpu(bg->bg_next_group); 1064 bg_ptr = le64_to_cpu(bg->bg_next_group);
957 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); 1065 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
958 1066
959 status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh, 1067 status = ocfs2_journal_access_gd(handle, alloc_inode, prev_bg_bh,
960 OCFS2_JOURNAL_ACCESS_WRITE); 1068 OCFS2_JOURNAL_ACCESS_WRITE);
961 if (status < 0) { 1069 if (status < 0) {
962 mlog_errno(status); 1070 mlog_errno(status);
963 goto out_rollback; 1071 goto out_rollback;
@@ -971,8 +1079,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
971 goto out_rollback; 1079 goto out_rollback;
972 } 1080 }
973 1081
974 status = ocfs2_journal_access(handle, alloc_inode, bg_bh, 1082 status = ocfs2_journal_access_gd(handle, alloc_inode, bg_bh,
975 OCFS2_JOURNAL_ACCESS_WRITE); 1083 OCFS2_JOURNAL_ACCESS_WRITE);
976 if (status < 0) { 1084 if (status < 0) {
977 mlog_errno(status); 1085 mlog_errno(status);
978 goto out_rollback; 1086 goto out_rollback;
@@ -986,8 +1094,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
986 goto out_rollback; 1094 goto out_rollback;
987 } 1095 }
988 1096
989 status = ocfs2_journal_access(handle, alloc_inode, fe_bh, 1097 status = ocfs2_journal_access_di(handle, alloc_inode, fe_bh,
990 OCFS2_JOURNAL_ACCESS_WRITE); 1098 OCFS2_JOURNAL_ACCESS_WRITE);
991 if (status < 0) { 1099 if (status < 0) {
992 mlog_errno(status); 1100 mlog_errno(status);
993 goto out_rollback; 1101 goto out_rollback;
@@ -1008,7 +1116,7 @@ out_rollback:
1008 bg->bg_next_group = cpu_to_le64(bg_ptr); 1116 bg->bg_next_group = cpu_to_le64(bg_ptr);
1009 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); 1117 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
1010 } 1118 }
1011out: 1119
1012 mlog_exit(status); 1120 mlog_exit(status);
1013 return status; 1121 return status;
1014} 1122}
@@ -1138,8 +1246,8 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1138 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; 1246 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1139 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; 1247 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
1140 1248
1141 ret = ocfs2_journal_access(handle, inode, di_bh, 1249 ret = ocfs2_journal_access_di(handle, inode, di_bh,
1142 OCFS2_JOURNAL_ACCESS_WRITE); 1250 OCFS2_JOURNAL_ACCESS_WRITE);
1143 if (ret < 0) { 1251 if (ret < 0) {
1144 mlog_errno(ret); 1252 mlog_errno(ret);
1145 goto out; 1253 goto out;
@@ -1170,21 +1278,17 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1170 u16 found; 1278 u16 found;
1171 struct buffer_head *group_bh = NULL; 1279 struct buffer_head *group_bh = NULL;
1172 struct ocfs2_group_desc *gd; 1280 struct ocfs2_group_desc *gd;
1281 struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
1173 struct inode *alloc_inode = ac->ac_inode; 1282 struct inode *alloc_inode = ac->ac_inode;
1174 1283
1175 ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh); 1284 ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno,
1285 &group_bh);
1176 if (ret < 0) { 1286 if (ret < 0) {
1177 mlog_errno(ret); 1287 mlog_errno(ret);
1178 return ret; 1288 return ret;
1179 } 1289 }
1180 1290
1181 gd = (struct ocfs2_group_desc *) group_bh->b_data; 1291 gd = (struct ocfs2_group_desc *) group_bh->b_data;
1182 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
1183 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
1184 ret = -EIO;
1185 goto out;
1186 }
1187
1188 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, 1292 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1189 ac->ac_max_block, bit_off, &found); 1293 ac->ac_max_block, bit_off, &found);
1190 if (ret < 0) { 1294 if (ret < 0) {
@@ -1241,19 +1345,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1241 bits_wanted, chain, 1345 bits_wanted, chain,
1242 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); 1346 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
1243 1347
1244 status = ocfs2_read_block(alloc_inode, 1348 status = ocfs2_read_group_descriptor(alloc_inode, fe,
1245 le64_to_cpu(cl->cl_recs[chain].c_blkno), 1349 le64_to_cpu(cl->cl_recs[chain].c_blkno),
1246 &group_bh); 1350 &group_bh);
1247 if (status < 0) { 1351 if (status < 0) {
1248 mlog_errno(status); 1352 mlog_errno(status);
1249 goto bail; 1353 goto bail;
1250 } 1354 }
1251 bg = (struct ocfs2_group_desc *) group_bh->b_data; 1355 bg = (struct ocfs2_group_desc *) group_bh->b_data;
1252 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
1253 if (status) {
1254 mlog_errno(status);
1255 goto bail;
1256 }
1257 1356
1258 status = -ENOSPC; 1357 status = -ENOSPC;
1259 /* for now, the chain search is a bit simplistic. We just use 1358 /* for now, the chain search is a bit simplistic. We just use
@@ -1271,18 +1370,13 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1271 next_group = le64_to_cpu(bg->bg_next_group); 1370 next_group = le64_to_cpu(bg->bg_next_group);
1272 prev_group_bh = group_bh; 1371 prev_group_bh = group_bh;
1273 group_bh = NULL; 1372 group_bh = NULL;
1274 status = ocfs2_read_block(alloc_inode, 1373 status = ocfs2_read_group_descriptor(alloc_inode, fe,
1275 next_group, &group_bh); 1374 next_group, &group_bh);
1276 if (status < 0) { 1375 if (status < 0) {
1277 mlog_errno(status); 1376 mlog_errno(status);
1278 goto bail; 1377 goto bail;
1279 } 1378 }
1280 bg = (struct ocfs2_group_desc *) group_bh->b_data; 1379 bg = (struct ocfs2_group_desc *) group_bh->b_data;
1281 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
1282 if (status) {
1283 mlog_errno(status);
1284 goto bail;
1285 }
1286 } 1380 }
1287 if (status < 0) { 1381 if (status < 0) {
1288 if (status != -ENOSPC) 1382 if (status != -ENOSPC)
@@ -1324,10 +1418,10 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1324 1418
1325 /* Ok, claim our bits now: set the info on dinode, chainlist 1419 /* Ok, claim our bits now: set the info on dinode, chainlist
1326 * and then the group */ 1420 * and then the group */
1327 status = ocfs2_journal_access(handle, 1421 status = ocfs2_journal_access_di(handle,
1328 alloc_inode, 1422 alloc_inode,
1329 ac->ac_bh, 1423 ac->ac_bh,
1330 OCFS2_JOURNAL_ACCESS_WRITE); 1424 OCFS2_JOURNAL_ACCESS_WRITE);
1331 if (status < 0) { 1425 if (status < 0) {
1332 mlog_errno(status); 1426 mlog_errno(status);
1333 goto bail; 1427 goto bail;
@@ -1392,11 +1486,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1392 BUG_ON(!ac->ac_bh); 1486 BUG_ON(!ac->ac_bh);
1393 1487
1394 fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; 1488 fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1395 if (!OCFS2_IS_VALID_DINODE(fe)) { 1489
1396 OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe); 1490 /* The bh was validated by the inode read during
1397 status = -EIO; 1491 * ocfs2_reserve_suballoc_bits(). Any corruption is a code bug. */
1398 goto bail; 1492 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
1399 } 1493
1400 if (le32_to_cpu(fe->id1.bitmap1.i_used) >= 1494 if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
1401 le32_to_cpu(fe->id1.bitmap1.i_total)) { 1495 le32_to_cpu(fe->id1.bitmap1.i_total)) {
1402 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " 1496 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
@@ -1725,19 +1819,17 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1725 1819
1726 mlog_entry_void(); 1820 mlog_entry_void();
1727 1821
1728 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 1822 /* The caller got this descriptor from
1729 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 1823 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
1730 status = -EIO; 1824 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
1731 goto bail;
1732 }
1733 1825
1734 mlog(0, "off = %u, num = %u\n", bit_off, num_bits); 1826 mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
1735 1827
1736 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1828 if (ocfs2_is_cluster_bitmap(alloc_inode))
1737 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 1829 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
1738 1830
1739 status = ocfs2_journal_access(handle, alloc_inode, group_bh, 1831 status = ocfs2_journal_access_gd(handle, alloc_inode, group_bh,
1740 journal_type); 1832 journal_type);
1741 if (status < 0) { 1833 if (status < 0) {
1742 mlog_errno(status); 1834 mlog_errno(status);
1743 goto bail; 1835 goto bail;
@@ -1782,29 +1874,26 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
1782 1874
1783 mlog_entry_void(); 1875 mlog_entry_void();
1784 1876
1785 if (!OCFS2_IS_VALID_DINODE(fe)) { 1877 /* The alloc_bh comes from ocfs2_free_dinode() or
1786 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 1878 * ocfs2_free_clusters(). The callers have all locked the
1787 status = -EIO; 1879 * allocator and gotten alloc_bh from the lock call. This
1788 goto bail; 1880 * validates the dinode buffer. Any corruption that has happended
1789 } 1881 * is a code bug. */
1882 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
1790 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); 1883 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
1791 1884
1792 mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", 1885 mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
1793 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, 1886 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
1794 (unsigned long long)bg_blkno, start_bit); 1887 (unsigned long long)bg_blkno, start_bit);
1795 1888
1796 status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh); 1889 status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
1890 &group_bh);
1797 if (status < 0) { 1891 if (status < 0) {
1798 mlog_errno(status); 1892 mlog_errno(status);
1799 goto bail; 1893 goto bail;
1800 } 1894 }
1801
1802 group = (struct ocfs2_group_desc *) group_bh->b_data; 1895 group = (struct ocfs2_group_desc *) group_bh->b_data;
1803 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); 1896
1804 if (status) {
1805 mlog_errno(status);
1806 goto bail;
1807 }
1808 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); 1897 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
1809 1898
1810 status = ocfs2_block_group_clear_bits(handle, alloc_inode, 1899 status = ocfs2_block_group_clear_bits(handle, alloc_inode,
@@ -1815,8 +1904,8 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
1815 goto bail; 1904 goto bail;
1816 } 1905 }
1817 1906
1818 status = ocfs2_journal_access(handle, alloc_inode, alloc_bh, 1907 status = ocfs2_journal_access_di(handle, alloc_inode, alloc_bh,
1819 OCFS2_JOURNAL_ACCESS_WRITE); 1908 OCFS2_JOURNAL_ACCESS_WRITE);
1820 if (status < 0) { 1909 if (status < 0) {
1821 mlog_errno(status); 1910 mlog_errno(status);
1822 goto bail; 1911 goto bail;
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 4df159d8f450..e3c13c77f9e8 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -164,10 +164,24 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
164 * and return that block offset. */ 164 * and return that block offset. */
165u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); 165u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
166 166
167/* somewhat more expensive than our other checks, so use sparingly. */ 167/*
168 * By default, ocfs2_read_group_descriptor() calls ocfs2_error() when it
169 * finds a problem. A caller that wants to check a group descriptor
170 * without going readonly should read the block with ocfs2_read_block[s]()
171 * and then checking it with this function. This is only resize, really.
172 * Everyone else should be using ocfs2_read_group_descriptor().
173 */
168int ocfs2_check_group_descriptor(struct super_block *sb, 174int ocfs2_check_group_descriptor(struct super_block *sb,
169 struct ocfs2_dinode *di, 175 struct ocfs2_dinode *di,
170 struct ocfs2_group_desc *gd); 176 struct buffer_head *bh);
177/*
178 * Read a group descriptor block into *bh. If *bh is NULL, a bh will be
179 * allocated. This is a cached read. The descriptor will be validated with
180 * ocfs2_validate_group_descriptor().
181 */
182int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
183 u64 gd_blkno, struct buffer_head **bh);
184
171int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, 185int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
172 u32 clusters_to_add, u32 extents_to_split, 186 u32 clusters_to_add, u32 extents_to_split,
173 struct ocfs2_alloc_context **data_ac, 187 struct ocfs2_alloc_context **data_ac,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 304b63ac78cf..43ed11345b59 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -41,6 +41,7 @@
41#include <linux/debugfs.h> 41#include <linux/debugfs.h>
42#include <linux/mount.h> 42#include <linux/mount.h>
43#include <linux/seq_file.h> 43#include <linux/seq_file.h>
44#include <linux/quotaops.h>
44 45
45#define MLOG_MASK_PREFIX ML_SUPER 46#define MLOG_MASK_PREFIX ML_SUPER
46#include <cluster/masklog.h> 47#include <cluster/masklog.h>
@@ -51,6 +52,7 @@
51#include "ocfs1_fs_compat.h" 52#include "ocfs1_fs_compat.h"
52 53
53#include "alloc.h" 54#include "alloc.h"
55#include "blockcheck.h"
54#include "dlmglue.h" 56#include "dlmglue.h"
55#include "export.h" 57#include "export.h"
56#include "extent_map.h" 58#include "extent_map.h"
@@ -65,10 +67,13 @@
65#include "uptodate.h" 67#include "uptodate.h"
66#include "ver.h" 68#include "ver.h"
67#include "xattr.h" 69#include "xattr.h"
70#include "quota.h"
68 71
69#include "buffer_head_io.h" 72#include "buffer_head_io.h"
70 73
71static struct kmem_cache *ocfs2_inode_cachep = NULL; 74static struct kmem_cache *ocfs2_inode_cachep = NULL;
75struct kmem_cache *ocfs2_dquot_cachep;
76struct kmem_cache *ocfs2_qf_chunk_cachep;
72 77
73/* OCFS2 needs to schedule several differnt types of work which 78/* OCFS2 needs to schedule several differnt types of work which
74 * require cluster locking, disk I/O, recovery waits, etc. Since these 79 * require cluster locking, disk I/O, recovery waits, etc. Since these
@@ -124,6 +129,9 @@ static int ocfs2_get_sector(struct super_block *sb,
124static void ocfs2_write_super(struct super_block *sb); 129static void ocfs2_write_super(struct super_block *sb);
125static struct inode *ocfs2_alloc_inode(struct super_block *sb); 130static struct inode *ocfs2_alloc_inode(struct super_block *sb);
126static void ocfs2_destroy_inode(struct inode *inode); 131static void ocfs2_destroy_inode(struct inode *inode);
132static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
133static int ocfs2_enable_quotas(struct ocfs2_super *osb);
134static void ocfs2_disable_quotas(struct ocfs2_super *osb);
127 135
128static const struct super_operations ocfs2_sops = { 136static const struct super_operations ocfs2_sops = {
129 .statfs = ocfs2_statfs, 137 .statfs = ocfs2_statfs,
@@ -137,6 +145,8 @@ static const struct super_operations ocfs2_sops = {
137 .put_super = ocfs2_put_super, 145 .put_super = ocfs2_put_super,
138 .remount_fs = ocfs2_remount, 146 .remount_fs = ocfs2_remount,
139 .show_options = ocfs2_show_options, 147 .show_options = ocfs2_show_options,
148 .quota_read = ocfs2_quota_read,
149 .quota_write = ocfs2_quota_write,
140}; 150};
141 151
142enum { 152enum {
@@ -158,6 +168,10 @@ enum {
158 Opt_user_xattr, 168 Opt_user_xattr,
159 Opt_nouser_xattr, 169 Opt_nouser_xattr,
160 Opt_inode64, 170 Opt_inode64,
171 Opt_acl,
172 Opt_noacl,
173 Opt_usrquota,
174 Opt_grpquota,
161 Opt_err, 175 Opt_err,
162}; 176};
163 177
@@ -180,6 +194,10 @@ static const match_table_t tokens = {
180 {Opt_user_xattr, "user_xattr"}, 194 {Opt_user_xattr, "user_xattr"},
181 {Opt_nouser_xattr, "nouser_xattr"}, 195 {Opt_nouser_xattr, "nouser_xattr"},
182 {Opt_inode64, "inode64"}, 196 {Opt_inode64, "inode64"},
197 {Opt_acl, "acl"},
198 {Opt_noacl, "noacl"},
199 {Opt_usrquota, "usrquota"},
200 {Opt_grpquota, "grpquota"},
183 {Opt_err, NULL} 201 {Opt_err, NULL}
184}; 202};
185 203
@@ -221,6 +239,19 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait)
221 return 0; 239 return 0;
222} 240}
223 241
242static int ocfs2_need_system_inode(struct ocfs2_super *osb, int ino)
243{
244 if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
245 && (ino == USER_QUOTA_SYSTEM_INODE
246 || ino == LOCAL_USER_QUOTA_SYSTEM_INODE))
247 return 0;
248 if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
249 && (ino == GROUP_QUOTA_SYSTEM_INODE
250 || ino == LOCAL_GROUP_QUOTA_SYSTEM_INODE))
251 return 0;
252 return 1;
253}
254
224static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) 255static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
225{ 256{
226 struct inode *new = NULL; 257 struct inode *new = NULL;
@@ -247,6 +278,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
247 278
248 for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE; 279 for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE;
249 i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) { 280 i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) {
281 if (!ocfs2_need_system_inode(osb, i))
282 continue;
250 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); 283 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
251 if (!new) { 284 if (!new) {
252 ocfs2_release_system_inodes(osb); 285 ocfs2_release_system_inodes(osb);
@@ -277,6 +310,8 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
277 for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; 310 for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1;
278 i < NUM_SYSTEM_INODES; 311 i < NUM_SYSTEM_INODES;
279 i++) { 312 i++) {
313 if (!ocfs2_need_system_inode(osb, i))
314 continue;
280 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); 315 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
281 if (!new) { 316 if (!new) {
282 ocfs2_release_system_inodes(osb); 317 ocfs2_release_system_inodes(osb);
@@ -426,6 +461,12 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
426 461
427 /* We're going to/from readonly mode. */ 462 /* We're going to/from readonly mode. */
428 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { 463 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
464 /* Disable quota accounting before remounting RO */
465 if (*flags & MS_RDONLY) {
466 ret = ocfs2_susp_quotas(osb, 0);
467 if (ret < 0)
468 goto out;
469 }
429 /* Lock here so the check of HARD_RO and the potential 470 /* Lock here so the check of HARD_RO and the potential
430 * setting of SOFT_RO is atomic. */ 471 * setting of SOFT_RO is atomic. */
431 spin_lock(&osb->osb_lock); 472 spin_lock(&osb->osb_lock);
@@ -461,11 +502,28 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
461 } 502 }
462unlock_osb: 503unlock_osb:
463 spin_unlock(&osb->osb_lock); 504 spin_unlock(&osb->osb_lock);
505 /* Enable quota accounting after remounting RW */
506 if (!ret && !(*flags & MS_RDONLY)) {
507 if (sb_any_quota_suspended(sb))
508 ret = ocfs2_susp_quotas(osb, 1);
509 else
510 ret = ocfs2_enable_quotas(osb);
511 if (ret < 0) {
512 /* Return back changes... */
513 spin_lock(&osb->osb_lock);
514 sb->s_flags |= MS_RDONLY;
515 osb->osb_flags |= OCFS2_OSB_SOFT_RO;
516 spin_unlock(&osb->osb_lock);
517 goto out;
518 }
519 }
464 } 520 }
465 521
466 if (!ret) { 522 if (!ret) {
467 /* Only save off the new mount options in case of a successful 523 /* Only save off the new mount options in case of a successful
468 * remount. */ 524 * remount. */
525 if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
526 parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
469 osb->s_mount_opt = parsed_options.mount_opt; 527 osb->s_mount_opt = parsed_options.mount_opt;
470 osb->s_atime_quantum = parsed_options.atime_quantum; 528 osb->s_atime_quantum = parsed_options.atime_quantum;
471 osb->preferred_slot = parsed_options.slot; 529 osb->preferred_slot = parsed_options.slot;
@@ -619,6 +677,131 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
619 return 0; 677 return 0;
620} 678}
621 679
680static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend)
681{
682 int type;
683 struct super_block *sb = osb->sb;
684 unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
685 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
686 int status = 0;
687
688 for (type = 0; type < MAXQUOTAS; type++) {
689 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
690 continue;
691 if (unsuspend)
692 status = vfs_quota_enable(
693 sb_dqopt(sb)->files[type],
694 type, QFMT_OCFS2,
695 DQUOT_SUSPENDED);
696 else
697 status = vfs_quota_disable(sb, type,
698 DQUOT_SUSPENDED);
699 if (status < 0)
700 break;
701 }
702 if (status < 0)
703 mlog(ML_ERROR, "Failed to suspend/unsuspend quotas on "
704 "remount (error = %d).\n", status);
705 return status;
706}
707
708static int ocfs2_enable_quotas(struct ocfs2_super *osb)
709{
710 struct inode *inode[MAXQUOTAS] = { NULL, NULL };
711 struct super_block *sb = osb->sb;
712 unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
713 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
714 unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
715 LOCAL_GROUP_QUOTA_SYSTEM_INODE };
716 int status;
717 int type;
718
719 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE;
720 for (type = 0; type < MAXQUOTAS; type++) {
721 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
722 continue;
723 inode[type] = ocfs2_get_system_file_inode(osb, ino[type],
724 osb->slot_num);
725 if (!inode[type]) {
726 status = -ENOENT;
727 goto out_quota_off;
728 }
729 status = vfs_quota_enable(inode[type], type, QFMT_OCFS2,
730 DQUOT_USAGE_ENABLED);
731 if (status < 0)
732 goto out_quota_off;
733 }
734
735 for (type = 0; type < MAXQUOTAS; type++)
736 iput(inode[type]);
737 return 0;
738out_quota_off:
739 ocfs2_disable_quotas(osb);
740 for (type = 0; type < MAXQUOTAS; type++)
741 iput(inode[type]);
742 mlog_errno(status);
743 return status;
744}
745
746static void ocfs2_disable_quotas(struct ocfs2_super *osb)
747{
748 int type;
749 struct inode *inode;
750 struct super_block *sb = osb->sb;
751
752 /* We mostly ignore errors in this function because there's not much
753 * we can do when we see them */
754 for (type = 0; type < MAXQUOTAS; type++) {
755 if (!sb_has_quota_loaded(sb, type))
756 continue;
757 inode = igrab(sb->s_dquot.files[type]);
758 /* Turn off quotas. This will remove all dquot structures from
759 * memory and so they will be automatically synced to global
760 * quota files */
761 vfs_quota_disable(sb, type, DQUOT_USAGE_ENABLED |
762 DQUOT_LIMITS_ENABLED);
763 if (!inode)
764 continue;
765 iput(inode);
766 }
767}
768
769/* Handle quota on quotactl */
770static int ocfs2_quota_on(struct super_block *sb, int type, int format_id,
771 char *path, int remount)
772{
773 unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
774 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
775
776 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
777 return -EINVAL;
778
779 if (remount)
780 return 0; /* Just ignore it has been handled in
781 * ocfs2_remount() */
782 return vfs_quota_enable(sb_dqopt(sb)->files[type], type,
783 format_id, DQUOT_LIMITS_ENABLED);
784}
785
786/* Handle quota off quotactl */
787static int ocfs2_quota_off(struct super_block *sb, int type, int remount)
788{
789 if (remount)
790 return 0; /* Ignore now and handle later in
791 * ocfs2_remount() */
792 return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
793}
794
795static struct quotactl_ops ocfs2_quotactl_ops = {
796 .quota_on = ocfs2_quota_on,
797 .quota_off = ocfs2_quota_off,
798 .quota_sync = vfs_quota_sync,
799 .get_info = vfs_get_dqinfo,
800 .set_info = vfs_set_dqinfo,
801 .get_dqblk = vfs_get_dqblk,
802 .set_dqblk = vfs_set_dqblk,
803};
804
622static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) 805static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
623{ 806{
624 struct dentry *root; 807 struct dentry *root;
@@ -651,12 +834,32 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
651 } 834 }
652 brelse(bh); 835 brelse(bh);
653 bh = NULL; 836 bh = NULL;
837
838 if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
839 parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
840
654 osb->s_mount_opt = parsed_options.mount_opt; 841 osb->s_mount_opt = parsed_options.mount_opt;
655 osb->s_atime_quantum = parsed_options.atime_quantum; 842 osb->s_atime_quantum = parsed_options.atime_quantum;
656 osb->preferred_slot = parsed_options.slot; 843 osb->preferred_slot = parsed_options.slot;
657 osb->osb_commit_interval = parsed_options.commit_interval; 844 osb->osb_commit_interval = parsed_options.commit_interval;
658 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); 845 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
659 osb->local_alloc_bits = osb->local_alloc_default_bits; 846 osb->local_alloc_bits = osb->local_alloc_default_bits;
847 if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA &&
848 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
849 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
850 status = -EINVAL;
851 mlog(ML_ERROR, "User quotas were requested, but this "
852 "filesystem does not have the feature enabled.\n");
853 goto read_super_error;
854 }
855 if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA &&
856 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
857 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
858 status = -EINVAL;
859 mlog(ML_ERROR, "Group quotas were requested, but this "
860 "filesystem does not have the feature enabled.\n");
861 goto read_super_error;
862 }
660 863
661 status = ocfs2_verify_userspace_stack(osb, &parsed_options); 864 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
662 if (status) 865 if (status)
@@ -664,6 +867,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
664 867
665 sb->s_magic = OCFS2_SUPER_MAGIC; 868 sb->s_magic = OCFS2_SUPER_MAGIC;
666 869
870 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
871 ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
872
667 /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, 873 /* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
668 * heartbeat=none */ 874 * heartbeat=none */
669 if (bdev_read_only(sb->s_bdev)) { 875 if (bdev_read_only(sb->s_bdev)) {
@@ -758,6 +964,28 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
758 atomic_set(&osb->vol_state, VOLUME_MOUNTED); 964 atomic_set(&osb->vol_state, VOLUME_MOUNTED);
759 wake_up(&osb->osb_mount_event); 965 wake_up(&osb->osb_mount_event);
760 966
967 /* Now we can initialize quotas because we can afford to wait
968 * for cluster locks recovery now. That also means that truncation
969 * log recovery can happen but that waits for proper quota setup */
970 if (!(sb->s_flags & MS_RDONLY)) {
971 status = ocfs2_enable_quotas(osb);
972 if (status < 0) {
973 /* We have to err-out specially here because
974 * s_root is already set */
975 mlog_errno(status);
976 atomic_set(&osb->vol_state, VOLUME_DISABLED);
977 wake_up(&osb->osb_mount_event);
978 mlog_exit(status);
979 return status;
980 }
981 }
982
983 ocfs2_complete_quota_recovery(osb);
984
985 /* Now we wake up again for processes waiting for quotas */
986 atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
987 wake_up(&osb->osb_mount_event);
988
761 mlog_exit(status); 989 mlog_exit(status);
762 return status; 990 return status;
763 991
@@ -945,6 +1173,41 @@ static int ocfs2_parse_options(struct super_block *sb,
945 case Opt_inode64: 1173 case Opt_inode64:
946 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1174 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
947 break; 1175 break;
1176 case Opt_usrquota:
1177 /* We check only on remount, otherwise features
1178 * aren't yet initialized. */
1179 if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1180 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1181 mlog(ML_ERROR, "User quota requested but "
1182 "filesystem feature is not set\n");
1183 status = 0;
1184 goto bail;
1185 }
1186 mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
1187 break;
1188 case Opt_grpquota:
1189 if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1190 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1191 mlog(ML_ERROR, "Group quota requested but "
1192 "filesystem feature is not set\n");
1193 status = 0;
1194 goto bail;
1195 }
1196 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
1197 break;
1198#ifdef CONFIG_OCFS2_FS_POSIX_ACL
1199 case Opt_acl:
1200 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
1201 break;
1202 case Opt_noacl:
1203 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
1204 break;
1205#else
1206 case Opt_acl:
1207 case Opt_noacl:
1208 printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
1209 break;
1210#endif
948 default: 1211 default:
949 mlog(ML_ERROR, 1212 mlog(ML_ERROR,
950 "Unrecognized mount option \"%s\" " 1213 "Unrecognized mount option \"%s\" "
@@ -1008,6 +1271,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1008 if (osb->osb_cluster_stack[0]) 1271 if (osb->osb_cluster_stack[0])
1009 seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, 1272 seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN,
1010 osb->osb_cluster_stack); 1273 osb->osb_cluster_stack);
1274 if (opts & OCFS2_MOUNT_USRQUOTA)
1275 seq_printf(s, ",usrquota");
1276 if (opts & OCFS2_MOUNT_GRPQUOTA)
1277 seq_printf(s, ",grpquota");
1011 1278
1012 if (opts & OCFS2_MOUNT_NOUSERXATTR) 1279 if (opts & OCFS2_MOUNT_NOUSERXATTR)
1013 seq_printf(s, ",nouser_xattr"); 1280 seq_printf(s, ",nouser_xattr");
@@ -1017,6 +1284,13 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1017 if (opts & OCFS2_MOUNT_INODE64) 1284 if (opts & OCFS2_MOUNT_INODE64)
1018 seq_printf(s, ",inode64"); 1285 seq_printf(s, ",inode64");
1019 1286
1287#ifdef CONFIG_OCFS2_FS_POSIX_ACL
1288 if (opts & OCFS2_MOUNT_POSIX_ACL)
1289 seq_printf(s, ",acl");
1290 else
1291 seq_printf(s, ",noacl");
1292#endif
1293
1020 return 0; 1294 return 0;
1021} 1295}
1022 1296
@@ -1052,10 +1326,16 @@ static int __init ocfs2_init(void)
1052 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); 1326 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
1053 } 1327 }
1054 1328
1329 status = ocfs2_quota_setup();
1330 if (status)
1331 goto leave;
1332
1055 ocfs2_set_locking_protocol(); 1333 ocfs2_set_locking_protocol();
1056 1334
1335 status = register_quota_format(&ocfs2_quota_format);
1057leave: 1336leave:
1058 if (status < 0) { 1337 if (status < 0) {
1338 ocfs2_quota_shutdown();
1059 ocfs2_free_mem_caches(); 1339 ocfs2_free_mem_caches();
1060 exit_ocfs2_uptodate_cache(); 1340 exit_ocfs2_uptodate_cache();
1061 } 1341 }
@@ -1072,11 +1352,15 @@ static void __exit ocfs2_exit(void)
1072{ 1352{
1073 mlog_entry_void(); 1353 mlog_entry_void();
1074 1354
1355 ocfs2_quota_shutdown();
1356
1075 if (ocfs2_wq) { 1357 if (ocfs2_wq) {
1076 flush_workqueue(ocfs2_wq); 1358 flush_workqueue(ocfs2_wq);
1077 destroy_workqueue(ocfs2_wq); 1359 destroy_workqueue(ocfs2_wq);
1078 } 1360 }
1079 1361
1362 unregister_quota_format(&ocfs2_quota_format);
1363
1080 debugfs_remove(ocfs2_debugfs_root); 1364 debugfs_remove(ocfs2_debugfs_root);
1081 1365
1082 ocfs2_free_mem_caches(); 1366 ocfs2_free_mem_caches();
@@ -1192,8 +1476,27 @@ static int ocfs2_initialize_mem_caches(void)
1192 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 1476 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
1193 SLAB_MEM_SPREAD), 1477 SLAB_MEM_SPREAD),
1194 ocfs2_inode_init_once); 1478 ocfs2_inode_init_once);
1195 if (!ocfs2_inode_cachep) 1479 ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache",
1480 sizeof(struct ocfs2_dquot),
1481 0,
1482 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
1483 SLAB_MEM_SPREAD),
1484 NULL);
1485 ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache",
1486 sizeof(struct ocfs2_quota_chunk),
1487 0,
1488 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
1489 NULL);
1490 if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep ||
1491 !ocfs2_qf_chunk_cachep) {
1492 if (ocfs2_inode_cachep)
1493 kmem_cache_destroy(ocfs2_inode_cachep);
1494 if (ocfs2_dquot_cachep)
1495 kmem_cache_destroy(ocfs2_dquot_cachep);
1496 if (ocfs2_qf_chunk_cachep)
1497 kmem_cache_destroy(ocfs2_qf_chunk_cachep);
1196 return -ENOMEM; 1498 return -ENOMEM;
1499 }
1197 1500
1198 return 0; 1501 return 0;
1199} 1502}
@@ -1202,8 +1505,15 @@ static void ocfs2_free_mem_caches(void)
1202{ 1505{
1203 if (ocfs2_inode_cachep) 1506 if (ocfs2_inode_cachep)
1204 kmem_cache_destroy(ocfs2_inode_cachep); 1507 kmem_cache_destroy(ocfs2_inode_cachep);
1205
1206 ocfs2_inode_cachep = NULL; 1508 ocfs2_inode_cachep = NULL;
1509
1510 if (ocfs2_dquot_cachep)
1511 kmem_cache_destroy(ocfs2_dquot_cachep);
1512 ocfs2_dquot_cachep = NULL;
1513
1514 if (ocfs2_qf_chunk_cachep)
1515 kmem_cache_destroy(ocfs2_qf_chunk_cachep);
1516 ocfs2_qf_chunk_cachep = NULL;
1207} 1517}
1208 1518
1209static int ocfs2_get_sector(struct super_block *sb, 1519static int ocfs2_get_sector(struct super_block *sb,
@@ -1303,6 +1613,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1303 osb = OCFS2_SB(sb); 1613 osb = OCFS2_SB(sb);
1304 BUG_ON(!osb); 1614 BUG_ON(!osb);
1305 1615
1616 ocfs2_disable_quotas(osb);
1617
1306 ocfs2_shutdown_local_alloc(osb); 1618 ocfs2_shutdown_local_alloc(osb);
1307 1619
1308 ocfs2_truncate_log_shutdown(osb); 1620 ocfs2_truncate_log_shutdown(osb);
@@ -1413,6 +1725,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
1413 sb->s_fs_info = osb; 1725 sb->s_fs_info = osb;
1414 sb->s_op = &ocfs2_sops; 1726 sb->s_op = &ocfs2_sops;
1415 sb->s_export_op = &ocfs2_export_ops; 1727 sb->s_export_op = &ocfs2_export_ops;
1728 sb->s_qcop = &ocfs2_quotactl_ops;
1729 sb->dq_op = &ocfs2_quota_operations;
1416 sb->s_xattr = ocfs2_xattr_handlers; 1730 sb->s_xattr = ocfs2_xattr_handlers;
1417 sb->s_time_gran = 1; 1731 sb->s_time_gran = 1;
1418 sb->s_flags |= MS_NOATIME; 1732 sb->s_flags |= MS_NOATIME;
@@ -1676,6 +1990,15 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
1676 1990
1677 if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, 1991 if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
1678 strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) { 1992 strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
1993 /* We have to do a raw check of the feature here */
1994 if (le32_to_cpu(di->id2.i_super.s_feature_incompat) &
1995 OCFS2_FEATURE_INCOMPAT_META_ECC) {
1996 status = ocfs2_block_check_validate(bh->b_data,
1997 bh->b_size,
1998 &di->i_check);
1999 if (status)
2000 goto out;
2001 }
1679 status = -EINVAL; 2002 status = -EINVAL;
1680 if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) { 2003 if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) {
1681 mlog(ML_ERROR, "found superblock with incorrect block " 2004 mlog(ML_ERROR, "found superblock with incorrect block "
@@ -1717,6 +2040,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
1717 } 2040 }
1718 } 2041 }
1719 2042
2043out:
1720 mlog_exit(status); 2044 mlog_exit(status);
1721 return status; 2045 return status;
1722} 2046}
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index cbd03dfdc7b9..ed0a0cfd68d2 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -84,7 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
84 84
85 mlog_entry_void(); 85 mlog_entry_void();
86 86
87 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh); 87 status = ocfs2_read_inode_block(inode, bh);
88 if (status < 0) { 88 if (status < 0) {
89 mlog_errno(status); 89 mlog_errno(status);
90 link = ERR_PTR(status); 90 link = ERR_PTR(status);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 74d7367ade13..e1d638af6ac3 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -35,12 +35,14 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/module.h> 36#include <linux/module.h>
37#include <linux/string.h> 37#include <linux/string.h>
38#include <linux/security.h>
38 39
39#define MLOG_MASK_PREFIX ML_XATTR 40#define MLOG_MASK_PREFIX ML_XATTR
40#include <cluster/masklog.h> 41#include <cluster/masklog.h>
41 42
42#include "ocfs2.h" 43#include "ocfs2.h"
43#include "alloc.h" 44#include "alloc.h"
45#include "blockcheck.h"
44#include "dlmglue.h" 46#include "dlmglue.h"
45#include "file.h" 47#include "file.h"
46#include "symlink.h" 48#include "symlink.h"
@@ -61,12 +63,32 @@ struct ocfs2_xattr_def_value_root {
61}; 63};
62 64
63struct ocfs2_xattr_bucket { 65struct ocfs2_xattr_bucket {
64 struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 66 /* The inode these xattrs are associated with */
65 struct ocfs2_xattr_header *xh; 67 struct inode *bu_inode;
68
69 /* The actual buffers that make up the bucket */
70 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
71
72 /* How many blocks make up one bucket for this filesystem */
73 int bu_blocks;
74};
75
76struct ocfs2_xattr_set_ctxt {
77 handle_t *handle;
78 struct ocfs2_alloc_context *meta_ac;
79 struct ocfs2_alloc_context *data_ac;
80 struct ocfs2_cached_dealloc_ctxt dealloc;
66}; 81};
67 82
68#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 83#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
69#define OCFS2_XATTR_INLINE_SIZE 80 84#define OCFS2_XATTR_INLINE_SIZE 80
85#define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \
86 - sizeof(struct ocfs2_xattr_header) \
87 - sizeof(__u32))
88#define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \
89 - sizeof(struct ocfs2_xattr_block) \
90 - sizeof(struct ocfs2_xattr_header) \
91 - sizeof(__u32))
70 92
71static struct ocfs2_xattr_def_value_root def_xv = { 93static struct ocfs2_xattr_def_value_root def_xv = {
72 .xv.xr_list.l_count = cpu_to_le16(1), 94 .xv.xr_list.l_count = cpu_to_le16(1),
@@ -74,13 +96,25 @@ static struct ocfs2_xattr_def_value_root def_xv = {
74 96
75struct xattr_handler *ocfs2_xattr_handlers[] = { 97struct xattr_handler *ocfs2_xattr_handlers[] = {
76 &ocfs2_xattr_user_handler, 98 &ocfs2_xattr_user_handler,
99#ifdef CONFIG_OCFS2_FS_POSIX_ACL
100 &ocfs2_xattr_acl_access_handler,
101 &ocfs2_xattr_acl_default_handler,
102#endif
77 &ocfs2_xattr_trusted_handler, 103 &ocfs2_xattr_trusted_handler,
104 &ocfs2_xattr_security_handler,
78 NULL 105 NULL
79}; 106};
80 107
81static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 108static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
82 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 109 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
110#ifdef CONFIG_OCFS2_FS_POSIX_ACL
111 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
112 = &ocfs2_xattr_acl_access_handler,
113 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
114 = &ocfs2_xattr_acl_default_handler,
115#endif
83 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 116 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
117 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler,
84}; 118};
85 119
86struct ocfs2_xattr_info { 120struct ocfs2_xattr_info {
@@ -98,7 +132,7 @@ struct ocfs2_xattr_search {
98 */ 132 */
99 struct buffer_head *xattr_bh; 133 struct buffer_head *xattr_bh;
100 struct ocfs2_xattr_header *header; 134 struct ocfs2_xattr_header *header;
101 struct ocfs2_xattr_bucket bucket; 135 struct ocfs2_xattr_bucket *bucket;
102 void *base; 136 void *base;
103 void *end; 137 void *end;
104 struct ocfs2_xattr_entry *here; 138 struct ocfs2_xattr_entry *here;
@@ -127,14 +161,20 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
127 size_t buffer_size); 161 size_t buffer_size);
128 162
129static int ocfs2_xattr_create_index_block(struct inode *inode, 163static int ocfs2_xattr_create_index_block(struct inode *inode,
130 struct ocfs2_xattr_search *xs); 164 struct ocfs2_xattr_search *xs,
165 struct ocfs2_xattr_set_ctxt *ctxt);
131 166
132static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 167static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
133 struct ocfs2_xattr_info *xi, 168 struct ocfs2_xattr_info *xi,
134 struct ocfs2_xattr_search *xs); 169 struct ocfs2_xattr_search *xs,
170 struct ocfs2_xattr_set_ctxt *ctxt);
135 171
136static int ocfs2_delete_xattr_index_block(struct inode *inode, 172static int ocfs2_delete_xattr_index_block(struct inode *inode,
137 struct buffer_head *xb_bh); 173 struct buffer_head *xb_bh);
174static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
175 u64 src_blk, u64 last_blk, u64 to_blk,
176 unsigned int start_bucket,
177 u32 *first_hash);
138 178
139static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 179static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
140{ 180{
@@ -154,6 +194,216 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
154 return len / sizeof(struct ocfs2_xattr_entry); 194 return len / sizeof(struct ocfs2_xattr_entry);
155} 195}
156 196
197#define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
198#define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
199#define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
200
201static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
202{
203 struct ocfs2_xattr_bucket *bucket;
204 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
205
206 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
207
208 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
209 if (bucket) {
210 bucket->bu_inode = inode;
211 bucket->bu_blocks = blks;
212 }
213
214 return bucket;
215}
216
217static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
218{
219 int i;
220
221 for (i = 0; i < bucket->bu_blocks; i++) {
222 brelse(bucket->bu_bhs[i]);
223 bucket->bu_bhs[i] = NULL;
224 }
225}
226
227static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
228{
229 if (bucket) {
230 ocfs2_xattr_bucket_relse(bucket);
231 bucket->bu_inode = NULL;
232 kfree(bucket);
233 }
234}
235
236/*
237 * A bucket that has never been written to disk doesn't need to be
238 * read. We just need the buffer_heads. Don't call this for
239 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes
240 * them fully.
241 */
242static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
243 u64 xb_blkno)
244{
245 int i, rc = 0;
246
247 for (i = 0; i < bucket->bu_blocks; i++) {
248 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
249 xb_blkno + i);
250 if (!bucket->bu_bhs[i]) {
251 rc = -EIO;
252 mlog_errno(rc);
253 break;
254 }
255
256 if (!ocfs2_buffer_uptodate(bucket->bu_inode,
257 bucket->bu_bhs[i]))
258 ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
259 bucket->bu_bhs[i]);
260 }
261
262 if (rc)
263 ocfs2_xattr_bucket_relse(bucket);
264 return rc;
265}
266
267/* Read the xattr bucket at xb_blkno */
268static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
269 u64 xb_blkno)
270{
271 int rc;
272
273 rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
274 bucket->bu_blocks, bucket->bu_bhs, 0,
275 NULL);
276 if (!rc) {
277 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
278 bucket->bu_bhs,
279 bucket->bu_blocks,
280 &bucket_xh(bucket)->xh_check);
281 if (rc)
282 mlog_errno(rc);
283 }
284
285 if (rc)
286 ocfs2_xattr_bucket_relse(bucket);
287 return rc;
288}
289
290static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
291 struct ocfs2_xattr_bucket *bucket,
292 int type)
293{
294 int i, rc = 0;
295
296 for (i = 0; i < bucket->bu_blocks; i++) {
297 rc = ocfs2_journal_access(handle, bucket->bu_inode,
298 bucket->bu_bhs[i], type);
299 if (rc) {
300 mlog_errno(rc);
301 break;
302 }
303 }
304
305 return rc;
306}
307
308static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
309 struct ocfs2_xattr_bucket *bucket)
310{
311 int i;
312
313 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
314 bucket->bu_bhs, bucket->bu_blocks,
315 &bucket_xh(bucket)->xh_check);
316
317 for (i = 0; i < bucket->bu_blocks; i++)
318 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
319}
320
321static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
322 struct ocfs2_xattr_bucket *src)
323{
324 int i;
325 int blocksize = src->bu_inode->i_sb->s_blocksize;
326
327 BUG_ON(dest->bu_blocks != src->bu_blocks);
328 BUG_ON(dest->bu_inode != src->bu_inode);
329
330 for (i = 0; i < src->bu_blocks; i++) {
331 memcpy(bucket_block(dest, i), bucket_block(src, i),
332 blocksize);
333 }
334}
335
336static int ocfs2_validate_xattr_block(struct super_block *sb,
337 struct buffer_head *bh)
338{
339 int rc;
340 struct ocfs2_xattr_block *xb =
341 (struct ocfs2_xattr_block *)bh->b_data;
342
343 mlog(0, "Validating xattr block %llu\n",
344 (unsigned long long)bh->b_blocknr);
345
346 BUG_ON(!buffer_uptodate(bh));
347
348 /*
349 * If the ecc fails, we return the error but otherwise
350 * leave the filesystem running. We know any error is
351 * local to this block.
352 */
353 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
354 if (rc)
355 return rc;
356
357 /*
358 * Errors after here are fatal
359 */
360
361 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
362 ocfs2_error(sb,
363 "Extended attribute block #%llu has bad "
364 "signature %.*s",
365 (unsigned long long)bh->b_blocknr, 7,
366 xb->xb_signature);
367 return -EINVAL;
368 }
369
370 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
371 ocfs2_error(sb,
372 "Extended attribute block #%llu has an "
373 "invalid xb_blkno of %llu",
374 (unsigned long long)bh->b_blocknr,
375 (unsigned long long)le64_to_cpu(xb->xb_blkno));
376 return -EINVAL;
377 }
378
379 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
380 ocfs2_error(sb,
381 "Extended attribute block #%llu has an invalid "
382 "xb_fs_generation of #%u",
383 (unsigned long long)bh->b_blocknr,
384 le32_to_cpu(xb->xb_fs_generation));
385 return -EINVAL;
386 }
387
388 return 0;
389}
390
391static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
392 struct buffer_head **bh)
393{
394 int rc;
395 struct buffer_head *tmp = *bh;
396
397 rc = ocfs2_read_block(inode, xb_blkno, &tmp,
398 ocfs2_validate_xattr_block);
399
400 /* If ocfs2_read_block() got us a new bh, pass it up. */
401 if (!rc && !*bh)
402 *bh = tmp;
403
404 return rc;
405}
406
157static inline const char *ocfs2_xattr_prefix(int name_index) 407static inline const char *ocfs2_xattr_prefix(int name_index)
158{ 408{
159 struct xattr_handler *handler = NULL; 409 struct xattr_handler *handler = NULL;
@@ -200,54 +450,163 @@ static void ocfs2_xattr_hash_entry(struct inode *inode,
200 return; 450 return;
201} 451}
202 452
453static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
454{
455 int size = 0;
456
457 if (value_len <= OCFS2_XATTR_INLINE_SIZE)
458 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
459 else
460 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
461 size += sizeof(struct ocfs2_xattr_entry);
462
463 return size;
464}
465
466int ocfs2_calc_security_init(struct inode *dir,
467 struct ocfs2_security_xattr_info *si,
468 int *want_clusters,
469 int *xattr_credits,
470 struct ocfs2_alloc_context **xattr_ac)
471{
472 int ret = 0;
473 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
474 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
475 si->value_len);
476
477 /*
478 * The max space of security xattr taken inline is
479 * 256(name) + 80(value) + 16(entry) = 352 bytes,
480 * So reserve one metadata block for it is ok.
481 */
482 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
483 s_size > OCFS2_XATTR_FREE_IN_IBODY) {
484 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
485 if (ret) {
486 mlog_errno(ret);
487 return ret;
488 }
489 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
490 }
491
492 /* reserve clusters for xattr value which will be set in B tree*/
493 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
494 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
495 si->value_len);
496
497 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
498 new_clusters);
499 *want_clusters += new_clusters;
500 }
501 return ret;
502}
503
504int ocfs2_calc_xattr_init(struct inode *dir,
505 struct buffer_head *dir_bh,
506 int mode,
507 struct ocfs2_security_xattr_info *si,
508 int *want_clusters,
509 int *xattr_credits,
510 struct ocfs2_alloc_context **xattr_ac)
511{
512 int ret = 0;
513 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
514 int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
515
516 if (si->enable)
517 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
518 si->value_len);
519
520 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
521 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
522 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
523 "", NULL, 0);
524 if (acl_len > 0) {
525 a_size = ocfs2_xattr_entry_real_size(0, acl_len);
526 if (S_ISDIR(mode))
527 a_size <<= 1;
528 } else if (acl_len != 0 && acl_len != -ENODATA) {
529 mlog_errno(ret);
530 return ret;
531 }
532 }
533
534 if (!(s_size + a_size))
535 return ret;
536
537 /*
538 * The max space of security xattr taken inline is
539 * 256(name) + 80(value) + 16(entry) = 352 bytes,
540 * The max space of acl xattr taken inline is
541 * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
542 * when blocksize = 512, may reserve one more cluser for
543 * xattr bucket, otherwise reserve one metadata block
544 * for them is ok.
545 */
546 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
547 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
548 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
549 if (ret) {
550 mlog_errno(ret);
551 return ret;
552 }
553 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
554 }
555
556 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
557 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
558 *want_clusters += 1;
559 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
560 }
561
562 /*
563 * reserve credits and clusters for xattrs which has large value
564 * and have to be set outside
565 */
566 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
567 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
568 si->value_len);
569 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
570 new_clusters);
571 *want_clusters += new_clusters;
572 }
573 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
574 acl_len > OCFS2_XATTR_INLINE_SIZE) {
575 /* for directory, it has DEFAULT and ACCESS two types of acls */
576 new_clusters = (S_ISDIR(mode) ? 2 : 1) *
577 ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
578 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
579 new_clusters);
580 *want_clusters += new_clusters;
581 }
582
583 return ret;
584}
585
203static int ocfs2_xattr_extend_allocation(struct inode *inode, 586static int ocfs2_xattr_extend_allocation(struct inode *inode,
204 u32 clusters_to_add, 587 u32 clusters_to_add,
205 struct buffer_head *xattr_bh, 588 struct ocfs2_xattr_value_buf *vb,
206 struct ocfs2_xattr_value_root *xv) 589 struct ocfs2_xattr_set_ctxt *ctxt)
207{ 590{
208 int status = 0; 591 int status = 0;
209 int restart_func = 0; 592 handle_t *handle = ctxt->handle;
210 int credits = 0;
211 handle_t *handle = NULL;
212 struct ocfs2_alloc_context *data_ac = NULL;
213 struct ocfs2_alloc_context *meta_ac = NULL;
214 enum ocfs2_alloc_restarted why; 593 enum ocfs2_alloc_restarted why;
215 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 594 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
216 u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters); 595 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
217 struct ocfs2_extent_tree et; 596 struct ocfs2_extent_tree et;
218 597
219 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add); 598 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
220 599
221 ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv); 600 ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
222
223restart_all:
224
225 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
226 &data_ac, &meta_ac);
227 if (status) {
228 mlog_errno(status);
229 goto leave;
230 }
231
232 credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
233 clusters_to_add);
234 handle = ocfs2_start_trans(osb, credits);
235 if (IS_ERR(handle)) {
236 status = PTR_ERR(handle);
237 handle = NULL;
238 mlog_errno(status);
239 goto leave;
240 }
241 601
242restarted_transaction: 602 status = vb->vb_access(handle, inode, vb->vb_bh,
243 status = ocfs2_journal_access(handle, inode, xattr_bh, 603 OCFS2_JOURNAL_ACCESS_WRITE);
244 OCFS2_JOURNAL_ACCESS_WRITE);
245 if (status < 0) { 604 if (status < 0) {
246 mlog_errno(status); 605 mlog_errno(status);
247 goto leave; 606 goto leave;
248 } 607 }
249 608
250 prev_clusters = le32_to_cpu(xv->xr_clusters); 609 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
251 status = ocfs2_add_clusters_in_btree(osb, 610 status = ocfs2_add_clusters_in_btree(osb,
252 inode, 611 inode,
253 &logical_start, 612 &logical_start,
@@ -255,157 +614,84 @@ restarted_transaction:
255 0, 614 0,
256 &et, 615 &et,
257 handle, 616 handle,
258 data_ac, 617 ctxt->data_ac,
259 meta_ac, 618 ctxt->meta_ac,
260 &why); 619 &why);
261 if ((status < 0) && (status != -EAGAIN)) { 620 if (status < 0) {
262 if (status != -ENOSPC) 621 mlog_errno(status);
263 mlog_errno(status);
264 goto leave; 622 goto leave;
265 } 623 }
266 624
267 status = ocfs2_journal_dirty(handle, xattr_bh); 625 status = ocfs2_journal_dirty(handle, vb->vb_bh);
268 if (status < 0) { 626 if (status < 0) {
269 mlog_errno(status); 627 mlog_errno(status);
270 goto leave; 628 goto leave;
271 } 629 }
272 630
273 clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters; 631 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
274 632
275 if (why != RESTART_NONE && clusters_to_add) { 633 /*
276 if (why == RESTART_META) { 634 * We should have already allocated enough space before the transaction,
277 mlog(0, "restarting function.\n"); 635 * so no need to restart.
278 restart_func = 1; 636 */
279 } else { 637 BUG_ON(why != RESTART_NONE || clusters_to_add);
280 BUG_ON(why != RESTART_TRANS);
281
282 mlog(0, "restarting transaction.\n");
283 /* TODO: This can be more intelligent. */
284 credits = ocfs2_calc_extend_credits(osb->sb,
285 et.et_root_el,
286 clusters_to_add);
287 status = ocfs2_extend_trans(handle, credits);
288 if (status < 0) {
289 /* handle still has to be committed at
290 * this point. */
291 status = -ENOMEM;
292 mlog_errno(status);
293 goto leave;
294 }
295 goto restarted_transaction;
296 }
297 }
298 638
299leave: 639leave:
300 if (handle) {
301 ocfs2_commit_trans(osb, handle);
302 handle = NULL;
303 }
304 if (data_ac) {
305 ocfs2_free_alloc_context(data_ac);
306 data_ac = NULL;
307 }
308 if (meta_ac) {
309 ocfs2_free_alloc_context(meta_ac);
310 meta_ac = NULL;
311 }
312 if ((!status) && restart_func) {
313 restart_func = 0;
314 goto restart_all;
315 }
316 640
317 return status; 641 return status;
318} 642}
319 643
320static int __ocfs2_remove_xattr_range(struct inode *inode, 644static int __ocfs2_remove_xattr_range(struct inode *inode,
321 struct buffer_head *root_bh, 645 struct ocfs2_xattr_value_buf *vb,
322 struct ocfs2_xattr_value_root *xv,
323 u32 cpos, u32 phys_cpos, u32 len, 646 u32 cpos, u32 phys_cpos, u32 len,
324 struct ocfs2_cached_dealloc_ctxt *dealloc) 647 struct ocfs2_xattr_set_ctxt *ctxt)
325{ 648{
326 int ret; 649 int ret;
327 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 650 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
328 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 651 handle_t *handle = ctxt->handle;
329 struct inode *tl_inode = osb->osb_tl_inode;
330 handle_t *handle;
331 struct ocfs2_alloc_context *meta_ac = NULL;
332 struct ocfs2_extent_tree et; 652 struct ocfs2_extent_tree et;
333 653
334 ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv); 654 ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
335 655
336 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 656 ret = vb->vb_access(handle, inode, vb->vb_bh,
657 OCFS2_JOURNAL_ACCESS_WRITE);
337 if (ret) { 658 if (ret) {
338 mlog_errno(ret); 659 mlog_errno(ret);
339 return ret;
340 }
341
342 mutex_lock(&tl_inode->i_mutex);
343
344 if (ocfs2_truncate_log_needs_flush(osb)) {
345 ret = __ocfs2_flush_truncate_log(osb);
346 if (ret < 0) {
347 mlog_errno(ret);
348 goto out;
349 }
350 }
351
352 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
353 if (IS_ERR(handle)) {
354 ret = PTR_ERR(handle);
355 mlog_errno(ret);
356 goto out; 660 goto out;
357 } 661 }
358 662
359 ret = ocfs2_journal_access(handle, inode, root_bh, 663 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
360 OCFS2_JOURNAL_ACCESS_WRITE); 664 &ctxt->dealloc);
361 if (ret) {
362 mlog_errno(ret);
363 goto out_commit;
364 }
365
366 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
367 dealloc);
368 if (ret) { 665 if (ret) {
369 mlog_errno(ret); 666 mlog_errno(ret);
370 goto out_commit; 667 goto out;
371 } 668 }
372 669
373 le32_add_cpu(&xv->xr_clusters, -len); 670 le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
374 671
375 ret = ocfs2_journal_dirty(handle, root_bh); 672 ret = ocfs2_journal_dirty(handle, vb->vb_bh);
376 if (ret) { 673 if (ret) {
377 mlog_errno(ret); 674 mlog_errno(ret);
378 goto out_commit; 675 goto out;
379 } 676 }
380 677
381 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); 678 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
382 if (ret) 679 if (ret)
383 mlog_errno(ret); 680 mlog_errno(ret);
384 681
385out_commit:
386 ocfs2_commit_trans(osb, handle);
387out: 682out:
388 mutex_unlock(&tl_inode->i_mutex);
389
390 if (meta_ac)
391 ocfs2_free_alloc_context(meta_ac);
392
393 return ret; 683 return ret;
394} 684}
395 685
396static int ocfs2_xattr_shrink_size(struct inode *inode, 686static int ocfs2_xattr_shrink_size(struct inode *inode,
397 u32 old_clusters, 687 u32 old_clusters,
398 u32 new_clusters, 688 u32 new_clusters,
399 struct buffer_head *root_bh, 689 struct ocfs2_xattr_value_buf *vb,
400 struct ocfs2_xattr_value_root *xv) 690 struct ocfs2_xattr_set_ctxt *ctxt)
401{ 691{
402 int ret = 0; 692 int ret = 0;
403 u32 trunc_len, cpos, phys_cpos, alloc_size; 693 u32 trunc_len, cpos, phys_cpos, alloc_size;
404 u64 block; 694 u64 block;
405 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
406 struct ocfs2_cached_dealloc_ctxt dealloc;
407
408 ocfs2_init_dealloc_ctxt(&dealloc);
409 695
410 if (old_clusters <= new_clusters) 696 if (old_clusters <= new_clusters)
411 return 0; 697 return 0;
@@ -414,7 +700,8 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
414 trunc_len = old_clusters - new_clusters; 700 trunc_len = old_clusters - new_clusters;
415 while (trunc_len) { 701 while (trunc_len) {
416 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 702 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
417 &alloc_size, &xv->xr_list); 703 &alloc_size,
704 &vb->vb_xv->xr_list);
418 if (ret) { 705 if (ret) {
419 mlog_errno(ret); 706 mlog_errno(ret);
420 goto out; 707 goto out;
@@ -423,9 +710,9 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
423 if (alloc_size > trunc_len) 710 if (alloc_size > trunc_len)
424 alloc_size = trunc_len; 711 alloc_size = trunc_len;
425 712
426 ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos, 713 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
427 phys_cpos, alloc_size, 714 phys_cpos, alloc_size,
428 &dealloc); 715 ctxt);
429 if (ret) { 716 if (ret) {
430 mlog_errno(ret); 717 mlog_errno(ret);
431 goto out; 718 goto out;
@@ -439,20 +726,17 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
439 } 726 }
440 727
441out: 728out:
442 ocfs2_schedule_truncate_log_flush(osb, 1);
443 ocfs2_run_deallocs(osb, &dealloc);
444
445 return ret; 729 return ret;
446} 730}
447 731
448static int ocfs2_xattr_value_truncate(struct inode *inode, 732static int ocfs2_xattr_value_truncate(struct inode *inode,
449 struct buffer_head *root_bh, 733 struct ocfs2_xattr_value_buf *vb,
450 struct ocfs2_xattr_value_root *xv, 734 int len,
451 int len) 735 struct ocfs2_xattr_set_ctxt *ctxt)
452{ 736{
453 int ret; 737 int ret;
454 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 738 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
455 u32 old_clusters = le32_to_cpu(xv->xr_clusters); 739 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
456 740
457 if (new_clusters == old_clusters) 741 if (new_clusters == old_clusters)
458 return 0; 742 return 0;
@@ -460,11 +744,11 @@ static int ocfs2_xattr_value_truncate(struct inode *inode,
460 if (new_clusters > old_clusters) 744 if (new_clusters > old_clusters)
461 ret = ocfs2_xattr_extend_allocation(inode, 745 ret = ocfs2_xattr_extend_allocation(inode,
462 new_clusters - old_clusters, 746 new_clusters - old_clusters,
463 root_bh, xv); 747 vb, ctxt);
464 else 748 else
465 ret = ocfs2_xattr_shrink_size(inode, 749 ret = ocfs2_xattr_shrink_size(inode,
466 old_clusters, new_clusters, 750 old_clusters, new_clusters,
467 root_bh, xv); 751 vb, ctxt);
468 752
469 return ret; 753 return ret;
470} 754}
@@ -554,18 +838,14 @@ static int ocfs2_xattr_block_list(struct inode *inode,
554 if (!di->i_xattr_loc) 838 if (!di->i_xattr_loc)
555 return ret; 839 return ret;
556 840
557 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); 841 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
842 &blk_bh);
558 if (ret < 0) { 843 if (ret < 0) {
559 mlog_errno(ret); 844 mlog_errno(ret);
560 return ret; 845 return ret;
561 } 846 }
562 847
563 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 848 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
564 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
565 ret = -EIO;
566 goto cleanup;
567 }
568
569 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 849 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
570 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 850 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
571 ret = ocfs2_xattr_list_entries(inode, header, 851 ret = ocfs2_xattr_list_entries(inode, header,
@@ -575,7 +855,7 @@ static int ocfs2_xattr_block_list(struct inode *inode,
575 ret = ocfs2_xattr_tree_list_index_block(inode, xt, 855 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
576 buffer, buffer_size); 856 buffer, buffer_size);
577 } 857 }
578cleanup: 858
579 brelse(blk_bh); 859 brelse(blk_bh);
580 860
581 return ret; 861 return ret;
@@ -685,7 +965,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
685 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 965 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
686 /* Copy ocfs2_xattr_value */ 966 /* Copy ocfs2_xattr_value */
687 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 967 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
688 ret = ocfs2_read_block(inode, blkno, &bh); 968 ret = ocfs2_read_block(inode, blkno, &bh, NULL);
689 if (ret) { 969 if (ret) {
690 mlog_errno(ret); 970 mlog_errno(ret);
691 goto out; 971 goto out;
@@ -769,7 +1049,12 @@ static int ocfs2_xattr_block_get(struct inode *inode,
769 size_t size; 1049 size_t size;
770 int ret = -ENODATA, name_offset, name_len, block_off, i; 1050 int ret = -ENODATA, name_offset, name_len, block_off, i;
771 1051
772 memset(&xs->bucket, 0, sizeof(xs->bucket)); 1052 xs->bucket = ocfs2_xattr_bucket_new(inode);
1053 if (!xs->bucket) {
1054 ret = -ENOMEM;
1055 mlog_errno(ret);
1056 goto cleanup;
1057 }
773 1058
774 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1059 ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
775 if (ret) { 1060 if (ret) {
@@ -795,11 +1080,11 @@ static int ocfs2_xattr_block_get(struct inode *inode,
795 1080
796 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1081 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
797 ret = ocfs2_xattr_bucket_get_name_value(inode, 1082 ret = ocfs2_xattr_bucket_get_name_value(inode,
798 xs->bucket.xh, 1083 bucket_xh(xs->bucket),
799 i, 1084 i,
800 &block_off, 1085 &block_off,
801 &name_offset); 1086 &name_offset);
802 xs->base = xs->bucket.bhs[block_off]->b_data; 1087 xs->base = bucket_block(xs->bucket, block_off);
803 } 1088 }
804 if (ocfs2_xattr_is_local(xs->here)) { 1089 if (ocfs2_xattr_is_local(xs->here)) {
805 memcpy(buffer, (void *)xs->base + 1090 memcpy(buffer, (void *)xs->base +
@@ -817,21 +1102,15 @@ static int ocfs2_xattr_block_get(struct inode *inode,
817 } 1102 }
818 ret = size; 1103 ret = size;
819cleanup: 1104cleanup:
820 for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++) 1105 ocfs2_xattr_bucket_free(xs->bucket);
821 brelse(xs->bucket.bhs[i]);
822 memset(&xs->bucket, 0, sizeof(xs->bucket));
823 1106
824 brelse(xs->xattr_bh); 1107 brelse(xs->xattr_bh);
825 xs->xattr_bh = NULL; 1108 xs->xattr_bh = NULL;
826 return ret; 1109 return ret;
827} 1110}
828 1111
829/* ocfs2_xattr_get() 1112int ocfs2_xattr_get_nolock(struct inode *inode,
830 * 1113 struct buffer_head *di_bh,
831 * Copy an extended attribute into the buffer provided.
832 * Buffer is NULL to compute the size of buffer required.
833 */
834static int ocfs2_xattr_get(struct inode *inode,
835 int name_index, 1114 int name_index,
836 const char *name, 1115 const char *name,
837 void *buffer, 1116 void *buffer,
@@ -839,7 +1118,6 @@ static int ocfs2_xattr_get(struct inode *inode,
839{ 1118{
840 int ret; 1119 int ret;
841 struct ocfs2_dinode *di = NULL; 1120 struct ocfs2_dinode *di = NULL;
842 struct buffer_head *di_bh = NULL;
843 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1121 struct ocfs2_inode_info *oi = OCFS2_I(inode);
844 struct ocfs2_xattr_search xis = { 1122 struct ocfs2_xattr_search xis = {
845 .not_found = -ENODATA, 1123 .not_found = -ENODATA,
@@ -854,11 +1132,6 @@ static int ocfs2_xattr_get(struct inode *inode,
854 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1132 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
855 ret = -ENODATA; 1133 ret = -ENODATA;
856 1134
857 ret = ocfs2_inode_lock(inode, &di_bh, 0);
858 if (ret < 0) {
859 mlog_errno(ret);
860 return ret;
861 }
862 xis.inode_bh = xbs.inode_bh = di_bh; 1135 xis.inode_bh = xbs.inode_bh = di_bh;
863 di = (struct ocfs2_dinode *)di_bh->b_data; 1136 di = (struct ocfs2_dinode *)di_bh->b_data;
864 1137
@@ -869,6 +1142,32 @@ static int ocfs2_xattr_get(struct inode *inode,
869 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1142 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
870 buffer_size, &xbs); 1143 buffer_size, &xbs);
871 up_read(&oi->ip_xattr_sem); 1144 up_read(&oi->ip_xattr_sem);
1145
1146 return ret;
1147}
1148
1149/* ocfs2_xattr_get()
1150 *
1151 * Copy an extended attribute into the buffer provided.
1152 * Buffer is NULL to compute the size of buffer required.
1153 */
1154static int ocfs2_xattr_get(struct inode *inode,
1155 int name_index,
1156 const char *name,
1157 void *buffer,
1158 size_t buffer_size)
1159{
1160 int ret;
1161 struct buffer_head *di_bh = NULL;
1162
1163 ret = ocfs2_inode_lock(inode, &di_bh, 0);
1164 if (ret < 0) {
1165 mlog_errno(ret);
1166 return ret;
1167 }
1168 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1169 name, buffer, buffer_size);
1170
872 ocfs2_inode_unlock(inode, 0); 1171 ocfs2_inode_unlock(inode, 0);
873 1172
874 brelse(di_bh); 1173 brelse(di_bh);
@@ -877,44 +1176,36 @@ static int ocfs2_xattr_get(struct inode *inode,
877} 1176}
878 1177
879static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1178static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1179 handle_t *handle,
880 struct ocfs2_xattr_value_root *xv, 1180 struct ocfs2_xattr_value_root *xv,
881 const void *value, 1181 const void *value,
882 int value_len) 1182 int value_len)
883{ 1183{
884 int ret = 0, i, cp_len, credits; 1184 int ret = 0, i, cp_len;
885 u16 blocksize = inode->i_sb->s_blocksize; 1185 u16 blocksize = inode->i_sb->s_blocksize;
886 u32 p_cluster, num_clusters; 1186 u32 p_cluster, num_clusters;
887 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1187 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
888 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1188 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
889 u64 blkno; 1189 u64 blkno;
890 struct buffer_head *bh = NULL; 1190 struct buffer_head *bh = NULL;
891 handle_t *handle;
892 1191
893 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1192 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
894 1193
895 credits = clusters * bpc;
896 handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits);
897 if (IS_ERR(handle)) {
898 ret = PTR_ERR(handle);
899 mlog_errno(ret);
900 goto out;
901 }
902
903 while (cpos < clusters) { 1194 while (cpos < clusters) {
904 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1195 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
905 &num_clusters, &xv->xr_list); 1196 &num_clusters, &xv->xr_list);
906 if (ret) { 1197 if (ret) {
907 mlog_errno(ret); 1198 mlog_errno(ret);
908 goto out_commit; 1199 goto out;
909 } 1200 }
910 1201
911 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1202 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
912 1203
913 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1204 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
914 ret = ocfs2_read_block(inode, blkno, &bh); 1205 ret = ocfs2_read_block(inode, blkno, &bh, NULL);
915 if (ret) { 1206 if (ret) {
916 mlog_errno(ret); 1207 mlog_errno(ret);
917 goto out_commit; 1208 goto out;
918 } 1209 }
919 1210
920 ret = ocfs2_journal_access(handle, 1211 ret = ocfs2_journal_access(handle,
@@ -923,7 +1214,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
923 OCFS2_JOURNAL_ACCESS_WRITE); 1214 OCFS2_JOURNAL_ACCESS_WRITE);
924 if (ret < 0) { 1215 if (ret < 0) {
925 mlog_errno(ret); 1216 mlog_errno(ret);
926 goto out_commit; 1217 goto out;
927 } 1218 }
928 1219
929 cp_len = value_len > blocksize ? blocksize : value_len; 1220 cp_len = value_len > blocksize ? blocksize : value_len;
@@ -937,7 +1228,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
937 ret = ocfs2_journal_dirty(handle, bh); 1228 ret = ocfs2_journal_dirty(handle, bh);
938 if (ret < 0) { 1229 if (ret < 0) {
939 mlog_errno(ret); 1230 mlog_errno(ret);
940 goto out_commit; 1231 goto out;
941 } 1232 }
942 brelse(bh); 1233 brelse(bh);
943 bh = NULL; 1234 bh = NULL;
@@ -951,8 +1242,6 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
951 } 1242 }
952 cpos += num_clusters; 1243 cpos += num_clusters;
953 } 1244 }
954out_commit:
955 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
956out: 1245out:
957 brelse(bh); 1246 brelse(bh);
958 1247
@@ -960,28 +1249,22 @@ out:
960} 1249}
961 1250
962static int ocfs2_xattr_cleanup(struct inode *inode, 1251static int ocfs2_xattr_cleanup(struct inode *inode,
1252 handle_t *handle,
963 struct ocfs2_xattr_info *xi, 1253 struct ocfs2_xattr_info *xi,
964 struct ocfs2_xattr_search *xs, 1254 struct ocfs2_xattr_search *xs,
1255 struct ocfs2_xattr_value_buf *vb,
965 size_t offs) 1256 size_t offs)
966{ 1257{
967 handle_t *handle = NULL;
968 int ret = 0; 1258 int ret = 0;
969 size_t name_len = strlen(xi->name); 1259 size_t name_len = strlen(xi->name);
970 void *val = xs->base + offs; 1260 void *val = xs->base + offs;
971 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 1261 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
972 1262
973 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1263 ret = vb->vb_access(handle, inode, vb->vb_bh,
974 OCFS2_XATTR_BLOCK_UPDATE_CREDITS); 1264 OCFS2_JOURNAL_ACCESS_WRITE);
975 if (IS_ERR(handle)) {
976 ret = PTR_ERR(handle);
977 mlog_errno(ret);
978 goto out;
979 }
980 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
981 OCFS2_JOURNAL_ACCESS_WRITE);
982 if (ret) { 1265 if (ret) {
983 mlog_errno(ret); 1266 mlog_errno(ret);
984 goto out_commit; 1267 goto out;
985 } 1268 }
986 /* Decrease xattr count */ 1269 /* Decrease xattr count */
987 le16_add_cpu(&xs->header->xh_count, -1); 1270 le16_add_cpu(&xs->header->xh_count, -1);
@@ -989,35 +1272,27 @@ static int ocfs2_xattr_cleanup(struct inode *inode,
989 memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry)); 1272 memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
990 memset(val, 0, size); 1273 memset(val, 0, size);
991 1274
992 ret = ocfs2_journal_dirty(handle, xs->xattr_bh); 1275 ret = ocfs2_journal_dirty(handle, vb->vb_bh);
993 if (ret < 0) 1276 if (ret < 0)
994 mlog_errno(ret); 1277 mlog_errno(ret);
995out_commit:
996 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
997out: 1278out:
998 return ret; 1279 return ret;
999} 1280}
1000 1281
1001static int ocfs2_xattr_update_entry(struct inode *inode, 1282static int ocfs2_xattr_update_entry(struct inode *inode,
1283 handle_t *handle,
1002 struct ocfs2_xattr_info *xi, 1284 struct ocfs2_xattr_info *xi,
1003 struct ocfs2_xattr_search *xs, 1285 struct ocfs2_xattr_search *xs,
1286 struct ocfs2_xattr_value_buf *vb,
1004 size_t offs) 1287 size_t offs)
1005{ 1288{
1006 handle_t *handle = NULL; 1289 int ret;
1007 int ret = 0;
1008 1290
1009 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1291 ret = vb->vb_access(handle, inode, vb->vb_bh,
1010 OCFS2_XATTR_BLOCK_UPDATE_CREDITS); 1292 OCFS2_JOURNAL_ACCESS_WRITE);
1011 if (IS_ERR(handle)) {
1012 ret = PTR_ERR(handle);
1013 mlog_errno(ret);
1014 goto out;
1015 }
1016 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1017 OCFS2_JOURNAL_ACCESS_WRITE);
1018 if (ret) { 1293 if (ret) {
1019 mlog_errno(ret); 1294 mlog_errno(ret);
1020 goto out_commit; 1295 goto out;
1021 } 1296 }
1022 1297
1023 xs->here->xe_name_offset = cpu_to_le16(offs); 1298 xs->here->xe_name_offset = cpu_to_le16(offs);
@@ -1028,11 +1303,9 @@ static int ocfs2_xattr_update_entry(struct inode *inode,
1028 ocfs2_xattr_set_local(xs->here, 0); 1303 ocfs2_xattr_set_local(xs->here, 0);
1029 ocfs2_xattr_hash_entry(inode, xs->header, xs->here); 1304 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1030 1305
1031 ret = ocfs2_journal_dirty(handle, xs->xattr_bh); 1306 ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1032 if (ret < 0) 1307 if (ret < 0)
1033 mlog_errno(ret); 1308 mlog_errno(ret);
1034out_commit:
1035 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1036out: 1309out:
1037 return ret; 1310 return ret;
1038} 1311}
@@ -1045,6 +1318,8 @@ out:
1045static int ocfs2_xattr_set_value_outside(struct inode *inode, 1318static int ocfs2_xattr_set_value_outside(struct inode *inode,
1046 struct ocfs2_xattr_info *xi, 1319 struct ocfs2_xattr_info *xi,
1047 struct ocfs2_xattr_search *xs, 1320 struct ocfs2_xattr_search *xs,
1321 struct ocfs2_xattr_set_ctxt *ctxt,
1322 struct ocfs2_xattr_value_buf *vb,
1048 size_t offs) 1323 size_t offs)
1049{ 1324{
1050 size_t name_len = strlen(xi->name); 1325 size_t name_len = strlen(xi->name);
@@ -1062,20 +1337,20 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
1062 xv->xr_list.l_tree_depth = 0; 1337 xv->xr_list.l_tree_depth = 0;
1063 xv->xr_list.l_count = cpu_to_le16(1); 1338 xv->xr_list.l_count = cpu_to_le16(1);
1064 xv->xr_list.l_next_free_rec = 0; 1339 xv->xr_list.l_next_free_rec = 0;
1340 vb->vb_xv = xv;
1065 1341
1066 ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv, 1342 ret = ocfs2_xattr_value_truncate(inode, vb, xi->value_len, ctxt);
1067 xi->value_len);
1068 if (ret < 0) { 1343 if (ret < 0) {
1069 mlog_errno(ret); 1344 mlog_errno(ret);
1070 return ret; 1345 return ret;
1071 } 1346 }
1072 ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value, 1347 ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs);
1073 xi->value_len);
1074 if (ret < 0) { 1348 if (ret < 0) {
1075 mlog_errno(ret); 1349 mlog_errno(ret);
1076 return ret; 1350 return ret;
1077 } 1351 }
1078 ret = ocfs2_xattr_update_entry(inode, xi, xs, offs); 1352 ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb->vb_xv,
1353 xi->value, xi->value_len);
1079 if (ret < 0) 1354 if (ret < 0)
1080 mlog_errno(ret); 1355 mlog_errno(ret);
1081 1356
@@ -1195,6 +1470,7 @@ static void ocfs2_xattr_set_entry_local(struct inode *inode,
1195static int ocfs2_xattr_set_entry(struct inode *inode, 1470static int ocfs2_xattr_set_entry(struct inode *inode,
1196 struct ocfs2_xattr_info *xi, 1471 struct ocfs2_xattr_info *xi,
1197 struct ocfs2_xattr_search *xs, 1472 struct ocfs2_xattr_search *xs,
1473 struct ocfs2_xattr_set_ctxt *ctxt,
1198 int flag) 1474 int flag)
1199{ 1475{
1200 struct ocfs2_xattr_entry *last; 1476 struct ocfs2_xattr_entry *last;
@@ -1202,7 +1478,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1202 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1478 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1203 size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name); 1479 size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1204 size_t size_l = 0; 1480 size_t size_l = 0;
1205 handle_t *handle = NULL; 1481 handle_t *handle = ctxt->handle;
1206 int free, i, ret; 1482 int free, i, ret;
1207 struct ocfs2_xattr_info xi_l = { 1483 struct ocfs2_xattr_info xi_l = {
1208 .name_index = xi->name_index, 1484 .name_index = xi->name_index,
@@ -1210,6 +1486,16 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1210 .value = xi->value, 1486 .value = xi->value,
1211 .value_len = xi->value_len, 1487 .value_len = xi->value_len,
1212 }; 1488 };
1489 struct ocfs2_xattr_value_buf vb = {
1490 .vb_bh = xs->xattr_bh,
1491 .vb_access = ocfs2_journal_access_di,
1492 };
1493
1494 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1495 BUG_ON(xs->xattr_bh == xs->inode_bh);
1496 vb.vb_access = ocfs2_journal_access_xb;
1497 } else
1498 BUG_ON(xs->xattr_bh != xs->inode_bh);
1213 1499
1214 /* Compute min_offs, last and free space. */ 1500 /* Compute min_offs, last and free space. */
1215 last = xs->header->xh_entries; 1501 last = xs->header->xh_entries;
@@ -1265,15 +1551,14 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1265 if (ocfs2_xattr_is_local(xs->here) && size == size_l) { 1551 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1266 /* Replace existing local xattr with tree root */ 1552 /* Replace existing local xattr with tree root */
1267 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, 1553 ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1268 offs); 1554 ctxt, &vb, offs);
1269 if (ret < 0) 1555 if (ret < 0)
1270 mlog_errno(ret); 1556 mlog_errno(ret);
1271 goto out; 1557 goto out;
1272 } else if (!ocfs2_xattr_is_local(xs->here)) { 1558 } else if (!ocfs2_xattr_is_local(xs->here)) {
1273 /* For existing xattr which has value outside */ 1559 /* For existing xattr which has value outside */
1274 struct ocfs2_xattr_value_root *xv = NULL; 1560 vb.vb_xv = (struct ocfs2_xattr_value_root *)
1275 xv = (struct ocfs2_xattr_value_root *)(val + 1561 (val + OCFS2_XATTR_SIZE(name_len));
1276 OCFS2_XATTR_SIZE(name_len));
1277 1562
1278 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { 1563 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1279 /* 1564 /*
@@ -1282,27 +1567,30 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1282 * then set new value with set_value_outside(). 1567 * then set new value with set_value_outside().
1283 */ 1568 */
1284 ret = ocfs2_xattr_value_truncate(inode, 1569 ret = ocfs2_xattr_value_truncate(inode,
1285 xs->xattr_bh, 1570 &vb,
1286 xv, 1571 xi->value_len,
1287 xi->value_len); 1572 ctxt);
1288 if (ret < 0) { 1573 if (ret < 0) {
1289 mlog_errno(ret); 1574 mlog_errno(ret);
1290 goto out; 1575 goto out;
1291 } 1576 }
1292 1577
1293 ret = __ocfs2_xattr_set_value_outside(inode, 1578 ret = ocfs2_xattr_update_entry(inode,
1294 xv, 1579 handle,
1295 xi->value, 1580 xi,
1296 xi->value_len); 1581 xs,
1582 &vb,
1583 offs);
1297 if (ret < 0) { 1584 if (ret < 0) {
1298 mlog_errno(ret); 1585 mlog_errno(ret);
1299 goto out; 1586 goto out;
1300 } 1587 }
1301 1588
1302 ret = ocfs2_xattr_update_entry(inode, 1589 ret = __ocfs2_xattr_set_value_outside(inode,
1303 xi, 1590 handle,
1304 xs, 1591 vb.vb_xv,
1305 offs); 1592 xi->value,
1593 xi->value_len);
1306 if (ret < 0) 1594 if (ret < 0)
1307 mlog_errno(ret); 1595 mlog_errno(ret);
1308 goto out; 1596 goto out;
@@ -1312,44 +1600,28 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1312 * just trucate old value to zero. 1600 * just trucate old value to zero.
1313 */ 1601 */
1314 ret = ocfs2_xattr_value_truncate(inode, 1602 ret = ocfs2_xattr_value_truncate(inode,
1315 xs->xattr_bh, 1603 &vb,
1316 xv, 1604 0,
1317 0); 1605 ctxt);
1318 if (ret < 0) 1606 if (ret < 0)
1319 mlog_errno(ret); 1607 mlog_errno(ret);
1320 } 1608 }
1321 } 1609 }
1322 } 1610 }
1323 1611
1324 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1612 ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh,
1325 OCFS2_INODE_UPDATE_CREDITS); 1613 OCFS2_JOURNAL_ACCESS_WRITE);
1326 if (IS_ERR(handle)) {
1327 ret = PTR_ERR(handle);
1328 mlog_errno(ret);
1329 goto out;
1330 }
1331
1332 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1333 OCFS2_JOURNAL_ACCESS_WRITE);
1334 if (ret) { 1614 if (ret) {
1335 mlog_errno(ret); 1615 mlog_errno(ret);
1336 goto out_commit; 1616 goto out;
1337 } 1617 }
1338 1618
1339 if (!(flag & OCFS2_INLINE_XATTR_FL)) { 1619 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1340 /* set extended attribute in external block. */ 1620 ret = vb.vb_access(handle, inode, vb.vb_bh,
1341 ret = ocfs2_extend_trans(handle, 1621 OCFS2_JOURNAL_ACCESS_WRITE);
1342 OCFS2_INODE_UPDATE_CREDITS +
1343 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1344 if (ret) {
1345 mlog_errno(ret);
1346 goto out_commit;
1347 }
1348 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1349 OCFS2_JOURNAL_ACCESS_WRITE);
1350 if (ret) { 1622 if (ret) {
1351 mlog_errno(ret); 1623 mlog_errno(ret);
1352 goto out_commit; 1624 goto out;
1353 } 1625 }
1354 } 1626 }
1355 1627
@@ -1363,7 +1635,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1363 ret = ocfs2_journal_dirty(handle, xs->xattr_bh); 1635 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1364 if (ret < 0) { 1636 if (ret < 0) {
1365 mlog_errno(ret); 1637 mlog_errno(ret);
1366 goto out_commit; 1638 goto out;
1367 } 1639 }
1368 } 1640 }
1369 1641
@@ -1391,25 +1663,19 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1391 oi->ip_dyn_features |= flag; 1663 oi->ip_dyn_features |= flag;
1392 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 1664 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1393 spin_unlock(&oi->ip_lock); 1665 spin_unlock(&oi->ip_lock);
1394 /* Update inode ctime */
1395 inode->i_ctime = CURRENT_TIME;
1396 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1397 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1398 1666
1399 ret = ocfs2_journal_dirty(handle, xs->inode_bh); 1667 ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1400 if (ret < 0) 1668 if (ret < 0)
1401 mlog_errno(ret); 1669 mlog_errno(ret);
1402 1670
1403out_commit:
1404 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1405
1406 if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { 1671 if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1407 /* 1672 /*
1408 * Set value outside in B tree. 1673 * Set value outside in B tree.
1409 * This is the second step for value size > INLINE_SIZE. 1674 * This is the second step for value size > INLINE_SIZE.
1410 */ 1675 */
1411 size_t offs = le16_to_cpu(xs->here->xe_name_offset); 1676 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1412 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs); 1677 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt,
1678 &vb, offs);
1413 if (ret < 0) { 1679 if (ret < 0) {
1414 int ret2; 1680 int ret2;
1415 1681
@@ -1418,41 +1684,56 @@ out_commit:
1418 * If set value outside failed, we have to clean 1684 * If set value outside failed, we have to clean
1419 * the junk tree root we have already set in local. 1685 * the junk tree root we have already set in local.
1420 */ 1686 */
1421 ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs); 1687 ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
1688 xi, xs, &vb, offs);
1422 if (ret2 < 0) 1689 if (ret2 < 0)
1423 mlog_errno(ret2); 1690 mlog_errno(ret2);
1424 } 1691 }
1425 } 1692 }
1426out: 1693out:
1427 return ret; 1694 return ret;
1428
1429} 1695}
1430 1696
1431static int ocfs2_remove_value_outside(struct inode*inode, 1697static int ocfs2_remove_value_outside(struct inode*inode,
1432 struct buffer_head *bh, 1698 struct ocfs2_xattr_value_buf *vb,
1433 struct ocfs2_xattr_header *header) 1699 struct ocfs2_xattr_header *header)
1434{ 1700{
1435 int ret = 0, i; 1701 int ret = 0, i;
1702 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1703 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1704
1705 ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1706
1707 ctxt.handle = ocfs2_start_trans(osb,
1708 ocfs2_remove_extent_credits(osb->sb));
1709 if (IS_ERR(ctxt.handle)) {
1710 ret = PTR_ERR(ctxt.handle);
1711 mlog_errno(ret);
1712 goto out;
1713 }
1436 1714
1437 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 1715 for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1438 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 1716 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1439 1717
1440 if (!ocfs2_xattr_is_local(entry)) { 1718 if (!ocfs2_xattr_is_local(entry)) {
1441 struct ocfs2_xattr_value_root *xv;
1442 void *val; 1719 void *val;
1443 1720
1444 val = (void *)header + 1721 val = (void *)header +
1445 le16_to_cpu(entry->xe_name_offset); 1722 le16_to_cpu(entry->xe_name_offset);
1446 xv = (struct ocfs2_xattr_value_root *) 1723 vb->vb_xv = (struct ocfs2_xattr_value_root *)
1447 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 1724 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1448 ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0); 1725 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
1449 if (ret < 0) { 1726 if (ret < 0) {
1450 mlog_errno(ret); 1727 mlog_errno(ret);
1451 return ret; 1728 break;
1452 } 1729 }
1453 } 1730 }
1454 } 1731 }
1455 1732
1733 ocfs2_commit_trans(osb, ctxt.handle);
1734 ocfs2_schedule_truncate_log_flush(osb, 1);
1735 ocfs2_run_deallocs(osb, &ctxt.dealloc);
1736out:
1456 return ret; 1737 return ret;
1457} 1738}
1458 1739
@@ -1463,12 +1744,16 @@ static int ocfs2_xattr_ibody_remove(struct inode *inode,
1463 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1744 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1464 struct ocfs2_xattr_header *header; 1745 struct ocfs2_xattr_header *header;
1465 int ret; 1746 int ret;
1747 struct ocfs2_xattr_value_buf vb = {
1748 .vb_bh = di_bh,
1749 .vb_access = ocfs2_journal_access_di,
1750 };
1466 1751
1467 header = (struct ocfs2_xattr_header *) 1752 header = (struct ocfs2_xattr_header *)
1468 ((void *)di + inode->i_sb->s_blocksize - 1753 ((void *)di + inode->i_sb->s_blocksize -
1469 le16_to_cpu(di->i_xattr_inline_size)); 1754 le16_to_cpu(di->i_xattr_inline_size));
1470 1755
1471 ret = ocfs2_remove_value_outside(inode, di_bh, header); 1756 ret = ocfs2_remove_value_outside(inode, &vb, header);
1472 1757
1473 return ret; 1758 return ret;
1474} 1759}
@@ -1478,11 +1763,15 @@ static int ocfs2_xattr_block_remove(struct inode *inode,
1478{ 1763{
1479 struct ocfs2_xattr_block *xb; 1764 struct ocfs2_xattr_block *xb;
1480 int ret = 0; 1765 int ret = 0;
1766 struct ocfs2_xattr_value_buf vb = {
1767 .vb_bh = blk_bh,
1768 .vb_access = ocfs2_journal_access_xb,
1769 };
1481 1770
1482 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1771 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1483 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1772 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1484 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 1773 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1485 ret = ocfs2_remove_value_outside(inode, blk_bh, header); 1774 ret = ocfs2_remove_value_outside(inode, &vb, header);
1486 } else 1775 } else
1487 ret = ocfs2_delete_xattr_index_block(inode, blk_bh); 1776 ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1488 1777
@@ -1502,24 +1791,19 @@ static int ocfs2_xattr_free_block(struct inode *inode,
1502 u64 blk, bg_blkno; 1791 u64 blk, bg_blkno;
1503 u16 bit; 1792 u16 bit;
1504 1793
1505 ret = ocfs2_read_block(inode, block, &blk_bh); 1794 ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
1506 if (ret < 0) { 1795 if (ret < 0) {
1507 mlog_errno(ret); 1796 mlog_errno(ret);
1508 goto out; 1797 goto out;
1509 } 1798 }
1510 1799
1511 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1512 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1513 ret = -EIO;
1514 goto out;
1515 }
1516
1517 ret = ocfs2_xattr_block_remove(inode, blk_bh); 1800 ret = ocfs2_xattr_block_remove(inode, blk_bh);
1518 if (ret < 0) { 1801 if (ret < 0) {
1519 mlog_errno(ret); 1802 mlog_errno(ret);
1520 goto out; 1803 goto out;
1521 } 1804 }
1522 1805
1806 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1523 blk = le64_to_cpu(xb->xb_blkno); 1807 blk = le64_to_cpu(xb->xb_blkno);
1524 bit = le16_to_cpu(xb->xb_suballoc_bit); 1808 bit = le16_to_cpu(xb->xb_suballoc_bit);
1525 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 1809 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
@@ -1606,8 +1890,8 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1606 mlog_errno(ret); 1890 mlog_errno(ret);
1607 goto out; 1891 goto out;
1608 } 1892 }
1609 ret = ocfs2_journal_access(handle, inode, di_bh, 1893 ret = ocfs2_journal_access_di(handle, inode, di_bh,
1610 OCFS2_JOURNAL_ACCESS_WRITE); 1894 OCFS2_JOURNAL_ACCESS_WRITE);
1611 if (ret) { 1895 if (ret) {
1612 mlog_errno(ret); 1896 mlog_errno(ret);
1613 goto out_commit; 1897 goto out_commit;
@@ -1714,7 +1998,8 @@ static int ocfs2_xattr_ibody_find(struct inode *inode,
1714 */ 1998 */
1715static int ocfs2_xattr_ibody_set(struct inode *inode, 1999static int ocfs2_xattr_ibody_set(struct inode *inode,
1716 struct ocfs2_xattr_info *xi, 2000 struct ocfs2_xattr_info *xi,
1717 struct ocfs2_xattr_search *xs) 2001 struct ocfs2_xattr_search *xs,
2002 struct ocfs2_xattr_set_ctxt *ctxt)
1718{ 2003{
1719 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2004 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1720 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2005 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
@@ -1731,7 +2016,7 @@ static int ocfs2_xattr_ibody_set(struct inode *inode,
1731 } 2016 }
1732 } 2017 }
1733 2018
1734 ret = ocfs2_xattr_set_entry(inode, xi, xs, 2019 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
1735 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL)); 2020 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
1736out: 2021out:
1737 up_write(&oi->ip_alloc_sem); 2022 up_write(&oi->ip_alloc_sem);
@@ -1758,19 +2043,15 @@ static int ocfs2_xattr_block_find(struct inode *inode,
1758 if (!di->i_xattr_loc) 2043 if (!di->i_xattr_loc)
1759 return ret; 2044 return ret;
1760 2045
1761 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); 2046 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2047 &blk_bh);
1762 if (ret < 0) { 2048 if (ret < 0) {
1763 mlog_errno(ret); 2049 mlog_errno(ret);
1764 return ret; 2050 return ret;
1765 } 2051 }
1766 2052
1767 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1768 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1769 ret = -EIO;
1770 goto cleanup;
1771 }
1772
1773 xs->xattr_bh = blk_bh; 2053 xs->xattr_bh = blk_bh;
2054 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1774 2055
1775 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2056 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1776 xs->header = &xb->xb_attrs.xb_header; 2057 xs->header = &xb->xb_attrs.xb_header;
@@ -1804,13 +2085,13 @@ cleanup:
1804 */ 2085 */
1805static int ocfs2_xattr_block_set(struct inode *inode, 2086static int ocfs2_xattr_block_set(struct inode *inode,
1806 struct ocfs2_xattr_info *xi, 2087 struct ocfs2_xattr_info *xi,
1807 struct ocfs2_xattr_search *xs) 2088 struct ocfs2_xattr_search *xs,
2089 struct ocfs2_xattr_set_ctxt *ctxt)
1808{ 2090{
1809 struct buffer_head *new_bh = NULL; 2091 struct buffer_head *new_bh = NULL;
1810 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2092 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1811 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2093 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1812 struct ocfs2_alloc_context *meta_ac = NULL; 2094 handle_t *handle = ctxt->handle;
1813 handle_t *handle = NULL;
1814 struct ocfs2_xattr_block *xblk = NULL; 2095 struct ocfs2_xattr_block *xblk = NULL;
1815 u16 suballoc_bit_start; 2096 u16 suballoc_bit_start;
1816 u32 num_got; 2097 u32 num_got;
@@ -1818,45 +2099,29 @@ static int ocfs2_xattr_block_set(struct inode *inode,
1818 int ret; 2099 int ret;
1819 2100
1820 if (!xs->xattr_bh) { 2101 if (!xs->xattr_bh) {
1821 /* 2102 ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh,
1822 * Alloc one external block for extended attribute 2103 OCFS2_JOURNAL_ACCESS_CREATE);
1823 * outside of inode.
1824 */
1825 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
1826 if (ret < 0) { 2104 if (ret < 0) {
1827 mlog_errno(ret); 2105 mlog_errno(ret);
1828 goto out; 2106 goto end;
1829 }
1830 handle = ocfs2_start_trans(osb,
1831 OCFS2_XATTR_BLOCK_CREATE_CREDITS);
1832 if (IS_ERR(handle)) {
1833 ret = PTR_ERR(handle);
1834 mlog_errno(ret);
1835 goto out;
1836 }
1837 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1838 OCFS2_JOURNAL_ACCESS_CREATE);
1839 if (ret < 0) {
1840 mlog_errno(ret);
1841 goto out_commit;
1842 } 2107 }
1843 2108
1844 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, 2109 ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
1845 &suballoc_bit_start, &num_got, 2110 &suballoc_bit_start, &num_got,
1846 &first_blkno); 2111 &first_blkno);
1847 if (ret < 0) { 2112 if (ret < 0) {
1848 mlog_errno(ret); 2113 mlog_errno(ret);
1849 goto out_commit; 2114 goto end;
1850 } 2115 }
1851 2116
1852 new_bh = sb_getblk(inode->i_sb, first_blkno); 2117 new_bh = sb_getblk(inode->i_sb, first_blkno);
1853 ocfs2_set_new_buffer_uptodate(inode, new_bh); 2118 ocfs2_set_new_buffer_uptodate(inode, new_bh);
1854 2119
1855 ret = ocfs2_journal_access(handle, inode, new_bh, 2120 ret = ocfs2_journal_access_xb(handle, inode, new_bh,
1856 OCFS2_JOURNAL_ACCESS_CREATE); 2121 OCFS2_JOURNAL_ACCESS_CREATE);
1857 if (ret < 0) { 2122 if (ret < 0) {
1858 mlog_errno(ret); 2123 mlog_errno(ret);
1859 goto out_commit; 2124 goto end;
1860 } 2125 }
1861 2126
1862 /* Initialize ocfs2_xattr_block */ 2127 /* Initialize ocfs2_xattr_block */
@@ -1874,44 +2139,555 @@ static int ocfs2_xattr_block_set(struct inode *inode,
1874 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2139 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
1875 xs->here = xs->header->xh_entries; 2140 xs->here = xs->header->xh_entries;
1876 2141
1877
1878 ret = ocfs2_journal_dirty(handle, new_bh); 2142 ret = ocfs2_journal_dirty(handle, new_bh);
1879 if (ret < 0) { 2143 if (ret < 0) {
1880 mlog_errno(ret); 2144 mlog_errno(ret);
1881 goto out_commit; 2145 goto end;
1882 } 2146 }
1883 di->i_xattr_loc = cpu_to_le64(first_blkno); 2147 di->i_xattr_loc = cpu_to_le64(first_blkno);
1884 ret = ocfs2_journal_dirty(handle, xs->inode_bh); 2148 ocfs2_journal_dirty(handle, xs->inode_bh);
1885 if (ret < 0)
1886 mlog_errno(ret);
1887out_commit:
1888 ocfs2_commit_trans(osb, handle);
1889out:
1890 if (meta_ac)
1891 ocfs2_free_alloc_context(meta_ac);
1892 if (ret < 0)
1893 return ret;
1894 } else 2149 } else
1895 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2150 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1896 2151
1897 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2152 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
1898 /* Set extended attribute into external block */ 2153 /* Set extended attribute into external block */
1899 ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL); 2154 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2155 OCFS2_HAS_XATTR_FL);
1900 if (!ret || ret != -ENOSPC) 2156 if (!ret || ret != -ENOSPC)
1901 goto end; 2157 goto end;
1902 2158
1903 ret = ocfs2_xattr_create_index_block(inode, xs); 2159 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
1904 if (ret) 2160 if (ret)
1905 goto end; 2161 goto end;
1906 } 2162 }
1907 2163
1908 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs); 2164 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
1909 2165
1910end: 2166end:
1911 2167
1912 return ret; 2168 return ret;
1913} 2169}
1914 2170
2171/* Check whether the new xattr can be inserted into the inode. */
2172static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2173 struct ocfs2_xattr_info *xi,
2174 struct ocfs2_xattr_search *xs)
2175{
2176 u64 value_size;
2177 struct ocfs2_xattr_entry *last;
2178 int free, i;
2179 size_t min_offs = xs->end - xs->base;
2180
2181 if (!xs->header)
2182 return 0;
2183
2184 last = xs->header->xh_entries;
2185
2186 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2187 size_t offs = le16_to_cpu(last->xe_name_offset);
2188 if (offs < min_offs)
2189 min_offs = offs;
2190 last += 1;
2191 }
2192
2193 free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
2194 if (free < 0)
2195 return 0;
2196
2197 BUG_ON(!xs->not_found);
2198
2199 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2200 value_size = OCFS2_XATTR_ROOT_SIZE;
2201 else
2202 value_size = OCFS2_XATTR_SIZE(xi->value_len);
2203
2204 if (free >= sizeof(struct ocfs2_xattr_entry) +
2205 OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
2206 return 1;
2207
2208 return 0;
2209}
2210
2211static int ocfs2_calc_xattr_set_need(struct inode *inode,
2212 struct ocfs2_dinode *di,
2213 struct ocfs2_xattr_info *xi,
2214 struct ocfs2_xattr_search *xis,
2215 struct ocfs2_xattr_search *xbs,
2216 int *clusters_need,
2217 int *meta_need,
2218 int *credits_need)
2219{
2220 int ret = 0, old_in_xb = 0;
2221 int clusters_add = 0, meta_add = 0, credits = 0;
2222 struct buffer_head *bh = NULL;
2223 struct ocfs2_xattr_block *xb = NULL;
2224 struct ocfs2_xattr_entry *xe = NULL;
2225 struct ocfs2_xattr_value_root *xv = NULL;
2226 char *base = NULL;
2227 int name_offset, name_len = 0;
2228 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2229 xi->value_len);
2230 u64 value_size;
2231
2232 /*
2233 * Calculate the clusters we need to write.
2234 * No matter whether we replace an old one or add a new one,
2235 * we need this for writing.
2236 */
2237 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2238 credits += new_clusters *
2239 ocfs2_clusters_to_blocks(inode->i_sb, 1);
2240
2241 if (xis->not_found && xbs->not_found) {
2242 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2243
2244 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2245 clusters_add += new_clusters;
2246 credits += ocfs2_calc_extend_credits(inode->i_sb,
2247 &def_xv.xv.xr_list,
2248 new_clusters);
2249 }
2250
2251 goto meta_guess;
2252 }
2253
2254 if (!xis->not_found) {
2255 xe = xis->here;
2256 name_offset = le16_to_cpu(xe->xe_name_offset);
2257 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2258 base = xis->base;
2259 credits += OCFS2_INODE_UPDATE_CREDITS;
2260 } else {
2261 int i, block_off = 0;
2262 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2263 xe = xbs->here;
2264 name_offset = le16_to_cpu(xe->xe_name_offset);
2265 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2266 i = xbs->here - xbs->header->xh_entries;
2267 old_in_xb = 1;
2268
2269 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2270 ret = ocfs2_xattr_bucket_get_name_value(inode,
2271 bucket_xh(xbs->bucket),
2272 i, &block_off,
2273 &name_offset);
2274 base = bucket_block(xbs->bucket, block_off);
2275 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2276 } else {
2277 base = xbs->base;
2278 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2279 }
2280 }
2281
2282 /*
2283 * delete a xattr doesn't need metadata and cluster allocation.
2284 * so just calculate the credits and return.
2285 *
2286 * The credits for removing the value tree will be extended
2287 * by ocfs2_remove_extent itself.
2288 */
2289 if (!xi->value) {
2290 if (!ocfs2_xattr_is_local(xe))
2291 credits += ocfs2_remove_extent_credits(inode->i_sb);
2292
2293 goto out;
2294 }
2295
2296 /* do cluster allocation guess first. */
2297 value_size = le64_to_cpu(xe->xe_value_size);
2298
2299 if (old_in_xb) {
2300 /*
2301 * In xattr set, we always try to set the xe in inode first,
2302 * so if it can be inserted into inode successfully, the old
2303 * one will be removed from the xattr block, and this xattr
2304 * will be inserted into inode as a new xattr in inode.
2305 */
2306 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
2307 clusters_add += new_clusters;
2308 credits += ocfs2_remove_extent_credits(inode->i_sb) +
2309 OCFS2_INODE_UPDATE_CREDITS;
2310 if (!ocfs2_xattr_is_local(xe))
2311 credits += ocfs2_calc_extend_credits(
2312 inode->i_sb,
2313 &def_xv.xv.xr_list,
2314 new_clusters);
2315 goto out;
2316 }
2317 }
2318
2319 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2320 /* the new values will be stored outside. */
2321 u32 old_clusters = 0;
2322
2323 if (!ocfs2_xattr_is_local(xe)) {
2324 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2325 value_size);
2326 xv = (struct ocfs2_xattr_value_root *)
2327 (base + name_offset + name_len);
2328 value_size = OCFS2_XATTR_ROOT_SIZE;
2329 } else
2330 xv = &def_xv.xv;
2331
2332 if (old_clusters >= new_clusters) {
2333 credits += ocfs2_remove_extent_credits(inode->i_sb);
2334 goto out;
2335 } else {
2336 meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
2337 clusters_add += new_clusters - old_clusters;
2338 credits += ocfs2_calc_extend_credits(inode->i_sb,
2339 &xv->xr_list,
2340 new_clusters -
2341 old_clusters);
2342 if (value_size >= OCFS2_XATTR_ROOT_SIZE)
2343 goto out;
2344 }
2345 } else {
2346 /*
2347 * Now the new value will be stored inside. So if the new
2348 * value is smaller than the size of value root or the old
2349 * value, we don't need any allocation, otherwise we have
2350 * to guess metadata allocation.
2351 */
2352 if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
2353 (!ocfs2_xattr_is_local(xe) &&
2354 OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
2355 goto out;
2356 }
2357
2358meta_guess:
2359 /* calculate metadata allocation. */
2360 if (di->i_xattr_loc) {
2361 if (!xbs->xattr_bh) {
2362 ret = ocfs2_read_xattr_block(inode,
2363 le64_to_cpu(di->i_xattr_loc),
2364 &bh);
2365 if (ret) {
2366 mlog_errno(ret);
2367 goto out;
2368 }
2369
2370 xb = (struct ocfs2_xattr_block *)bh->b_data;
2371 } else
2372 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2373
2374 /*
2375 * If there is already an xattr tree, good, we can calculate
2376 * like other b-trees. Otherwise we may have the chance of
2377 * create a tree, the credit calculation is borrowed from
2378 * ocfs2_calc_extend_credits with root_el = NULL. And the
2379 * new tree will be cluster based, so no meta is needed.
2380 */
2381 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2382 struct ocfs2_extent_list *el =
2383 &xb->xb_attrs.xb_root.xt_list;
2384 meta_add += ocfs2_extend_meta_needed(el);
2385 credits += ocfs2_calc_extend_credits(inode->i_sb,
2386 el, 1);
2387 } else
2388 credits += OCFS2_SUBALLOC_ALLOC + 1;
2389
2390 /*
2391 * This cluster will be used either for new bucket or for
2392 * new xattr block.
2393 * If the cluster size is the same as the bucket size, one
2394 * more is needed since we may need to extend the bucket
2395 * also.
2396 */
2397 clusters_add += 1;
2398 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2399 if (OCFS2_XATTR_BUCKET_SIZE ==
2400 OCFS2_SB(inode->i_sb)->s_clustersize) {
2401 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2402 clusters_add += 1;
2403 }
2404 } else {
2405 meta_add += 1;
2406 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
2407 }
2408out:
2409 if (clusters_need)
2410 *clusters_need = clusters_add;
2411 if (meta_need)
2412 *meta_need = meta_add;
2413 if (credits_need)
2414 *credits_need = credits;
2415 brelse(bh);
2416 return ret;
2417}
2418
2419static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2420 struct ocfs2_dinode *di,
2421 struct ocfs2_xattr_info *xi,
2422 struct ocfs2_xattr_search *xis,
2423 struct ocfs2_xattr_search *xbs,
2424 struct ocfs2_xattr_set_ctxt *ctxt,
2425 int *credits)
2426{
2427 int clusters_add, meta_add, ret;
2428 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2429
2430 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
2431
2432 ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
2433
2434 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
2435 &clusters_add, &meta_add, credits);
2436 if (ret) {
2437 mlog_errno(ret);
2438 return ret;
2439 }
2440
2441 mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2442 "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2443
2444 if (meta_add) {
2445 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
2446 &ctxt->meta_ac);
2447 if (ret) {
2448 mlog_errno(ret);
2449 goto out;
2450 }
2451 }
2452
2453 if (clusters_add) {
2454 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
2455 if (ret)
2456 mlog_errno(ret);
2457 }
2458out:
2459 if (ret) {
2460 if (ctxt->meta_ac) {
2461 ocfs2_free_alloc_context(ctxt->meta_ac);
2462 ctxt->meta_ac = NULL;
2463 }
2464
2465 /*
2466 * We cannot have an error and a non null ctxt->data_ac.
2467 */
2468 }
2469
2470 return ret;
2471}
2472
2473static int __ocfs2_xattr_set_handle(struct inode *inode,
2474 struct ocfs2_dinode *di,
2475 struct ocfs2_xattr_info *xi,
2476 struct ocfs2_xattr_search *xis,
2477 struct ocfs2_xattr_search *xbs,
2478 struct ocfs2_xattr_set_ctxt *ctxt)
2479{
2480 int ret = 0, credits, old_found;
2481
2482 if (!xi->value) {
2483 /* Remove existing extended attribute */
2484 if (!xis->not_found)
2485 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2486 else if (!xbs->not_found)
2487 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2488 } else {
2489 /* We always try to set extended attribute into inode first*/
2490 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2491 if (!ret && !xbs->not_found) {
2492 /*
2493 * If succeed and that extended attribute existing in
2494 * external block, then we will remove it.
2495 */
2496 xi->value = NULL;
2497 xi->value_len = 0;
2498
2499 old_found = xis->not_found;
2500 xis->not_found = -ENODATA;
2501 ret = ocfs2_calc_xattr_set_need(inode,
2502 di,
2503 xi,
2504 xis,
2505 xbs,
2506 NULL,
2507 NULL,
2508 &credits);
2509 xis->not_found = old_found;
2510 if (ret) {
2511 mlog_errno(ret);
2512 goto out;
2513 }
2514
2515 ret = ocfs2_extend_trans(ctxt->handle, credits +
2516 ctxt->handle->h_buffer_credits);
2517 if (ret) {
2518 mlog_errno(ret);
2519 goto out;
2520 }
2521 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2522 } else if (ret == -ENOSPC) {
2523 if (di->i_xattr_loc && !xbs->xattr_bh) {
2524 ret = ocfs2_xattr_block_find(inode,
2525 xi->name_index,
2526 xi->name, xbs);
2527 if (ret)
2528 goto out;
2529
2530 old_found = xis->not_found;
2531 xis->not_found = -ENODATA;
2532 ret = ocfs2_calc_xattr_set_need(inode,
2533 di,
2534 xi,
2535 xis,
2536 xbs,
2537 NULL,
2538 NULL,
2539 &credits);
2540 xis->not_found = old_found;
2541 if (ret) {
2542 mlog_errno(ret);
2543 goto out;
2544 }
2545
2546 ret = ocfs2_extend_trans(ctxt->handle, credits +
2547 ctxt->handle->h_buffer_credits);
2548 if (ret) {
2549 mlog_errno(ret);
2550 goto out;
2551 }
2552 }
2553 /*
2554 * If no space in inode, we will set extended attribute
2555 * into external block.
2556 */
2557 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2558 if (ret)
2559 goto out;
2560 if (!xis->not_found) {
2561 /*
2562 * If succeed and that extended attribute
2563 * existing in inode, we will remove it.
2564 */
2565 xi->value = NULL;
2566 xi->value_len = 0;
2567 xbs->not_found = -ENODATA;
2568 ret = ocfs2_calc_xattr_set_need(inode,
2569 di,
2570 xi,
2571 xis,
2572 xbs,
2573 NULL,
2574 NULL,
2575 &credits);
2576 if (ret) {
2577 mlog_errno(ret);
2578 goto out;
2579 }
2580
2581 ret = ocfs2_extend_trans(ctxt->handle, credits +
2582 ctxt->handle->h_buffer_credits);
2583 if (ret) {
2584 mlog_errno(ret);
2585 goto out;
2586 }
2587 ret = ocfs2_xattr_ibody_set(inode, xi,
2588 xis, ctxt);
2589 }
2590 }
2591 }
2592
2593 if (!ret) {
2594 /* Update inode ctime. */
2595 ret = ocfs2_journal_access(ctxt->handle, inode, xis->inode_bh,
2596 OCFS2_JOURNAL_ACCESS_WRITE);
2597 if (ret) {
2598 mlog_errno(ret);
2599 goto out;
2600 }
2601
2602 inode->i_ctime = CURRENT_TIME;
2603 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
2604 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
2605 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
2606 }
2607out:
2608 return ret;
2609}
2610
2611/*
2612 * This function only called duing creating inode
2613 * for init security/acl xattrs of the new inode.
2614 * All transanction credits have been reserved in mknod.
2615 */
2616int ocfs2_xattr_set_handle(handle_t *handle,
2617 struct inode *inode,
2618 struct buffer_head *di_bh,
2619 int name_index,
2620 const char *name,
2621 const void *value,
2622 size_t value_len,
2623 int flags,
2624 struct ocfs2_alloc_context *meta_ac,
2625 struct ocfs2_alloc_context *data_ac)
2626{
2627 struct ocfs2_dinode *di;
2628 int ret;
2629
2630 struct ocfs2_xattr_info xi = {
2631 .name_index = name_index,
2632 .name = name,
2633 .value = value,
2634 .value_len = value_len,
2635 };
2636
2637 struct ocfs2_xattr_search xis = {
2638 .not_found = -ENODATA,
2639 };
2640
2641 struct ocfs2_xattr_search xbs = {
2642 .not_found = -ENODATA,
2643 };
2644
2645 struct ocfs2_xattr_set_ctxt ctxt = {
2646 .handle = handle,
2647 .meta_ac = meta_ac,
2648 .data_ac = data_ac,
2649 };
2650
2651 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2652 return -EOPNOTSUPP;
2653
2654 /*
2655 * In extreme situation, may need xattr bucket when
2656 * block size is too small. And we have already reserved
2657 * the credits for bucket in mknod.
2658 */
2659 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
2660 xbs.bucket = ocfs2_xattr_bucket_new(inode);
2661 if (!xbs.bucket) {
2662 mlog_errno(-ENOMEM);
2663 return -ENOMEM;
2664 }
2665 }
2666
2667 xis.inode_bh = xbs.inode_bh = di_bh;
2668 di = (struct ocfs2_dinode *)di_bh->b_data;
2669
2670 down_write(&OCFS2_I(inode)->ip_xattr_sem);
2671
2672 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2673 if (ret)
2674 goto cleanup;
2675 if (xis.not_found) {
2676 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2677 if (ret)
2678 goto cleanup;
2679 }
2680
2681 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2682
2683cleanup:
2684 up_write(&OCFS2_I(inode)->ip_xattr_sem);
2685 brelse(xbs.xattr_bh);
2686 ocfs2_xattr_bucket_free(xbs.bucket);
2687
2688 return ret;
2689}
2690
1915/* 2691/*
1916 * ocfs2_xattr_set() 2692 * ocfs2_xattr_set()
1917 * 2693 *
@@ -1928,8 +2704,10 @@ int ocfs2_xattr_set(struct inode *inode,
1928{ 2704{
1929 struct buffer_head *di_bh = NULL; 2705 struct buffer_head *di_bh = NULL;
1930 struct ocfs2_dinode *di; 2706 struct ocfs2_dinode *di;
1931 int ret; 2707 int ret, credits;
1932 u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 2708 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2709 struct inode *tl_inode = osb->osb_tl_inode;
2710 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1933 2711
1934 struct ocfs2_xattr_info xi = { 2712 struct ocfs2_xattr_info xi = {
1935 .name_index = name_index, 2713 .name_index = name_index,
@@ -1949,10 +2727,20 @@ int ocfs2_xattr_set(struct inode *inode,
1949 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2727 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1950 return -EOPNOTSUPP; 2728 return -EOPNOTSUPP;
1951 2729
2730 /*
2731 * Only xbs will be used on indexed trees. xis doesn't need a
2732 * bucket.
2733 */
2734 xbs.bucket = ocfs2_xattr_bucket_new(inode);
2735 if (!xbs.bucket) {
2736 mlog_errno(-ENOMEM);
2737 return -ENOMEM;
2738 }
2739
1952 ret = ocfs2_inode_lock(inode, &di_bh, 1); 2740 ret = ocfs2_inode_lock(inode, &di_bh, 1);
1953 if (ret < 0) { 2741 if (ret < 0) {
1954 mlog_errno(ret); 2742 mlog_errno(ret);
1955 return ret; 2743 goto cleanup_nolock;
1956 } 2744 }
1957 xis.inode_bh = xbs.inode_bh = di_bh; 2745 xis.inode_bh = xbs.inode_bh = di_bh;
1958 di = (struct ocfs2_dinode *)di_bh->b_data; 2746 di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -1984,55 +2772,53 @@ int ocfs2_xattr_set(struct inode *inode,
1984 goto cleanup; 2772 goto cleanup;
1985 } 2773 }
1986 2774
1987 if (!value) { 2775
1988 /* Remove existing extended attribute */ 2776 mutex_lock(&tl_inode->i_mutex);
1989 if (!xis.not_found) 2777
1990 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); 2778 if (ocfs2_truncate_log_needs_flush(osb)) {
1991 else if (!xbs.not_found) 2779 ret = __ocfs2_flush_truncate_log(osb);
1992 ret = ocfs2_xattr_block_set(inode, &xi, &xbs); 2780 if (ret < 0) {
1993 } else { 2781 mutex_unlock(&tl_inode->i_mutex);
1994 /* We always try to set extended attribute into inode first*/ 2782 mlog_errno(ret);
1995 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); 2783 goto cleanup;
1996 if (!ret && !xbs.not_found) {
1997 /*
1998 * If succeed and that extended attribute existing in
1999 * external block, then we will remove it.
2000 */
2001 xi.value = NULL;
2002 xi.value_len = 0;
2003 ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2004 } else if (ret == -ENOSPC) {
2005 if (di->i_xattr_loc && !xbs.xattr_bh) {
2006 ret = ocfs2_xattr_block_find(inode, name_index,
2007 name, &xbs);
2008 if (ret)
2009 goto cleanup;
2010 }
2011 /*
2012 * If no space in inode, we will set extended attribute
2013 * into external block.
2014 */
2015 ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2016 if (ret)
2017 goto cleanup;
2018 if (!xis.not_found) {
2019 /*
2020 * If succeed and that extended attribute
2021 * existing in inode, we will remove it.
2022 */
2023 xi.value = NULL;
2024 xi.value_len = 0;
2025 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2026 }
2027 } 2784 }
2028 } 2785 }
2786 mutex_unlock(&tl_inode->i_mutex);
2787
2788 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
2789 &xbs, &ctxt, &credits);
2790 if (ret) {
2791 mlog_errno(ret);
2792 goto cleanup;
2793 }
2794
2795 /* we need to update inode's ctime field, so add credit for it. */
2796 credits += OCFS2_INODE_UPDATE_CREDITS;
2797 ctxt.handle = ocfs2_start_trans(osb, credits);
2798 if (IS_ERR(ctxt.handle)) {
2799 ret = PTR_ERR(ctxt.handle);
2800 mlog_errno(ret);
2801 goto cleanup;
2802 }
2803
2804 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2805
2806 ocfs2_commit_trans(osb, ctxt.handle);
2807
2808 if (ctxt.data_ac)
2809 ocfs2_free_alloc_context(ctxt.data_ac);
2810 if (ctxt.meta_ac)
2811 ocfs2_free_alloc_context(ctxt.meta_ac);
2812 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
2813 ocfs2_schedule_truncate_log_flush(osb, 1);
2814 ocfs2_run_deallocs(osb, &ctxt.dealloc);
2029cleanup: 2815cleanup:
2030 up_write(&OCFS2_I(inode)->ip_xattr_sem); 2816 up_write(&OCFS2_I(inode)->ip_xattr_sem);
2031 ocfs2_inode_unlock(inode, 1); 2817 ocfs2_inode_unlock(inode, 1);
2818cleanup_nolock:
2032 brelse(di_bh); 2819 brelse(di_bh);
2033 brelse(xbs.xattr_bh); 2820 brelse(xbs.xattr_bh);
2034 for (i = 0; i < blk_per_bucket; i++) 2821 ocfs2_xattr_bucket_free(xbs.bucket);
2035 brelse(xbs.bucket.bhs[i]);
2036 2822
2037 return ret; 2823 return ret;
2038} 2824}
@@ -2107,7 +2893,7 @@ typedef int (xattr_bucket_func)(struct inode *inode,
2107 void *para); 2893 void *para);
2108 2894
2109static int ocfs2_find_xe_in_bucket(struct inode *inode, 2895static int ocfs2_find_xe_in_bucket(struct inode *inode,
2110 struct buffer_head *header_bh, 2896 struct ocfs2_xattr_bucket *bucket,
2111 int name_index, 2897 int name_index,
2112 const char *name, 2898 const char *name,
2113 u32 name_hash, 2899 u32 name_hash,
@@ -2115,11 +2901,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
2115 int *found) 2901 int *found)
2116{ 2902{
2117 int i, ret = 0, cmp = 1, block_off, new_offset; 2903 int i, ret = 0, cmp = 1, block_off, new_offset;
2118 struct ocfs2_xattr_header *xh = 2904 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
2119 (struct ocfs2_xattr_header *)header_bh->b_data;
2120 size_t name_len = strlen(name); 2905 size_t name_len = strlen(name);
2121 struct ocfs2_xattr_entry *xe = NULL; 2906 struct ocfs2_xattr_entry *xe = NULL;
2122 struct buffer_head *name_bh = NULL;
2123 char *xe_name; 2907 char *xe_name;
2124 2908
2125 /* 2909 /*
@@ -2150,19 +2934,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
2150 break; 2934 break;
2151 } 2935 }
2152 2936
2153 ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off,
2154 &name_bh);
2155 if (ret) {
2156 mlog_errno(ret);
2157 break;
2158 }
2159 xe_name = name_bh->b_data + new_offset;
2160 2937
2161 cmp = memcmp(name, xe_name, name_len); 2938 xe_name = bucket_block(bucket, block_off) + new_offset;
2162 brelse(name_bh); 2939 if (!memcmp(name, xe_name, name_len)) {
2163 name_bh = NULL;
2164
2165 if (cmp == 0) {
2166 *xe_index = i; 2940 *xe_index = i;
2167 *found = 1; 2941 *found = 1;
2168 ret = 0; 2942 ret = 0;
@@ -2192,39 +2966,42 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
2192 struct ocfs2_xattr_search *xs) 2966 struct ocfs2_xattr_search *xs)
2193{ 2967{
2194 int ret, found = 0; 2968 int ret, found = 0;
2195 struct buffer_head *bh = NULL;
2196 struct buffer_head *lower_bh = NULL;
2197 struct ocfs2_xattr_header *xh = NULL; 2969 struct ocfs2_xattr_header *xh = NULL;
2198 struct ocfs2_xattr_entry *xe = NULL; 2970 struct ocfs2_xattr_entry *xe = NULL;
2199 u16 index = 0; 2971 u16 index = 0;
2200 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 2972 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2201 int low_bucket = 0, bucket, high_bucket; 2973 int low_bucket = 0, bucket, high_bucket;
2974 struct ocfs2_xattr_bucket *search;
2202 u32 last_hash; 2975 u32 last_hash;
2203 u64 blkno; 2976 u64 blkno, lower_blkno = 0;
2204 2977
2205 ret = ocfs2_read_block(inode, p_blkno, &bh); 2978 search = ocfs2_xattr_bucket_new(inode);
2979 if (!search) {
2980 ret = -ENOMEM;
2981 mlog_errno(ret);
2982 goto out;
2983 }
2984
2985 ret = ocfs2_read_xattr_bucket(search, p_blkno);
2206 if (ret) { 2986 if (ret) {
2207 mlog_errno(ret); 2987 mlog_errno(ret);
2208 goto out; 2988 goto out;
2209 } 2989 }
2210 2990
2211 xh = (struct ocfs2_xattr_header *)bh->b_data; 2991 xh = bucket_xh(search);
2212 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 2992 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
2213
2214 while (low_bucket <= high_bucket) { 2993 while (low_bucket <= high_bucket) {
2215 brelse(bh); 2994 ocfs2_xattr_bucket_relse(search);
2216 bh = NULL;
2217 bucket = (low_bucket + high_bucket) / 2;
2218 2995
2996 bucket = (low_bucket + high_bucket) / 2;
2219 blkno = p_blkno + bucket * blk_per_bucket; 2997 blkno = p_blkno + bucket * blk_per_bucket;
2220 2998 ret = ocfs2_read_xattr_bucket(search, blkno);
2221 ret = ocfs2_read_block(inode, blkno, &bh);
2222 if (ret) { 2999 if (ret) {
2223 mlog_errno(ret); 3000 mlog_errno(ret);
2224 goto out; 3001 goto out;
2225 } 3002 }
2226 3003
2227 xh = (struct ocfs2_xattr_header *)bh->b_data; 3004 xh = bucket_xh(search);
2228 xe = &xh->xh_entries[0]; 3005 xe = &xh->xh_entries[0];
2229 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3006 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
2230 high_bucket = bucket - 1; 3007 high_bucket = bucket - 1;
@@ -2241,10 +3018,8 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
2241 3018
2242 last_hash = le32_to_cpu(xe->xe_name_hash); 3019 last_hash = le32_to_cpu(xe->xe_name_hash);
2243 3020
2244 /* record lower_bh which may be the insert place. */ 3021 /* record lower_blkno which may be the insert place. */
2245 brelse(lower_bh); 3022 lower_blkno = blkno;
2246 lower_bh = bh;
2247 bh = NULL;
2248 3023
2249 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3024 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
2250 low_bucket = bucket + 1; 3025 low_bucket = bucket + 1;
@@ -2252,7 +3027,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
2252 } 3027 }
2253 3028
2254 /* the searched xattr should reside in this bucket if exists. */ 3029 /* the searched xattr should reside in this bucket if exists. */
2255 ret = ocfs2_find_xe_in_bucket(inode, lower_bh, 3030 ret = ocfs2_find_xe_in_bucket(inode, search,
2256 name_index, name, name_hash, 3031 name_index, name, name_hash,
2257 &index, &found); 3032 &index, &found);
2258 if (ret) { 3033 if (ret) {
@@ -2267,46 +3042,29 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
2267 * When the xattr's hash value is in the gap of 2 buckets, we will 3042 * When the xattr's hash value is in the gap of 2 buckets, we will
2268 * always set it to the previous bucket. 3043 * always set it to the previous bucket.
2269 */ 3044 */
2270 if (!lower_bh) { 3045 if (!lower_blkno)
2271 /* 3046 lower_blkno = p_blkno;
2272 * We can't find any bucket whose first name_hash is less 3047
2273 * than the find name_hash. 3048 /* This should be in cache - we just read it during the search */
2274 */ 3049 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
2275 BUG_ON(bh->b_blocknr != p_blkno); 3050 if (ret) {
2276 lower_bh = bh; 3051 mlog_errno(ret);
2277 bh = NULL; 3052 goto out;
2278 } 3053 }
2279 xs->bucket.bhs[0] = lower_bh;
2280 xs->bucket.xh = (struct ocfs2_xattr_header *)
2281 xs->bucket.bhs[0]->b_data;
2282 lower_bh = NULL;
2283 3054
2284 xs->header = xs->bucket.xh; 3055 xs->header = bucket_xh(xs->bucket);
2285 xs->base = xs->bucket.bhs[0]->b_data; 3056 xs->base = bucket_block(xs->bucket, 0);
2286 xs->end = xs->base + inode->i_sb->s_blocksize; 3057 xs->end = xs->base + inode->i_sb->s_blocksize;
2287 3058
2288 if (found) { 3059 if (found) {
2289 /*
2290 * If we have found the xattr enty, read all the blocks in
2291 * this bucket.
2292 */
2293 ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1,
2294 blk_per_bucket - 1, &xs->bucket.bhs[1],
2295 0);
2296 if (ret) {
2297 mlog_errno(ret);
2298 goto out;
2299 }
2300
2301 xs->here = &xs->header->xh_entries[index]; 3060 xs->here = &xs->header->xh_entries[index];
2302 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name, 3061 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
2303 (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index); 3062 (unsigned long long)bucket_blkno(xs->bucket), index);
2304 } else 3063 } else
2305 ret = -ENODATA; 3064 ret = -ENODATA;
2306 3065
2307out: 3066out:
2308 brelse(bh); 3067 ocfs2_xattr_bucket_free(search);
2309 brelse(lower_bh);
2310 return ret; 3068 return ret;
2311} 3069}
2312 3070
@@ -2357,53 +3115,50 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2357 xattr_bucket_func *func, 3115 xattr_bucket_func *func,
2358 void *para) 3116 void *para)
2359{ 3117{
2360 int i, j, ret = 0; 3118 int i, ret = 0;
2361 int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2362 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3119 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
2363 u32 num_buckets = clusters * bpc; 3120 u32 num_buckets = clusters * bpc;
2364 struct ocfs2_xattr_bucket bucket; 3121 struct ocfs2_xattr_bucket *bucket;
2365 3122
2366 memset(&bucket, 0, sizeof(bucket)); 3123 bucket = ocfs2_xattr_bucket_new(inode);
3124 if (!bucket) {
3125 mlog_errno(-ENOMEM);
3126 return -ENOMEM;
3127 }
2367 3128
2368 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", 3129 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2369 clusters, (unsigned long long)blkno); 3130 clusters, (unsigned long long)blkno);
2370 3131
2371 for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { 3132 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
2372 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, 3133 ret = ocfs2_read_xattr_bucket(bucket, blkno);
2373 bucket.bhs, 0);
2374 if (ret) { 3134 if (ret) {
2375 mlog_errno(ret); 3135 mlog_errno(ret);
2376 goto out; 3136 break;
2377 } 3137 }
2378 3138
2379 bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data;
2380 /* 3139 /*
2381 * The real bucket num in this series of blocks is stored 3140 * The real bucket num in this series of blocks is stored
2382 * in the 1st bucket. 3141 * in the 1st bucket.
2383 */ 3142 */
2384 if (i == 0) 3143 if (i == 0)
2385 num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets); 3144 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
2386 3145
2387 mlog(0, "iterating xattr bucket %llu, first hash %u\n", 3146 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
2388 (unsigned long long)blkno, 3147 (unsigned long long)blkno,
2389 le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash)); 3148 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
2390 if (func) { 3149 if (func) {
2391 ret = func(inode, &bucket, para); 3150 ret = func(inode, bucket, para);
2392 if (ret) { 3151 if (ret)
2393 mlog_errno(ret); 3152 mlog_errno(ret);
2394 break; 3153 /* Fall through to bucket_relse() */
2395 }
2396 } 3154 }
2397 3155
2398 for (j = 0; j < blk_per_bucket; j++) 3156 ocfs2_xattr_bucket_relse(bucket);
2399 brelse(bucket.bhs[j]); 3157 if (ret)
2400 memset(&bucket, 0, sizeof(bucket)); 3158 break;
2401 } 3159 }
2402 3160
2403out: 3161 ocfs2_xattr_bucket_free(bucket);
2404 for (j = 0; j < blk_per_bucket; j++)
2405 brelse(bucket.bhs[j]);
2406
2407 return ret; 3162 return ret;
2408} 3163}
2409 3164
@@ -2441,21 +3196,21 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
2441 int i, block_off, new_offset; 3196 int i, block_off, new_offset;
2442 const char *prefix, *name; 3197 const char *prefix, *name;
2443 3198
2444 for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) { 3199 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
2445 struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i]; 3200 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
2446 type = ocfs2_xattr_get_type(entry); 3201 type = ocfs2_xattr_get_type(entry);
2447 prefix = ocfs2_xattr_prefix(type); 3202 prefix = ocfs2_xattr_prefix(type);
2448 3203
2449 if (prefix) { 3204 if (prefix) {
2450 ret = ocfs2_xattr_bucket_get_name_value(inode, 3205 ret = ocfs2_xattr_bucket_get_name_value(inode,
2451 bucket->xh, 3206 bucket_xh(bucket),
2452 i, 3207 i,
2453 &block_off, 3208 &block_off,
2454 &new_offset); 3209 &new_offset);
2455 if (ret) 3210 if (ret)
2456 break; 3211 break;
2457 3212
2458 name = (const char *)bucket->bhs[block_off]->b_data + 3213 name = (const char *)bucket_block(bucket, block_off) +
2459 new_offset; 3214 new_offset;
2460 ret = ocfs2_xattr_list_entry(xl->buffer, 3215 ret = ocfs2_xattr_list_entry(xl->buffer,
2461 xl->buffer_size, 3216 xl->buffer_size,
@@ -2540,32 +3295,34 @@ static void swap_xe(void *a, void *b, int size)
2540/* 3295/*
2541 * When the ocfs2_xattr_block is filled up, new bucket will be created 3296 * When the ocfs2_xattr_block is filled up, new bucket will be created
2542 * and all the xattr entries will be moved to the new bucket. 3297 * and all the xattr entries will be moved to the new bucket.
3298 * The header goes at the start of the bucket, and the names+values are
3299 * filled from the end. This is why *target starts as the last buffer.
2543 * Note: we need to sort the entries since they are not saved in order 3300 * Note: we need to sort the entries since they are not saved in order
2544 * in the ocfs2_xattr_block. 3301 * in the ocfs2_xattr_block.
2545 */ 3302 */
2546static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 3303static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2547 struct buffer_head *xb_bh, 3304 struct buffer_head *xb_bh,
2548 struct buffer_head *xh_bh, 3305 struct ocfs2_xattr_bucket *bucket)
2549 struct buffer_head *data_bh)
2550{ 3306{
2551 int i, blocksize = inode->i_sb->s_blocksize; 3307 int i, blocksize = inode->i_sb->s_blocksize;
3308 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2552 u16 offset, size, off_change; 3309 u16 offset, size, off_change;
2553 struct ocfs2_xattr_entry *xe; 3310 struct ocfs2_xattr_entry *xe;
2554 struct ocfs2_xattr_block *xb = 3311 struct ocfs2_xattr_block *xb =
2555 (struct ocfs2_xattr_block *)xb_bh->b_data; 3312 (struct ocfs2_xattr_block *)xb_bh->b_data;
2556 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 3313 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
2557 struct ocfs2_xattr_header *xh = 3314 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
2558 (struct ocfs2_xattr_header *)xh_bh->b_data;
2559 u16 count = le16_to_cpu(xb_xh->xh_count); 3315 u16 count = le16_to_cpu(xb_xh->xh_count);
2560 char *target = xh_bh->b_data, *src = xb_bh->b_data; 3316 char *src = xb_bh->b_data;
3317 char *target = bucket_block(bucket, blks - 1);
2561 3318
2562 mlog(0, "cp xattr from block %llu to bucket %llu\n", 3319 mlog(0, "cp xattr from block %llu to bucket %llu\n",
2563 (unsigned long long)xb_bh->b_blocknr, 3320 (unsigned long long)xb_bh->b_blocknr,
2564 (unsigned long long)xh_bh->b_blocknr); 3321 (unsigned long long)bucket_blkno(bucket));
3322
3323 for (i = 0; i < blks; i++)
3324 memset(bucket_block(bucket, i), 0, blocksize);
2565 3325
2566 memset(xh_bh->b_data, 0, blocksize);
2567 if (data_bh)
2568 memset(data_bh->b_data, 0, blocksize);
2569 /* 3326 /*
2570 * Since the xe_name_offset is based on ocfs2_xattr_header, 3327 * Since the xe_name_offset is based on ocfs2_xattr_header,
2571 * there is a offset change corresponding to the change of 3328 * there is a offset change corresponding to the change of
@@ -2577,8 +3334,6 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2577 size = blocksize - offset; 3334 size = blocksize - offset;
2578 3335
2579 /* copy all the names and values. */ 3336 /* copy all the names and values. */
2580 if (data_bh)
2581 target = data_bh->b_data;
2582 memcpy(target + offset, src + offset, size); 3337 memcpy(target + offset, src + offset, size);
2583 3338
2584 /* Init new header now. */ 3339 /* Init new header now. */
@@ -2588,7 +3343,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2588 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 3343 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
2589 3344
2590 /* copy all the entries. */ 3345 /* copy all the entries. */
2591 target = xh_bh->b_data; 3346 target = bucket_block(bucket, 0);
2592 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 3347 offset = offsetof(struct ocfs2_xattr_header, xh_entries);
2593 size = count * sizeof(struct ocfs2_xattr_entry); 3348 size = count * sizeof(struct ocfs2_xattr_entry);
2594 memcpy(target + offset, (char *)xb_xh + offset, size); 3349 memcpy(target + offset, (char *)xb_xh + offset, size);
@@ -2614,73 +3369,47 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2614 * While if the entry is in index b-tree, "bucket" indicates the 3369 * While if the entry is in index b-tree, "bucket" indicates the
2615 * real place of the xattr. 3370 * real place of the xattr.
2616 */ 3371 */
2617static int ocfs2_xattr_update_xattr_search(struct inode *inode, 3372static void ocfs2_xattr_update_xattr_search(struct inode *inode,
2618 struct ocfs2_xattr_search *xs, 3373 struct ocfs2_xattr_search *xs,
2619 struct buffer_head *old_bh, 3374 struct buffer_head *old_bh)
2620 struct buffer_head *new_bh)
2621{ 3375{
2622 int ret = 0;
2623 char *buf = old_bh->b_data; 3376 char *buf = old_bh->b_data;
2624 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 3377 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
2625 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 3378 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
2626 int i, blocksize = inode->i_sb->s_blocksize; 3379 int i;
2627 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2628
2629 xs->bucket.bhs[0] = new_bh;
2630 get_bh(new_bh);
2631 xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data;
2632 xs->header = xs->bucket.xh;
2633 3380
2634 xs->base = new_bh->b_data; 3381 xs->header = bucket_xh(xs->bucket);
3382 xs->base = bucket_block(xs->bucket, 0);
2635 xs->end = xs->base + inode->i_sb->s_blocksize; 3383 xs->end = xs->base + inode->i_sb->s_blocksize;
2636 3384
2637 if (!xs->not_found) { 3385 if (xs->not_found)
2638 if (OCFS2_XATTR_BUCKET_SIZE != blocksize) { 3386 return;
2639 ret = ocfs2_read_blocks(inode,
2640 xs->bucket.bhs[0]->b_blocknr + 1,
2641 blk_per_bucket - 1, &xs->bucket.bhs[1],
2642 0);
2643 if (ret) {
2644 mlog_errno(ret);
2645 return ret;
2646 }
2647
2648 }
2649 i = xs->here - old_xh->xh_entries;
2650 xs->here = &xs->header->xh_entries[i];
2651 }
2652 3387
2653 return ret; 3388 i = xs->here - old_xh->xh_entries;
3389 xs->here = &xs->header->xh_entries[i];
2654} 3390}
2655 3391
2656static int ocfs2_xattr_create_index_block(struct inode *inode, 3392static int ocfs2_xattr_create_index_block(struct inode *inode,
2657 struct ocfs2_xattr_search *xs) 3393 struct ocfs2_xattr_search *xs,
3394 struct ocfs2_xattr_set_ctxt *ctxt)
2658{ 3395{
2659 int ret, credits = OCFS2_SUBALLOC_ALLOC; 3396 int ret;
2660 u32 bit_off, len; 3397 u32 bit_off, len;
2661 u64 blkno; 3398 u64 blkno;
2662 handle_t *handle; 3399 handle_t *handle = ctxt->handle;
2663 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3400 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2664 struct ocfs2_inode_info *oi = OCFS2_I(inode); 3401 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2665 struct ocfs2_alloc_context *data_ac;
2666 struct buffer_head *xh_bh = NULL, *data_bh = NULL;
2667 struct buffer_head *xb_bh = xs->xattr_bh; 3402 struct buffer_head *xb_bh = xs->xattr_bh;
2668 struct ocfs2_xattr_block *xb = 3403 struct ocfs2_xattr_block *xb =
2669 (struct ocfs2_xattr_block *)xb_bh->b_data; 3404 (struct ocfs2_xattr_block *)xb_bh->b_data;
2670 struct ocfs2_xattr_tree_root *xr; 3405 struct ocfs2_xattr_tree_root *xr;
2671 u16 xb_flags = le16_to_cpu(xb->xb_flags); 3406 u16 xb_flags = le16_to_cpu(xb->xb_flags);
2672 u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2673 3407
2674 mlog(0, "create xattr index block for %llu\n", 3408 mlog(0, "create xattr index block for %llu\n",
2675 (unsigned long long)xb_bh->b_blocknr); 3409 (unsigned long long)xb_bh->b_blocknr);
2676 3410
2677 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 3411 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
2678 3412 BUG_ON(!xs->bucket);
2679 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
2680 if (ret) {
2681 mlog_errno(ret);
2682 goto out;
2683 }
2684 3413
2685 /* 3414 /*
2686 * XXX: 3415 * XXX:
@@ -2689,29 +3418,18 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
2689 */ 3418 */
2690 down_write(&oi->ip_alloc_sem); 3419 down_write(&oi->ip_alloc_sem);
2691 3420
2692 /* 3421 ret = ocfs2_journal_access_xb(handle, inode, xb_bh,
2693 * 3 more credits, one for xattr block update, one for the 1st block 3422 OCFS2_JOURNAL_ACCESS_WRITE);
2694 * of the new xattr bucket and one for the value/data.
2695 */
2696 credits += 3;
2697 handle = ocfs2_start_trans(osb, credits);
2698 if (IS_ERR(handle)) {
2699 ret = PTR_ERR(handle);
2700 mlog_errno(ret);
2701 goto out_sem;
2702 }
2703
2704 ret = ocfs2_journal_access(handle, inode, xb_bh,
2705 OCFS2_JOURNAL_ACCESS_WRITE);
2706 if (ret) { 3423 if (ret) {
2707 mlog_errno(ret); 3424 mlog_errno(ret);
2708 goto out_commit; 3425 goto out;
2709 } 3426 }
2710 3427
2711 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len); 3428 ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
3429 1, 1, &bit_off, &len);
2712 if (ret) { 3430 if (ret) {
2713 mlog_errno(ret); 3431 mlog_errno(ret);
2714 goto out_commit; 3432 goto out;
2715 } 3433 }
2716 3434
2717 /* 3435 /*
@@ -2724,51 +3442,23 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
2724 mlog(0, "allocate 1 cluster from %llu to xattr block\n", 3442 mlog(0, "allocate 1 cluster from %llu to xattr block\n",
2725 (unsigned long long)blkno); 3443 (unsigned long long)blkno);
2726 3444
2727 xh_bh = sb_getblk(inode->i_sb, blkno); 3445 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
2728 if (!xh_bh) { 3446 if (ret) {
2729 ret = -EIO;
2730 mlog_errno(ret); 3447 mlog_errno(ret);
2731 goto out_commit; 3448 goto out;
2732 } 3449 }
2733 3450
2734 ocfs2_set_new_buffer_uptodate(inode, xh_bh); 3451 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
2735 3452 OCFS2_JOURNAL_ACCESS_CREATE);
2736 ret = ocfs2_journal_access(handle, inode, xh_bh,
2737 OCFS2_JOURNAL_ACCESS_CREATE);
2738 if (ret) { 3453 if (ret) {
2739 mlog_errno(ret); 3454 mlog_errno(ret);
2740 goto out_commit; 3455 goto out;
2741 }
2742
2743 if (bpb > 1) {
2744 data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
2745 if (!data_bh) {
2746 ret = -EIO;
2747 mlog_errno(ret);
2748 goto out_commit;
2749 }
2750
2751 ocfs2_set_new_buffer_uptodate(inode, data_bh);
2752
2753 ret = ocfs2_journal_access(handle, inode, data_bh,
2754 OCFS2_JOURNAL_ACCESS_CREATE);
2755 if (ret) {
2756 mlog_errno(ret);
2757 goto out_commit;
2758 }
2759 } 3456 }
2760 3457
2761 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh); 3458 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
3459 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
2762 3460
2763 ocfs2_journal_dirty(handle, xh_bh); 3461 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
2764 if (data_bh)
2765 ocfs2_journal_dirty(handle, data_bh);
2766
2767 ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
2768 if (ret) {
2769 mlog_errno(ret);
2770 goto out_commit;
2771 }
2772 3462
2773 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 3463 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
2774 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 3464 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
@@ -2787,24 +3477,10 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
2787 3477
2788 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 3478 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
2789 3479
2790 ret = ocfs2_journal_dirty(handle, xb_bh); 3480 ocfs2_journal_dirty(handle, xb_bh);
2791 if (ret) {
2792 mlog_errno(ret);
2793 goto out_commit;
2794 }
2795
2796out_commit:
2797 ocfs2_commit_trans(osb, handle);
2798
2799out_sem:
2800 up_write(&oi->ip_alloc_sem);
2801 3481
2802out: 3482out:
2803 if (data_ac) 3483 up_write(&oi->ip_alloc_sem);
2804 ocfs2_free_alloc_context(data_ac);
2805
2806 brelse(xh_bh);
2807 brelse(data_bh);
2808 3484
2809 return ret; 3485 return ret;
2810} 3486}
@@ -2829,29 +3505,18 @@ static int cmp_xe_offset(const void *a, const void *b)
2829 * so that we can spare some space for insertion. 3505 * so that we can spare some space for insertion.
2830 */ 3506 */
2831static int ocfs2_defrag_xattr_bucket(struct inode *inode, 3507static int ocfs2_defrag_xattr_bucket(struct inode *inode,
3508 handle_t *handle,
2832 struct ocfs2_xattr_bucket *bucket) 3509 struct ocfs2_xattr_bucket *bucket)
2833{ 3510{
2834 int ret, i; 3511 int ret, i;
2835 size_t end, offset, len, value_len; 3512 size_t end, offset, len, value_len;
2836 struct ocfs2_xattr_header *xh; 3513 struct ocfs2_xattr_header *xh;
2837 char *entries, *buf, *bucket_buf = NULL; 3514 char *entries, *buf, *bucket_buf = NULL;
2838 u64 blkno = bucket->bhs[0]->b_blocknr; 3515 u64 blkno = bucket_blkno(bucket);
2839 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2840 u16 xh_free_start; 3516 u16 xh_free_start;
2841 size_t blocksize = inode->i_sb->s_blocksize; 3517 size_t blocksize = inode->i_sb->s_blocksize;
2842 handle_t *handle;
2843 struct buffer_head **bhs;
2844 struct ocfs2_xattr_entry *xe; 3518 struct ocfs2_xattr_entry *xe;
2845 3519
2846 bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
2847 GFP_NOFS);
2848 if (!bhs)
2849 return -ENOMEM;
2850
2851 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0);
2852 if (ret)
2853 goto out;
2854
2855 /* 3520 /*
2856 * In order to make the operation more efficient and generic, 3521 * In order to make the operation more efficient and generic,
2857 * we copy all the blocks into a contiguous memory and do the 3522 * we copy all the blocks into a contiguous memory and do the
@@ -2865,26 +3530,16 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2865 } 3530 }
2866 3531
2867 buf = bucket_buf; 3532 buf = bucket_buf;
2868 for (i = 0; i < blk_per_bucket; i++, buf += blocksize) 3533 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
2869 memcpy(buf, bhs[i]->b_data, blocksize); 3534 memcpy(buf, bucket_block(bucket, i), blocksize);
2870 3535
2871 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket); 3536 ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
2872 if (IS_ERR(handle)) { 3537 OCFS2_JOURNAL_ACCESS_WRITE);
2873 ret = PTR_ERR(handle); 3538 if (ret < 0) {
2874 handle = NULL;
2875 mlog_errno(ret); 3539 mlog_errno(ret);
2876 goto out; 3540 goto out;
2877 } 3541 }
2878 3542
2879 for (i = 0; i < blk_per_bucket; i++) {
2880 ret = ocfs2_journal_access(handle, inode, bhs[i],
2881 OCFS2_JOURNAL_ACCESS_WRITE);
2882 if (ret < 0) {
2883 mlog_errno(ret);
2884 goto commit;
2885 }
2886 }
2887
2888 xh = (struct ocfs2_xattr_header *)bucket_buf; 3543 xh = (struct ocfs2_xattr_header *)bucket_buf;
2889 entries = (char *)xh->xh_entries; 3544 entries = (char *)xh->xh_entries;
2890 xh_free_start = le16_to_cpu(xh->xh_free_start); 3545 xh_free_start = le16_to_cpu(xh->xh_free_start);
@@ -2940,7 +3595,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2940 "bucket %llu\n", (unsigned long long)blkno); 3595 "bucket %llu\n", (unsigned long long)blkno);
2941 3596
2942 if (xh_free_start == end) 3597 if (xh_free_start == end)
2943 goto commit; 3598 goto out;
2944 3599
2945 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 3600 memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
2946 xh->xh_free_start = cpu_to_le16(end); 3601 xh->xh_free_start = cpu_to_le16(end);
@@ -2951,169 +3606,94 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2951 cmp_xe, swap_xe); 3606 cmp_xe, swap_xe);
2952 3607
2953 buf = bucket_buf; 3608 buf = bucket_buf;
2954 for (i = 0; i < blk_per_bucket; i++, buf += blocksize) { 3609 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
2955 memcpy(bhs[i]->b_data, buf, blocksize); 3610 memcpy(bucket_block(bucket, i), buf, blocksize);
2956 ocfs2_journal_dirty(handle, bhs[i]); 3611 ocfs2_xattr_bucket_journal_dirty(handle, bucket);
2957 }
2958 3612
2959commit:
2960 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2961out: 3613out:
2962
2963 if (bhs) {
2964 for (i = 0; i < blk_per_bucket; i++)
2965 brelse(bhs[i]);
2966 }
2967 kfree(bhs);
2968
2969 kfree(bucket_buf); 3614 kfree(bucket_buf);
2970 return ret; 3615 return ret;
2971} 3616}
2972 3617
2973/* 3618/*
2974 * Move half nums of the xattr bucket in the previous cluster to this new 3619 * prev_blkno points to the start of an existing extent. new_blkno
2975 * cluster. We only touch the last cluster of the previous extend record. 3620 * points to a newly allocated extent. Because we know each of our
3621 * clusters contains more than bucket, we can easily split one cluster
3622 * at a bucket boundary. So we take the last cluster of the existing
3623 * extent and split it down the middle. We move the last half of the
3624 * buckets in the last cluster of the existing extent over to the new
3625 * extent.
3626 *
3627 * first_bh is the buffer at prev_blkno so we can update the existing
3628 * extent's bucket count. header_bh is the bucket were we were hoping
3629 * to insert our xattr. If the bucket move places the target in the new
3630 * extent, we'll update first_bh and header_bh after modifying the old
3631 * extent.
2976 * 3632 *
2977 * first_bh is the first buffer_head of a series of bucket in the same 3633 * first_hash will be set as the 1st xe's name_hash in the new extent.
2978 * extent rec and header_bh is the header of one bucket in this cluster.
2979 * They will be updated if we move the data header_bh contains to the new
2980 * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
2981 */ 3634 */
2982static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 3635static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
2983 handle_t *handle, 3636 handle_t *handle,
2984 struct buffer_head **first_bh, 3637 struct ocfs2_xattr_bucket *first,
2985 struct buffer_head **header_bh, 3638 struct ocfs2_xattr_bucket *target,
2986 u64 new_blkno, 3639 u64 new_blkno,
2987 u64 prev_blkno,
2988 u32 num_clusters, 3640 u32 num_clusters,
2989 u32 *first_hash) 3641 u32 *first_hash)
2990{ 3642{
2991 int i, ret, credits; 3643 int ret;
2992 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3644 struct super_block *sb = inode->i_sb;
2993 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 3645 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
2994 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 3646 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
2995 int blocksize = inode->i_sb->s_blocksize; 3647 int to_move = num_buckets / 2;
2996 struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL; 3648 u64 src_blkno;
2997 struct ocfs2_xattr_header *new_xh; 3649 u64 last_cluster_blkno = bucket_blkno(first) +
2998 struct ocfs2_xattr_header *xh = 3650 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
2999 (struct ocfs2_xattr_header *)((*first_bh)->b_data);
3000
3001 BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3002 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3003
3004 prev_bh = *first_bh;
3005 get_bh(prev_bh);
3006 xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
3007 3651
3008 prev_blkno += (num_clusters - 1) * bpc + bpc / 2; 3652 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
3653 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
3009 3654
3010 mlog(0, "move half of xattrs in cluster %llu to %llu\n", 3655 mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3011 (unsigned long long)prev_blkno, (unsigned long long)new_blkno); 3656 (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
3012 3657
3013 /* 3658 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
3014 * We need to update the 1st half of the new cluster and 3659 last_cluster_blkno, new_blkno,
3015 * 1 more for the update of the 1st bucket of the previous 3660 to_move, first_hash);
3016 * extent record.
3017 */
3018 credits = bpc / 2 + 1;
3019 ret = ocfs2_extend_trans(handle, credits);
3020 if (ret) { 3661 if (ret) {
3021 mlog_errno(ret); 3662 mlog_errno(ret);
3022 goto out; 3663 goto out;
3023 } 3664 }
3024 3665
3025 ret = ocfs2_journal_access(handle, inode, prev_bh, 3666 /* This is the first bucket that got moved */
3026 OCFS2_JOURNAL_ACCESS_WRITE); 3667 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
3027 if (ret) {
3028 mlog_errno(ret);
3029 goto out;
3030 }
3031 3668
3032 for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) { 3669 /*
3033 old_bh = new_bh = NULL; 3670 * If the target bucket was part of the moved buckets, we need to
3034 new_bh = sb_getblk(inode->i_sb, new_blkno); 3671 * update first and target.
3035 if (!new_bh) { 3672 */
3036 ret = -EIO; 3673 if (bucket_blkno(target) >= src_blkno) {
3037 mlog_errno(ret); 3674 /* Find the block for the new target bucket */
3038 goto out; 3675 src_blkno = new_blkno +
3039 } 3676 (bucket_blkno(target) - src_blkno);
3040 3677
3041 ocfs2_set_new_buffer_uptodate(inode, new_bh); 3678 ocfs2_xattr_bucket_relse(first);
3679 ocfs2_xattr_bucket_relse(target);
3042 3680
3043 ret = ocfs2_journal_access(handle, inode, new_bh, 3681 /*
3044 OCFS2_JOURNAL_ACCESS_CREATE); 3682 * These shouldn't fail - the buffers are in the
3045 if (ret < 0) { 3683 * journal from ocfs2_cp_xattr_bucket().
3684 */
3685 ret = ocfs2_read_xattr_bucket(first, new_blkno);
3686 if (ret) {
3046 mlog_errno(ret); 3687 mlog_errno(ret);
3047 brelse(new_bh);
3048 goto out; 3688 goto out;
3049 } 3689 }
3050 3690 ret = ocfs2_read_xattr_bucket(target, src_blkno);
3051 ret = ocfs2_read_block(inode, prev_blkno, &old_bh); 3691 if (ret)
3052 if (ret < 0) {
3053 mlog_errno(ret); 3692 mlog_errno(ret);
3054 brelse(new_bh);
3055 goto out;
3056 }
3057 3693
3058 memcpy(new_bh->b_data, old_bh->b_data, blocksize);
3059
3060 if (i == 0) {
3061 new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
3062 new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
3063
3064 if (first_hash)
3065 *first_hash = le32_to_cpu(
3066 new_xh->xh_entries[0].xe_name_hash);
3067 new_first_bh = new_bh;
3068 get_bh(new_first_bh);
3069 }
3070
3071 ocfs2_journal_dirty(handle, new_bh);
3072
3073 if (*header_bh == old_bh) {
3074 brelse(*header_bh);
3075 *header_bh = new_bh;
3076 get_bh(*header_bh);
3077
3078 brelse(*first_bh);
3079 *first_bh = new_first_bh;
3080 get_bh(*first_bh);
3081 }
3082 brelse(new_bh);
3083 brelse(old_bh);
3084 } 3694 }
3085 3695
3086 le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
3087
3088 ocfs2_journal_dirty(handle, prev_bh);
3089out: 3696out:
3090 brelse(prev_bh);
3091 brelse(new_first_bh);
3092 return ret;
3093}
3094
3095static int ocfs2_read_xattr_bucket(struct inode *inode,
3096 u64 blkno,
3097 struct buffer_head **bhs,
3098 int new)
3099{
3100 int ret = 0;
3101 u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3102
3103 if (!new)
3104 return ocfs2_read_blocks(inode, blkno,
3105 blk_per_bucket, bhs, 0);
3106
3107 for (i = 0; i < blk_per_bucket; i++) {
3108 bhs[i] = sb_getblk(inode->i_sb, blkno + i);
3109 if (bhs[i] == NULL) {
3110 ret = -EIO;
3111 mlog_errno(ret);
3112 break;
3113 }
3114 ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
3115 }
3116
3117 return ret; 3697 return ret;
3118} 3698}
3119 3699
@@ -3178,8 +3758,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
3178{ 3758{
3179 int ret, i; 3759 int ret, i;
3180 int count, start, len, name_value_len = 0, xe_len, name_offset = 0; 3760 int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3181 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3761 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3182 struct buffer_head **s_bhs, **t_bhs = NULL;
3183 struct ocfs2_xattr_header *xh; 3762 struct ocfs2_xattr_header *xh;
3184 struct ocfs2_xattr_entry *xe; 3763 struct ocfs2_xattr_entry *xe;
3185 int blocksize = inode->i_sb->s_blocksize; 3764 int blocksize = inode->i_sb->s_blocksize;
@@ -3187,47 +3766,52 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
3187 mlog(0, "move some of xattrs from bucket %llu to %llu\n", 3766 mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3188 (unsigned long long)blk, (unsigned long long)new_blk); 3767 (unsigned long long)blk, (unsigned long long)new_blk);
3189 3768
3190 s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); 3769 s_bucket = ocfs2_xattr_bucket_new(inode);
3191 if (!s_bhs) 3770 t_bucket = ocfs2_xattr_bucket_new(inode);
3192 return -ENOMEM; 3771 if (!s_bucket || !t_bucket) {
3193 3772 ret = -ENOMEM;
3194 ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0);
3195 if (ret) {
3196 mlog_errno(ret); 3773 mlog_errno(ret);
3197 goto out; 3774 goto out;
3198 } 3775 }
3199 3776
3200 ret = ocfs2_journal_access(handle, inode, s_bhs[0], 3777 ret = ocfs2_read_xattr_bucket(s_bucket, blk);
3201 OCFS2_JOURNAL_ACCESS_WRITE);
3202 if (ret) { 3778 if (ret) {
3203 mlog_errno(ret); 3779 mlog_errno(ret);
3204 goto out; 3780 goto out;
3205 } 3781 }
3206 3782
3207 t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); 3783 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
3208 if (!t_bhs) { 3784 OCFS2_JOURNAL_ACCESS_WRITE);
3209 ret = -ENOMEM; 3785 if (ret) {
3786 mlog_errno(ret);
3210 goto out; 3787 goto out;
3211 } 3788 }
3212 3789
3213 ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head); 3790 /*
3791 * Even if !new_bucket_head, we're overwriting t_bucket. Thus,
3792 * there's no need to read it.
3793 */
3794 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
3214 if (ret) { 3795 if (ret) {
3215 mlog_errno(ret); 3796 mlog_errno(ret);
3216 goto out; 3797 goto out;
3217 } 3798 }
3218 3799
3219 for (i = 0; i < blk_per_bucket; i++) { 3800 /*
3220 ret = ocfs2_journal_access(handle, inode, t_bhs[i], 3801 * Hey, if we're overwriting t_bucket, what difference does
3221 new_bucket_head ? 3802 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the
3222 OCFS2_JOURNAL_ACCESS_CREATE : 3803 * same part of ocfs2_cp_xattr_bucket().
3223 OCFS2_JOURNAL_ACCESS_WRITE); 3804 */
3224 if (ret) { 3805 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3225 mlog_errno(ret); 3806 new_bucket_head ?
3226 goto out; 3807 OCFS2_JOURNAL_ACCESS_CREATE :
3227 } 3808 OCFS2_JOURNAL_ACCESS_WRITE);
3809 if (ret) {
3810 mlog_errno(ret);
3811 goto out;
3228 } 3812 }
3229 3813
3230 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; 3814 xh = bucket_xh(s_bucket);
3231 count = le16_to_cpu(xh->xh_count); 3815 count = le16_to_cpu(xh->xh_count);
3232 start = ocfs2_xattr_find_divide_pos(xh); 3816 start = ocfs2_xattr_find_divide_pos(xh);
3233 3817
@@ -3239,10 +3823,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
3239 * The hash value is set as one larger than 3823 * The hash value is set as one larger than
3240 * that of the last entry in the previous bucket. 3824 * that of the last entry in the previous bucket.
3241 */ 3825 */
3242 for (i = 0; i < blk_per_bucket; i++) 3826 for (i = 0; i < t_bucket->bu_blocks; i++)
3243 memset(t_bhs[i]->b_data, 0, blocksize); 3827 memset(bucket_block(t_bucket, i), 0, blocksize);
3244 3828
3245 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; 3829 xh = bucket_xh(t_bucket);
3246 xh->xh_free_start = cpu_to_le16(blocksize); 3830 xh->xh_free_start = cpu_to_le16(blocksize);
3247 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 3831 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3248 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 3832 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
@@ -3251,11 +3835,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
3251 } 3835 }
3252 3836
3253 /* copy the whole bucket to the new first. */ 3837 /* copy the whole bucket to the new first. */
3254 for (i = 0; i < blk_per_bucket; i++) 3838 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3255 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3256 3839
3257 /* update the new bucket. */ 3840 /* update the new bucket. */
3258 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; 3841 xh = bucket_xh(t_bucket);
3259 3842
3260 /* 3843 /*
3261 * Calculate the total name/value len and xh_free_start for 3844 * Calculate the total name/value len and xh_free_start for
@@ -3319,11 +3902,7 @@ set_num_buckets:
3319 else 3902 else
3320 xh->xh_num_buckets = 0; 3903 xh->xh_num_buckets = 0;
3321 3904
3322 for (i = 0; i < blk_per_bucket; i++) { 3905 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3323 ocfs2_journal_dirty(handle, t_bhs[i]);
3324 if (ret)
3325 mlog_errno(ret);
3326 }
3327 3906
3328 /* store the first_hash of the new bucket. */ 3907 /* store the first_hash of the new bucket. */
3329 if (first_hash) 3908 if (first_hash)
@@ -3337,29 +3916,18 @@ set_num_buckets:
3337 if (start == count) 3916 if (start == count)
3338 goto out; 3917 goto out;
3339 3918
3340 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; 3919 xh = bucket_xh(s_bucket);
3341 memset(&xh->xh_entries[start], 0, 3920 memset(&xh->xh_entries[start], 0,
3342 sizeof(struct ocfs2_xattr_entry) * (count - start)); 3921 sizeof(struct ocfs2_xattr_entry) * (count - start));
3343 xh->xh_count = cpu_to_le16(start); 3922 xh->xh_count = cpu_to_le16(start);
3344 xh->xh_free_start = cpu_to_le16(name_offset); 3923 xh->xh_free_start = cpu_to_le16(name_offset);
3345 xh->xh_name_value_len = cpu_to_le16(name_value_len); 3924 xh->xh_name_value_len = cpu_to_le16(name_value_len);
3346 3925
3347 ocfs2_journal_dirty(handle, s_bhs[0]); 3926 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
3348 if (ret)
3349 mlog_errno(ret);
3350 3927
3351out: 3928out:
3352 if (s_bhs) { 3929 ocfs2_xattr_bucket_free(s_bucket);
3353 for (i = 0; i < blk_per_bucket; i++) 3930 ocfs2_xattr_bucket_free(t_bucket);
3354 brelse(s_bhs[i]);
3355 }
3356 kfree(s_bhs);
3357
3358 if (t_bhs) {
3359 for (i = 0; i < blk_per_bucket; i++)
3360 brelse(t_bhs[i]);
3361 }
3362 kfree(t_bhs);
3363 3931
3364 return ret; 3932 return ret;
3365} 3933}
@@ -3376,10 +3944,8 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
3376 u64 t_blkno, 3944 u64 t_blkno,
3377 int t_is_new) 3945 int t_is_new)
3378{ 3946{
3379 int ret, i; 3947 int ret;
3380 int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3948 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3381 int blocksize = inode->i_sb->s_blocksize;
3382 struct buffer_head **s_bhs, **t_bhs = NULL;
3383 3949
3384 BUG_ON(s_blkno == t_blkno); 3950 BUG_ON(s_blkno == t_blkno);
3385 3951
@@ -3387,92 +3953,115 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
3387 (unsigned long long)s_blkno, (unsigned long long)t_blkno, 3953 (unsigned long long)s_blkno, (unsigned long long)t_blkno,
3388 t_is_new); 3954 t_is_new);
3389 3955
3390 s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, 3956 s_bucket = ocfs2_xattr_bucket_new(inode);
3391 GFP_NOFS); 3957 t_bucket = ocfs2_xattr_bucket_new(inode);
3392 if (!s_bhs) 3958 if (!s_bucket || !t_bucket) {
3393 return -ENOMEM; 3959 ret = -ENOMEM;
3960 mlog_errno(ret);
3961 goto out;
3962 }
3394 3963
3395 ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0); 3964 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
3396 if (ret) 3965 if (ret)
3397 goto out; 3966 goto out;
3398 3967
3399 t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, 3968 /*
3400 GFP_NOFS); 3969 * Even if !t_is_new, we're overwriting t_bucket. Thus,
3401 if (!t_bhs) { 3970 * there's no need to read it.
3402 ret = -ENOMEM; 3971 */
3972 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
3973 if (ret)
3403 goto out; 3974 goto out;
3404 }
3405 3975
3406 ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new); 3976 /*
3977 * Hey, if we're overwriting t_bucket, what difference does
3978 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new
3979 * cluster to fill, we came here from
3980 * ocfs2_mv_xattr_buckets(), and it is really new -
3981 * ACCESS_CREATE is required. But we also might have moved data
3982 * out of t_bucket before extending back into it.
3983 * ocfs2_add_new_xattr_bucket() can do this - its call to
3984 * ocfs2_add_new_xattr_cluster() may have created a new extent
3985 * and copied out the end of the old extent. Then it re-extends
3986 * the old extent back to create space for new xattrs. That's
3987 * how we get here, and the bucket isn't really new.
3988 */
3989 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3990 t_is_new ?
3991 OCFS2_JOURNAL_ACCESS_CREATE :
3992 OCFS2_JOURNAL_ACCESS_WRITE);
3407 if (ret) 3993 if (ret)
3408 goto out; 3994 goto out;
3409 3995
3410 for (i = 0; i < blk_per_bucket; i++) { 3996 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3411 ret = ocfs2_journal_access(handle, inode, t_bhs[i], 3997 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3412 t_is_new ?
3413 OCFS2_JOURNAL_ACCESS_CREATE :
3414 OCFS2_JOURNAL_ACCESS_WRITE);
3415 if (ret)
3416 goto out;
3417 }
3418
3419 for (i = 0; i < blk_per_bucket; i++) {
3420 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3421 ocfs2_journal_dirty(handle, t_bhs[i]);
3422 }
3423 3998
3424out: 3999out:
3425 if (s_bhs) { 4000 ocfs2_xattr_bucket_free(t_bucket);
3426 for (i = 0; i < blk_per_bucket; i++) 4001 ocfs2_xattr_bucket_free(s_bucket);
3427 brelse(s_bhs[i]);
3428 }
3429 kfree(s_bhs);
3430
3431 if (t_bhs) {
3432 for (i = 0; i < blk_per_bucket; i++)
3433 brelse(t_bhs[i]);
3434 }
3435 kfree(t_bhs);
3436 4002
3437 return ret; 4003 return ret;
3438} 4004}
3439 4005
3440/* 4006/*
3441 * Copy one xattr cluster from src_blk to to_blk. 4007 * src_blk points to the start of an existing extent. last_blk points to
3442 * The to_blk will become the first bucket header of the cluster, so its 4008 * last cluster in that extent. to_blk points to a newly allocated
3443 * xh_num_buckets will be initialized as the bucket num in the cluster. 4009 * extent. We copy the buckets from the cluster at last_blk to the new
4010 * extent. If start_bucket is non-zero, we skip that many buckets before
4011 * we start copying. The new extent's xh_num_buckets gets set to the
4012 * number of buckets we copied. The old extent's xh_num_buckets shrinks
4013 * by the same amount.
3444 */ 4014 */
3445static int ocfs2_cp_xattr_cluster(struct inode *inode, 4015static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
3446 handle_t *handle, 4016 u64 src_blk, u64 last_blk, u64 to_blk,
3447 struct buffer_head *first_bh, 4017 unsigned int start_bucket,
3448 u64 src_blk,
3449 u64 to_blk,
3450 u32 *first_hash) 4018 u32 *first_hash)
3451{ 4019{
3452 int i, ret, credits; 4020 int i, ret, credits;
3453 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4021 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3454 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 4022 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3455 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4023 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3456 struct buffer_head *bh = NULL; 4024 struct ocfs2_xattr_bucket *old_first, *new_first;
3457 struct ocfs2_xattr_header *xh; 4025
3458 u64 to_blk_start = to_blk; 4026 mlog(0, "mv xattrs from cluster %llu to %llu\n",
4027 (unsigned long long)last_blk, (unsigned long long)to_blk);
4028
4029 BUG_ON(start_bucket >= num_buckets);
4030 if (start_bucket) {
4031 num_buckets -= start_bucket;
4032 last_blk += (start_bucket * blks_per_bucket);
4033 }
4034
4035 /* The first bucket of the original extent */
4036 old_first = ocfs2_xattr_bucket_new(inode);
4037 /* The first bucket of the new extent */
4038 new_first = ocfs2_xattr_bucket_new(inode);
4039 if (!old_first || !new_first) {
4040 ret = -ENOMEM;
4041 mlog_errno(ret);
4042 goto out;
4043 }
3459 4044
3460 mlog(0, "cp xattrs from cluster %llu to %llu\n", 4045 ret = ocfs2_read_xattr_bucket(old_first, src_blk);
3461 (unsigned long long)src_blk, (unsigned long long)to_blk); 4046 if (ret) {
4047 mlog_errno(ret);
4048 goto out;
4049 }
3462 4050
3463 /* 4051 /*
3464 * We need to update the new cluster and 1 more for the update of 4052 * We need to update the first bucket of the old extent and all
3465 * the 1st bucket of the previous extent rec. 4053 * the buckets going to the new extent.
3466 */ 4054 */
3467 credits = bpc + 1; 4055 credits = ((num_buckets + 1) * blks_per_bucket) +
4056 handle->h_buffer_credits;
3468 ret = ocfs2_extend_trans(handle, credits); 4057 ret = ocfs2_extend_trans(handle, credits);
3469 if (ret) { 4058 if (ret) {
3470 mlog_errno(ret); 4059 mlog_errno(ret);
3471 goto out; 4060 goto out;
3472 } 4061 }
3473 4062
3474 ret = ocfs2_journal_access(handle, inode, first_bh, 4063 ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
3475 OCFS2_JOURNAL_ACCESS_WRITE); 4064 OCFS2_JOURNAL_ACCESS_WRITE);
3476 if (ret) { 4065 if (ret) {
3477 mlog_errno(ret); 4066 mlog_errno(ret);
3478 goto out; 4067 goto out;
@@ -3480,45 +4069,45 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
3480 4069
3481 for (i = 0; i < num_buckets; i++) { 4070 for (i = 0; i < num_buckets; i++) {
3482 ret = ocfs2_cp_xattr_bucket(inode, handle, 4071 ret = ocfs2_cp_xattr_bucket(inode, handle,
3483 src_blk, to_blk, 1); 4072 last_blk + (i * blks_per_bucket),
4073 to_blk + (i * blks_per_bucket),
4074 1);
3484 if (ret) { 4075 if (ret) {
3485 mlog_errno(ret); 4076 mlog_errno(ret);
3486 goto out; 4077 goto out;
3487 } 4078 }
3488
3489 src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3490 to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3491 } 4079 }
3492 4080
3493 /* update the old bucket header. */ 4081 /*
3494 xh = (struct ocfs2_xattr_header *)first_bh->b_data; 4082 * Get the new bucket ready before we dirty anything
3495 le16_add_cpu(&xh->xh_num_buckets, -num_buckets); 4083 * (This actually shouldn't fail, because we already dirtied
3496 4084 * it once in ocfs2_cp_xattr_bucket()).
3497 ocfs2_journal_dirty(handle, first_bh); 4085 */
3498 4086 ret = ocfs2_read_xattr_bucket(new_first, to_blk);
3499 /* update the new bucket header. */ 4087 if (ret) {
3500 ret = ocfs2_read_block(inode, to_blk_start, &bh);
3501 if (ret < 0) {
3502 mlog_errno(ret); 4088 mlog_errno(ret);
3503 goto out; 4089 goto out;
3504 } 4090 }
3505 4091 ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
3506 ret = ocfs2_journal_access(handle, inode, bh, 4092 OCFS2_JOURNAL_ACCESS_WRITE);
3507 OCFS2_JOURNAL_ACCESS_WRITE);
3508 if (ret) { 4093 if (ret) {
3509 mlog_errno(ret); 4094 mlog_errno(ret);
3510 goto out; 4095 goto out;
3511 } 4096 }
3512 4097
3513 xh = (struct ocfs2_xattr_header *)bh->b_data; 4098 /* Now update the headers */
3514 xh->xh_num_buckets = cpu_to_le16(num_buckets); 4099 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4100 ocfs2_xattr_bucket_journal_dirty(handle, old_first);
3515 4101
3516 ocfs2_journal_dirty(handle, bh); 4102 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4103 ocfs2_xattr_bucket_journal_dirty(handle, new_first);
3517 4104
3518 if (first_hash) 4105 if (first_hash)
3519 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4106 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4107
3520out: 4108out:
3521 brelse(bh); 4109 ocfs2_xattr_bucket_free(new_first);
4110 ocfs2_xattr_bucket_free(old_first);
3522 return ret; 4111 return ret;
3523} 4112}
3524 4113
@@ -3534,7 +4123,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
3534 u32 *first_hash) 4123 u32 *first_hash)
3535{ 4124{
3536 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4125 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3537 int ret, credits = 2 * blk_per_bucket; 4126 int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
3538 4127
3539 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4128 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
3540 4129
@@ -3577,43 +4166,49 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
3577 */ 4166 */
3578static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 4167static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3579 handle_t *handle, 4168 handle_t *handle,
3580 struct buffer_head **first_bh, 4169 struct ocfs2_xattr_bucket *first,
3581 struct buffer_head **header_bh, 4170 struct ocfs2_xattr_bucket *target,
3582 u64 new_blk, 4171 u64 new_blk,
3583 u64 prev_blk,
3584 u32 prev_clusters, 4172 u32 prev_clusters,
3585 u32 *v_start, 4173 u32 *v_start,
3586 int *extend) 4174 int *extend)
3587{ 4175{
3588 int ret = 0; 4176 int ret;
3589 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3590 4177
3591 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", 4178 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
3592 (unsigned long long)prev_blk, prev_clusters, 4179 (unsigned long long)bucket_blkno(first), prev_clusters,
3593 (unsigned long long)new_blk); 4180 (unsigned long long)new_blk);
3594 4181
3595 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) 4182 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
3596 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 4183 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
3597 handle, 4184 handle,
3598 first_bh, 4185 first, target,
3599 header_bh,
3600 new_blk, 4186 new_blk,
3601 prev_blk,
3602 prev_clusters, 4187 prev_clusters,
3603 v_start); 4188 v_start);
3604 else { 4189 if (ret)
3605 u64 last_blk = prev_blk + bpc * (prev_clusters - 1); 4190 mlog_errno(ret);
3606 4191 } else {
3607 if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk) 4192 /* The start of the last cluster in the first extent */
3608 ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh, 4193 u64 last_blk = bucket_blkno(first) +
3609 last_blk, new_blk, 4194 ((prev_clusters - 1) *
4195 ocfs2_clusters_to_blocks(inode->i_sb, 1));
4196
4197 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
4198 ret = ocfs2_mv_xattr_buckets(inode, handle,
4199 bucket_blkno(first),
4200 last_blk, new_blk, 0,
3610 v_start); 4201 v_start);
3611 else { 4202 if (ret)
4203 mlog_errno(ret);
4204 } else {
3612 ret = ocfs2_divide_xattr_cluster(inode, handle, 4205 ret = ocfs2_divide_xattr_cluster(inode, handle,
3613 last_blk, new_blk, 4206 last_blk, new_blk,
3614 v_start); 4207 v_start);
4208 if (ret)
4209 mlog_errno(ret);
3615 4210
3616 if ((*header_bh)->b_blocknr == last_blk && extend) 4211 if ((bucket_blkno(target) == last_blk) && extend)
3617 *extend = 0; 4212 *extend = 0;
3618 } 4213 }
3619 } 4214 }
@@ -3639,56 +4234,37 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3639 */ 4234 */
3640static int ocfs2_add_new_xattr_cluster(struct inode *inode, 4235static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3641 struct buffer_head *root_bh, 4236 struct buffer_head *root_bh,
3642 struct buffer_head **first_bh, 4237 struct ocfs2_xattr_bucket *first,
3643 struct buffer_head **header_bh, 4238 struct ocfs2_xattr_bucket *target,
3644 u32 *num_clusters, 4239 u32 *num_clusters,
3645 u32 prev_cpos, 4240 u32 prev_cpos,
3646 u64 prev_blkno, 4241 int *extend,
3647 int *extend) 4242 struct ocfs2_xattr_set_ctxt *ctxt)
3648{ 4243{
3649 int ret, credits; 4244 int ret;
3650 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 4245 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3651 u32 prev_clusters = *num_clusters; 4246 u32 prev_clusters = *num_clusters;
3652 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 4247 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
3653 u64 block; 4248 u64 block;
3654 handle_t *handle = NULL; 4249 handle_t *handle = ctxt->handle;
3655 struct ocfs2_alloc_context *data_ac = NULL;
3656 struct ocfs2_alloc_context *meta_ac = NULL;
3657 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4250 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3658 struct ocfs2_extent_tree et; 4251 struct ocfs2_extent_tree et;
3659 4252
3660 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " 4253 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
3661 "previous xattr blkno = %llu\n", 4254 "previous xattr blkno = %llu\n",
3662 (unsigned long long)OCFS2_I(inode)->ip_blkno, 4255 (unsigned long long)OCFS2_I(inode)->ip_blkno,
3663 prev_cpos, (unsigned long long)prev_blkno); 4256 prev_cpos, (unsigned long long)bucket_blkno(first));
3664 4257
3665 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); 4258 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
3666 4259
3667 ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, 4260 ret = ocfs2_journal_access_xb(handle, inode, root_bh,
3668 &data_ac, &meta_ac); 4261 OCFS2_JOURNAL_ACCESS_WRITE);
3669 if (ret) {
3670 mlog_errno(ret);
3671 goto leave;
3672 }
3673
3674 credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
3675 clusters_to_add);
3676 handle = ocfs2_start_trans(osb, credits);
3677 if (IS_ERR(handle)) {
3678 ret = PTR_ERR(handle);
3679 handle = NULL;
3680 mlog_errno(ret);
3681 goto leave;
3682 }
3683
3684 ret = ocfs2_journal_access(handle, inode, root_bh,
3685 OCFS2_JOURNAL_ACCESS_WRITE);
3686 if (ret < 0) { 4262 if (ret < 0) {
3687 mlog_errno(ret); 4263 mlog_errno(ret);
3688 goto leave; 4264 goto leave;
3689 } 4265 }
3690 4266
3691 ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 4267 ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
3692 clusters_to_add, &bit_off, &num_bits); 4268 clusters_to_add, &bit_off, &num_bits);
3693 if (ret < 0) { 4269 if (ret < 0) {
3694 if (ret != -ENOSPC) 4270 if (ret != -ENOSPC)
@@ -3702,7 +4278,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3702 mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n", 4278 mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
3703 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); 4279 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3704 4280
3705 if (prev_blkno + prev_clusters * bpc == block && 4281 if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
3706 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 4282 (prev_clusters + num_bits) << osb->s_clustersize_bits <=
3707 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 4283 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
3708 /* 4284 /*
@@ -3721,10 +4297,9 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3721 } else { 4297 } else {
3722 ret = ocfs2_adjust_xattr_cross_cluster(inode, 4298 ret = ocfs2_adjust_xattr_cross_cluster(inode,
3723 handle, 4299 handle,
3724 first_bh, 4300 first,
3725 header_bh, 4301 target,
3726 block, 4302 block,
3727 prev_blkno,
3728 prev_clusters, 4303 prev_clusters,
3729 &v_start, 4304 &v_start,
3730 extend); 4305 extend);
@@ -3734,149 +4309,137 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3734 } 4309 }
3735 } 4310 }
3736 4311
3737 if (handle->h_buffer_credits < credits) {
3738 /*
3739 * The journal has been restarted before, and don't
3740 * have enough space for the insertion, so extend it
3741 * here.
3742 */
3743 ret = ocfs2_extend_trans(handle, credits);
3744 if (ret) {
3745 mlog_errno(ret);
3746 goto leave;
3747 }
3748 }
3749 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", 4312 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
3750 num_bits, (unsigned long long)block, v_start); 4313 num_bits, (unsigned long long)block, v_start);
3751 ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, 4314 ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
3752 num_bits, 0, meta_ac); 4315 num_bits, 0, ctxt->meta_ac);
3753 if (ret < 0) { 4316 if (ret < 0) {
3754 mlog_errno(ret); 4317 mlog_errno(ret);
3755 goto leave; 4318 goto leave;
3756 } 4319 }
3757 4320
3758 ret = ocfs2_journal_dirty(handle, root_bh); 4321 ret = ocfs2_journal_dirty(handle, root_bh);
3759 if (ret < 0) { 4322 if (ret < 0)
3760 mlog_errno(ret); 4323 mlog_errno(ret);
3761 goto leave;
3762 }
3763 4324
3764leave: 4325leave:
3765 if (handle)
3766 ocfs2_commit_trans(osb, handle);
3767 if (data_ac)
3768 ocfs2_free_alloc_context(data_ac);
3769 if (meta_ac)
3770 ocfs2_free_alloc_context(meta_ac);
3771
3772 return ret; 4326 return ret;
3773} 4327}
3774 4328
3775/* 4329/*
3776 * Extend a new xattr bucket and move xattrs to the end one by one until 4330 * We are given an extent. 'first' is the bucket at the very front of
3777 * We meet with start_bh. Only move half of the xattrs to the bucket after it. 4331 * the extent. The extent has space for an additional bucket past
4332 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number
4333 * of the target bucket. We wish to shift every bucket past the target
4334 * down one, filling in that additional space. When we get back to the
4335 * target, we split the target between itself and the now-empty bucket
4336 * at target+1 (aka, target_blkno + blks_per_bucket).
3778 */ 4337 */
3779static int ocfs2_extend_xattr_bucket(struct inode *inode, 4338static int ocfs2_extend_xattr_bucket(struct inode *inode,
3780 struct buffer_head *first_bh, 4339 handle_t *handle,
3781 struct buffer_head *start_bh, 4340 struct ocfs2_xattr_bucket *first,
4341 u64 target_blk,
3782 u32 num_clusters) 4342 u32 num_clusters)
3783{ 4343{
3784 int ret, credits; 4344 int ret, credits;
3785 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4345 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3786 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4346 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3787 u64 start_blk = start_bh->b_blocknr, end_blk; 4347 u64 end_blk;
3788 u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb); 4348 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
3789 handle_t *handle;
3790 struct ocfs2_xattr_header *first_xh =
3791 (struct ocfs2_xattr_header *)first_bh->b_data;
3792 u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
3793 4349
3794 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting " 4350 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
3795 "from %llu, len = %u\n", (unsigned long long)start_blk, 4351 "from %llu, len = %u\n", (unsigned long long)target_blk,
3796 (unsigned long long)first_bh->b_blocknr, num_clusters); 4352 (unsigned long long)bucket_blkno(first), num_clusters);
3797 4353
3798 BUG_ON(bucket >= num_buckets); 4354 /* The extent must have room for an additional bucket */
4355 BUG_ON(new_bucket >=
4356 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
3799 4357
3800 end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket; 4358 /* end_blk points to the last existing bucket */
4359 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
3801 4360
3802 /* 4361 /*
3803 * We will touch all the buckets after the start_bh(include it). 4362 * end_blk is the start of the last existing bucket.
3804 * Add one more bucket and modify the first_bh. 4363 * Thus, (end_blk - target_blk) covers the target bucket and
4364 * every bucket after it up to, but not including, the last
4365 * existing bucket. Then we add the last existing bucket, the
4366 * new bucket, and the first bucket (3 * blk_per_bucket).
3805 */ 4367 */
3806 credits = end_blk - start_blk + 2 * blk_per_bucket + 1; 4368 credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
3807 handle = ocfs2_start_trans(osb, credits); 4369 handle->h_buffer_credits;
3808 if (IS_ERR(handle)) { 4370 ret = ocfs2_extend_trans(handle, credits);
3809 ret = PTR_ERR(handle); 4371 if (ret) {
3810 handle = NULL;
3811 mlog_errno(ret); 4372 mlog_errno(ret);
3812 goto out; 4373 goto out;
3813 } 4374 }
3814 4375
3815 ret = ocfs2_journal_access(handle, inode, first_bh, 4376 ret = ocfs2_xattr_bucket_journal_access(handle, first,
3816 OCFS2_JOURNAL_ACCESS_WRITE); 4377 OCFS2_JOURNAL_ACCESS_WRITE);
3817 if (ret) { 4378 if (ret) {
3818 mlog_errno(ret); 4379 mlog_errno(ret);
3819 goto commit; 4380 goto out;
3820 } 4381 }
3821 4382
3822 while (end_blk != start_blk) { 4383 while (end_blk != target_blk) {
3823 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 4384 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
3824 end_blk + blk_per_bucket, 0); 4385 end_blk + blk_per_bucket, 0);
3825 if (ret) 4386 if (ret)
3826 goto commit; 4387 goto out;
3827 end_blk -= blk_per_bucket; 4388 end_blk -= blk_per_bucket;
3828 } 4389 }
3829 4390
3830 /* Move half of the xattr in start_blk to the next bucket. */ 4391 /* Move half of the xattr in target_blkno to the next bucket. */
3831 ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk, 4392 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
3832 start_blk + blk_per_bucket, NULL, 0); 4393 target_blk + blk_per_bucket, NULL, 0);
3833 4394
3834 le16_add_cpu(&first_xh->xh_num_buckets, 1); 4395 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
3835 ocfs2_journal_dirty(handle, first_bh); 4396 ocfs2_xattr_bucket_journal_dirty(handle, first);
3836 4397
3837commit:
3838 ocfs2_commit_trans(osb, handle);
3839out: 4398out:
3840 return ret; 4399 return ret;
3841} 4400}
3842 4401
3843/* 4402/*
3844 * Add new xattr bucket in an extent record and adjust the buckets accordingly. 4403 * Add new xattr bucket in an extent record and adjust the buckets
3845 * xb_bh is the ocfs2_xattr_block. 4404 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the
3846 * We will move all the buckets starting from header_bh to the next place. As 4405 * bucket we want to insert into.
3847 * for this one, half num of its xattrs will be moved to the next one. 4406 *
4407 * In the easy case, we will move all the buckets after target down by
4408 * one. Half of target's xattrs will be moved to the next bucket.
3848 * 4409 *
3849 * We will allocate a new cluster if current cluster is full and adjust 4410 * If current cluster is full, we'll allocate a new one. This may not
3850 * header_bh and first_bh if the insert place is moved to the new cluster. 4411 * be contiguous. The underlying calls will make sure that there is
4412 * space for the insert, shifting buckets around if necessary.
4413 * 'target' may be moved by those calls.
3851 */ 4414 */
3852static int ocfs2_add_new_xattr_bucket(struct inode *inode, 4415static int ocfs2_add_new_xattr_bucket(struct inode *inode,
3853 struct buffer_head *xb_bh, 4416 struct buffer_head *xb_bh,
3854 struct buffer_head *header_bh) 4417 struct ocfs2_xattr_bucket *target,
4418 struct ocfs2_xattr_set_ctxt *ctxt)
3855{ 4419{
3856 struct ocfs2_xattr_header *first_xh = NULL;
3857 struct buffer_head *first_bh = NULL;
3858 struct ocfs2_xattr_block *xb = 4420 struct ocfs2_xattr_block *xb =
3859 (struct ocfs2_xattr_block *)xb_bh->b_data; 4421 (struct ocfs2_xattr_block *)xb_bh->b_data;
3860 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 4422 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3861 struct ocfs2_extent_list *el = &xb_root->xt_list; 4423 struct ocfs2_extent_list *el = &xb_root->xt_list;
3862 struct ocfs2_xattr_header *xh = 4424 u32 name_hash =
3863 (struct ocfs2_xattr_header *)header_bh->b_data; 4425 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
3864 u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4426 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3865 struct super_block *sb = inode->i_sb;
3866 struct ocfs2_super *osb = OCFS2_SB(sb);
3867 int ret, num_buckets, extend = 1; 4427 int ret, num_buckets, extend = 1;
3868 u64 p_blkno; 4428 u64 p_blkno;
3869 u32 e_cpos, num_clusters; 4429 u32 e_cpos, num_clusters;
4430 /* The bucket at the front of the extent */
4431 struct ocfs2_xattr_bucket *first;
3870 4432
3871 mlog(0, "Add new xattr bucket starting form %llu\n", 4433 mlog(0, "Add new xattr bucket starting from %llu\n",
3872 (unsigned long long)header_bh->b_blocknr); 4434 (unsigned long long)bucket_blkno(target));
3873 4435
3874 /* 4436 /* The first bucket of the original extent */
3875 * Add refrence for header_bh here because it may be 4437 first = ocfs2_xattr_bucket_new(inode);
3876 * changed in ocfs2_add_new_xattr_cluster and we need 4438 if (!first) {
3877 * to free it in the end. 4439 ret = -ENOMEM;
3878 */ 4440 mlog_errno(ret);
3879 get_bh(header_bh); 4441 goto out;
4442 }
3880 4443
3881 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 4444 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
3882 &num_clusters, el); 4445 &num_clusters, el);
@@ -3885,40 +4448,45 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
3885 goto out; 4448 goto out;
3886 } 4449 }
3887 4450
3888 ret = ocfs2_read_block(inode, p_blkno, &first_bh); 4451 ret = ocfs2_read_xattr_bucket(first, p_blkno);
3889 if (ret) { 4452 if (ret) {
3890 mlog_errno(ret); 4453 mlog_errno(ret);
3891 goto out; 4454 goto out;
3892 } 4455 }
3893 4456
3894 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 4457 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
3895 first_xh = (struct ocfs2_xattr_header *)first_bh->b_data; 4458 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
3896 4459 /*
3897 if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) { 4460 * This can move first+target if the target bucket moves
4461 * to the new extent.
4462 */
3898 ret = ocfs2_add_new_xattr_cluster(inode, 4463 ret = ocfs2_add_new_xattr_cluster(inode,
3899 xb_bh, 4464 xb_bh,
3900 &first_bh, 4465 first,
3901 &header_bh, 4466 target,
3902 &num_clusters, 4467 &num_clusters,
3903 e_cpos, 4468 e_cpos,
3904 p_blkno, 4469 &extend,
3905 &extend); 4470 ctxt);
3906 if (ret) { 4471 if (ret) {
3907 mlog_errno(ret); 4472 mlog_errno(ret);
3908 goto out; 4473 goto out;
3909 } 4474 }
3910 } 4475 }
3911 4476
3912 if (extend) 4477 if (extend) {
3913 ret = ocfs2_extend_xattr_bucket(inode, 4478 ret = ocfs2_extend_xattr_bucket(inode,
3914 first_bh, 4479 ctxt->handle,
3915 header_bh, 4480 first,
4481 bucket_blkno(target),
3916 num_clusters); 4482 num_clusters);
3917 if (ret) 4483 if (ret)
3918 mlog_errno(ret); 4484 mlog_errno(ret);
4485 }
4486
3919out: 4487out:
3920 brelse(first_bh); 4488 ocfs2_xattr_bucket_free(first);
3921 brelse(header_bh); 4489
3922 return ret; 4490 return ret;
3923} 4491}
3924 4492
@@ -3929,7 +4497,7 @@ static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
3929 int block_off = offs >> inode->i_sb->s_blocksize_bits; 4497 int block_off = offs >> inode->i_sb->s_blocksize_bits;
3930 4498
3931 offs = offs % inode->i_sb->s_blocksize; 4499 offs = offs % inode->i_sb->s_blocksize;
3932 return bucket->bhs[block_off]->b_data + offs; 4500 return bucket_block(bucket, block_off) + offs;
3933} 4501}
3934 4502
3935/* 4503/*
@@ -3984,7 +4552,7 @@ static void ocfs2_xattr_set_entry_normal(struct inode *inode,
3984 xe->xe_value_size = 0; 4552 xe->xe_value_size = 0;
3985 4553
3986 val = ocfs2_xattr_bucket_get_val(inode, 4554 val = ocfs2_xattr_bucket_get_val(inode,
3987 &xs->bucket, offs); 4555 xs->bucket, offs);
3988 memset(val + OCFS2_XATTR_SIZE(name_len), 0, 4556 memset(val + OCFS2_XATTR_SIZE(name_len), 0,
3989 size - OCFS2_XATTR_SIZE(name_len)); 4557 size - OCFS2_XATTR_SIZE(name_len));
3990 if (OCFS2_XATTR_SIZE(xi->value_len) > 0) 4558 if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
@@ -4062,8 +4630,7 @@ set_new_name_value:
4062 xh->xh_free_start = cpu_to_le16(offs); 4630 xh->xh_free_start = cpu_to_le16(offs);
4063 } 4631 }
4064 4632
4065 val = ocfs2_xattr_bucket_get_val(inode, 4633 val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
4066 &xs->bucket, offs - size);
4067 xe->xe_name_offset = cpu_to_le16(offs - size); 4634 xe->xe_name_offset = cpu_to_le16(offs - size);
4068 4635
4069 memset(val, 0, size); 4636 memset(val, 0, size);
@@ -4079,125 +4646,45 @@ set_new_name_value:
4079 return; 4646 return;
4080} 4647}
4081 4648
4082static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
4083 handle_t *handle,
4084 struct ocfs2_xattr_search *xs,
4085 struct buffer_head **bhs,
4086 u16 bh_num)
4087{
4088 int ret = 0, off, block_off;
4089 struct ocfs2_xattr_entry *xe = xs->here;
4090
4091 /*
4092 * First calculate all the blocks we should journal_access
4093 * and journal_dirty. The first block should always be touched.
4094 */
4095 ret = ocfs2_journal_dirty(handle, bhs[0]);
4096 if (ret)
4097 mlog_errno(ret);
4098
4099 /* calc the data. */
4100 off = le16_to_cpu(xe->xe_name_offset);
4101 block_off = off >> inode->i_sb->s_blocksize_bits;
4102 ret = ocfs2_journal_dirty(handle, bhs[block_off]);
4103 if (ret)
4104 mlog_errno(ret);
4105
4106 return ret;
4107}
4108
4109/* 4649/*
4110 * Set the xattr entry in the specified bucket. 4650 * Set the xattr entry in the specified bucket.
4111 * The bucket is indicated by xs->bucket and it should have the enough 4651 * The bucket is indicated by xs->bucket and it should have the enough
4112 * space for the xattr insertion. 4652 * space for the xattr insertion.
4113 */ 4653 */
4114static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, 4654static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4655 handle_t *handle,
4115 struct ocfs2_xattr_info *xi, 4656 struct ocfs2_xattr_info *xi,
4116 struct ocfs2_xattr_search *xs, 4657 struct ocfs2_xattr_search *xs,
4117 u32 name_hash, 4658 u32 name_hash,
4118 int local) 4659 int local)
4119{ 4660{
4120 int i, ret; 4661 int ret;
4121 handle_t *handle = NULL; 4662 u64 blkno;
4122 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4123 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4124 4663
4125 mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n", 4664 mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4126 (unsigned long)xi->value_len, xi->name_index, 4665 (unsigned long)xi->value_len, xi->name_index,
4127 (unsigned long long)xs->bucket.bhs[0]->b_blocknr); 4666 (unsigned long long)bucket_blkno(xs->bucket));
4128 4667
4129 if (!xs->bucket.bhs[1]) { 4668 if (!xs->bucket->bu_bhs[1]) {
4130 ret = ocfs2_read_blocks(inode, 4669 blkno = bucket_blkno(xs->bucket);
4131 xs->bucket.bhs[0]->b_blocknr + 1, 4670 ocfs2_xattr_bucket_relse(xs->bucket);
4132 blk_per_bucket - 1, &xs->bucket.bhs[1], 4671 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
4133 0);
4134 if (ret) { 4672 if (ret) {
4135 mlog_errno(ret); 4673 mlog_errno(ret);
4136 goto out; 4674 goto out;
4137 } 4675 }
4138 } 4676 }
4139 4677
4140 handle = ocfs2_start_trans(osb, blk_per_bucket); 4678 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4141 if (IS_ERR(handle)) { 4679 OCFS2_JOURNAL_ACCESS_WRITE);
4142 ret = PTR_ERR(handle); 4680 if (ret < 0) {
4143 handle = NULL;
4144 mlog_errno(ret); 4681 mlog_errno(ret);
4145 goto out; 4682 goto out;
4146 } 4683 }
4147 4684
4148 for (i = 0; i < blk_per_bucket; i++) {
4149 ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i],
4150 OCFS2_JOURNAL_ACCESS_WRITE);
4151 if (ret < 0) {
4152 mlog_errno(ret);
4153 goto out;
4154 }
4155 }
4156
4157 ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local); 4685 ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4686 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4158 4687
4159 /*Only dirty the blocks we have touched in set xattr. */
4160 ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
4161 xs->bucket.bhs, blk_per_bucket);
4162 if (ret)
4163 mlog_errno(ret);
4164out:
4165 ocfs2_commit_trans(osb, handle);
4166
4167 return ret;
4168}
4169
4170static int ocfs2_xattr_value_update_size(struct inode *inode,
4171 struct buffer_head *xe_bh,
4172 struct ocfs2_xattr_entry *xe,
4173 u64 new_size)
4174{
4175 int ret;
4176 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4177 handle_t *handle = NULL;
4178
4179 handle = ocfs2_start_trans(osb, 1);
4180 if (IS_ERR(handle)) {
4181 ret = -ENOMEM;
4182 mlog_errno(ret);
4183 goto out;
4184 }
4185
4186 ret = ocfs2_journal_access(handle, inode, xe_bh,
4187 OCFS2_JOURNAL_ACCESS_WRITE);
4188 if (ret < 0) {
4189 mlog_errno(ret);
4190 goto out_commit;
4191 }
4192
4193 xe->xe_value_size = cpu_to_le64(new_size);
4194
4195 ret = ocfs2_journal_dirty(handle, xe_bh);
4196 if (ret < 0)
4197 mlog_errno(ret);
4198
4199out_commit:
4200 ocfs2_commit_trans(osb, handle);
4201out: 4688out:
4202 return ret; 4689 return ret;
4203} 4690}
@@ -4210,18 +4697,19 @@ out:
4210 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 4697 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4211 */ 4698 */
4212static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 4699static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4213 struct buffer_head *header_bh, 4700 struct ocfs2_xattr_bucket *bucket,
4214 int xe_off, 4701 int xe_off,
4215 int len) 4702 int len,
4703 struct ocfs2_xattr_set_ctxt *ctxt)
4216{ 4704{
4217 int ret, offset; 4705 int ret, offset;
4218 u64 value_blk; 4706 u64 value_blk;
4219 struct buffer_head *value_bh = NULL;
4220 struct ocfs2_xattr_value_root *xv;
4221 struct ocfs2_xattr_entry *xe; 4707 struct ocfs2_xattr_entry *xe;
4222 struct ocfs2_xattr_header *xh = 4708 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4223 (struct ocfs2_xattr_header *)header_bh->b_data;
4224 size_t blocksize = inode->i_sb->s_blocksize; 4709 size_t blocksize = inode->i_sb->s_blocksize;
4710 struct ocfs2_xattr_value_buf vb = {
4711 .vb_access = ocfs2_journal_access,
4712 };
4225 4713
4226 xe = &xh->xh_entries[xe_off]; 4714 xe = &xh->xh_entries[xe_off];
4227 4715
@@ -4234,49 +4722,58 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4234 4722
4235 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 4723 /* We don't allow ocfs2_xattr_value to be stored in different block. */
4236 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 4724 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4237 value_blk += header_bh->b_blocknr;
4238 4725
4239 ret = ocfs2_read_block(inode, value_blk, &value_bh); 4726 vb.vb_bh = bucket->bu_bhs[value_blk];
4240 if (ret) { 4727 BUG_ON(!vb.vb_bh);
4241 mlog_errno(ret);
4242 goto out;
4243 }
4244 4728
4245 xv = (struct ocfs2_xattr_value_root *) 4729 vb.vb_xv = (struct ocfs2_xattr_value_root *)
4246 (value_bh->b_data + offset % blocksize); 4730 (vb.vb_bh->b_data + offset % blocksize);
4247 4731
4248 mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n", 4732 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
4249 xe_off, (unsigned long long)header_bh->b_blocknr, len); 4733 OCFS2_JOURNAL_ACCESS_WRITE);
4250 ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
4251 if (ret) { 4734 if (ret) {
4252 mlog_errno(ret); 4735 mlog_errno(ret);
4253 goto out; 4736 goto out;
4254 } 4737 }
4255 4738
4256 ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len); 4739 /*
4740 * From here on out we have to dirty the bucket. The generic
4741 * value calls only modify one of the bucket's bhs, but we need
4742 * to send the bucket at once. So if they error, they *could* have
4743 * modified something. We have to assume they did, and dirty
4744 * the whole bucket. This leaves us in a consistent state.
4745 */
4746 mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4747 xe_off, (unsigned long long)bucket_blkno(bucket), len);
4748 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
4257 if (ret) { 4749 if (ret) {
4258 mlog_errno(ret); 4750 mlog_errno(ret);
4259 goto out; 4751 goto out_dirty;
4260 } 4752 }
4261 4753
4754 xe->xe_value_size = cpu_to_le64(len);
4755
4756out_dirty:
4757 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
4758
4262out: 4759out:
4263 brelse(value_bh);
4264 return ret; 4760 return ret;
4265} 4761}
4266 4762
4267static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, 4763static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4268 struct ocfs2_xattr_search *xs, 4764 struct ocfs2_xattr_search *xs,
4269 int len) 4765 int len,
4766 struct ocfs2_xattr_set_ctxt *ctxt)
4270{ 4767{
4271 int ret, offset; 4768 int ret, offset;
4272 struct ocfs2_xattr_entry *xe = xs->here; 4769 struct ocfs2_xattr_entry *xe = xs->here;
4273 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base; 4770 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4274 4771
4275 BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe)); 4772 BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4276 4773
4277 offset = xe - xh->xh_entries; 4774 offset = xe - xh->xh_entries;
4278 ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0], 4775 ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket,
4279 offset, len); 4776 offset, len, ctxt);
4280 if (ret) 4777 if (ret)
4281 mlog_errno(ret); 4778 mlog_errno(ret);
4282 4779
@@ -4284,6 +4781,7 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4284} 4781}
4285 4782
4286static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode, 4783static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4784 handle_t *handle,
4287 struct ocfs2_xattr_search *xs, 4785 struct ocfs2_xattr_search *xs,
4288 char *val, 4786 char *val,
4289 int value_len) 4787 int value_len)
@@ -4299,7 +4797,8 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4299 4797
4300 xv = (struct ocfs2_xattr_value_root *)(xs->base + offset); 4798 xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4301 4799
4302 return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len); 4800 return __ocfs2_xattr_set_value_outside(inode, handle,
4801 xv, val, value_len);
4303} 4802}
4304 4803
4305static int ocfs2_rm_xattr_cluster(struct inode *inode, 4804static int ocfs2_rm_xattr_cluster(struct inode *inode,
@@ -4343,15 +4842,15 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
4343 } 4842 }
4344 } 4843 }
4345 4844
4346 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); 4845 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
4347 if (IS_ERR(handle)) { 4846 if (IS_ERR(handle)) {
4348 ret = -ENOMEM; 4847 ret = -ENOMEM;
4349 mlog_errno(ret); 4848 mlog_errno(ret);
4350 goto out; 4849 goto out;
4351 } 4850 }
4352 4851
4353 ret = ocfs2_journal_access(handle, inode, root_bh, 4852 ret = ocfs2_journal_access_xb(handle, inode, root_bh,
4354 OCFS2_JOURNAL_ACCESS_WRITE); 4853 OCFS2_JOURNAL_ACCESS_WRITE);
4355 if (ret) { 4854 if (ret) {
4356 mlog_errno(ret); 4855 mlog_errno(ret);
4357 goto out_commit; 4856 goto out_commit;
@@ -4392,26 +4891,19 @@ out:
4392} 4891}
4393 4892
4394static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, 4893static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4894 handle_t *handle,
4395 struct ocfs2_xattr_search *xs) 4895 struct ocfs2_xattr_search *xs)
4396{ 4896{
4397 handle_t *handle = NULL; 4897 struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4398 struct ocfs2_xattr_header *xh = xs->bucket.xh;
4399 struct ocfs2_xattr_entry *last = &xh->xh_entries[ 4898 struct ocfs2_xattr_entry *last = &xh->xh_entries[
4400 le16_to_cpu(xh->xh_count) - 1]; 4899 le16_to_cpu(xh->xh_count) - 1];
4401 int ret = 0; 4900 int ret = 0;
4402 4901
4403 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1); 4902 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4404 if (IS_ERR(handle)) { 4903 OCFS2_JOURNAL_ACCESS_WRITE);
4405 ret = PTR_ERR(handle);
4406 mlog_errno(ret);
4407 return;
4408 }
4409
4410 ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0],
4411 OCFS2_JOURNAL_ACCESS_WRITE);
4412 if (ret) { 4904 if (ret) {
4413 mlog_errno(ret); 4905 mlog_errno(ret);
4414 goto out_commit; 4906 return;
4415 } 4907 }
4416 4908
4417 /* Remove the old entry. */ 4909 /* Remove the old entry. */
@@ -4420,11 +4912,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4420 memset(last, 0, sizeof(struct ocfs2_xattr_entry)); 4912 memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4421 le16_add_cpu(&xh->xh_count, -1); 4913 le16_add_cpu(&xh->xh_count, -1);
4422 4914
4423 ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]); 4915 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4424 if (ret < 0)
4425 mlog_errno(ret);
4426out_commit:
4427 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
4428} 4916}
4429 4917
4430/* 4918/*
@@ -4440,7 +4928,8 @@ out_commit:
4440 */ 4928 */
4441static int ocfs2_xattr_set_in_bucket(struct inode *inode, 4929static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4442 struct ocfs2_xattr_info *xi, 4930 struct ocfs2_xattr_info *xi,
4443 struct ocfs2_xattr_search *xs) 4931 struct ocfs2_xattr_search *xs,
4932 struct ocfs2_xattr_set_ctxt *ctxt)
4444{ 4933{
4445 int ret, local = 1; 4934 int ret, local = 1;
4446 size_t value_len; 4935 size_t value_len;
@@ -4468,7 +4957,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4468 value_len = 0; 4957 value_len = 0;
4469 4958
4470 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, 4959 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4471 value_len); 4960 value_len,
4961 ctxt);
4472 if (ret) 4962 if (ret)
4473 goto out; 4963 goto out;
4474 4964
@@ -4488,7 +4978,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4488 xi->value_len = OCFS2_XATTR_ROOT_SIZE; 4978 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4489 } 4979 }
4490 4980
4491 ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local); 4981 ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
4982 name_hash, local);
4492 if (ret) { 4983 if (ret) {
4493 mlog_errno(ret); 4984 mlog_errno(ret);
4494 goto out; 4985 goto out;
@@ -4499,7 +4990,7 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4499 4990
4500 /* allocate the space now for the outside block storage. */ 4991 /* allocate the space now for the outside block storage. */
4501 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, 4992 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4502 value_len); 4993 value_len, ctxt);
4503 if (ret) { 4994 if (ret) {
4504 mlog_errno(ret); 4995 mlog_errno(ret);
4505 4996
@@ -4509,13 +5000,14 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4509 * storage and we have allocated xattr already, 5000 * storage and we have allocated xattr already,
4510 * so need to remove it. 5001 * so need to remove it.
4511 */ 5002 */
4512 ocfs2_xattr_bucket_remove_xs(inode, xs); 5003 ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
4513 } 5004 }
4514 goto out; 5005 goto out;
4515 } 5006 }
4516 5007
4517set_value_outside: 5008set_value_outside:
4518 ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len); 5009 ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
5010 xs, val, value_len);
4519out: 5011out:
4520 return ret; 5012 return ret;
4521} 5013}
@@ -4530,7 +5022,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4530 struct ocfs2_xattr_bucket *bucket, 5022 struct ocfs2_xattr_bucket *bucket,
4531 const char *name) 5023 const char *name)
4532{ 5024{
4533 struct ocfs2_xattr_header *xh = bucket->xh; 5025 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4534 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5026 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
4535 5027
4536 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5028 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
@@ -4540,7 +5032,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4540 xh->xh_entries[0].xe_name_hash) { 5032 xh->xh_entries[0].xe_name_hash) {
4541 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5033 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
4542 "hash = %u\n", 5034 "hash = %u\n",
4543 (unsigned long long)bucket->bhs[0]->b_blocknr, 5035 (unsigned long long)bucket_blkno(bucket),
4544 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5036 le32_to_cpu(xh->xh_entries[0].xe_name_hash));
4545 return -ENOSPC; 5037 return -ENOSPC;
4546 } 5038 }
@@ -4550,16 +5042,16 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4550 5042
4551static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5043static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
4552 struct ocfs2_xattr_info *xi, 5044 struct ocfs2_xattr_info *xi,
4553 struct ocfs2_xattr_search *xs) 5045 struct ocfs2_xattr_search *xs,
5046 struct ocfs2_xattr_set_ctxt *ctxt)
4554{ 5047{
4555 struct ocfs2_xattr_header *xh; 5048 struct ocfs2_xattr_header *xh;
4556 struct ocfs2_xattr_entry *xe; 5049 struct ocfs2_xattr_entry *xe;
4557 u16 count, header_size, xh_free_start; 5050 u16 count, header_size, xh_free_start;
4558 int i, free, max_free, need, old; 5051 int free, max_free, need, old;
4559 size_t value_size = 0, name_len = strlen(xi->name); 5052 size_t value_size = 0, name_len = strlen(xi->name);
4560 size_t blocksize = inode->i_sb->s_blocksize; 5053 size_t blocksize = inode->i_sb->s_blocksize;
4561 int ret, allocation = 0; 5054 int ret, allocation = 0;
4562 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4563 5055
4564 mlog_entry("Set xattr %s in xattr index block\n", xi->name); 5056 mlog_entry("Set xattr %s in xattr index block\n", xi->name);
4565 5057
@@ -4574,7 +5066,7 @@ try_again:
4574 5066
4575 mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " 5067 mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
4576 "of %u which exceed block size\n", 5068 "of %u which exceed block size\n",
4577 (unsigned long long)xs->bucket.bhs[0]->b_blocknr, 5069 (unsigned long long)bucket_blkno(xs->bucket),
4578 header_size); 5070 header_size);
4579 5071
4580 if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) 5072 if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
@@ -4614,11 +5106,13 @@ try_again:
4614 mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, " 5106 mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
4615 "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len =" 5107 "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
4616 " %u\n", xs->not_found, 5108 " %u\n", xs->not_found,
4617 (unsigned long long)xs->bucket.bhs[0]->b_blocknr, 5109 (unsigned long long)bucket_blkno(xs->bucket),
4618 free, need, max_free, le16_to_cpu(xh->xh_free_start), 5110 free, need, max_free, le16_to_cpu(xh->xh_free_start),
4619 le16_to_cpu(xh->xh_name_value_len)); 5111 le16_to_cpu(xh->xh_name_value_len));
4620 5112
4621 if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { 5113 if (free < need ||
5114 (xs->not_found &&
5115 count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
4622 if (need <= max_free && 5116 if (need <= max_free &&
4623 count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { 5117 count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4624 /* 5118 /*
@@ -4626,7 +5120,8 @@ try_again:
4626 * name/value will be moved, the xe shouldn't be changed 5120 * name/value will be moved, the xe shouldn't be changed
4627 * in xs. 5121 * in xs.
4628 */ 5122 */
4629 ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket); 5123 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5124 xs->bucket);
4630 if (ret) { 5125 if (ret) {
4631 mlog_errno(ret); 5126 mlog_errno(ret);
4632 goto out; 5127 goto out;
@@ -4658,7 +5153,7 @@ try_again:
4658 * add a new bucket for the insert. 5153 * add a new bucket for the insert.
4659 */ 5154 */
4660 ret = ocfs2_check_xattr_bucket_collision(inode, 5155 ret = ocfs2_check_xattr_bucket_collision(inode,
4661 &xs->bucket, 5156 xs->bucket,
4662 xi->name); 5157 xi->name);
4663 if (ret) { 5158 if (ret) {
4664 mlog_errno(ret); 5159 mlog_errno(ret);
@@ -4667,17 +5162,21 @@ try_again:
4667 5162
4668 ret = ocfs2_add_new_xattr_bucket(inode, 5163 ret = ocfs2_add_new_xattr_bucket(inode,
4669 xs->xattr_bh, 5164 xs->xattr_bh,
4670 xs->bucket.bhs[0]); 5165 xs->bucket,
5166 ctxt);
4671 if (ret) { 5167 if (ret) {
4672 mlog_errno(ret); 5168 mlog_errno(ret);
4673 goto out; 5169 goto out;
4674 } 5170 }
4675 5171
4676 for (i = 0; i < blk_per_bucket; i++) 5172 /*
4677 brelse(xs->bucket.bhs[i]); 5173 * ocfs2_add_new_xattr_bucket() will have updated
4678 5174 * xs->bucket if it moved, but it will not have updated
4679 memset(&xs->bucket, 0, sizeof(xs->bucket)); 5175 * any of the other search fields. Thus, we drop it and
4680 5176 * re-search. Everything should be cached, so it'll be
5177 * quick.
5178 */
5179 ocfs2_xattr_bucket_relse(xs->bucket);
4681 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5180 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
4682 xi->name_index, 5181 xi->name_index,
4683 xi->name, xs); 5182 xi->name, xs);
@@ -4689,7 +5188,7 @@ try_again:
4689 } 5188 }
4690 5189
4691xattr_set: 5190xattr_set:
4692 ret = ocfs2_xattr_set_in_bucket(inode, xi, xs); 5191 ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
4693out: 5192out:
4694 mlog_exit(ret); 5193 mlog_exit(ret);
4695 return ret; 5194 return ret;
@@ -4700,24 +5199,41 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
4700 void *para) 5199 void *para)
4701{ 5200{
4702 int ret = 0; 5201 int ret = 0;
4703 struct ocfs2_xattr_header *xh = bucket->xh; 5202 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4704 u16 i; 5203 u16 i;
4705 struct ocfs2_xattr_entry *xe; 5204 struct ocfs2_xattr_entry *xe;
5205 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5206 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5207 int credits = ocfs2_remove_extent_credits(osb->sb) +
5208 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5209
5210
5211 ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
4706 5212
4707 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5213 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4708 xe = &xh->xh_entries[i]; 5214 xe = &xh->xh_entries[i];
4709 if (ocfs2_xattr_is_local(xe)) 5215 if (ocfs2_xattr_is_local(xe))
4710 continue; 5216 continue;
4711 5217
4712 ret = ocfs2_xattr_bucket_value_truncate(inode, 5218 ctxt.handle = ocfs2_start_trans(osb, credits);
4713 bucket->bhs[0], 5219 if (IS_ERR(ctxt.handle)) {
4714 i, 0); 5220 ret = PTR_ERR(ctxt.handle);
5221 mlog_errno(ret);
5222 break;
5223 }
5224
5225 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5226 i, 0, &ctxt);
5227
5228 ocfs2_commit_trans(osb, ctxt.handle);
4715 if (ret) { 5229 if (ret) {
4716 mlog_errno(ret); 5230 mlog_errno(ret);
4717 break; 5231 break;
4718 } 5232 }
4719 } 5233 }
4720 5234
5235 ocfs2_schedule_truncate_log_flush(osb, 1);
5236 ocfs2_run_deallocs(osb, &ctxt.dealloc);
4721 return ret; 5237 return ret;
4722} 5238}
4723 5239
@@ -4768,6 +5284,74 @@ out:
4768} 5284}
4769 5285
4770/* 5286/*
5287 * 'security' attributes support
5288 */
5289static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
5290 size_t list_size, const char *name,
5291 size_t name_len)
5292{
5293 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
5294 const size_t total_len = prefix_len + name_len + 1;
5295
5296 if (list && total_len <= list_size) {
5297 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
5298 memcpy(list + prefix_len, name, name_len);
5299 list[prefix_len + name_len] = '\0';
5300 }
5301 return total_len;
5302}
5303
5304static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
5305 void *buffer, size_t size)
5306{
5307 if (strcmp(name, "") == 0)
5308 return -EINVAL;
5309 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
5310 buffer, size);
5311}
5312
5313static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
5314 const void *value, size_t size, int flags)
5315{
5316 if (strcmp(name, "") == 0)
5317 return -EINVAL;
5318
5319 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
5320 size, flags);
5321}
5322
5323int ocfs2_init_security_get(struct inode *inode,
5324 struct inode *dir,
5325 struct ocfs2_security_xattr_info *si)
5326{
5327 /* check whether ocfs2 support feature xattr */
5328 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
5329 return -EOPNOTSUPP;
5330 return security_inode_init_security(inode, dir, &si->name, &si->value,
5331 &si->value_len);
5332}
5333
5334int ocfs2_init_security_set(handle_t *handle,
5335 struct inode *inode,
5336 struct buffer_head *di_bh,
5337 struct ocfs2_security_xattr_info *si,
5338 struct ocfs2_alloc_context *xattr_ac,
5339 struct ocfs2_alloc_context *data_ac)
5340{
5341 return ocfs2_xattr_set_handle(handle, inode, di_bh,
5342 OCFS2_XATTR_INDEX_SECURITY,
5343 si->name, si->value, si->value_len, 0,
5344 xattr_ac, data_ac);
5345}
5346
5347struct xattr_handler ocfs2_xattr_security_handler = {
5348 .prefix = XATTR_SECURITY_PREFIX,
5349 .list = ocfs2_xattr_security_list,
5350 .get = ocfs2_xattr_security_get,
5351 .set = ocfs2_xattr_security_set,
5352};
5353
5354/*
4771 * 'trusted' attributes support 5355 * 'trusted' attributes support
4772 */ 5356 */
4773static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, 5357static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 1d8314c7656d..5a1ebc789f7e 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -30,13 +30,58 @@ enum ocfs2_xattr_type {
30 OCFS2_XATTR_MAX 30 OCFS2_XATTR_MAX
31}; 31};
32 32
33struct ocfs2_security_xattr_info {
34 int enable;
35 char *name;
36 void *value;
37 size_t value_len;
38};
39
33extern struct xattr_handler ocfs2_xattr_user_handler; 40extern struct xattr_handler ocfs2_xattr_user_handler;
34extern struct xattr_handler ocfs2_xattr_trusted_handler; 41extern struct xattr_handler ocfs2_xattr_trusted_handler;
42extern struct xattr_handler ocfs2_xattr_security_handler;
43#ifdef CONFIG_OCFS2_FS_POSIX_ACL
44extern struct xattr_handler ocfs2_xattr_acl_access_handler;
45extern struct xattr_handler ocfs2_xattr_acl_default_handler;
46#endif
35extern struct xattr_handler *ocfs2_xattr_handlers[]; 47extern struct xattr_handler *ocfs2_xattr_handlers[];
36 48
37ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); 49ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
50int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int,
51 const char *, void *, size_t);
38int ocfs2_xattr_set(struct inode *, int, const char *, const void *, 52int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
39 size_t, int); 53 size_t, int);
54int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *,
55 int, const char *, const void *, size_t, int,
56 struct ocfs2_alloc_context *,
57 struct ocfs2_alloc_context *);
40int ocfs2_xattr_remove(struct inode *, struct buffer_head *); 58int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
59int ocfs2_init_security_get(struct inode *, struct inode *,
60 struct ocfs2_security_xattr_info *);
61int ocfs2_init_security_set(handle_t *, struct inode *,
62 struct buffer_head *,
63 struct ocfs2_security_xattr_info *,
64 struct ocfs2_alloc_context *,
65 struct ocfs2_alloc_context *);
66int ocfs2_calc_security_init(struct inode *,
67 struct ocfs2_security_xattr_info *,
68 int *, int *, struct ocfs2_alloc_context **);
69int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
70 int, struct ocfs2_security_xattr_info *,
71 int *, int *, struct ocfs2_alloc_context **);
72
73/*
74 * xattrs can live inside an inode, as part of an external xattr block,
75 * or inside an xattr bucket, which is the leaf of a tree rooted in an
76 * xattr block. Some of the xattr calls, especially the value setting
77 * functions, want to treat each of these locations as equal. Let's wrap
78 * them in a structure that we can pass around instead of raw buffer_heads.
79 */
80struct ocfs2_xattr_value_buf {
81 struct buffer_head *vb_bh;
82 ocfs2_journal_access_func vb_access;
83 struct ocfs2_xattr_value_root *vb_xv;
84};
85
41 86
42#endif /* OCFS2_XATTR_H */ 87#endif /* OCFS2_XATTR_H */