aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-01-05 21:32:43 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-05 21:32:43 -0500
commit10cc04f5a01041ffff068b3f9b195bfdc5290c45 (patch)
tree5c53027ce5299075759b70e1447ce811ba1afdf0 /fs/ocfs2
parent520c85346666d4d9a6fcaaa8450542302dc28b91 (diff)
parent9047beabb8a396f0b18de1e4a9ab920cf92054af (diff)
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (138 commits) ocfs2: Access the right buffer_head in ocfs2_merge_rec_left. ocfs2: use min_t in ocfs2_quota_read() ocfs2: remove unneeded lvb casts ocfs2: Add xattr support checking in init_security ocfs2: alloc xattr bucket in ocfs2_xattr_set_handle ocfs2: calculate and reserve credits for xattr value in mknod ocfs2/xattr: fix credits calculation during index create ocfs2/xattr: Always updating ctime during xattr set. ocfs2/xattr: Remove extend_trans call and add its credits from the beginning ocfs2/dlm: Fix race during lockres mastery ocfs2/dlm: Fix race in adding/removing lockres' to/from the tracking list ocfs2/dlm: Hold off sending lockres drop ref message while lockres is migrating ocfs2/dlm: Clean up errors in dlm_proxy_ast_handler() ocfs2/dlm: Fix a race between migrate request and exit domain ocfs2: One more hamming code optimization. ocfs2: Another hamming code optimization. ocfs2: Don't hand-code xor in ocfs2_hamming_encode(). ocfs2: Enable metadata checksums. ocfs2: Validate superblock with checksum and ecc. ocfs2: Checksum and ECC for directory blocks. ...
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/Makefile7
-rw-r--r--fs/ocfs2/acl.c479
-rw-r--r--fs/ocfs2/acl.h58
-rw-r--r--fs/ocfs2/alloc.c710
-rw-r--r--fs/ocfs2/alloc.h30
-rw-r--r--fs/ocfs2/aops.c59
-rw-r--r--fs/ocfs2/blockcheck.c477
-rw-r--r--fs/ocfs2/blockcheck.h82
-rw-r--r--fs/ocfs2/buffer_head_io.c32
-rw-r--r--fs/ocfs2/buffer_head_io.h27
-rw-r--r--fs/ocfs2/cluster/masklog.c1
-rw-r--r--fs/ocfs2/cluster/masklog.h1
-rw-r--r--fs/ocfs2/dir.c399
-rw-r--r--fs/ocfs2/dir.h2
-rw-r--r--fs/ocfs2/dlm/dlmast.c52
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h3
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c53
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c1
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c42
-rw-r--r--fs/ocfs2/dlm/dlmthread.c3
-rw-r--r--fs/ocfs2/dlmglue.c168
-rw-r--r--fs/ocfs2/dlmglue.h19
-rw-r--r--fs/ocfs2/extent_map.c96
-rw-r--r--fs/ocfs2/extent_map.h24
-rw-r--r--fs/ocfs2/file.c209
-rw-r--r--fs/ocfs2/file.h3
-rw-r--r--fs/ocfs2/inode.c175
-rw-r--r--fs/ocfs2/inode.h18
-rw-r--r--fs/ocfs2/journal.c364
-rw-r--r--fs/ocfs2/journal.h128
-rw-r--r--fs/ocfs2/localalloc.c26
-rw-r--r--fs/ocfs2/namei.c318
-rw-r--r--fs/ocfs2/ocfs2.h46
-rw-r--r--fs/ocfs2/ocfs2_fs.h213
-rw-r--r--fs/ocfs2/ocfs2_jbd_compat.h82
-rw-r--r--fs/ocfs2/ocfs2_lockid.h5
-rw-r--r--fs/ocfs2/quota.h119
-rw-r--r--fs/ocfs2/quota_global.c1025
-rw-r--r--fs/ocfs2/quota_local.c1253
-rw-r--r--fs/ocfs2/resize.c76
-rw-r--r--fs/ocfs2/slot_map.c4
-rw-r--r--fs/ocfs2/suballoc.c363
-rw-r--r--fs/ocfs2/suballoc.h18
-rw-r--r--fs/ocfs2/super.c328
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/ocfs2/xattr.c2984
-rw-r--r--fs/ocfs2/xattr.h45
47 files changed, 8302 insertions, 2327 deletions
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 589dcdfdfe3c..01596079dd63 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_OCFS2_FS_USERSPACE_CLUSTER) += ocfs2_stack_user.o
12ocfs2-objs := \ 12ocfs2-objs := \
13 alloc.o \ 13 alloc.o \
14 aops.o \ 14 aops.o \
15 blockcheck.o \
15 buffer_head_io.o \ 16 buffer_head_io.o \
16 dcache.o \ 17 dcache.o \
17 dir.o \ 18 dir.o \
@@ -35,8 +36,14 @@ ocfs2-objs := \
35 sysfile.o \ 36 sysfile.o \
36 uptodate.o \ 37 uptodate.o \
37 ver.o \ 38 ver.o \
39 quota_local.o \
40 quota_global.o \
38 xattr.o 41 xattr.o
39 42
43ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y)
44ocfs2-objs += acl.o
45endif
46
40ocfs2_stackglue-objs := stackglue.o 47ocfs2_stackglue-objs := stackglue.o
41ocfs2_stack_o2cb-objs := stack_o2cb.o 48ocfs2_stack_o2cb-objs := stack_o2cb.o
42ocfs2_stack_user-objs := stack_user.o 49ocfs2_stack_user-objs := stack_user.o
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
new file mode 100644
index 000000000000..12dfb44c22e5
--- /dev/null
+++ b/fs/ocfs2/acl.c
@@ -0,0 +1,479 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * acl.c
5 *
6 * Copyright (C) 2004, 2008 Oracle. All rights reserved.
7 *
8 * CREDITS:
9 * Lots of code in this file is copy from linux/fs/ext3/acl.c.
10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public
14 * License version 2 as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 */
21
22#include <linux/init.h>
23#include <linux/module.h>
24#include <linux/string.h>
25
26#define MLOG_MASK_PREFIX ML_INODE
27#include <cluster/masklog.h>
28
29#include "ocfs2.h"
30#include "alloc.h"
31#include "dlmglue.h"
32#include "file.h"
33#include "ocfs2_fs.h"
34
35#include "xattr.h"
36#include "acl.h"
37
38/*
39 * Convert from xattr value to acl struct.
40 */
41static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size)
42{
43 int n, count;
44 struct posix_acl *acl;
45
46 if (!value)
47 return NULL;
48 if (size < sizeof(struct posix_acl_entry))
49 return ERR_PTR(-EINVAL);
50
51 count = size / sizeof(struct posix_acl_entry);
52 if (count < 0)
53 return ERR_PTR(-EINVAL);
54 if (count == 0)
55 return NULL;
56
57 acl = posix_acl_alloc(count, GFP_NOFS);
58 if (!acl)
59 return ERR_PTR(-ENOMEM);
60 for (n = 0; n < count; n++) {
61 struct ocfs2_acl_entry *entry =
62 (struct ocfs2_acl_entry *)value;
63
64 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
65 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
66 acl->a_entries[n].e_id = le32_to_cpu(entry->e_id);
67 value += sizeof(struct posix_acl_entry);
68
69 }
70 return acl;
71}
72
73/*
74 * Convert acl struct to xattr value.
75 */
76static void *ocfs2_acl_to_xattr(const struct posix_acl *acl, size_t *size)
77{
78 struct ocfs2_acl_entry *entry = NULL;
79 char *ocfs2_acl;
80 size_t n;
81
82 *size = acl->a_count * sizeof(struct posix_acl_entry);
83
84 ocfs2_acl = kmalloc(*size, GFP_NOFS);
85 if (!ocfs2_acl)
86 return ERR_PTR(-ENOMEM);
87
88 entry = (struct ocfs2_acl_entry *)ocfs2_acl;
89 for (n = 0; n < acl->a_count; n++, entry++) {
90 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
91 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
92 entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
93 }
94 return ocfs2_acl;
95}
96
97static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode,
98 int type,
99 struct buffer_head *di_bh)
100{
101 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
102 int name_index;
103 char *value = NULL;
104 struct posix_acl *acl;
105 int retval;
106
107 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
108 return NULL;
109
110 switch (type) {
111 case ACL_TYPE_ACCESS:
112 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
113 break;
114 case ACL_TYPE_DEFAULT:
115 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
116 break;
117 default:
118 return ERR_PTR(-EINVAL);
119 }
120
121 retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index, "", NULL, 0);
122 if (retval > 0) {
123 value = kmalloc(retval, GFP_NOFS);
124 if (!value)
125 return ERR_PTR(-ENOMEM);
126 retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
127 "", value, retval);
128 }
129
130 if (retval > 0)
131 acl = ocfs2_acl_from_xattr(value, retval);
132 else if (retval == -ENODATA || retval == 0)
133 acl = NULL;
134 else
135 acl = ERR_PTR(retval);
136
137 kfree(value);
138
139 return acl;
140}
141
142
143/*
144 * Get posix acl.
145 */
146static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type)
147{
148 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
149 struct buffer_head *di_bh = NULL;
150 struct posix_acl *acl;
151 int ret;
152
153 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
154 return NULL;
155
156 ret = ocfs2_inode_lock(inode, &di_bh, 0);
157 if (ret < 0) {
158 mlog_errno(ret);
159 acl = ERR_PTR(ret);
160 return acl;
161 }
162
163 acl = ocfs2_get_acl_nolock(inode, type, di_bh);
164
165 ocfs2_inode_unlock(inode, 0);
166
167 brelse(di_bh);
168
169 return acl;
170}
171
172/*
173 * Set the access or default ACL of an inode.
174 */
175static int ocfs2_set_acl(handle_t *handle,
176 struct inode *inode,
177 struct buffer_head *di_bh,
178 int type,
179 struct posix_acl *acl,
180 struct ocfs2_alloc_context *meta_ac,
181 struct ocfs2_alloc_context *data_ac)
182{
183 int name_index;
184 void *value = NULL;
185 size_t size = 0;
186 int ret;
187
188 if (S_ISLNK(inode->i_mode))
189 return -EOPNOTSUPP;
190
191 switch (type) {
192 case ACL_TYPE_ACCESS:
193 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
194 if (acl) {
195 mode_t mode = inode->i_mode;
196 ret = posix_acl_equiv_mode(acl, &mode);
197 if (ret < 0)
198 return ret;
199 else {
200 inode->i_mode = mode;
201 if (ret == 0)
202 acl = NULL;
203 }
204 }
205 break;
206 case ACL_TYPE_DEFAULT:
207 name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
208 if (!S_ISDIR(inode->i_mode))
209 return acl ? -EACCES : 0;
210 break;
211 default:
212 return -EINVAL;
213 }
214
215 if (acl) {
216 value = ocfs2_acl_to_xattr(acl, &size);
217 if (IS_ERR(value))
218 return (int)PTR_ERR(value);
219 }
220
221 if (handle)
222 ret = ocfs2_xattr_set_handle(handle, inode, di_bh, name_index,
223 "", value, size, 0,
224 meta_ac, data_ac);
225 else
226 ret = ocfs2_xattr_set(inode, name_index, "", value, size, 0);
227
228 kfree(value);
229
230 return ret;
231}
232
233int ocfs2_check_acl(struct inode *inode, int mask)
234{
235 struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
236
237 if (IS_ERR(acl))
238 return PTR_ERR(acl);
239 if (acl) {
240 int ret = posix_acl_permission(inode, acl, mask);
241 posix_acl_release(acl);
242 return ret;
243 }
244
245 return -EAGAIN;
246}
247
248int ocfs2_acl_chmod(struct inode *inode)
249{
250 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
251 struct posix_acl *acl, *clone;
252 int ret;
253
254 if (S_ISLNK(inode->i_mode))
255 return -EOPNOTSUPP;
256
257 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
258 return 0;
259
260 acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
261 if (IS_ERR(acl) || !acl)
262 return PTR_ERR(acl);
263 clone = posix_acl_clone(acl, GFP_KERNEL);
264 posix_acl_release(acl);
265 if (!clone)
266 return -ENOMEM;
267 ret = posix_acl_chmod_masq(clone, inode->i_mode);
268 if (!ret)
269 ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS,
270 clone, NULL, NULL);
271 posix_acl_release(clone);
272 return ret;
273}
274
275/*
276 * Initialize the ACLs of a new inode. If parent directory has default ACL,
277 * then clone to new inode. Called from ocfs2_mknod.
278 */
279int ocfs2_init_acl(handle_t *handle,
280 struct inode *inode,
281 struct inode *dir,
282 struct buffer_head *di_bh,
283 struct buffer_head *dir_bh,
284 struct ocfs2_alloc_context *meta_ac,
285 struct ocfs2_alloc_context *data_ac)
286{
287 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
288 struct posix_acl *acl = NULL;
289 int ret = 0;
290
291 if (!S_ISLNK(inode->i_mode)) {
292 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
293 acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
294 dir_bh);
295 if (IS_ERR(acl))
296 return PTR_ERR(acl);
297 }
298 if (!acl)
299 inode->i_mode &= ~current->fs->umask;
300 }
301 if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
302 struct posix_acl *clone;
303 mode_t mode;
304
305 if (S_ISDIR(inode->i_mode)) {
306 ret = ocfs2_set_acl(handle, inode, di_bh,
307 ACL_TYPE_DEFAULT, acl,
308 meta_ac, data_ac);
309 if (ret)
310 goto cleanup;
311 }
312 clone = posix_acl_clone(acl, GFP_NOFS);
313 ret = -ENOMEM;
314 if (!clone)
315 goto cleanup;
316
317 mode = inode->i_mode;
318 ret = posix_acl_create_masq(clone, &mode);
319 if (ret >= 0) {
320 inode->i_mode = mode;
321 if (ret > 0) {
322 ret = ocfs2_set_acl(handle, inode,
323 di_bh, ACL_TYPE_ACCESS,
324 clone, meta_ac, data_ac);
325 }
326 }
327 posix_acl_release(clone);
328 }
329cleanup:
330 posix_acl_release(acl);
331 return ret;
332}
333
334static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
335 char *list,
336 size_t list_len,
337 const char *name,
338 size_t name_len)
339{
340 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
341 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
342
343 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
344 return 0;
345
346 if (list && size <= list_len)
347 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
348 return size;
349}
350
351static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
352 char *list,
353 size_t list_len,
354 const char *name,
355 size_t name_len)
356{
357 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
358 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
359
360 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
361 return 0;
362
363 if (list && size <= list_len)
364 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
365 return size;
366}
367
368static int ocfs2_xattr_get_acl(struct inode *inode,
369 int type,
370 void *buffer,
371 size_t size)
372{
373 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
374 struct posix_acl *acl;
375 int ret;
376
377 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
378 return -EOPNOTSUPP;
379
380 acl = ocfs2_get_acl(inode, type);
381 if (IS_ERR(acl))
382 return PTR_ERR(acl);
383 if (acl == NULL)
384 return -ENODATA;
385 ret = posix_acl_to_xattr(acl, buffer, size);
386 posix_acl_release(acl);
387
388 return ret;
389}
390
391static int ocfs2_xattr_get_acl_access(struct inode *inode,
392 const char *name,
393 void *buffer,
394 size_t size)
395{
396 if (strcmp(name, "") != 0)
397 return -EINVAL;
398 return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
399}
400
401static int ocfs2_xattr_get_acl_default(struct inode *inode,
402 const char *name,
403 void *buffer,
404 size_t size)
405{
406 if (strcmp(name, "") != 0)
407 return -EINVAL;
408 return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
409}
410
411static int ocfs2_xattr_set_acl(struct inode *inode,
412 int type,
413 const void *value,
414 size_t size)
415{
416 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
417 struct posix_acl *acl;
418 int ret = 0;
419
420 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
421 return -EOPNOTSUPP;
422
423 if (!is_owner_or_cap(inode))
424 return -EPERM;
425
426 if (value) {
427 acl = posix_acl_from_xattr(value, size);
428 if (IS_ERR(acl))
429 return PTR_ERR(acl);
430 else if (acl) {
431 ret = posix_acl_valid(acl);
432 if (ret)
433 goto cleanup;
434 }
435 } else
436 acl = NULL;
437
438 ret = ocfs2_set_acl(NULL, inode, NULL, type, acl, NULL, NULL);
439
440cleanup:
441 posix_acl_release(acl);
442 return ret;
443}
444
445static int ocfs2_xattr_set_acl_access(struct inode *inode,
446 const char *name,
447 const void *value,
448 size_t size,
449 int flags)
450{
451 if (strcmp(name, "") != 0)
452 return -EINVAL;
453 return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
454}
455
456static int ocfs2_xattr_set_acl_default(struct inode *inode,
457 const char *name,
458 const void *value,
459 size_t size,
460 int flags)
461{
462 if (strcmp(name, "") != 0)
463 return -EINVAL;
464 return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
465}
466
467struct xattr_handler ocfs2_xattr_acl_access_handler = {
468 .prefix = POSIX_ACL_XATTR_ACCESS,
469 .list = ocfs2_xattr_list_acl_access,
470 .get = ocfs2_xattr_get_acl_access,
471 .set = ocfs2_xattr_set_acl_access,
472};
473
474struct xattr_handler ocfs2_xattr_acl_default_handler = {
475 .prefix = POSIX_ACL_XATTR_DEFAULT,
476 .list = ocfs2_xattr_list_acl_default,
477 .get = ocfs2_xattr_get_acl_default,
478 .set = ocfs2_xattr_set_acl_default,
479};
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
new file mode 100644
index 000000000000..8f6389ed4da5
--- /dev/null
+++ b/fs/ocfs2/acl.h
@@ -0,0 +1,58 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * acl.h
5 *
6 * Copyright (C) 2004, 2008 Oracle. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License version 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 */
17
18#ifndef OCFS2_ACL_H
19#define OCFS2_ACL_H
20
21#include <linux/posix_acl_xattr.h>
22
23struct ocfs2_acl_entry {
24 __le16 e_tag;
25 __le16 e_perm;
26 __le32 e_id;
27};
28
29#ifdef CONFIG_OCFS2_FS_POSIX_ACL
30
31extern int ocfs2_check_acl(struct inode *, int);
32extern int ocfs2_acl_chmod(struct inode *);
33extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
34 struct buffer_head *, struct buffer_head *,
35 struct ocfs2_alloc_context *,
36 struct ocfs2_alloc_context *);
37
38#else /* CONFIG_OCFS2_FS_POSIX_ACL*/
39
40#define ocfs2_check_acl NULL
41static inline int ocfs2_acl_chmod(struct inode *inode)
42{
43 return 0;
44}
45static inline int ocfs2_init_acl(handle_t *handle,
46 struct inode *inode,
47 struct inode *dir,
48 struct buffer_head *di_bh,
49 struct buffer_head *dir_bh,
50 struct ocfs2_alloc_context *meta_ac,
51 struct ocfs2_alloc_context *data_ac)
52{
53 return 0;
54}
55
56#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
57
58#endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 0cc2deb9394c..54ff4c77aaa3 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -28,6 +28,7 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/swap.h> 30#include <linux/swap.h>
31#include <linux/quotaops.h>
31 32
32#define MLOG_MASK_PREFIX ML_DISK_ALLOC 33#define MLOG_MASK_PREFIX ML_DISK_ALLOC
33#include <cluster/masklog.h> 34#include <cluster/masklog.h>
@@ -36,6 +37,7 @@
36 37
37#include "alloc.h" 38#include "alloc.h"
38#include "aops.h" 39#include "aops.h"
40#include "blockcheck.h"
39#include "dlmglue.h" 41#include "dlmglue.h"
40#include "extent_map.h" 42#include "extent_map.h"
41#include "inode.h" 43#include "inode.h"
@@ -46,6 +48,7 @@
46#include "file.h" 48#include "file.h"
47#include "super.h" 49#include "super.h"
48#include "uptodate.h" 50#include "uptodate.h"
51#include "xattr.h"
49 52
50#include "buffer_head_io.h" 53#include "buffer_head_io.h"
51 54
@@ -187,20 +190,12 @@ static int ocfs2_dinode_insert_check(struct inode *inode,
187static int ocfs2_dinode_sanity_check(struct inode *inode, 190static int ocfs2_dinode_sanity_check(struct inode *inode,
188 struct ocfs2_extent_tree *et) 191 struct ocfs2_extent_tree *et)
189{ 192{
190 int ret = 0; 193 struct ocfs2_dinode *di = et->et_object;
191 struct ocfs2_dinode *di;
192 194
193 BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); 195 BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
196 BUG_ON(!OCFS2_IS_VALID_DINODE(di));
194 197
195 di = et->et_object; 198 return 0;
196 if (!OCFS2_IS_VALID_DINODE(di)) {
197 ret = -EIO;
198 ocfs2_error(inode->i_sb,
199 "Inode %llu has invalid path root",
200 (unsigned long long)OCFS2_I(inode)->ip_blkno);
201 }
202
203 return ret;
204} 199}
205 200
206static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et) 201static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
@@ -213,36 +208,33 @@ static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
213 208
214static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et) 209static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et)
215{ 210{
216 struct ocfs2_xattr_value_root *xv = et->et_object; 211 struct ocfs2_xattr_value_buf *vb = et->et_object;
217 212
218 et->et_root_el = &xv->xr_list; 213 et->et_root_el = &vb->vb_xv->xr_list;
219} 214}
220 215
221static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et, 216static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et,
222 u64 blkno) 217 u64 blkno)
223{ 218{
224 struct ocfs2_xattr_value_root *xv = 219 struct ocfs2_xattr_value_buf *vb = et->et_object;
225 (struct ocfs2_xattr_value_root *)et->et_object;
226 220
227 xv->xr_last_eb_blk = cpu_to_le64(blkno); 221 vb->vb_xv->xr_last_eb_blk = cpu_to_le64(blkno);
228} 222}
229 223
230static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et) 224static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
231{ 225{
232 struct ocfs2_xattr_value_root *xv = 226 struct ocfs2_xattr_value_buf *vb = et->et_object;
233 (struct ocfs2_xattr_value_root *) et->et_object;
234 227
235 return le64_to_cpu(xv->xr_last_eb_blk); 228 return le64_to_cpu(vb->vb_xv->xr_last_eb_blk);
236} 229}
237 230
238static void ocfs2_xattr_value_update_clusters(struct inode *inode, 231static void ocfs2_xattr_value_update_clusters(struct inode *inode,
239 struct ocfs2_extent_tree *et, 232 struct ocfs2_extent_tree *et,
240 u32 clusters) 233 u32 clusters)
241{ 234{
242 struct ocfs2_xattr_value_root *xv = 235 struct ocfs2_xattr_value_buf *vb = et->et_object;
243 (struct ocfs2_xattr_value_root *)et->et_object;
244 236
245 le32_add_cpu(&xv->xr_clusters, clusters); 237 le32_add_cpu(&vb->vb_xv->xr_clusters, clusters);
246} 238}
247 239
248static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = { 240static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = {
@@ -304,11 +296,13 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
304static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, 296static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
305 struct inode *inode, 297 struct inode *inode,
306 struct buffer_head *bh, 298 struct buffer_head *bh,
299 ocfs2_journal_access_func access,
307 void *obj, 300 void *obj,
308 struct ocfs2_extent_tree_operations *ops) 301 struct ocfs2_extent_tree_operations *ops)
309{ 302{
310 et->et_ops = ops; 303 et->et_ops = ops;
311 et->et_root_bh = bh; 304 et->et_root_bh = bh;
305 et->et_root_journal_access = access;
312 if (!obj) 306 if (!obj)
313 obj = (void *)bh->b_data; 307 obj = (void *)bh->b_data;
314 et->et_object = obj; 308 et->et_object = obj;
@@ -324,23 +318,23 @@ void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
324 struct inode *inode, 318 struct inode *inode,
325 struct buffer_head *bh) 319 struct buffer_head *bh)
326{ 320{
327 __ocfs2_init_extent_tree(et, inode, bh, NULL, &ocfs2_dinode_et_ops); 321 __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_di,
322 NULL, &ocfs2_dinode_et_ops);
328} 323}
329 324
330void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, 325void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
331 struct inode *inode, 326 struct inode *inode,
332 struct buffer_head *bh) 327 struct buffer_head *bh)
333{ 328{
334 __ocfs2_init_extent_tree(et, inode, bh, NULL, 329 __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_xb,
335 &ocfs2_xattr_tree_et_ops); 330 NULL, &ocfs2_xattr_tree_et_ops);
336} 331}
337 332
338void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, 333void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
339 struct inode *inode, 334 struct inode *inode,
340 struct buffer_head *bh, 335 struct ocfs2_xattr_value_buf *vb)
341 struct ocfs2_xattr_value_root *xv)
342{ 336{
343 __ocfs2_init_extent_tree(et, inode, bh, xv, 337 __ocfs2_init_extent_tree(et, inode, vb->vb_bh, vb->vb_access, vb,
344 &ocfs2_xattr_value_et_ops); 338 &ocfs2_xattr_value_et_ops);
345} 339}
346 340
@@ -362,6 +356,15 @@ static inline void ocfs2_et_update_clusters(struct inode *inode,
362 et->et_ops->eo_update_clusters(inode, et, clusters); 356 et->et_ops->eo_update_clusters(inode, et, clusters);
363} 357}
364 358
359static inline int ocfs2_et_root_journal_access(handle_t *handle,
360 struct inode *inode,
361 struct ocfs2_extent_tree *et,
362 int type)
363{
364 return et->et_root_journal_access(handle, inode, et->et_root_bh,
365 type);
366}
367
365static inline int ocfs2_et_insert_check(struct inode *inode, 368static inline int ocfs2_et_insert_check(struct inode *inode,
366 struct ocfs2_extent_tree *et, 369 struct ocfs2_extent_tree *et,
367 struct ocfs2_extent_rec *rec) 370 struct ocfs2_extent_rec *rec)
@@ -402,12 +405,14 @@ struct ocfs2_path_item {
402#define OCFS2_MAX_PATH_DEPTH 5 405#define OCFS2_MAX_PATH_DEPTH 5
403 406
404struct ocfs2_path { 407struct ocfs2_path {
405 int p_tree_depth; 408 int p_tree_depth;
406 struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH]; 409 ocfs2_journal_access_func p_root_access;
410 struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH];
407}; 411};
408 412
409#define path_root_bh(_path) ((_path)->p_node[0].bh) 413#define path_root_bh(_path) ((_path)->p_node[0].bh)
410#define path_root_el(_path) ((_path)->p_node[0].el) 414#define path_root_el(_path) ((_path)->p_node[0].el)
415#define path_root_access(_path)((_path)->p_root_access)
411#define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh) 416#define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh)
412#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el) 417#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
413#define path_num_items(_path) ((_path)->p_tree_depth + 1) 418#define path_num_items(_path) ((_path)->p_tree_depth + 1)
@@ -440,6 +445,8 @@ static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
440 */ 445 */
441 if (keep_root) 446 if (keep_root)
442 depth = le16_to_cpu(path_root_el(path)->l_tree_depth); 447 depth = le16_to_cpu(path_root_el(path)->l_tree_depth);
448 else
449 path_root_access(path) = NULL;
443 450
444 path->p_tree_depth = depth; 451 path->p_tree_depth = depth;
445} 452}
@@ -465,6 +472,7 @@ static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src)
465 472
466 BUG_ON(path_root_bh(dest) != path_root_bh(src)); 473 BUG_ON(path_root_bh(dest) != path_root_bh(src));
467 BUG_ON(path_root_el(dest) != path_root_el(src)); 474 BUG_ON(path_root_el(dest) != path_root_el(src));
475 BUG_ON(path_root_access(dest) != path_root_access(src));
468 476
469 ocfs2_reinit_path(dest, 1); 477 ocfs2_reinit_path(dest, 1);
470 478
@@ -486,6 +494,7 @@ static void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src)
486 int i; 494 int i;
487 495
488 BUG_ON(path_root_bh(dest) != path_root_bh(src)); 496 BUG_ON(path_root_bh(dest) != path_root_bh(src));
497 BUG_ON(path_root_access(dest) != path_root_access(src));
489 498
490 for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) { 499 for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
491 brelse(dest->p_node[i].bh); 500 brelse(dest->p_node[i].bh);
@@ -521,7 +530,8 @@ static inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index,
521} 530}
522 531
523static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, 532static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
524 struct ocfs2_extent_list *root_el) 533 struct ocfs2_extent_list *root_el,
534 ocfs2_journal_access_func access)
525{ 535{
526 struct ocfs2_path *path; 536 struct ocfs2_path *path;
527 537
@@ -533,11 +543,48 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
533 get_bh(root_bh); 543 get_bh(root_bh);
534 path_root_bh(path) = root_bh; 544 path_root_bh(path) = root_bh;
535 path_root_el(path) = root_el; 545 path_root_el(path) = root_el;
546 path_root_access(path) = access;
536 } 547 }
537 548
538 return path; 549 return path;
539} 550}
540 551
552static struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
553{
554 return ocfs2_new_path(path_root_bh(path), path_root_el(path),
555 path_root_access(path));
556}
557
558static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
559{
560 return ocfs2_new_path(et->et_root_bh, et->et_root_el,
561 et->et_root_journal_access);
562}
563
564/*
565 * Journal the buffer at depth idx. All idx>0 are extent_blocks,
566 * otherwise it's the root_access function.
567 *
568 * I don't like the way this function's name looks next to
569 * ocfs2_journal_access_path(), but I don't have a better one.
570 */
571static int ocfs2_path_bh_journal_access(handle_t *handle,
572 struct inode *inode,
573 struct ocfs2_path *path,
574 int idx)
575{
576 ocfs2_journal_access_func access = path_root_access(path);
577
578 if (!access)
579 access = ocfs2_journal_access;
580
581 if (idx)
582 access = ocfs2_journal_access_eb;
583
584 return access(handle, inode, path->p_node[idx].bh,
585 OCFS2_JOURNAL_ACCESS_WRITE);
586}
587
541/* 588/*
542 * Convenience function to journal all components in a path. 589 * Convenience function to journal all components in a path.
543 */ 590 */
@@ -550,8 +597,7 @@ static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
550 goto out; 597 goto out;
551 598
552 for(i = 0; i < path_num_items(path); i++) { 599 for(i = 0; i < path_num_items(path); i++) {
553 ret = ocfs2_journal_access(handle, inode, path->p_node[i].bh, 600 ret = ocfs2_path_bh_journal_access(handle, inode, path, i);
554 OCFS2_JOURNAL_ACCESS_WRITE);
555 if (ret < 0) { 601 if (ret < 0) {
556 mlog_errno(ret); 602 mlog_errno(ret);
557 goto out; 603 goto out;
@@ -686,6 +732,80 @@ struct ocfs2_merge_ctxt {
686 int c_split_covers_rec; 732 int c_split_covers_rec;
687}; 733};
688 734
735static int ocfs2_validate_extent_block(struct super_block *sb,
736 struct buffer_head *bh)
737{
738 int rc;
739 struct ocfs2_extent_block *eb =
740 (struct ocfs2_extent_block *)bh->b_data;
741
742 mlog(0, "Validating extent block %llu\n",
743 (unsigned long long)bh->b_blocknr);
744
745 BUG_ON(!buffer_uptodate(bh));
746
747 /*
748 * If the ecc fails, we return the error but otherwise
749 * leave the filesystem running. We know any error is
750 * local to this block.
751 */
752 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &eb->h_check);
753 if (rc) {
754 mlog(ML_ERROR, "Checksum failed for extent block %llu\n",
755 (unsigned long long)bh->b_blocknr);
756 return rc;
757 }
758
759 /*
760 * Errors after here are fatal.
761 */
762
763 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
764 ocfs2_error(sb,
765 "Extent block #%llu has bad signature %.*s",
766 (unsigned long long)bh->b_blocknr, 7,
767 eb->h_signature);
768 return -EINVAL;
769 }
770
771 if (le64_to_cpu(eb->h_blkno) != bh->b_blocknr) {
772 ocfs2_error(sb,
773 "Extent block #%llu has an invalid h_blkno "
774 "of %llu",
775 (unsigned long long)bh->b_blocknr,
776 (unsigned long long)le64_to_cpu(eb->h_blkno));
777 return -EINVAL;
778 }
779
780 if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) {
781 ocfs2_error(sb,
782 "Extent block #%llu has an invalid "
783 "h_fs_generation of #%u",
784 (unsigned long long)bh->b_blocknr,
785 le32_to_cpu(eb->h_fs_generation));
786 return -EINVAL;
787 }
788
789 return 0;
790}
791
792int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
793 struct buffer_head **bh)
794{
795 int rc;
796 struct buffer_head *tmp = *bh;
797
798 rc = ocfs2_read_block(inode, eb_blkno, &tmp,
799 ocfs2_validate_extent_block);
800
801 /* If ocfs2_read_block() got us a new bh, pass it up. */
802 if (!rc && !*bh)
803 *bh = tmp;
804
805 return rc;
806}
807
808
689/* 809/*
690 * How many free extents have we got before we need more meta data? 810 * How many free extents have we got before we need more meta data?
691 */ 811 */
@@ -705,8 +825,7 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb,
705 last_eb_blk = ocfs2_et_get_last_eb_blk(et); 825 last_eb_blk = ocfs2_et_get_last_eb_blk(et);
706 826
707 if (last_eb_blk) { 827 if (last_eb_blk) {
708 retval = ocfs2_read_block(inode, last_eb_blk, 828 retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
709 &eb_bh);
710 if (retval < 0) { 829 if (retval < 0) {
711 mlog_errno(retval); 830 mlog_errno(retval);
712 goto bail; 831 goto bail;
@@ -768,8 +887,8 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
768 } 887 }
769 ocfs2_set_new_buffer_uptodate(inode, bhs[i]); 888 ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
770 889
771 status = ocfs2_journal_access(handle, inode, bhs[i], 890 status = ocfs2_journal_access_eb(handle, inode, bhs[i],
772 OCFS2_JOURNAL_ACCESS_CREATE); 891 OCFS2_JOURNAL_ACCESS_CREATE);
773 if (status < 0) { 892 if (status < 0) {
774 mlog_errno(status); 893 mlog_errno(status);
775 goto bail; 894 goto bail;
@@ -908,15 +1027,12 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
908 for(i = 0; i < new_blocks; i++) { 1027 for(i = 0; i < new_blocks; i++) {
909 bh = new_eb_bhs[i]; 1028 bh = new_eb_bhs[i];
910 eb = (struct ocfs2_extent_block *) bh->b_data; 1029 eb = (struct ocfs2_extent_block *) bh->b_data;
911 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1030 /* ocfs2_create_new_meta_bhs() should create it right! */
912 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1031 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
913 status = -EIO;
914 goto bail;
915 }
916 eb_el = &eb->h_list; 1032 eb_el = &eb->h_list;
917 1033
918 status = ocfs2_journal_access(handle, inode, bh, 1034 status = ocfs2_journal_access_eb(handle, inode, bh,
919 OCFS2_JOURNAL_ACCESS_CREATE); 1035 OCFS2_JOURNAL_ACCESS_CREATE);
920 if (status < 0) { 1036 if (status < 0) {
921 mlog_errno(status); 1037 mlog_errno(status);
922 goto bail; 1038 goto bail;
@@ -955,21 +1071,21 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
955 * journal_dirty erroring as it won't unless we've aborted the 1071 * journal_dirty erroring as it won't unless we've aborted the
956 * handle (in which case we would never be here) so reserving 1072 * handle (in which case we would never be here) so reserving
957 * the write with journal_access is all we need to do. */ 1073 * the write with journal_access is all we need to do. */
958 status = ocfs2_journal_access(handle, inode, *last_eb_bh, 1074 status = ocfs2_journal_access_eb(handle, inode, *last_eb_bh,
959 OCFS2_JOURNAL_ACCESS_WRITE); 1075 OCFS2_JOURNAL_ACCESS_WRITE);
960 if (status < 0) { 1076 if (status < 0) {
961 mlog_errno(status); 1077 mlog_errno(status);
962 goto bail; 1078 goto bail;
963 } 1079 }
964 status = ocfs2_journal_access(handle, inode, et->et_root_bh, 1080 status = ocfs2_et_root_journal_access(handle, inode, et,
965 OCFS2_JOURNAL_ACCESS_WRITE); 1081 OCFS2_JOURNAL_ACCESS_WRITE);
966 if (status < 0) { 1082 if (status < 0) {
967 mlog_errno(status); 1083 mlog_errno(status);
968 goto bail; 1084 goto bail;
969 } 1085 }
970 if (eb_bh) { 1086 if (eb_bh) {
971 status = ocfs2_journal_access(handle, inode, eb_bh, 1087 status = ocfs2_journal_access_eb(handle, inode, eb_bh,
972 OCFS2_JOURNAL_ACCESS_WRITE); 1088 OCFS2_JOURNAL_ACCESS_WRITE);
973 if (status < 0) { 1089 if (status < 0) {
974 mlog_errno(status); 1090 mlog_errno(status);
975 goto bail; 1091 goto bail;
@@ -1052,17 +1168,14 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
1052 } 1168 }
1053 1169
1054 eb = (struct ocfs2_extent_block *) new_eb_bh->b_data; 1170 eb = (struct ocfs2_extent_block *) new_eb_bh->b_data;
1055 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 1171 /* ocfs2_create_new_meta_bhs() should create it right! */
1056 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 1172 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
1057 status = -EIO;
1058 goto bail;
1059 }
1060 1173
1061 eb_el = &eb->h_list; 1174 eb_el = &eb->h_list;
1062 root_el = et->et_root_el; 1175 root_el = et->et_root_el;
1063 1176
1064 status = ocfs2_journal_access(handle, inode, new_eb_bh, 1177 status = ocfs2_journal_access_eb(handle, inode, new_eb_bh,
1065 OCFS2_JOURNAL_ACCESS_CREATE); 1178 OCFS2_JOURNAL_ACCESS_CREATE);
1066 if (status < 0) { 1179 if (status < 0) {
1067 mlog_errno(status); 1180 mlog_errno(status);
1068 goto bail; 1181 goto bail;
@@ -1080,8 +1193,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
1080 goto bail; 1193 goto bail;
1081 } 1194 }
1082 1195
1083 status = ocfs2_journal_access(handle, inode, et->et_root_bh, 1196 status = ocfs2_et_root_journal_access(handle, inode, et,
1084 OCFS2_JOURNAL_ACCESS_WRITE); 1197 OCFS2_JOURNAL_ACCESS_WRITE);
1085 if (status < 0) { 1198 if (status < 0) {
1086 mlog_errno(status); 1199 mlog_errno(status);
1087 goto bail; 1200 goto bail;
@@ -1176,18 +1289,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
1176 brelse(bh); 1289 brelse(bh);
1177 bh = NULL; 1290 bh = NULL;
1178 1291
1179 status = ocfs2_read_block(inode, blkno, &bh); 1292 status = ocfs2_read_extent_block(inode, blkno, &bh);
1180 if (status < 0) { 1293 if (status < 0) {
1181 mlog_errno(status); 1294 mlog_errno(status);
1182 goto bail; 1295 goto bail;
1183 } 1296 }
1184 1297
1185 eb = (struct ocfs2_extent_block *) bh->b_data; 1298 eb = (struct ocfs2_extent_block *) bh->b_data;
1186 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
1187 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
1188 status = -EIO;
1189 goto bail;
1190 }
1191 el = &eb->h_list; 1299 el = &eb->h_list;
1192 1300
1193 if (le16_to_cpu(el->l_next_free_rec) < 1301 if (le16_to_cpu(el->l_next_free_rec) <
@@ -1540,7 +1648,7 @@ static int __ocfs2_find_path(struct inode *inode,
1540 1648
1541 brelse(bh); 1649 brelse(bh);
1542 bh = NULL; 1650 bh = NULL;
1543 ret = ocfs2_read_block(inode, blkno, &bh); 1651 ret = ocfs2_read_extent_block(inode, blkno, &bh);
1544 if (ret) { 1652 if (ret) {
1545 mlog_errno(ret); 1653 mlog_errno(ret);
1546 goto out; 1654 goto out;
@@ -1548,11 +1656,6 @@ static int __ocfs2_find_path(struct inode *inode,
1548 1656
1549 eb = (struct ocfs2_extent_block *) bh->b_data; 1657 eb = (struct ocfs2_extent_block *) bh->b_data;
1550 el = &eb->h_list; 1658 el = &eb->h_list;
1551 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
1552 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
1553 ret = -EIO;
1554 goto out;
1555 }
1556 1659
1557 if (le16_to_cpu(el->l_next_free_rec) > 1660 if (le16_to_cpu(el->l_next_free_rec) >
1558 le16_to_cpu(el->l_count)) { 1661 le16_to_cpu(el->l_count)) {
@@ -1860,25 +1963,23 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
1860 root_bh = left_path->p_node[subtree_index].bh; 1963 root_bh = left_path->p_node[subtree_index].bh;
1861 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 1964 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
1862 1965
1863 ret = ocfs2_journal_access(handle, inode, root_bh, 1966 ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
1864 OCFS2_JOURNAL_ACCESS_WRITE); 1967 subtree_index);
1865 if (ret) { 1968 if (ret) {
1866 mlog_errno(ret); 1969 mlog_errno(ret);
1867 goto out; 1970 goto out;
1868 } 1971 }
1869 1972
1870 for(i = subtree_index + 1; i < path_num_items(right_path); i++) { 1973 for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
1871 ret = ocfs2_journal_access(handle, inode, 1974 ret = ocfs2_path_bh_journal_access(handle, inode,
1872 right_path->p_node[i].bh, 1975 right_path, i);
1873 OCFS2_JOURNAL_ACCESS_WRITE);
1874 if (ret) { 1976 if (ret) {
1875 mlog_errno(ret); 1977 mlog_errno(ret);
1876 goto out; 1978 goto out;
1877 } 1979 }
1878 1980
1879 ret = ocfs2_journal_access(handle, inode, 1981 ret = ocfs2_path_bh_journal_access(handle, inode,
1880 left_path->p_node[i].bh, 1982 left_path, i);
1881 OCFS2_JOURNAL_ACCESS_WRITE);
1882 if (ret) { 1983 if (ret) {
1883 mlog_errno(ret); 1984 mlog_errno(ret);
1884 goto out; 1985 goto out;
@@ -2102,8 +2203,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2102 2203
2103 *ret_left_path = NULL; 2204 *ret_left_path = NULL;
2104 2205
2105 left_path = ocfs2_new_path(path_root_bh(right_path), 2206 left_path = ocfs2_new_path_from_path(right_path);
2106 path_root_el(right_path));
2107 if (!left_path) { 2207 if (!left_path) {
2108 ret = -ENOMEM; 2208 ret = -ENOMEM;
2109 mlog_errno(ret); 2209 mlog_errno(ret);
@@ -2398,9 +2498,9 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2398 return -EAGAIN; 2498 return -EAGAIN;
2399 2499
2400 if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) { 2500 if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
2401 ret = ocfs2_journal_access(handle, inode, 2501 ret = ocfs2_journal_access_eb(handle, inode,
2402 path_leaf_bh(right_path), 2502 path_leaf_bh(right_path),
2403 OCFS2_JOURNAL_ACCESS_WRITE); 2503 OCFS2_JOURNAL_ACCESS_WRITE);
2404 if (ret) { 2504 if (ret) {
2405 mlog_errno(ret); 2505 mlog_errno(ret);
2406 goto out; 2506 goto out;
@@ -2417,8 +2517,8 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2417 * We have to update i_last_eb_blk during the meta 2517 * We have to update i_last_eb_blk during the meta
2418 * data delete. 2518 * data delete.
2419 */ 2519 */
2420 ret = ocfs2_journal_access(handle, inode, et_root_bh, 2520 ret = ocfs2_et_root_journal_access(handle, inode, et,
2421 OCFS2_JOURNAL_ACCESS_WRITE); 2521 OCFS2_JOURNAL_ACCESS_WRITE);
2422 if (ret) { 2522 if (ret) {
2423 mlog_errno(ret); 2523 mlog_errno(ret);
2424 goto out; 2524 goto out;
@@ -2433,25 +2533,23 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2433 */ 2533 */
2434 BUG_ON(right_has_empty && !del_right_subtree); 2534 BUG_ON(right_has_empty && !del_right_subtree);
2435 2535
2436 ret = ocfs2_journal_access(handle, inode, root_bh, 2536 ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
2437 OCFS2_JOURNAL_ACCESS_WRITE); 2537 subtree_index);
2438 if (ret) { 2538 if (ret) {
2439 mlog_errno(ret); 2539 mlog_errno(ret);
2440 goto out; 2540 goto out;
2441 } 2541 }
2442 2542
2443 for(i = subtree_index + 1; i < path_num_items(right_path); i++) { 2543 for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
2444 ret = ocfs2_journal_access(handle, inode, 2544 ret = ocfs2_path_bh_journal_access(handle, inode,
2445 right_path->p_node[i].bh, 2545 right_path, i);
2446 OCFS2_JOURNAL_ACCESS_WRITE);
2447 if (ret) { 2546 if (ret) {
2448 mlog_errno(ret); 2547 mlog_errno(ret);
2449 goto out; 2548 goto out;
2450 } 2549 }
2451 2550
2452 ret = ocfs2_journal_access(handle, inode, 2551 ret = ocfs2_path_bh_journal_access(handle, inode,
2453 left_path->p_node[i].bh, 2552 left_path, i);
2454 OCFS2_JOURNAL_ACCESS_WRITE);
2455 if (ret) { 2553 if (ret) {
2456 mlog_errno(ret); 2554 mlog_errno(ret);
2457 goto out; 2555 goto out;
@@ -2596,16 +2694,17 @@ out:
2596 2694
2597static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode, 2695static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
2598 handle_t *handle, 2696 handle_t *handle,
2599 struct buffer_head *bh, 2697 struct ocfs2_path *path)
2600 struct ocfs2_extent_list *el)
2601{ 2698{
2602 int ret; 2699 int ret;
2700 struct buffer_head *bh = path_leaf_bh(path);
2701 struct ocfs2_extent_list *el = path_leaf_el(path);
2603 2702
2604 if (!ocfs2_is_empty_extent(&el->l_recs[0])) 2703 if (!ocfs2_is_empty_extent(&el->l_recs[0]))
2605 return 0; 2704 return 0;
2606 2705
2607 ret = ocfs2_journal_access(handle, inode, bh, 2706 ret = ocfs2_path_bh_journal_access(handle, inode, path,
2608 OCFS2_JOURNAL_ACCESS_WRITE); 2707 path_num_items(path) - 1);
2609 if (ret) { 2708 if (ret) {
2610 mlog_errno(ret); 2709 mlog_errno(ret);
2611 goto out; 2710 goto out;
@@ -2644,8 +2743,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2644 goto out; 2743 goto out;
2645 } 2744 }
2646 2745
2647 left_path = ocfs2_new_path(path_root_bh(path), 2746 left_path = ocfs2_new_path_from_path(path);
2648 path_root_el(path));
2649 if (!left_path) { 2747 if (!left_path) {
2650 ret = -ENOMEM; 2748 ret = -ENOMEM;
2651 mlog_errno(ret); 2749 mlog_errno(ret);
@@ -2654,8 +2752,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2654 2752
2655 ocfs2_cp_path(left_path, path); 2753 ocfs2_cp_path(left_path, path);
2656 2754
2657 right_path = ocfs2_new_path(path_root_bh(path), 2755 right_path = ocfs2_new_path_from_path(path);
2658 path_root_el(path));
2659 if (!right_path) { 2756 if (!right_path) {
2660 ret = -ENOMEM; 2757 ret = -ENOMEM;
2661 mlog_errno(ret); 2758 mlog_errno(ret);
@@ -2689,9 +2786,8 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2689 * Caller might still want to make changes to the 2786 * Caller might still want to make changes to the
2690 * tree root, so re-add it to the journal here. 2787 * tree root, so re-add it to the journal here.
2691 */ 2788 */
2692 ret = ocfs2_journal_access(handle, inode, 2789 ret = ocfs2_path_bh_journal_access(handle, inode,
2693 path_root_bh(left_path), 2790 left_path, 0);
2694 OCFS2_JOURNAL_ACCESS_WRITE);
2695 if (ret) { 2791 if (ret) {
2696 mlog_errno(ret); 2792 mlog_errno(ret);
2697 goto out; 2793 goto out;
@@ -2785,8 +2881,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
2785 * We have a path to the left of this one - it needs 2881 * We have a path to the left of this one - it needs
2786 * an update too. 2882 * an update too.
2787 */ 2883 */
2788 left_path = ocfs2_new_path(path_root_bh(path), 2884 left_path = ocfs2_new_path_from_path(path);
2789 path_root_el(path));
2790 if (!left_path) { 2885 if (!left_path) {
2791 ret = -ENOMEM; 2886 ret = -ENOMEM;
2792 mlog_errno(ret); 2887 mlog_errno(ret);
@@ -2875,8 +2970,7 @@ rightmost_no_delete:
2875 * it up front. 2970 * it up front.
2876 */ 2971 */
2877 ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, 2972 ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
2878 path_leaf_bh(path), 2973 path);
2879 path_leaf_el(path));
2880 if (ret) 2974 if (ret)
2881 mlog_errno(ret); 2975 mlog_errno(ret);
2882 goto out; 2976 goto out;
@@ -3027,8 +3121,7 @@ static int ocfs2_get_right_path(struct inode *inode,
3027 /* This function shouldn't be called for the rightmost leaf. */ 3121 /* This function shouldn't be called for the rightmost leaf. */
3028 BUG_ON(right_cpos == 0); 3122 BUG_ON(right_cpos == 0);
3029 3123
3030 right_path = ocfs2_new_path(path_root_bh(left_path), 3124 right_path = ocfs2_new_path_from_path(left_path);
3031 path_root_el(left_path));
3032 if (!right_path) { 3125 if (!right_path) {
3033 ret = -ENOMEM; 3126 ret = -ENOMEM;
3034 mlog_errno(ret); 3127 mlog_errno(ret);
@@ -3111,8 +3204,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3111 root_bh = left_path->p_node[subtree_index].bh; 3204 root_bh = left_path->p_node[subtree_index].bh;
3112 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 3205 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
3113 3206
3114 ret = ocfs2_journal_access(handle, inode, root_bh, 3207 ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
3115 OCFS2_JOURNAL_ACCESS_WRITE); 3208 subtree_index);
3116 if (ret) { 3209 if (ret) {
3117 mlog_errno(ret); 3210 mlog_errno(ret);
3118 goto out; 3211 goto out;
@@ -3120,17 +3213,15 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3120 3213
3121 for (i = subtree_index + 1; 3214 for (i = subtree_index + 1;
3122 i < path_num_items(right_path); i++) { 3215 i < path_num_items(right_path); i++) {
3123 ret = ocfs2_journal_access(handle, inode, 3216 ret = ocfs2_path_bh_journal_access(handle, inode,
3124 right_path->p_node[i].bh, 3217 right_path, i);
3125 OCFS2_JOURNAL_ACCESS_WRITE);
3126 if (ret) { 3218 if (ret) {
3127 mlog_errno(ret); 3219 mlog_errno(ret);
3128 goto out; 3220 goto out;
3129 } 3221 }
3130 3222
3131 ret = ocfs2_journal_access(handle, inode, 3223 ret = ocfs2_path_bh_journal_access(handle, inode,
3132 left_path->p_node[i].bh, 3224 left_path, i);
3133 OCFS2_JOURNAL_ACCESS_WRITE);
3134 if (ret) { 3225 if (ret) {
3135 mlog_errno(ret); 3226 mlog_errno(ret);
3136 goto out; 3227 goto out;
@@ -3142,8 +3233,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3142 right_rec = &el->l_recs[index + 1]; 3233 right_rec = &el->l_recs[index + 1];
3143 } 3234 }
3144 3235
3145 ret = ocfs2_journal_access(handle, inode, bh, 3236 ret = ocfs2_path_bh_journal_access(handle, inode, left_path,
3146 OCFS2_JOURNAL_ACCESS_WRITE); 3237 path_num_items(left_path) - 1);
3147 if (ret) { 3238 if (ret) {
3148 mlog_errno(ret); 3239 mlog_errno(ret);
3149 goto out; 3240 goto out;
@@ -3199,8 +3290,7 @@ static int ocfs2_get_left_path(struct inode *inode,
3199 /* This function shouldn't be called for the leftmost leaf. */ 3290 /* This function shouldn't be called for the leftmost leaf. */
3200 BUG_ON(left_cpos == 0); 3291 BUG_ON(left_cpos == 0);
3201 3292
3202 left_path = ocfs2_new_path(path_root_bh(right_path), 3293 left_path = ocfs2_new_path_from_path(right_path);
3203 path_root_el(right_path));
3204 if (!left_path) { 3294 if (!left_path) {
3205 ret = -ENOMEM; 3295 ret = -ENOMEM;
3206 mlog_errno(ret); 3296 mlog_errno(ret);
@@ -3283,8 +3373,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3283 root_bh = left_path->p_node[subtree_index].bh; 3373 root_bh = left_path->p_node[subtree_index].bh;
3284 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 3374 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
3285 3375
3286 ret = ocfs2_journal_access(handle, inode, root_bh, 3376 ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
3287 OCFS2_JOURNAL_ACCESS_WRITE); 3377 subtree_index);
3288 if (ret) { 3378 if (ret) {
3289 mlog_errno(ret); 3379 mlog_errno(ret);
3290 goto out; 3380 goto out;
@@ -3292,17 +3382,15 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3292 3382
3293 for (i = subtree_index + 1; 3383 for (i = subtree_index + 1;
3294 i < path_num_items(right_path); i++) { 3384 i < path_num_items(right_path); i++) {
3295 ret = ocfs2_journal_access(handle, inode, 3385 ret = ocfs2_path_bh_journal_access(handle, inode,
3296 right_path->p_node[i].bh, 3386 right_path, i);
3297 OCFS2_JOURNAL_ACCESS_WRITE);
3298 if (ret) { 3387 if (ret) {
3299 mlog_errno(ret); 3388 mlog_errno(ret);
3300 goto out; 3389 goto out;
3301 } 3390 }
3302 3391
3303 ret = ocfs2_journal_access(handle, inode, 3392 ret = ocfs2_path_bh_journal_access(handle, inode,
3304 left_path->p_node[i].bh, 3393 left_path, i);
3305 OCFS2_JOURNAL_ACCESS_WRITE);
3306 if (ret) { 3394 if (ret) {
3307 mlog_errno(ret); 3395 mlog_errno(ret);
3308 goto out; 3396 goto out;
@@ -3314,8 +3402,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3314 has_empty_extent = 1; 3402 has_empty_extent = 1;
3315 } 3403 }
3316 3404
3317 ret = ocfs2_journal_access(handle, inode, bh, 3405 ret = ocfs2_path_bh_journal_access(handle, inode, right_path,
3318 OCFS2_JOURNAL_ACCESS_WRITE); 3406 path_num_items(right_path) - 1);
3319 if (ret) { 3407 if (ret) {
3320 mlog_errno(ret); 3408 mlog_errno(ret);
3321 goto out; 3409 goto out;
@@ -3732,8 +3820,7 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
3732 * leftmost leaf. 3820 * leftmost leaf.
3733 */ 3821 */
3734 if (left_cpos) { 3822 if (left_cpos) {
3735 left_path = ocfs2_new_path(path_root_bh(right_path), 3823 left_path = ocfs2_new_path_from_path(right_path);
3736 path_root_el(right_path));
3737 if (!left_path) { 3824 if (!left_path) {
3738 ret = -ENOMEM; 3825 ret = -ENOMEM;
3739 mlog_errno(ret); 3826 mlog_errno(ret);
@@ -3958,8 +4045,8 @@ static int ocfs2_do_insert_extent(struct inode *inode,
3958 4045
3959 el = et->et_root_el; 4046 el = et->et_root_el;
3960 4047
3961 ret = ocfs2_journal_access(handle, inode, et->et_root_bh, 4048 ret = ocfs2_et_root_journal_access(handle, inode, et,
3962 OCFS2_JOURNAL_ACCESS_WRITE); 4049 OCFS2_JOURNAL_ACCESS_WRITE);
3963 if (ret) { 4050 if (ret) {
3964 mlog_errno(ret); 4051 mlog_errno(ret);
3965 goto out; 4052 goto out;
@@ -3970,7 +4057,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
3970 goto out_update_clusters; 4057 goto out_update_clusters;
3971 } 4058 }
3972 4059
3973 right_path = ocfs2_new_path(et->et_root_bh, et->et_root_el); 4060 right_path = ocfs2_new_path_from_et(et);
3974 if (!right_path) { 4061 if (!right_path) {
3975 ret = -ENOMEM; 4062 ret = -ENOMEM;
3976 mlog_errno(ret); 4063 mlog_errno(ret);
@@ -4020,8 +4107,8 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4020 * ocfs2_rotate_tree_right() might have extended the 4107 * ocfs2_rotate_tree_right() might have extended the
4021 * transaction without re-journaling our tree root. 4108 * transaction without re-journaling our tree root.
4022 */ 4109 */
4023 ret = ocfs2_journal_access(handle, inode, et->et_root_bh, 4110 ret = ocfs2_et_root_journal_access(handle, inode, et,
4024 OCFS2_JOURNAL_ACCESS_WRITE); 4111 OCFS2_JOURNAL_ACCESS_WRITE);
4025 if (ret) { 4112 if (ret) {
4026 mlog_errno(ret); 4113 mlog_errno(ret);
4027 goto out; 4114 goto out;
@@ -4082,8 +4169,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4082 goto out; 4169 goto out;
4083 4170
4084 if (left_cpos != 0) { 4171 if (left_cpos != 0) {
4085 left_path = ocfs2_new_path(path_root_bh(path), 4172 left_path = ocfs2_new_path_from_path(path);
4086 path_root_el(path));
4087 if (!left_path) 4173 if (!left_path)
4088 goto out; 4174 goto out;
4089 4175
@@ -4097,8 +4183,15 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4097 le16_to_cpu(new_el->l_count)) { 4183 le16_to_cpu(new_el->l_count)) {
4098 bh = path_leaf_bh(left_path); 4184 bh = path_leaf_bh(left_path);
4099 eb = (struct ocfs2_extent_block *)bh->b_data; 4185 eb = (struct ocfs2_extent_block *)bh->b_data;
4100 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, 4186 ocfs2_error(inode->i_sb,
4101 eb); 4187 "Extent block #%llu has an "
4188 "invalid l_next_free_rec of "
4189 "%d. It should have "
4190 "matched the l_count of %d",
4191 (unsigned long long)le64_to_cpu(eb->h_blkno),
4192 le16_to_cpu(new_el->l_next_free_rec),
4193 le16_to_cpu(new_el->l_count));
4194 status = -EINVAL;
4102 goto out; 4195 goto out;
4103 } 4196 }
4104 rec = &new_el->l_recs[ 4197 rec = &new_el->l_recs[
@@ -4132,8 +4225,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4132 if (right_cpos == 0) 4225 if (right_cpos == 0)
4133 goto out; 4226 goto out;
4134 4227
4135 right_path = ocfs2_new_path(path_root_bh(path), 4228 right_path = ocfs2_new_path_from_path(path);
4136 path_root_el(path));
4137 if (!right_path) 4229 if (!right_path)
4138 goto out; 4230 goto out;
4139 4231
@@ -4147,8 +4239,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4147 if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { 4239 if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
4148 bh = path_leaf_bh(right_path); 4240 bh = path_leaf_bh(right_path);
4149 eb = (struct ocfs2_extent_block *)bh->b_data; 4241 eb = (struct ocfs2_extent_block *)bh->b_data;
4150 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, 4242 ocfs2_error(inode->i_sb,
4151 eb); 4243 "Extent block #%llu has an "
4244 "invalid l_next_free_rec of %d",
4245 (unsigned long long)le64_to_cpu(eb->h_blkno),
4246 le16_to_cpu(new_el->l_next_free_rec));
4247 status = -EINVAL;
4152 goto out; 4248 goto out;
4153 } 4249 }
4154 rec = &new_el->l_recs[1]; 4250 rec = &new_el->l_recs[1];
@@ -4294,7 +4390,9 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4294 * ocfs2_figure_insert_type() and ocfs2_add_branch() 4390 * ocfs2_figure_insert_type() and ocfs2_add_branch()
4295 * may want it later. 4391 * may want it later.
4296 */ 4392 */
4297 ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh); 4393 ret = ocfs2_read_extent_block(inode,
4394 ocfs2_et_get_last_eb_blk(et),
4395 &bh);
4298 if (ret) { 4396 if (ret) {
4299 mlog_exit(ret); 4397 mlog_exit(ret);
4300 goto out; 4398 goto out;
@@ -4320,7 +4418,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4320 return 0; 4418 return 0;
4321 } 4419 }
4322 4420
4323 path = ocfs2_new_path(et->et_root_bh, et->et_root_el); 4421 path = ocfs2_new_path_from_et(et);
4324 if (!path) { 4422 if (!path) {
4325 ret = -ENOMEM; 4423 ret = -ENOMEM;
4326 mlog_errno(ret); 4424 mlog_errno(ret);
@@ -4531,9 +4629,9 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
4531 4629
4532 BUG_ON(num_bits > clusters_to_add); 4630 BUG_ON(num_bits > clusters_to_add);
4533 4631
4534 /* reserve our write early -- insert_extent may update the inode */ 4632 /* reserve our write early -- insert_extent may update the tree root */
4535 status = ocfs2_journal_access(handle, inode, et->et_root_bh, 4633 status = ocfs2_et_root_journal_access(handle, inode, et,
4536 OCFS2_JOURNAL_ACCESS_WRITE); 4634 OCFS2_JOURNAL_ACCESS_WRITE);
4537 if (status < 0) { 4635 if (status < 0) {
4538 mlog_errno(status); 4636 mlog_errno(status);
4539 goto leave; 4637 goto leave;
@@ -4760,20 +4858,15 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4760 if (path->p_tree_depth) { 4858 if (path->p_tree_depth) {
4761 struct ocfs2_extent_block *eb; 4859 struct ocfs2_extent_block *eb;
4762 4860
4763 ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), 4861 ret = ocfs2_read_extent_block(inode,
4764 &last_eb_bh); 4862 ocfs2_et_get_last_eb_blk(et),
4863 &last_eb_bh);
4765 if (ret) { 4864 if (ret) {
4766 mlog_exit(ret); 4865 mlog_exit(ret);
4767 goto out; 4866 goto out;
4768 } 4867 }
4769 4868
4770 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 4869 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
4771 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
4772 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
4773 ret = -EROFS;
4774 goto out;
4775 }
4776
4777 rightmost_el = &eb->h_list; 4870 rightmost_el = &eb->h_list;
4778 } else 4871 } else
4779 rightmost_el = path_root_el(path); 4872 rightmost_el = path_root_el(path);
@@ -4854,7 +4947,7 @@ int ocfs2_mark_extent_written(struct inode *inode,
4854 if (et->et_ops == &ocfs2_dinode_et_ops) 4947 if (et->et_ops == &ocfs2_dinode_et_ops)
4855 ocfs2_extent_map_trunc(inode, 0); 4948 ocfs2_extent_map_trunc(inode, 0);
4856 4949
4857 left_path = ocfs2_new_path(et->et_root_bh, et->et_root_el); 4950 left_path = ocfs2_new_path_from_et(et);
4858 if (!left_path) { 4951 if (!left_path) {
4859 ret = -ENOMEM; 4952 ret = -ENOMEM;
4860 mlog_errno(ret); 4953 mlog_errno(ret);
@@ -4918,8 +5011,9 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
4918 5011
4919 depth = path->p_tree_depth; 5012 depth = path->p_tree_depth;
4920 if (depth > 0) { 5013 if (depth > 0) {
4921 ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), 5014 ret = ocfs2_read_extent_block(inode,
4922 &last_eb_bh); 5015 ocfs2_et_get_last_eb_blk(et),
5016 &last_eb_bh);
4923 if (ret < 0) { 5017 if (ret < 0) {
4924 mlog_errno(ret); 5018 mlog_errno(ret);
4925 goto out; 5019 goto out;
@@ -5025,8 +5119,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5025 } 5119 }
5026 5120
5027 if (left_cpos && le16_to_cpu(el->l_next_free_rec) > 1) { 5121 if (left_cpos && le16_to_cpu(el->l_next_free_rec) > 1) {
5028 left_path = ocfs2_new_path(path_root_bh(path), 5122 left_path = ocfs2_new_path_from_path(path);
5029 path_root_el(path));
5030 if (!left_path) { 5123 if (!left_path) {
5031 ret = -ENOMEM; 5124 ret = -ENOMEM;
5032 mlog_errno(ret); 5125 mlog_errno(ret);
@@ -5135,7 +5228,7 @@ int ocfs2_remove_extent(struct inode *inode,
5135 5228
5136 ocfs2_extent_map_trunc(inode, 0); 5229 ocfs2_extent_map_trunc(inode, 0);
5137 5230
5138 path = ocfs2_new_path(et->et_root_bh, et->et_root_el); 5231 path = ocfs2_new_path_from_et(et);
5139 if (!path) { 5232 if (!path) {
5140 ret = -ENOMEM; 5233 ret = -ENOMEM;
5141 mlog_errno(ret); 5234 mlog_errno(ret);
@@ -5255,6 +5348,78 @@ out:
5255 return ret; 5348 return ret;
5256} 5349}
5257 5350
5351int ocfs2_remove_btree_range(struct inode *inode,
5352 struct ocfs2_extent_tree *et,
5353 u32 cpos, u32 phys_cpos, u32 len,
5354 struct ocfs2_cached_dealloc_ctxt *dealloc)
5355{
5356 int ret;
5357 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
5358 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5359 struct inode *tl_inode = osb->osb_tl_inode;
5360 handle_t *handle;
5361 struct ocfs2_alloc_context *meta_ac = NULL;
5362
5363 ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac);
5364 if (ret) {
5365 mlog_errno(ret);
5366 return ret;
5367 }
5368
5369 mutex_lock(&tl_inode->i_mutex);
5370
5371 if (ocfs2_truncate_log_needs_flush(osb)) {
5372 ret = __ocfs2_flush_truncate_log(osb);
5373 if (ret < 0) {
5374 mlog_errno(ret);
5375 goto out;
5376 }
5377 }
5378
5379 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5380 if (IS_ERR(handle)) {
5381 ret = PTR_ERR(handle);
5382 mlog_errno(ret);
5383 goto out;
5384 }
5385
5386 ret = ocfs2_et_root_journal_access(handle, inode, et,
5387 OCFS2_JOURNAL_ACCESS_WRITE);
5388 if (ret) {
5389 mlog_errno(ret);
5390 goto out;
5391 }
5392
5393 ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac,
5394 dealloc);
5395 if (ret) {
5396 mlog_errno(ret);
5397 goto out_commit;
5398 }
5399
5400 ocfs2_et_update_clusters(inode, et, -len);
5401
5402 ret = ocfs2_journal_dirty(handle, et->et_root_bh);
5403 if (ret) {
5404 mlog_errno(ret);
5405 goto out_commit;
5406 }
5407
5408 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
5409 if (ret)
5410 mlog_errno(ret);
5411
5412out_commit:
5413 ocfs2_commit_trans(osb, handle);
5414out:
5415 mutex_unlock(&tl_inode->i_mutex);
5416
5417 if (meta_ac)
5418 ocfs2_free_alloc_context(meta_ac);
5419
5420 return ret;
5421}
5422
5258int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) 5423int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
5259{ 5424{
5260 struct buffer_head *tl_bh = osb->osb_tl_bh; 5425 struct buffer_head *tl_bh = osb->osb_tl_bh;
@@ -5308,13 +5473,13 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5308 start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); 5473 start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
5309 5474
5310 di = (struct ocfs2_dinode *) tl_bh->b_data; 5475 di = (struct ocfs2_dinode *) tl_bh->b_data;
5311 tl = &di->id2.i_dealloc;
5312 if (!OCFS2_IS_VALID_DINODE(di)) {
5313 OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
5314 status = -EIO;
5315 goto bail;
5316 }
5317 5476
5477 /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated
5478 * by the underlying call to ocfs2_read_inode_block(), so any
5479 * corruption is a code bug */
5480 BUG_ON(!OCFS2_IS_VALID_DINODE(di));
5481
5482 tl = &di->id2.i_dealloc;
5318 tl_count = le16_to_cpu(tl->tl_count); 5483 tl_count = le16_to_cpu(tl->tl_count);
5319 mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || 5484 mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
5320 tl_count == 0, 5485 tl_count == 0,
@@ -5332,8 +5497,8 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5332 goto bail; 5497 goto bail;
5333 } 5498 }
5334 5499
5335 status = ocfs2_journal_access(handle, tl_inode, tl_bh, 5500 status = ocfs2_journal_access_di(handle, tl_inode, tl_bh,
5336 OCFS2_JOURNAL_ACCESS_WRITE); 5501 OCFS2_JOURNAL_ACCESS_WRITE);
5337 if (status < 0) { 5502 if (status < 0) {
5338 mlog_errno(status); 5503 mlog_errno(status);
5339 goto bail; 5504 goto bail;
@@ -5394,8 +5559,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5394 while (i >= 0) { 5559 while (i >= 0) {
5395 /* Caller has given us at least enough credits to 5560 /* Caller has given us at least enough credits to
5396 * update the truncate log dinode */ 5561 * update the truncate log dinode */
5397 status = ocfs2_journal_access(handle, tl_inode, tl_bh, 5562 status = ocfs2_journal_access_di(handle, tl_inode, tl_bh,
5398 OCFS2_JOURNAL_ACCESS_WRITE); 5563 OCFS2_JOURNAL_ACCESS_WRITE);
5399 if (status < 0) { 5564 if (status < 0) {
5400 mlog_errno(status); 5565 mlog_errno(status);
5401 goto bail; 5566 goto bail;
@@ -5464,13 +5629,13 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
5464 BUG_ON(mutex_trylock(&tl_inode->i_mutex)); 5629 BUG_ON(mutex_trylock(&tl_inode->i_mutex));
5465 5630
5466 di = (struct ocfs2_dinode *) tl_bh->b_data; 5631 di = (struct ocfs2_dinode *) tl_bh->b_data;
5467 tl = &di->id2.i_dealloc;
5468 if (!OCFS2_IS_VALID_DINODE(di)) {
5469 OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
5470 status = -EIO;
5471 goto out;
5472 }
5473 5632
5633 /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated
5634 * by the underlying call to ocfs2_read_inode_block(), so any
5635 * corruption is a code bug */
5636 BUG_ON(!OCFS2_IS_VALID_DINODE(di));
5637
5638 tl = &di->id2.i_dealloc;
5474 num_to_flush = le16_to_cpu(tl->tl_used); 5639 num_to_flush = le16_to_cpu(tl->tl_used);
5475 mlog(0, "Flush %u records from truncate log #%llu\n", 5640 mlog(0, "Flush %u records from truncate log #%llu\n",
5476 num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno); 5641 num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno);
@@ -5586,7 +5751,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
5586 goto bail; 5751 goto bail;
5587 } 5752 }
5588 5753
5589 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); 5754 status = ocfs2_read_inode_block(inode, &bh);
5590 if (status < 0) { 5755 if (status < 0) {
5591 iput(inode); 5756 iput(inode);
5592 mlog_errno(status); 5757 mlog_errno(status);
@@ -5625,13 +5790,13 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
5625 } 5790 }
5626 5791
5627 di = (struct ocfs2_dinode *) tl_bh->b_data; 5792 di = (struct ocfs2_dinode *) tl_bh->b_data;
5628 tl = &di->id2.i_dealloc;
5629 if (!OCFS2_IS_VALID_DINODE(di)) {
5630 OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di);
5631 status = -EIO;
5632 goto bail;
5633 }
5634 5793
5794 /* tl_bh is loaded from ocfs2_get_truncate_log_info(). It's
5795 * validated by the underlying call to ocfs2_read_inode_block(),
5796 * so any corruption is a code bug */
5797 BUG_ON(!OCFS2_IS_VALID_DINODE(di));
5798
5799 tl = &di->id2.i_dealloc;
5635 if (le16_to_cpu(tl->tl_used)) { 5800 if (le16_to_cpu(tl->tl_used)) {
5636 mlog(0, "We'll have %u logs to recover\n", 5801 mlog(0, "We'll have %u logs to recover\n",
5637 le16_to_cpu(tl->tl_used)); 5802 le16_to_cpu(tl->tl_used));
@@ -5651,6 +5816,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
5651 * tl_used. */ 5816 * tl_used. */
5652 tl->tl_used = 0; 5817 tl->tl_used = 0;
5653 5818
5819 ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check);
5654 status = ocfs2_write_block(osb, tl_bh, tl_inode); 5820 status = ocfs2_write_block(osb, tl_bh, tl_inode);
5655 if (status < 0) { 5821 if (status < 0) {
5656 mlog_errno(status); 5822 mlog_errno(status);
@@ -5800,7 +5966,10 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
5800 */ 5966 */
5801 5967
5802/* 5968/*
5803 * Describes a single block free from a suballocator 5969 * Describe a single bit freed from a suballocator. For the block
5970 * suballocators, it represents one block. For the global cluster
5971 * allocator, it represents some clusters and free_bit indicates
5972 * clusters number.
5804 */ 5973 */
5805struct ocfs2_cached_block_free { 5974struct ocfs2_cached_block_free {
5806 struct ocfs2_cached_block_free *free_next; 5975 struct ocfs2_cached_block_free *free_next;
@@ -5815,10 +5984,10 @@ struct ocfs2_per_slot_free_list {
5815 struct ocfs2_cached_block_free *f_first; 5984 struct ocfs2_cached_block_free *f_first;
5816}; 5985};
5817 5986
5818static int ocfs2_free_cached_items(struct ocfs2_super *osb, 5987static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
5819 int sysfile_type, 5988 int sysfile_type,
5820 int slot, 5989 int slot,
5821 struct ocfs2_cached_block_free *head) 5990 struct ocfs2_cached_block_free *head)
5822{ 5991{
5823 int ret; 5992 int ret;
5824 u64 bg_blkno; 5993 u64 bg_blkno;
@@ -5893,6 +6062,82 @@ out:
5893 return ret; 6062 return ret;
5894} 6063}
5895 6064
6065int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6066 u64 blkno, unsigned int bit)
6067{
6068 int ret = 0;
6069 struct ocfs2_cached_block_free *item;
6070
6071 item = kmalloc(sizeof(*item), GFP_NOFS);
6072 if (item == NULL) {
6073 ret = -ENOMEM;
6074 mlog_errno(ret);
6075 return ret;
6076 }
6077
6078 mlog(0, "Insert clusters: (bit %u, blk %llu)\n",
6079 bit, (unsigned long long)blkno);
6080
6081 item->free_blk = blkno;
6082 item->free_bit = bit;
6083 item->free_next = ctxt->c_global_allocator;
6084
6085 ctxt->c_global_allocator = item;
6086 return ret;
6087}
6088
6089static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
6090 struct ocfs2_cached_block_free *head)
6091{
6092 struct ocfs2_cached_block_free *tmp;
6093 struct inode *tl_inode = osb->osb_tl_inode;
6094 handle_t *handle;
6095 int ret = 0;
6096
6097 mutex_lock(&tl_inode->i_mutex);
6098
6099 while (head) {
6100 if (ocfs2_truncate_log_needs_flush(osb)) {
6101 ret = __ocfs2_flush_truncate_log(osb);
6102 if (ret < 0) {
6103 mlog_errno(ret);
6104 break;
6105 }
6106 }
6107
6108 handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
6109 if (IS_ERR(handle)) {
6110 ret = PTR_ERR(handle);
6111 mlog_errno(ret);
6112 break;
6113 }
6114
6115 ret = ocfs2_truncate_log_append(osb, handle, head->free_blk,
6116 head->free_bit);
6117
6118 ocfs2_commit_trans(osb, handle);
6119 tmp = head;
6120 head = head->free_next;
6121 kfree(tmp);
6122
6123 if (ret < 0) {
6124 mlog_errno(ret);
6125 break;
6126 }
6127 }
6128
6129 mutex_unlock(&tl_inode->i_mutex);
6130
6131 while (head) {
6132 /* Premature exit may have left some dangling items. */
6133 tmp = head;
6134 head = head->free_next;
6135 kfree(tmp);
6136 }
6137
6138 return ret;
6139}
6140
5896int ocfs2_run_deallocs(struct ocfs2_super *osb, 6141int ocfs2_run_deallocs(struct ocfs2_super *osb,
5897 struct ocfs2_cached_dealloc_ctxt *ctxt) 6142 struct ocfs2_cached_dealloc_ctxt *ctxt)
5898{ 6143{
@@ -5908,8 +6153,10 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
5908 if (fl->f_first) { 6153 if (fl->f_first) {
5909 mlog(0, "Free items: (type %u, slot %d)\n", 6154 mlog(0, "Free items: (type %u, slot %d)\n",
5910 fl->f_inode_type, fl->f_slot); 6155 fl->f_inode_type, fl->f_slot);
5911 ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type, 6156 ret2 = ocfs2_free_cached_blocks(osb,
5912 fl->f_slot, fl->f_first); 6157 fl->f_inode_type,
6158 fl->f_slot,
6159 fl->f_first);
5913 if (ret2) 6160 if (ret2)
5914 mlog_errno(ret2); 6161 mlog_errno(ret2);
5915 if (!ret) 6162 if (!ret)
@@ -5920,6 +6167,17 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
5920 kfree(fl); 6167 kfree(fl);
5921 } 6168 }
5922 6169
6170 if (ctxt->c_global_allocator) {
6171 ret2 = ocfs2_free_cached_clusters(osb,
6172 ctxt->c_global_allocator);
6173 if (ret2)
6174 mlog_errno(ret2);
6175 if (!ret)
6176 ret = ret2;
6177
6178 ctxt->c_global_allocator = NULL;
6179 }
6180
5923 return ret; 6181 return ret;
5924} 6182}
5925 6183
@@ -6075,11 +6333,10 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode,
6075 6333
6076 eb = (struct ocfs2_extent_block *) bh->b_data; 6334 eb = (struct ocfs2_extent_block *) bh->b_data;
6077 el = &eb->h_list; 6335 el = &eb->h_list;
6078 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { 6336
6079 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); 6337 /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block().
6080 ret = -EROFS; 6338 * Any corruption is a code bug. */
6081 goto out; 6339 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
6082 }
6083 6340
6084 *new_last_eb = bh; 6341 *new_last_eb = bh;
6085 get_bh(*new_last_eb); 6342 get_bh(*new_last_eb);
@@ -6326,8 +6583,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6326 } 6583 }
6327 6584
6328 if (last_eb_bh) { 6585 if (last_eb_bh) {
6329 status = ocfs2_journal_access(handle, inode, last_eb_bh, 6586 status = ocfs2_journal_access_eb(handle, inode, last_eb_bh,
6330 OCFS2_JOURNAL_ACCESS_WRITE); 6587 OCFS2_JOURNAL_ACCESS_WRITE);
6331 if (status < 0) { 6588 if (status < 0) {
6332 mlog_errno(status); 6589 mlog_errno(status);
6333 goto bail; 6590 goto bail;
@@ -6350,6 +6607,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6350 goto bail; 6607 goto bail;
6351 } 6608 }
6352 6609
6610 vfs_dq_free_space_nodirty(inode,
6611 ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
6353 spin_lock(&OCFS2_I(inode)->ip_lock); 6612 spin_lock(&OCFS2_I(inode)->ip_lock);
6354 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - 6613 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
6355 clusters_to_del; 6614 clusters_to_del;
@@ -6436,11 +6695,6 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
6436 mlog_errno(ret); 6695 mlog_errno(ret);
6437 else if (ocfs2_should_order_data(inode)) { 6696 else if (ocfs2_should_order_data(inode)) {
6438 ret = ocfs2_jbd2_file_inode(handle, inode); 6697 ret = ocfs2_jbd2_file_inode(handle, inode);
6439#ifdef CONFIG_OCFS2_COMPAT_JBD
6440 ret = walk_page_buffers(handle, page_buffers(page),
6441 from, to, &partial,
6442 ocfs2_journal_dirty_data);
6443#endif
6444 if (ret < 0) 6698 if (ret < 0)
6445 mlog_errno(ret); 6699 mlog_errno(ret);
6446 } 6700 }
@@ -6663,6 +6917,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6663 struct page **pages = NULL; 6917 struct page **pages = NULL;
6664 loff_t end = osb->s_clustersize; 6918 loff_t end = osb->s_clustersize;
6665 struct ocfs2_extent_tree et; 6919 struct ocfs2_extent_tree et;
6920 int did_quota = 0;
6666 6921
6667 has_data = i_size_read(inode) ? 1 : 0; 6922 has_data = i_size_read(inode) ? 1 : 0;
6668 6923
@@ -6682,15 +6937,16 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6682 } 6937 }
6683 } 6938 }
6684 6939
6685 handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS); 6940 handle = ocfs2_start_trans(osb,
6941 ocfs2_inline_to_extents_credits(osb->sb));
6686 if (IS_ERR(handle)) { 6942 if (IS_ERR(handle)) {
6687 ret = PTR_ERR(handle); 6943 ret = PTR_ERR(handle);
6688 mlog_errno(ret); 6944 mlog_errno(ret);
6689 goto out_unlock; 6945 goto out_unlock;
6690 } 6946 }
6691 6947
6692 ret = ocfs2_journal_access(handle, inode, di_bh, 6948 ret = ocfs2_journal_access_di(handle, inode, di_bh,
6693 OCFS2_JOURNAL_ACCESS_WRITE); 6949 OCFS2_JOURNAL_ACCESS_WRITE);
6694 if (ret) { 6950 if (ret) {
6695 mlog_errno(ret); 6951 mlog_errno(ret);
6696 goto out_commit; 6952 goto out_commit;
@@ -6701,6 +6957,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6701 unsigned int page_end; 6957 unsigned int page_end;
6702 u64 phys; 6958 u64 phys;
6703 6959
6960 if (vfs_dq_alloc_space_nodirty(inode,
6961 ocfs2_clusters_to_bytes(osb->sb, 1))) {
6962 ret = -EDQUOT;
6963 goto out_commit;
6964 }
6965 did_quota = 1;
6966
6704 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, 6967 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
6705 &num); 6968 &num);
6706 if (ret) { 6969 if (ret) {
@@ -6774,6 +7037,10 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6774 } 7037 }
6775 7038
6776out_commit: 7039out_commit:
7040 if (ret < 0 && did_quota)
7041 vfs_dq_free_space_nodirty(inode,
7042 ocfs2_clusters_to_bytes(osb->sb, 1));
7043
6777 ocfs2_commit_trans(osb, handle); 7044 ocfs2_commit_trans(osb, handle);
6778 7045
6779out_unlock: 7046out_unlock:
@@ -6813,7 +7080,8 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
6813 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, 7080 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
6814 i_size_read(inode)); 7081 i_size_read(inode));
6815 7082
6816 path = ocfs2_new_path(fe_bh, &di->id2.i_list); 7083 path = ocfs2_new_path(fe_bh, &di->id2.i_list,
7084 ocfs2_journal_access_di);
6817 if (!path) { 7085 if (!path) {
6818 status = -ENOMEM; 7086 status = -ENOMEM;
6819 mlog_errno(status); 7087 mlog_errno(status);
@@ -6984,20 +7252,14 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
6984 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); 7252 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
6985 7253
6986 if (fe->id2.i_list.l_tree_depth) { 7254 if (fe->id2.i_list.l_tree_depth) {
6987 status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk), 7255 status = ocfs2_read_extent_block(inode,
6988 &last_eb_bh); 7256 le64_to_cpu(fe->i_last_eb_blk),
7257 &last_eb_bh);
6989 if (status < 0) { 7258 if (status < 0) {
6990 mlog_errno(status); 7259 mlog_errno(status);
6991 goto bail; 7260 goto bail;
6992 } 7261 }
6993 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; 7262 eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
6994 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
6995 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
6996
6997 brelse(last_eb_bh);
6998 status = -EIO;
6999 goto bail;
7000 }
7001 } 7263 }
7002 7264
7003 (*tc)->tc_last_eb_bh = last_eb_bh; 7265 (*tc)->tc_last_eb_bh = last_eb_bh;
@@ -7052,8 +7314,8 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
7052 goto out; 7314 goto out;
7053 } 7315 }
7054 7316
7055 ret = ocfs2_journal_access(handle, inode, di_bh, 7317 ret = ocfs2_journal_access_di(handle, inode, di_bh,
7056 OCFS2_JOURNAL_ACCESS_WRITE); 7318 OCFS2_JOURNAL_ACCESS_WRITE);
7057 if (ret) { 7319 if (ret) {
7058 mlog_errno(ret); 7320 mlog_errno(ret);
7059 goto out_commit; 7321 goto out_commit;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 70257c84cfbe..cceff5c37f47 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -45,7 +45,9 @@
45 * 45 *
46 * ocfs2_extent_tree contains info for the root of the b-tree, it must have a 46 * ocfs2_extent_tree contains info for the root of the b-tree, it must have a
47 * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree 47 * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
48 * functions. 48 * functions. With metadata ecc, we now call different journal_access
49 * functions for each type of metadata, so it must have the
50 * root_journal_access function.
49 * ocfs2_extent_tree_operations abstract the normal operations we do for 51 * ocfs2_extent_tree_operations abstract the normal operations we do for
50 * the root of extent b-tree. 52 * the root of extent b-tree.
51 */ 53 */
@@ -54,6 +56,7 @@ struct ocfs2_extent_tree {
54 struct ocfs2_extent_tree_operations *et_ops; 56 struct ocfs2_extent_tree_operations *et_ops;
55 struct buffer_head *et_root_bh; 57 struct buffer_head *et_root_bh;
56 struct ocfs2_extent_list *et_root_el; 58 struct ocfs2_extent_list *et_root_el;
59 ocfs2_journal_access_func et_root_journal_access;
57 void *et_object; 60 void *et_object;
58 unsigned int et_max_leaf_clusters; 61 unsigned int et_max_leaf_clusters;
59}; 62};
@@ -68,10 +71,18 @@ void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
68void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, 71void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
69 struct inode *inode, 72 struct inode *inode,
70 struct buffer_head *bh); 73 struct buffer_head *bh);
74struct ocfs2_xattr_value_buf;
71void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, 75void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
72 struct inode *inode, 76 struct inode *inode,
73 struct buffer_head *bh, 77 struct ocfs2_xattr_value_buf *vb);
74 struct ocfs2_xattr_value_root *xv); 78
79/*
80 * Read an extent block into *bh. If *bh is NULL, a bh will be
81 * allocated. This is a cached read. The extent block will be validated
82 * with ocfs2_validate_extent_block().
83 */
84int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
85 struct buffer_head **bh);
75 86
76struct ocfs2_alloc_context; 87struct ocfs2_alloc_context;
77int ocfs2_insert_extent(struct ocfs2_super *osb, 88int ocfs2_insert_extent(struct ocfs2_super *osb,
@@ -110,6 +121,11 @@ int ocfs2_remove_extent(struct inode *inode,
110 u32 cpos, u32 len, handle_t *handle, 121 u32 cpos, u32 len, handle_t *handle,
111 struct ocfs2_alloc_context *meta_ac, 122 struct ocfs2_alloc_context *meta_ac,
112 struct ocfs2_cached_dealloc_ctxt *dealloc); 123 struct ocfs2_cached_dealloc_ctxt *dealloc);
124int ocfs2_remove_btree_range(struct inode *inode,
125 struct ocfs2_extent_tree *et,
126 u32 cpos, u32 phys_cpos, u32 len,
127 struct ocfs2_cached_dealloc_ctxt *dealloc);
128
113int ocfs2_num_free_extents(struct ocfs2_super *osb, 129int ocfs2_num_free_extents(struct ocfs2_super *osb,
114 struct inode *inode, 130 struct inode *inode,
115 struct ocfs2_extent_tree *et); 131 struct ocfs2_extent_tree *et);
@@ -167,10 +183,18 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
167 */ 183 */
168struct ocfs2_cached_dealloc_ctxt { 184struct ocfs2_cached_dealloc_ctxt {
169 struct ocfs2_per_slot_free_list *c_first_suballocator; 185 struct ocfs2_per_slot_free_list *c_first_suballocator;
186 struct ocfs2_cached_block_free *c_global_allocator;
170}; 187};
171static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c) 188static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
172{ 189{
173 c->c_first_suballocator = NULL; 190 c->c_first_suballocator = NULL;
191 c->c_global_allocator = NULL;
192}
193int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
194 u64 blkno, unsigned int bit);
195static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
196{
197 return c->c_global_allocator != NULL;
174} 198}
175int ocfs2_run_deallocs(struct ocfs2_super *osb, 199int ocfs2_run_deallocs(struct ocfs2_super *osb,
176 struct ocfs2_cached_dealloc_ctxt *ctxt); 200 struct ocfs2_cached_dealloc_ctxt *ctxt);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index c22543b33420..a067a6cffb01 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -27,6 +27,7 @@
27#include <linux/swap.h> 27#include <linux/swap.h>
28#include <linux/pipe_fs_i.h> 28#include <linux/pipe_fs_i.h>
29#include <linux/mpage.h> 29#include <linux/mpage.h>
30#include <linux/quotaops.h>
30 31
31#define MLOG_MASK_PREFIX ML_FILE_IO 32#define MLOG_MASK_PREFIX ML_FILE_IO
32#include <cluster/masklog.h> 33#include <cluster/masklog.h>
@@ -68,20 +69,13 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
68 goto bail; 69 goto bail;
69 } 70 }
70 71
71 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); 72 status = ocfs2_read_inode_block(inode, &bh);
72 if (status < 0) { 73 if (status < 0) {
73 mlog_errno(status); 74 mlog_errno(status);
74 goto bail; 75 goto bail;
75 } 76 }
76 fe = (struct ocfs2_dinode *) bh->b_data; 77 fe = (struct ocfs2_dinode *) bh->b_data;
77 78
78 if (!OCFS2_IS_VALID_DINODE(fe)) {
79 mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
80 (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
81 fe->i_signature);
82 goto bail;
83 }
84
85 if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, 79 if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
86 le32_to_cpu(fe->i_clusters))) { 80 le32_to_cpu(fe->i_clusters))) {
87 mlog(ML_ERROR, "block offset is outside the allocated size: " 81 mlog(ML_ERROR, "block offset is outside the allocated size: "
@@ -262,7 +256,7 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
262 BUG_ON(!PageLocked(page)); 256 BUG_ON(!PageLocked(page));
263 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); 257 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
264 258
265 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); 259 ret = ocfs2_read_inode_block(inode, &di_bh);
266 if (ret) { 260 if (ret) {
267 mlog_errno(ret); 261 mlog_errno(ret);
268 goto out; 262 goto out;
@@ -481,12 +475,6 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
481 475
482 if (ocfs2_should_order_data(inode)) { 476 if (ocfs2_should_order_data(inode)) {
483 ret = ocfs2_jbd2_file_inode(handle, inode); 477 ret = ocfs2_jbd2_file_inode(handle, inode);
484#ifdef CONFIG_OCFS2_COMPAT_JBD
485 ret = walk_page_buffers(handle,
486 page_buffers(page),
487 from, to, NULL,
488 ocfs2_journal_dirty_data);
489#endif
490 if (ret < 0) 478 if (ret < 0)
491 mlog_errno(ret); 479 mlog_errno(ret);
492 } 480 }
@@ -1072,15 +1060,8 @@ static void ocfs2_write_failure(struct inode *inode,
1072 tmppage = wc->w_pages[i]; 1060 tmppage = wc->w_pages[i];
1073 1061
1074 if (page_has_buffers(tmppage)) { 1062 if (page_has_buffers(tmppage)) {
1075 if (ocfs2_should_order_data(inode)) { 1063 if (ocfs2_should_order_data(inode))
1076 ocfs2_jbd2_file_inode(wc->w_handle, inode); 1064 ocfs2_jbd2_file_inode(wc->w_handle, inode);
1077#ifdef CONFIG_OCFS2_COMPAT_JBD
1078 walk_page_buffers(wc->w_handle,
1079 page_buffers(tmppage),
1080 from, to, NULL,
1081 ocfs2_journal_dirty_data);
1082#endif
1083 }
1084 1065
1085 block_commit_write(tmppage, from, to); 1066 block_commit_write(tmppage, from, to);
1086 } 1067 }
@@ -1531,8 +1512,8 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
1531 goto out; 1512 goto out;
1532 } 1513 }
1533 1514
1534 ret = ocfs2_journal_access(handle, inode, wc->w_di_bh, 1515 ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
1535 OCFS2_JOURNAL_ACCESS_WRITE); 1516 OCFS2_JOURNAL_ACCESS_WRITE);
1536 if (ret) { 1517 if (ret) {
1537 ocfs2_commit_trans(osb, handle); 1518 ocfs2_commit_trans(osb, handle);
1538 1519
@@ -1750,15 +1731,20 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1750 1731
1751 wc->w_handle = handle; 1732 wc->w_handle = handle;
1752 1733
1734 if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode,
1735 ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) {
1736 ret = -EDQUOT;
1737 goto out_commit;
1738 }
1753 /* 1739 /*
1754 * We don't want this to fail in ocfs2_write_end(), so do it 1740 * We don't want this to fail in ocfs2_write_end(), so do it
1755 * here. 1741 * here.
1756 */ 1742 */
1757 ret = ocfs2_journal_access(handle, inode, wc->w_di_bh, 1743 ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
1758 OCFS2_JOURNAL_ACCESS_WRITE); 1744 OCFS2_JOURNAL_ACCESS_WRITE);
1759 if (ret) { 1745 if (ret) {
1760 mlog_errno(ret); 1746 mlog_errno(ret);
1761 goto out_commit; 1747 goto out_quota;
1762 } 1748 }
1763 1749
1764 /* 1750 /*
@@ -1771,14 +1757,14 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1771 mmap_page); 1757 mmap_page);
1772 if (ret) { 1758 if (ret) {
1773 mlog_errno(ret); 1759 mlog_errno(ret);
1774 goto out_commit; 1760 goto out_quota;
1775 } 1761 }
1776 1762
1777 ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, 1763 ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
1778 len); 1764 len);
1779 if (ret) { 1765 if (ret) {
1780 mlog_errno(ret); 1766 mlog_errno(ret);
1781 goto out_commit; 1767 goto out_quota;
1782 } 1768 }
1783 1769
1784 if (data_ac) 1770 if (data_ac)
@@ -1790,6 +1776,10 @@ success:
1790 *pagep = wc->w_target_page; 1776 *pagep = wc->w_target_page;
1791 *fsdata = wc; 1777 *fsdata = wc;
1792 return 0; 1778 return 0;
1779out_quota:
1780 if (clusters_to_alloc)
1781 vfs_dq_free_space(inode,
1782 ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
1793out_commit: 1783out_commit:
1794 ocfs2_commit_trans(osb, handle); 1784 ocfs2_commit_trans(osb, handle);
1795 1785
@@ -1919,15 +1909,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1919 } 1909 }
1920 1910
1921 if (page_has_buffers(tmppage)) { 1911 if (page_has_buffers(tmppage)) {
1922 if (ocfs2_should_order_data(inode)) { 1912 if (ocfs2_should_order_data(inode))
1923 ocfs2_jbd2_file_inode(wc->w_handle, inode); 1913 ocfs2_jbd2_file_inode(wc->w_handle, inode);
1924#ifdef CONFIG_OCFS2_COMPAT_JBD
1925 walk_page_buffers(wc->w_handle,
1926 page_buffers(tmppage),
1927 from, to, NULL,
1928 ocfs2_journal_dirty_data);
1929#endif
1930 }
1931 block_commit_write(tmppage, from, to); 1914 block_commit_write(tmppage, from, to);
1932 } 1915 }
1933 } 1916 }
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
new file mode 100644
index 000000000000..2a947c44e594
--- /dev/null
+++ b/fs/ocfs2/blockcheck.c
@@ -0,0 +1,477 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * blockcheck.c
5 *
6 * Checksum and ECC codes for the OCFS2 userspace library.
7 *
8 * Copyright (C) 2006, 2008 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License, version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#include <linux/kernel.h>
21#include <linux/types.h>
22#include <linux/crc32.h>
23#include <linux/buffer_head.h>
24#include <linux/bitops.h>
25#include <asm/byteorder.h>
26
27#include <cluster/masklog.h>
28
29#include "ocfs2.h"
30
31#include "blockcheck.h"
32
33
34/*
35 * We use the following conventions:
36 *
37 * d = # data bits
38 * p = # parity bits
39 * c = # total code bits (d + p)
40 */
41
42
43/*
44 * Calculate the bit offset in the hamming code buffer based on the bit's
45 * offset in the data buffer. Since the hamming code reserves all
46 * power-of-two bits for parity, the data bit number and the code bit
47 * number are offest by all the parity bits beforehand.
48 *
49 * Recall that bit numbers in hamming code are 1-based. This function
50 * takes the 0-based data bit from the caller.
51 *
52 * An example. Take bit 1 of the data buffer. 1 is a power of two (2^0),
53 * so it's a parity bit. 2 is a power of two (2^1), so it's a parity bit.
54 * 3 is not a power of two. So bit 1 of the data buffer ends up as bit 3
55 * in the code buffer.
56 *
57 * The caller can pass in *p if it wants to keep track of the most recent
58 * number of parity bits added. This allows the function to start the
59 * calculation at the last place.
60 */
61static unsigned int calc_code_bit(unsigned int i, unsigned int *p_cache)
62{
63 unsigned int b, p = 0;
64
65 /*
66 * Data bits are 0-based, but we're talking code bits, which
67 * are 1-based.
68 */
69 b = i + 1;
70
71 /* Use the cache if it is there */
72 if (p_cache)
73 p = *p_cache;
74 b += p;
75
76 /*
77 * For every power of two below our bit number, bump our bit.
78 *
79 * We compare with (b + 1) because we have to compare with what b
80 * would be _if_ it were bumped up by the parity bit. Capice?
81 *
82 * p is set above.
83 */
84 for (; (1 << p) < (b + 1); p++)
85 b++;
86
87 if (p_cache)
88 *p_cache = p;
89
90 return b;
91}
92
93/*
94 * This is the low level encoder function. It can be called across
95 * multiple hunks just like the crc32 code. 'd' is the number of bits
96 * _in_this_hunk_. nr is the bit offset of this hunk. So, if you had
97 * two 512B buffers, you would do it like so:
98 *
99 * parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
100 * parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
101 *
102 * If you just have one buffer, use ocfs2_hamming_encode_block().
103 */
104u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr)
105{
106 unsigned int i, b, p = 0;
107
108 BUG_ON(!d);
109
110 /*
111 * b is the hamming code bit number. Hamming code specifies a
112 * 1-based array, but C uses 0-based. So 'i' is for C, and 'b' is
113 * for the algorithm.
114 *
115 * The i++ in the for loop is so that the start offset passed
116 * to ocfs2_find_next_bit_set() is one greater than the previously
117 * found bit.
118 */
119 for (i = 0; (i = ocfs2_find_next_bit(data, d, i)) < d; i++)
120 {
121 /*
122 * i is the offset in this hunk, nr + i is the total bit
123 * offset.
124 */
125 b = calc_code_bit(nr + i, &p);
126
127 /*
128 * Data bits in the resultant code are checked by
129 * parity bits that are part of the bit number
130 * representation. Huh?
131 *
132 * <wikipedia href="http://en.wikipedia.org/wiki/Hamming_code">
133 * In other words, the parity bit at position 2^k
134 * checks bits in positions having bit k set in
135 * their binary representation. Conversely, for
136 * instance, bit 13, i.e. 1101(2), is checked by
137 * bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1.
138 * </wikipedia>
139 *
140 * Note that 'k' is the _code_ bit number. 'b' in
141 * our loop.
142 */
143 parity ^= b;
144 }
145
146 /* While the data buffer was treated as little endian, the
147 * return value is in host endian. */
148 return parity;
149}
150
151u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize)
152{
153 return ocfs2_hamming_encode(0, data, blocksize * 8, 0);
154}
155
156/*
157 * Like ocfs2_hamming_encode(), this can handle hunks. nr is the bit
158 * offset of the current hunk. If bit to be fixed is not part of the
159 * current hunk, this does nothing.
160 *
161 * If you only have one hunk, use ocfs2_hamming_fix_block().
162 */
163void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
164 unsigned int fix)
165{
166 unsigned int i, b;
167
168 BUG_ON(!d);
169
170 /*
171 * If the bit to fix has an hweight of 1, it's a parity bit. One
172 * busted parity bit is its own error. Nothing to do here.
173 */
174 if (hweight32(fix) == 1)
175 return;
176
177 /*
178 * nr + d is the bit right past the data hunk we're looking at.
179 * If fix after that, nothing to do
180 */
181 if (fix >= calc_code_bit(nr + d, NULL))
182 return;
183
184 /*
185 * nr is the offset in the data hunk we're starting at. Let's
186 * start b at the offset in the code buffer. See hamming_encode()
187 * for a more detailed description of 'b'.
188 */
189 b = calc_code_bit(nr, NULL);
190 /* If the fix is before this hunk, nothing to do */
191 if (fix < b)
192 return;
193
194 for (i = 0; i < d; i++, b++)
195 {
196 /* Skip past parity bits */
197 while (hweight32(b) == 1)
198 b++;
199
200 /*
201 * i is the offset in this data hunk.
202 * nr + i is the offset in the total data buffer.
203 * b is the offset in the total code buffer.
204 *
205 * Thus, when b == fix, bit i in the current hunk needs
206 * fixing.
207 */
208 if (b == fix)
209 {
210 if (ocfs2_test_bit(i, data))
211 ocfs2_clear_bit(i, data);
212 else
213 ocfs2_set_bit(i, data);
214 break;
215 }
216 }
217}
218
219void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
220 unsigned int fix)
221{
222 ocfs2_hamming_fix(data, blocksize * 8, 0, fix);
223}
224
225/*
226 * This function generates check information for a block.
227 * data is the block to be checked. bc is a pointer to the
228 * ocfs2_block_check structure describing the crc32 and the ecc.
229 *
230 * bc should be a pointer inside data, as the function will
231 * take care of zeroing it before calculating the check information. If
232 * bc does not point inside data, the caller must make sure any inline
233 * ocfs2_block_check structures are zeroed.
234 *
235 * The data buffer must be in on-disk endian (little endian for ocfs2).
236 * bc will be filled with little-endian values and will be ready to go to
237 * disk.
238 */
239void ocfs2_block_check_compute(void *data, size_t blocksize,
240 struct ocfs2_block_check *bc)
241{
242 u32 crc;
243 u32 ecc;
244
245 memset(bc, 0, sizeof(struct ocfs2_block_check));
246
247 crc = crc32_le(~0, data, blocksize);
248 ecc = ocfs2_hamming_encode_block(data, blocksize);
249
250 /*
251 * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
252 * larger than 16 bits.
253 */
254 BUG_ON(ecc > USHORT_MAX);
255
256 bc->bc_crc32e = cpu_to_le32(crc);
257 bc->bc_ecc = cpu_to_le16((u16)ecc);
258}
259
260/*
261 * This function validates existing check information. Like _compute,
262 * the function will take care of zeroing bc before calculating check codes.
263 * If bc is not a pointer inside data, the caller must have zeroed any
264 * inline ocfs2_block_check structures.
265 *
266 * Again, the data passed in should be the on-disk endian.
267 */
268int ocfs2_block_check_validate(void *data, size_t blocksize,
269 struct ocfs2_block_check *bc)
270{
271 int rc = 0;
272 struct ocfs2_block_check check;
273 u32 crc, ecc;
274
275 check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
276 check.bc_ecc = le16_to_cpu(bc->bc_ecc);
277
278 memset(bc, 0, sizeof(struct ocfs2_block_check));
279
280 /* Fast path - if the crc32 validates, we're good to go */
281 crc = crc32_le(~0, data, blocksize);
282 if (crc == check.bc_crc32e)
283 goto out;
284
285 mlog(ML_ERROR,
286 "CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
287 (unsigned int)check.bc_crc32e, (unsigned int)crc);
288
289 /* Ok, try ECC fixups */
290 ecc = ocfs2_hamming_encode_block(data, blocksize);
291 ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc);
292
293 /* And check the crc32 again */
294 crc = crc32_le(~0, data, blocksize);
295 if (crc == check.bc_crc32e)
296 goto out;
297
298 mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
299 (unsigned int)check.bc_crc32e, (unsigned int)crc);
300
301 rc = -EIO;
302
303out:
304 bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
305 bc->bc_ecc = cpu_to_le16(check.bc_ecc);
306
307 return rc;
308}
309
310/*
311 * This function generates check information for a list of buffer_heads.
312 * bhs is the blocks to be checked. bc is a pointer to the
313 * ocfs2_block_check structure describing the crc32 and the ecc.
314 *
315 * bc should be a pointer inside data, as the function will
316 * take care of zeroing it before calculating the check information. If
317 * bc does not point inside data, the caller must make sure any inline
318 * ocfs2_block_check structures are zeroed.
319 *
320 * The data buffer must be in on-disk endian (little endian for ocfs2).
321 * bc will be filled with little-endian values and will be ready to go to
322 * disk.
323 */
324void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
325 struct ocfs2_block_check *bc)
326{
327 int i;
328 u32 crc, ecc;
329
330 BUG_ON(nr < 0);
331
332 if (!nr)
333 return;
334
335 memset(bc, 0, sizeof(struct ocfs2_block_check));
336
337 for (i = 0, crc = ~0, ecc = 0; i < nr; i++) {
338 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
339 /*
340 * The number of bits in a buffer is obviously b_size*8.
341 * The offset of this buffer is b_size*i, so the bit offset
342 * of this buffer is b_size*8*i.
343 */
344 ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
345 bhs[i]->b_size * 8,
346 bhs[i]->b_size * 8 * i);
347 }
348
349 /*
350 * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
351 * larger than 16 bits.
352 */
353 BUG_ON(ecc > USHORT_MAX);
354
355 bc->bc_crc32e = cpu_to_le32(crc);
356 bc->bc_ecc = cpu_to_le16((u16)ecc);
357}
358
359/*
360 * This function validates existing check information on a list of
361 * buffer_heads. Like _compute_bhs, the function will take care of
362 * zeroing bc before calculating check codes. If bc is not a pointer
363 * inside data, the caller must have zeroed any inline
364 * ocfs2_block_check structures.
365 *
366 * Again, the data passed in should be the on-disk endian.
367 */
368int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
369 struct ocfs2_block_check *bc)
370{
371 int i, rc = 0;
372 struct ocfs2_block_check check;
373 u32 crc, ecc, fix;
374
375 BUG_ON(nr < 0);
376
377 if (!nr)
378 return 0;
379
380 check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
381 check.bc_ecc = le16_to_cpu(bc->bc_ecc);
382
383 memset(bc, 0, sizeof(struct ocfs2_block_check));
384
385 /* Fast path - if the crc32 validates, we're good to go */
386 for (i = 0, crc = ~0; i < nr; i++)
387 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
388 if (crc == check.bc_crc32e)
389 goto out;
390
391 mlog(ML_ERROR,
392 "CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
393 (unsigned int)check.bc_crc32e, (unsigned int)crc);
394
395 /* Ok, try ECC fixups */
396 for (i = 0, ecc = 0; i < nr; i++) {
397 /*
398 * The number of bits in a buffer is obviously b_size*8.
399 * The offset of this buffer is b_size*i, so the bit offset
400 * of this buffer is b_size*8*i.
401 */
402 ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
403 bhs[i]->b_size * 8,
404 bhs[i]->b_size * 8 * i);
405 }
406 fix = ecc ^ check.bc_ecc;
407 for (i = 0; i < nr; i++) {
408 /*
409 * Try the fix against each buffer. It will only affect
410 * one of them.
411 */
412 ocfs2_hamming_fix(bhs[i]->b_data, bhs[i]->b_size * 8,
413 bhs[i]->b_size * 8 * i, fix);
414 }
415
416 /* And check the crc32 again */
417 for (i = 0, crc = ~0; i < nr; i++)
418 crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
419 if (crc == check.bc_crc32e)
420 goto out;
421
422 mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
423 (unsigned int)check.bc_crc32e, (unsigned int)crc);
424
425 rc = -EIO;
426
427out:
428 bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
429 bc->bc_ecc = cpu_to_le16(check.bc_ecc);
430
431 return rc;
432}
433
434/*
435 * These are the main API. They check the superblock flag before
436 * calling the underlying operations.
437 *
438 * They expect the buffer(s) to be in disk format.
439 */
440void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
441 struct ocfs2_block_check *bc)
442{
443 if (ocfs2_meta_ecc(OCFS2_SB(sb)))
444 ocfs2_block_check_compute(data, sb->s_blocksize, bc);
445}
446
447int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
448 struct ocfs2_block_check *bc)
449{
450 int rc = 0;
451
452 if (ocfs2_meta_ecc(OCFS2_SB(sb)))
453 rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc);
454
455 return rc;
456}
457
458void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
459 struct buffer_head **bhs, int nr,
460 struct ocfs2_block_check *bc)
461{
462 if (ocfs2_meta_ecc(OCFS2_SB(sb)))
463 ocfs2_block_check_compute_bhs(bhs, nr, bc);
464}
465
466int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
467 struct buffer_head **bhs, int nr,
468 struct ocfs2_block_check *bc)
469{
470 int rc = 0;
471
472 if (ocfs2_meta_ecc(OCFS2_SB(sb)))
473 rc = ocfs2_block_check_validate_bhs(bhs, nr, bc);
474
475 return rc;
476}
477
diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h
new file mode 100644
index 000000000000..70ec3feda32f
--- /dev/null
+++ b/fs/ocfs2/blockcheck.h
@@ -0,0 +1,82 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * blockcheck.h
5 *
6 * Checksum and ECC codes for the OCFS2 userspace library.
7 *
8 * Copyright (C) 2004, 2008 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License, version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#ifndef OCFS2_BLOCKCHECK_H
21#define OCFS2_BLOCKCHECK_H
22
23
24/* High level block API */
25void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
26 struct ocfs2_block_check *bc);
27int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
28 struct ocfs2_block_check *bc);
29void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
30 struct buffer_head **bhs, int nr,
31 struct ocfs2_block_check *bc);
32int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
33 struct buffer_head **bhs, int nr,
34 struct ocfs2_block_check *bc);
35
36/* Lower level API */
37void ocfs2_block_check_compute(void *data, size_t blocksize,
38 struct ocfs2_block_check *bc);
39int ocfs2_block_check_validate(void *data, size_t blocksize,
40 struct ocfs2_block_check *bc);
41void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
42 struct ocfs2_block_check *bc);
43int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
44 struct ocfs2_block_check *bc);
45
46/*
47 * Hamming code functions
48 */
49
50/*
51 * Encoding hamming code parity bits for a buffer.
52 *
53 * This is the low level encoder function. It can be called across
54 * multiple hunks just like the crc32 code. 'd' is the number of bits
55 * _in_this_hunk_. nr is the bit offset of this hunk. So, if you had
56 * two 512B buffers, you would do it like so:
57 *
58 * parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
59 * parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
60 *
61 * If you just have one buffer, use ocfs2_hamming_encode_block().
62 */
63u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d,
64 unsigned int nr);
65/*
66 * Fix a buffer with a bit error. The 'fix' is the original parity
67 * xor'd with the parity calculated now.
68 *
69 * Like ocfs2_hamming_encode(), this can handle hunks. nr is the bit
70 * offset of the current hunk. If bit to be fixed is not part of the
71 * current hunk, this does nothing.
72 *
73 * If you only have one buffer, use ocfs2_hamming_fix_block().
74 */
75void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
76 unsigned int fix);
77
78/* Convenience wrappers for a single buffer of data */
79extern u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize);
80extern void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
81 unsigned int fix);
82#endif
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 3a178ec48d7c..15c8e6deee2e 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -39,6 +39,18 @@
39 39
40#include "buffer_head_io.h" 40#include "buffer_head_io.h"
41 41
42/*
43 * Bits on bh->b_state used by ocfs2.
44 *
45 * These MUST be after the JBD2 bits. Hence, we use BH_JBDPrivateStart.
46 */
47enum ocfs2_state_bits {
48 BH_NeedsValidate = BH_JBDPrivateStart,
49};
50
51/* Expand the magic b_state functions */
52BUFFER_FNS(NeedsValidate, needs_validate);
53
42int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, 54int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
43 struct inode *inode) 55 struct inode *inode)
44{ 56{
@@ -166,7 +178,9 @@ bail:
166} 178}
167 179
168int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, 180int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
169 struct buffer_head *bhs[], int flags) 181 struct buffer_head *bhs[], int flags,
182 int (*validate)(struct super_block *sb,
183 struct buffer_head *bh))
170{ 184{
171 int status = 0; 185 int status = 0;
172 int i, ignore_cache = 0; 186 int i, ignore_cache = 0;
@@ -298,6 +312,8 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
298 312
299 clear_buffer_uptodate(bh); 313 clear_buffer_uptodate(bh);
300 get_bh(bh); /* for end_buffer_read_sync() */ 314 get_bh(bh); /* for end_buffer_read_sync() */
315 if (validate)
316 set_buffer_needs_validate(bh);
301 bh->b_end_io = end_buffer_read_sync; 317 bh->b_end_io = end_buffer_read_sync;
302 submit_bh(READ, bh); 318 submit_bh(READ, bh);
303 continue; 319 continue;
@@ -328,6 +344,20 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
328 bhs[i] = NULL; 344 bhs[i] = NULL;
329 continue; 345 continue;
330 } 346 }
347
348 if (buffer_needs_validate(bh)) {
349 /* We never set NeedsValidate if the
350 * buffer was held by the journal, so
351 * that better not have changed */
352 BUG_ON(buffer_jbd(bh));
353 clear_buffer_needs_validate(bh);
354 status = validate(inode->i_sb, bh);
355 if (status) {
356 put_bh(bh);
357 bhs[i] = NULL;
358 continue;
359 }
360 }
331 } 361 }
332 362
333 /* Always set the buffer in the cache, even if it was 363 /* Always set the buffer in the cache, even if it was
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index 75e1dcb1ade7..c75d682dadd8 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -31,21 +31,24 @@
31void ocfs2_end_buffer_io_sync(struct buffer_head *bh, 31void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
32 int uptodate); 32 int uptodate);
33 33
34static inline int ocfs2_read_block(struct inode *inode,
35 u64 off,
36 struct buffer_head **bh);
37
38int ocfs2_write_block(struct ocfs2_super *osb, 34int ocfs2_write_block(struct ocfs2_super *osb,
39 struct buffer_head *bh, 35 struct buffer_head *bh,
40 struct inode *inode); 36 struct inode *inode);
41int ocfs2_read_blocks(struct inode *inode,
42 u64 block,
43 int nr,
44 struct buffer_head *bhs[],
45 int flags);
46int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, 37int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
47 unsigned int nr, struct buffer_head *bhs[]); 38 unsigned int nr, struct buffer_head *bhs[]);
48 39
40/*
41 * If not NULL, validate() will be called on a buffer that is freshly
42 * read from disk. It will not be called if the buffer was in cache.
43 * Note that if validate() is being used for this buffer, it needs to
44 * be set even for a READAHEAD call, as it marks the buffer for later
45 * validation.
46 */
47int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
48 struct buffer_head *bhs[], int flags,
49 int (*validate)(struct super_block *sb,
50 struct buffer_head *bh));
51
49int ocfs2_write_super_or_backup(struct ocfs2_super *osb, 52int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
50 struct buffer_head *bh); 53 struct buffer_head *bh);
51 54
@@ -53,7 +56,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
53#define OCFS2_BH_READAHEAD 8 56#define OCFS2_BH_READAHEAD 8
54 57
55static inline int ocfs2_read_block(struct inode *inode, u64 off, 58static inline int ocfs2_read_block(struct inode *inode, u64 off,
56 struct buffer_head **bh) 59 struct buffer_head **bh,
60 int (*validate)(struct super_block *sb,
61 struct buffer_head *bh))
57{ 62{
58 int status = 0; 63 int status = 0;
59 64
@@ -63,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off,
63 goto bail; 68 goto bail;
64 } 69 }
65 70
66 status = ocfs2_read_blocks(inode, off, 1, bh, 0); 71 status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate);
67 72
68bail: 73bail:
69 return status; 74 return status;
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index d8a0cb92cef6..96df5416993e 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -110,6 +110,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
110 define_mask(QUORUM), 110 define_mask(QUORUM),
111 define_mask(EXPORT), 111 define_mask(EXPORT),
112 define_mask(XATTR), 112 define_mask(XATTR),
113 define_mask(QUOTA),
113 define_mask(ERROR), 114 define_mask(ERROR),
114 define_mask(NOTICE), 115 define_mask(NOTICE),
115 define_mask(KTHREAD), 116 define_mask(KTHREAD),
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 57670c680471..7e72a81bc2d4 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -113,6 +113,7 @@
113#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */ 113#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */
114#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ 114#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
115#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ 115#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
116#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
116/* bits that are infrequently given and frequently matched in the high word */ 117/* bits that are infrequently given and frequently matched in the high word */
117#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 118#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
118#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 119#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 026e6eb85187..f2c4098cf337 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -40,6 +40,7 @@
40#include <linux/types.h> 40#include <linux/types.h>
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/quotaops.h>
43 44
44#define MLOG_MASK_PREFIX ML_NAMEI 45#define MLOG_MASK_PREFIX ML_NAMEI
45#include <cluster/masklog.h> 46#include <cluster/masklog.h>
@@ -47,6 +48,7 @@
47#include "ocfs2.h" 48#include "ocfs2.h"
48 49
49#include "alloc.h" 50#include "alloc.h"
51#include "blockcheck.h"
50#include "dir.h" 52#include "dir.h"
51#include "dlmglue.h" 53#include "dlmglue.h"
52#include "extent_map.h" 54#include "extent_map.h"
@@ -82,47 +84,72 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
82 struct ocfs2_alloc_context *meta_ac, 84 struct ocfs2_alloc_context *meta_ac,
83 struct buffer_head **new_bh); 85 struct buffer_head **new_bh);
84 86
85static struct buffer_head *ocfs2_bread(struct inode *inode, 87/*
86 int block, int *err, int reada) 88 * These are distinct checks because future versions of the file system will
89 * want to have a trailing dirent structure independent of indexing.
90 */
91static int ocfs2_dir_has_trailer(struct inode *dir)
87{ 92{
88 struct buffer_head *bh = NULL; 93 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
89 int tmperr; 94 return 0;
90 u64 p_blkno;
91 int readflags = 0;
92 95
93 if (reada) 96 return ocfs2_meta_ecc(OCFS2_SB(dir->i_sb));
94 readflags |= OCFS2_BH_READAHEAD; 97}
95 98
96 if (((u64)block << inode->i_sb->s_blocksize_bits) >= 99static int ocfs2_supports_dir_trailer(struct ocfs2_super *osb)
97 i_size_read(inode)) { 100{
98 BUG_ON(!reada); 101 return ocfs2_meta_ecc(osb);
99 return NULL; 102}
100 }
101 103
102 down_read(&OCFS2_I(inode)->ip_alloc_sem); 104static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb)
103 tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, 105{
104 NULL); 106 return sb->s_blocksize - sizeof(struct ocfs2_dir_block_trailer);
105 up_read(&OCFS2_I(inode)->ip_alloc_sem); 107}
106 if (tmperr < 0) {
107 mlog_errno(tmperr);
108 goto fail;
109 }
110 108
111 tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags); 109#define ocfs2_trailer_from_bh(_bh, _sb) ((struct ocfs2_dir_block_trailer *) ((_bh)->b_data + ocfs2_dir_trailer_blk_off((_sb))))
112 if (tmperr < 0)
113 goto fail;
114 110
115 tmperr = 0; 111/* XXX ocfs2_block_dqtrailer() is similar but not quite - can we make
112 * them more consistent? */
113struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
114 void *data)
115{
116 char *p = data;
116 117
117 *err = 0; 118 p += blocksize - sizeof(struct ocfs2_dir_block_trailer);
118 return bh; 119 return (struct ocfs2_dir_block_trailer *)p;
120}
119 121
120fail: 122/*
121 brelse(bh); 123 * XXX: This is executed once on every dirent. We should consider optimizing
122 bh = NULL; 124 * it.
125 */
126static int ocfs2_skip_dir_trailer(struct inode *dir,
127 struct ocfs2_dir_entry *de,
128 unsigned long offset,
129 unsigned long blklen)
130{
131 unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer);
123 132
124 *err = -EIO; 133 if (!ocfs2_dir_has_trailer(dir))
125 return NULL; 134 return 0;
135
136 if (offset != toff)
137 return 0;
138
139 return 1;
140}
141
142static void ocfs2_init_dir_trailer(struct inode *inode,
143 struct buffer_head *bh)
144{
145 struct ocfs2_dir_block_trailer *trailer;
146
147 trailer = ocfs2_trailer_from_bh(bh, inode->i_sb);
148 strcpy(trailer->db_signature, OCFS2_DIR_TRAILER_SIGNATURE);
149 trailer->db_compat_rec_len =
150 cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer));
151 trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
152 trailer->db_blkno = cpu_to_le64(bh->b_blocknr);
126} 153}
127 154
128/* 155/*
@@ -231,7 +258,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name,
231 struct ocfs2_dinode *di; 258 struct ocfs2_dinode *di;
232 struct ocfs2_inline_data *data; 259 struct ocfs2_inline_data *data;
233 260
234 ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); 261 ret = ocfs2_read_inode_block(dir, &di_bh);
235 if (ret) { 262 if (ret) {
236 mlog_errno(ret); 263 mlog_errno(ret);
237 goto out; 264 goto out;
@@ -250,6 +277,108 @@ out:
250 return NULL; 277 return NULL;
251} 278}
252 279
280static int ocfs2_validate_dir_block(struct super_block *sb,
281 struct buffer_head *bh)
282{
283 int rc;
284 struct ocfs2_dir_block_trailer *trailer =
285 ocfs2_trailer_from_bh(bh, sb);
286
287
288 /*
289 * We don't validate dirents here, that's handled
290 * in-place when the code walks them.
291 */
292 mlog(0, "Validating dirblock %llu\n",
293 (unsigned long long)bh->b_blocknr);
294
295 BUG_ON(!buffer_uptodate(bh));
296
297 /*
298 * If the ecc fails, we return the error but otherwise
299 * leave the filesystem running. We know any error is
300 * local to this block.
301 *
302 * Note that we are safe to call this even if the directory
303 * doesn't have a trailer. Filesystems without metaecc will do
304 * nothing, and filesystems with it will have one.
305 */
306 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &trailer->db_check);
307 if (rc)
308 mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
309 (unsigned long long)bh->b_blocknr);
310
311 return rc;
312}
313
314/*
315 * This function forces all errors to -EIO for consistency with its
316 * predecessor, ocfs2_bread(). We haven't audited what returning the
317 * real error codes would do to callers. We log the real codes with
318 * mlog_errno() before we squash them.
319 */
320static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
321 struct buffer_head **bh, int flags)
322{
323 int rc = 0;
324 struct buffer_head *tmp = *bh;
325 struct ocfs2_dir_block_trailer *trailer;
326
327 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
328 ocfs2_validate_dir_block);
329 if (rc) {
330 mlog_errno(rc);
331 goto out;
332 }
333
334 /*
335 * We check the trailer here rather than in
336 * ocfs2_validate_dir_block() because that function doesn't have
337 * the inode to test.
338 */
339 if (!(flags & OCFS2_BH_READAHEAD) &&
340 ocfs2_dir_has_trailer(inode)) {
341 trailer = ocfs2_trailer_from_bh(tmp, inode->i_sb);
342 if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
343 rc = -EINVAL;
344 ocfs2_error(inode->i_sb,
345 "Invalid dirblock #%llu: "
346 "signature = %.*s\n",
347 (unsigned long long)tmp->b_blocknr, 7,
348 trailer->db_signature);
349 goto out;
350 }
351 if (le64_to_cpu(trailer->db_blkno) != tmp->b_blocknr) {
352 rc = -EINVAL;
353 ocfs2_error(inode->i_sb,
354 "Directory block #%llu has an invalid "
355 "db_blkno of %llu",
356 (unsigned long long)tmp->b_blocknr,
357 (unsigned long long)le64_to_cpu(trailer->db_blkno));
358 goto out;
359 }
360 if (le64_to_cpu(trailer->db_parent_dinode) !=
361 OCFS2_I(inode)->ip_blkno) {
362 rc = -EINVAL;
363 ocfs2_error(inode->i_sb,
364 "Directory block #%llu on dinode "
365 "#%llu has an invalid parent_dinode "
366 "of %llu",
367 (unsigned long long)tmp->b_blocknr,
368 (unsigned long long)OCFS2_I(inode)->ip_blkno,
369 (unsigned long long)le64_to_cpu(trailer->db_blkno));
370 goto out;
371 }
372 }
373
374 /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
375 if (!*bh)
376 *bh = tmp;
377
378out:
379 return rc ? -EIO : 0;
380}
381
253static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, 382static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
254 struct inode *dir, 383 struct inode *dir,
255 struct ocfs2_dir_entry **res_dir) 384 struct ocfs2_dir_entry **res_dir)
@@ -296,15 +425,17 @@ restart:
296 } 425 }
297 num++; 426 num++;
298 427
299 bh = ocfs2_bread(dir, b++, &err, 1); 428 bh = NULL;
429 err = ocfs2_read_dir_block(dir, b++, &bh,
430 OCFS2_BH_READAHEAD);
300 bh_use[ra_max] = bh; 431 bh_use[ra_max] = bh;
301 } 432 }
302 } 433 }
303 if ((bh = bh_use[ra_ptr++]) == NULL) 434 if ((bh = bh_use[ra_ptr++]) == NULL)
304 goto next; 435 goto next;
305 if (ocfs2_read_block(dir, block, &bh)) { 436 if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
306 /* read error, skip block & hope for the best. 437 /* read error, skip block & hope for the best.
307 * ocfs2_read_block() has released the bh. */ 438 * ocfs2_read_dir_block() has released the bh. */
308 ocfs2_error(dir->i_sb, "reading directory %llu, " 439 ocfs2_error(dir->i_sb, "reading directory %llu, "
309 "offset %lu\n", 440 "offset %lu\n",
310 (unsigned long long)OCFS2_I(dir)->ip_blkno, 441 (unsigned long long)OCFS2_I(dir)->ip_blkno,
@@ -381,14 +512,18 @@ int ocfs2_update_entry(struct inode *dir, handle_t *handle,
381 struct inode *new_entry_inode) 512 struct inode *new_entry_inode)
382{ 513{
383 int ret; 514 int ret;
515 ocfs2_journal_access_func access = ocfs2_journal_access_db;
384 516
385 /* 517 /*
386 * The same code works fine for both inline-data and extent 518 * The same code works fine for both inline-data and extent
387 * based directories, so no need to split this up. 519 * based directories, so no need to split this up. The only
520 * difference is the journal_access function.
388 */ 521 */
389 522
390 ret = ocfs2_journal_access(handle, dir, de_bh, 523 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
391 OCFS2_JOURNAL_ACCESS_WRITE); 524 access = ocfs2_journal_access_di;
525
526 ret = access(handle, dir, de_bh, OCFS2_JOURNAL_ACCESS_WRITE);
392 if (ret) { 527 if (ret) {
393 mlog_errno(ret); 528 mlog_errno(ret);
394 goto out; 529 goto out;
@@ -410,9 +545,13 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
410{ 545{
411 struct ocfs2_dir_entry *de, *pde; 546 struct ocfs2_dir_entry *de, *pde;
412 int i, status = -ENOENT; 547 int i, status = -ENOENT;
548 ocfs2_journal_access_func access = ocfs2_journal_access_db;
413 549
414 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh); 550 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
415 551
552 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
553 access = ocfs2_journal_access_di;
554
416 i = 0; 555 i = 0;
417 pde = NULL; 556 pde = NULL;
418 de = (struct ocfs2_dir_entry *) first_de; 557 de = (struct ocfs2_dir_entry *) first_de;
@@ -423,8 +562,8 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
423 goto bail; 562 goto bail;
424 } 563 }
425 if (de == de_del) { 564 if (de == de_del) {
426 status = ocfs2_journal_access(handle, dir, bh, 565 status = access(handle, dir, bh,
427 OCFS2_JOURNAL_ACCESS_WRITE); 566 OCFS2_JOURNAL_ACCESS_WRITE);
428 if (status < 0) { 567 if (status < 0) {
429 status = -EIO; 568 status = -EIO;
430 mlog_errno(status); 569 mlog_errno(status);
@@ -458,7 +597,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle,
458 struct ocfs2_dinode *di; 597 struct ocfs2_dinode *di;
459 struct ocfs2_inline_data *data; 598 struct ocfs2_inline_data *data;
460 599
461 ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); 600 ret = ocfs2_read_inode_block(dir, &di_bh);
462 if (ret) { 601 if (ret) {
463 mlog_errno(ret); 602 mlog_errno(ret);
464 goto out; 603 goto out;
@@ -576,6 +715,16 @@ int __ocfs2_add_entry(handle_t *handle,
576 goto bail; 715 goto bail;
577 } 716 }
578 717
718 /* We're guaranteed that we should have space, so we
719 * can't possibly have hit the trailer...right? */
720 mlog_bug_on_msg(ocfs2_skip_dir_trailer(dir, de, offset, size),
721 "Hit dir trailer trying to insert %.*s "
722 "(namelen %d) into directory %llu. "
723 "offset is %lu, trailer offset is %d\n",
724 namelen, name, namelen,
725 (unsigned long long)parent_fe_bh->b_blocknr,
726 offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
727
579 if (ocfs2_dirent_would_fit(de, rec_len)) { 728 if (ocfs2_dirent_would_fit(de, rec_len)) {
580 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 729 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
581 retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh); 730 retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
@@ -584,8 +733,14 @@ int __ocfs2_add_entry(handle_t *handle,
584 goto bail; 733 goto bail;
585 } 734 }
586 735
587 status = ocfs2_journal_access(handle, dir, insert_bh, 736 if (insert_bh == parent_fe_bh)
588 OCFS2_JOURNAL_ACCESS_WRITE); 737 status = ocfs2_journal_access_di(handle, dir,
738 insert_bh,
739 OCFS2_JOURNAL_ACCESS_WRITE);
740 else
741 status = ocfs2_journal_access_db(handle, dir,
742 insert_bh,
743 OCFS2_JOURNAL_ACCESS_WRITE);
589 /* By now the buffer is marked for journaling */ 744 /* By now the buffer is marked for journaling */
590 offset += le16_to_cpu(de->rec_len); 745 offset += le16_to_cpu(de->rec_len);
591 if (le64_to_cpu(de->inode)) { 746 if (le64_to_cpu(de->inode)) {
@@ -611,6 +766,7 @@ int __ocfs2_add_entry(handle_t *handle,
611 retval = 0; 766 retval = 0;
612 goto bail; 767 goto bail;
613 } 768 }
769
614 offset += le16_to_cpu(de->rec_len); 770 offset += le16_to_cpu(de->rec_len);
615 de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len)); 771 de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
616 } 772 }
@@ -636,7 +792,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
636 struct ocfs2_inline_data *data; 792 struct ocfs2_inline_data *data;
637 struct ocfs2_dir_entry *de; 793 struct ocfs2_dir_entry *de;
638 794
639 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); 795 ret = ocfs2_read_inode_block(inode, &di_bh);
640 if (ret) { 796 if (ret) {
641 mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", 797 mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
642 (unsigned long long)OCFS2_I(inode)->ip_blkno); 798 (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -724,7 +880,6 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
724 int i, stored; 880 int i, stored;
725 struct buffer_head * bh, * tmp; 881 struct buffer_head * bh, * tmp;
726 struct ocfs2_dir_entry * de; 882 struct ocfs2_dir_entry * de;
727 int err;
728 struct super_block * sb = inode->i_sb; 883 struct super_block * sb = inode->i_sb;
729 unsigned int ra_sectors = 16; 884 unsigned int ra_sectors = 16;
730 885
@@ -735,12 +890,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
735 890
736 while (!error && !stored && *f_pos < i_size_read(inode)) { 891 while (!error && !stored && *f_pos < i_size_read(inode)) {
737 blk = (*f_pos) >> sb->s_blocksize_bits; 892 blk = (*f_pos) >> sb->s_blocksize_bits;
738 bh = ocfs2_bread(inode, blk, &err, 0); 893 if (ocfs2_read_dir_block(inode, blk, &bh, 0)) {
739 if (!bh) { 894 /* Skip the corrupt dirblock and keep trying */
740 mlog(ML_ERROR,
741 "directory #%llu contains a hole at offset %lld\n",
742 (unsigned long long)OCFS2_I(inode)->ip_blkno,
743 *f_pos);
744 *f_pos += sb->s_blocksize - offset; 895 *f_pos += sb->s_blocksize - offset;
745 continue; 896 continue;
746 } 897 }
@@ -754,8 +905,10 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
754 || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) { 905 || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
755 for (i = ra_sectors >> (sb->s_blocksize_bits - 9); 906 for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
756 i > 0; i--) { 907 i > 0; i--) {
757 tmp = ocfs2_bread(inode, ++blk, &err, 1); 908 tmp = NULL;
758 brelse(tmp); 909 if (!ocfs2_read_dir_block(inode, ++blk, &tmp,
910 OCFS2_BH_READAHEAD))
911 brelse(tmp);
759 } 912 }
760 last_ra_blk = blk; 913 last_ra_blk = blk;
761 ra_sectors = 8; 914 ra_sectors = 8;
@@ -828,6 +981,7 @@ revalidate:
828 } 981 }
829 offset = 0; 982 offset = 0;
830 brelse(bh); 983 brelse(bh);
984 bh = NULL;
831 } 985 }
832 986
833 stored = 0; 987 stored = 0;
@@ -1050,9 +1204,15 @@ int ocfs2_empty_dir(struct inode *inode)
1050 return !priv.seen_other; 1204 return !priv.seen_other;
1051} 1205}
1052 1206
1053static void ocfs2_fill_initial_dirents(struct inode *inode, 1207/*
1054 struct inode *parent, 1208 * Fills "." and ".." dirents in a new directory block. Returns dirent for
1055 char *start, unsigned int size) 1209 * "..", which might be used during creation of a directory with a trailing
1210 * header. It is otherwise safe to ignore the return code.
1211 */
1212static struct ocfs2_dir_entry *ocfs2_fill_initial_dirents(struct inode *inode,
1213 struct inode *parent,
1214 char *start,
1215 unsigned int size)
1056{ 1216{
1057 struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start; 1217 struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start;
1058 1218
@@ -1069,6 +1229,8 @@ static void ocfs2_fill_initial_dirents(struct inode *inode,
1069 de->name_len = 2; 1229 de->name_len = 2;
1070 strcpy(de->name, ".."); 1230 strcpy(de->name, "..");
1071 ocfs2_set_de_type(de, S_IFDIR); 1231 ocfs2_set_de_type(de, S_IFDIR);
1232
1233 return de;
1072} 1234}
1073 1235
1074/* 1236/*
@@ -1086,8 +1248,8 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
1086 struct ocfs2_inline_data *data = &di->id2.i_data; 1248 struct ocfs2_inline_data *data = &di->id2.i_data;
1087 unsigned int size = le16_to_cpu(data->id_count); 1249 unsigned int size = le16_to_cpu(data->id_count);
1088 1250
1089 ret = ocfs2_journal_access(handle, inode, di_bh, 1251 ret = ocfs2_journal_access_di(handle, inode, di_bh,
1090 OCFS2_JOURNAL_ACCESS_WRITE); 1252 OCFS2_JOURNAL_ACCESS_WRITE);
1091 if (ret) { 1253 if (ret) {
1092 mlog_errno(ret); 1254 mlog_errno(ret);
1093 goto out; 1255 goto out;
@@ -1121,10 +1283,15 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
1121 struct ocfs2_alloc_context *data_ac) 1283 struct ocfs2_alloc_context *data_ac)
1122{ 1284{
1123 int status; 1285 int status;
1286 unsigned int size = osb->sb->s_blocksize;
1124 struct buffer_head *new_bh = NULL; 1287 struct buffer_head *new_bh = NULL;
1288 struct ocfs2_dir_entry *de;
1125 1289
1126 mlog_entry_void(); 1290 mlog_entry_void();
1127 1291
1292 if (ocfs2_supports_dir_trailer(osb))
1293 size = ocfs2_dir_trailer_blk_off(parent->i_sb);
1294
1128 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh, 1295 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
1129 data_ac, NULL, &new_bh); 1296 data_ac, NULL, &new_bh);
1130 if (status < 0) { 1297 if (status < 0) {
@@ -1134,16 +1301,17 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
1134 1301
1135 ocfs2_set_new_buffer_uptodate(inode, new_bh); 1302 ocfs2_set_new_buffer_uptodate(inode, new_bh);
1136 1303
1137 status = ocfs2_journal_access(handle, inode, new_bh, 1304 status = ocfs2_journal_access_db(handle, inode, new_bh,
1138 OCFS2_JOURNAL_ACCESS_CREATE); 1305 OCFS2_JOURNAL_ACCESS_CREATE);
1139 if (status < 0) { 1306 if (status < 0) {
1140 mlog_errno(status); 1307 mlog_errno(status);
1141 goto bail; 1308 goto bail;
1142 } 1309 }
1143 memset(new_bh->b_data, 0, osb->sb->s_blocksize); 1310 memset(new_bh->b_data, 0, osb->sb->s_blocksize);
1144 1311
1145 ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, 1312 de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size);
1146 osb->sb->s_blocksize); 1313 if (ocfs2_supports_dir_trailer(osb))
1314 ocfs2_init_dir_trailer(inode, new_bh);
1147 1315
1148 status = ocfs2_journal_dirty(handle, new_bh); 1316 status = ocfs2_journal_dirty(handle, new_bh);
1149 if (status < 0) { 1317 if (status < 0) {
@@ -1184,13 +1352,27 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb,
1184 data_ac); 1352 data_ac);
1185} 1353}
1186 1354
1355/*
1356 * Expand rec_len of the rightmost dirent in a directory block so that it
1357 * contains the end of our valid space for dirents. We do this during
1358 * expansion from an inline directory to one with extents. The first dir block
1359 * in that case is taken from the inline data portion of the inode block.
1360 *
1361 * We add the dir trailer if this filesystem wants it.
1362 */
1187static void ocfs2_expand_last_dirent(char *start, unsigned int old_size, 1363static void ocfs2_expand_last_dirent(char *start, unsigned int old_size,
1188 unsigned int new_size) 1364 struct super_block *sb)
1189{ 1365{
1190 struct ocfs2_dir_entry *de; 1366 struct ocfs2_dir_entry *de;
1191 struct ocfs2_dir_entry *prev_de; 1367 struct ocfs2_dir_entry *prev_de;
1192 char *de_buf, *limit; 1368 char *de_buf, *limit;
1193 unsigned int bytes = new_size - old_size; 1369 unsigned int new_size = sb->s_blocksize;
1370 unsigned int bytes;
1371
1372 if (ocfs2_supports_dir_trailer(OCFS2_SB(sb)))
1373 new_size = ocfs2_dir_trailer_blk_off(sb);
1374
1375 bytes = new_size - old_size;
1194 1376
1195 limit = start + old_size; 1377 limit = start + old_size;
1196 de_buf = start; 1378 de_buf = start;
@@ -1216,9 +1398,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1216 unsigned int blocks_wanted, 1398 unsigned int blocks_wanted,
1217 struct buffer_head **first_block_bh) 1399 struct buffer_head **first_block_bh)
1218{ 1400{
1219 int ret, credits = OCFS2_INLINE_TO_EXTENTS_CREDITS;
1220 u32 alloc, bit_off, len; 1401 u32 alloc, bit_off, len;
1221 struct super_block *sb = dir->i_sb; 1402 struct super_block *sb = dir->i_sb;
1403 int ret, credits = ocfs2_inline_to_extents_credits(sb);
1222 u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits; 1404 u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits;
1223 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 1405 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
1224 struct ocfs2_inode_info *oi = OCFS2_I(dir); 1406 struct ocfs2_inode_info *oi = OCFS2_I(dir);
@@ -1227,6 +1409,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1227 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1409 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1228 handle_t *handle; 1410 handle_t *handle;
1229 struct ocfs2_extent_tree et; 1411 struct ocfs2_extent_tree et;
1412 int did_quota = 0;
1230 1413
1231 ocfs2_init_dinode_extent_tree(&et, dir, di_bh); 1414 ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
1232 1415
@@ -1264,6 +1447,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1264 goto out_sem; 1447 goto out_sem;
1265 } 1448 }
1266 1449
1450 if (vfs_dq_alloc_space_nodirty(dir,
1451 ocfs2_clusters_to_bytes(osb->sb, alloc))) {
1452 ret = -EDQUOT;
1453 goto out_commit;
1454 }
1455 did_quota = 1;
1267 /* 1456 /*
1268 * Try to claim as many clusters as the bitmap can give though 1457 * Try to claim as many clusters as the bitmap can give though
1269 * if we only get one now, that's enough to continue. The rest 1458 * if we only get one now, that's enough to continue. The rest
@@ -1290,8 +1479,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1290 1479
1291 ocfs2_set_new_buffer_uptodate(dir, dirdata_bh); 1480 ocfs2_set_new_buffer_uptodate(dir, dirdata_bh);
1292 1481
1293 ret = ocfs2_journal_access(handle, dir, dirdata_bh, 1482 ret = ocfs2_journal_access_db(handle, dir, dirdata_bh,
1294 OCFS2_JOURNAL_ACCESS_CREATE); 1483 OCFS2_JOURNAL_ACCESS_CREATE);
1295 if (ret) { 1484 if (ret) {
1296 mlog_errno(ret); 1485 mlog_errno(ret);
1297 goto out_commit; 1486 goto out_commit;
@@ -1300,8 +1489,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1300 memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir)); 1489 memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
1301 memset(dirdata_bh->b_data + i_size_read(dir), 0, 1490 memset(dirdata_bh->b_data + i_size_read(dir), 0,
1302 sb->s_blocksize - i_size_read(dir)); 1491 sb->s_blocksize - i_size_read(dir));
1303 ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), 1492 ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), sb);
1304 sb->s_blocksize); 1493 if (ocfs2_supports_dir_trailer(osb))
1494 ocfs2_init_dir_trailer(dir, dirdata_bh);
1305 1495
1306 ret = ocfs2_journal_dirty(handle, dirdata_bh); 1496 ret = ocfs2_journal_dirty(handle, dirdata_bh);
1307 if (ret) { 1497 if (ret) {
@@ -1317,8 +1507,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1317 * We let the later dirent insert modify c/mtime - to the user 1507 * We let the later dirent insert modify c/mtime - to the user
1318 * the data hasn't changed. 1508 * the data hasn't changed.
1319 */ 1509 */
1320 ret = ocfs2_journal_access(handle, dir, di_bh, 1510 ret = ocfs2_journal_access_di(handle, dir, di_bh,
1321 OCFS2_JOURNAL_ACCESS_CREATE); 1511 OCFS2_JOURNAL_ACCESS_CREATE);
1322 if (ret) { 1512 if (ret) {
1323 mlog_errno(ret); 1513 mlog_errno(ret);
1324 goto out_commit; 1514 goto out_commit;
@@ -1386,6 +1576,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1386 dirdata_bh = NULL; 1576 dirdata_bh = NULL;
1387 1577
1388out_commit: 1578out_commit:
1579 if (ret < 0 && did_quota)
1580 vfs_dq_free_space_nodirty(dir,
1581 ocfs2_clusters_to_bytes(osb->sb, 2));
1389 ocfs2_commit_trans(osb, handle); 1582 ocfs2_commit_trans(osb, handle);
1390 1583
1391out_sem: 1584out_sem:
@@ -1410,7 +1603,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
1410 struct buffer_head **new_bh) 1603 struct buffer_head **new_bh)
1411{ 1604{
1412 int status; 1605 int status;
1413 int extend; 1606 int extend, did_quota = 0;
1414 u64 p_blkno, v_blkno; 1607 u64 p_blkno, v_blkno;
1415 1608
1416 spin_lock(&OCFS2_I(dir)->ip_lock); 1609 spin_lock(&OCFS2_I(dir)->ip_lock);
@@ -1420,6 +1613,13 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
1420 if (extend) { 1613 if (extend) {
1421 u32 offset = OCFS2_I(dir)->ip_clusters; 1614 u32 offset = OCFS2_I(dir)->ip_clusters;
1422 1615
1616 if (vfs_dq_alloc_space_nodirty(dir,
1617 ocfs2_clusters_to_bytes(sb, 1))) {
1618 status = -EDQUOT;
1619 goto bail;
1620 }
1621 did_quota = 1;
1622
1423 status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset, 1623 status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
1424 1, 0, parent_fe_bh, handle, 1624 1, 0, parent_fe_bh, handle,
1425 data_ac, meta_ac, NULL); 1625 data_ac, meta_ac, NULL);
@@ -1445,6 +1645,8 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
1445 } 1645 }
1446 status = 0; 1646 status = 0;
1447bail: 1647bail:
1648 if (did_quota && status < 0)
1649 vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
1448 mlog_exit(status); 1650 mlog_exit(status);
1449 return status; 1651 return status;
1450} 1652}
@@ -1569,16 +1771,22 @@ do_extend:
1569 1771
1570 ocfs2_set_new_buffer_uptodate(dir, new_bh); 1772 ocfs2_set_new_buffer_uptodate(dir, new_bh);
1571 1773
1572 status = ocfs2_journal_access(handle, dir, new_bh, 1774 status = ocfs2_journal_access_db(handle, dir, new_bh,
1573 OCFS2_JOURNAL_ACCESS_CREATE); 1775 OCFS2_JOURNAL_ACCESS_CREATE);
1574 if (status < 0) { 1776 if (status < 0) {
1575 mlog_errno(status); 1777 mlog_errno(status);
1576 goto bail; 1778 goto bail;
1577 } 1779 }
1578 memset(new_bh->b_data, 0, sb->s_blocksize); 1780 memset(new_bh->b_data, 0, sb->s_blocksize);
1781
1579 de = (struct ocfs2_dir_entry *) new_bh->b_data; 1782 de = (struct ocfs2_dir_entry *) new_bh->b_data;
1580 de->inode = 0; 1783 de->inode = 0;
1581 de->rec_len = cpu_to_le16(sb->s_blocksize); 1784 if (ocfs2_dir_has_trailer(dir)) {
1785 de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb));
1786 ocfs2_init_dir_trailer(dir, new_bh);
1787 } else {
1788 de->rec_len = cpu_to_le16(sb->s_blocksize);
1789 }
1582 status = ocfs2_journal_dirty(handle, new_bh); 1790 status = ocfs2_journal_dirty(handle, new_bh);
1583 if (status < 0) { 1791 if (status < 0) {
1584 mlog_errno(status); 1792 mlog_errno(status);
@@ -1620,11 +1828,21 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
1620 unsigned int *blocks_wanted) 1828 unsigned int *blocks_wanted)
1621{ 1829{
1622 int ret; 1830 int ret;
1831 struct super_block *sb = dir->i_sb;
1623 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1832 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1624 struct ocfs2_dir_entry *de, *last_de = NULL; 1833 struct ocfs2_dir_entry *de, *last_de = NULL;
1625 char *de_buf, *limit; 1834 char *de_buf, *limit;
1626 unsigned long offset = 0; 1835 unsigned long offset = 0;
1627 unsigned int rec_len, new_rec_len; 1836 unsigned int rec_len, new_rec_len, free_space = dir->i_sb->s_blocksize;
1837
1838 /*
1839 * This calculates how many free bytes we'd have in block zero, should
1840 * this function force expansion to an extent tree.
1841 */
1842 if (ocfs2_supports_dir_trailer(OCFS2_SB(sb)))
1843 free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir);
1844 else
1845 free_space = dir->i_sb->s_blocksize - i_size_read(dir);
1628 1846
1629 de_buf = di->id2.i_data.id_data; 1847 de_buf = di->id2.i_data.id_data;
1630 limit = de_buf + i_size_read(dir); 1848 limit = de_buf + i_size_read(dir);
@@ -1641,6 +1859,11 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
1641 ret = -EEXIST; 1859 ret = -EEXIST;
1642 goto out; 1860 goto out;
1643 } 1861 }
1862 /*
1863 * No need to check for a trailing dirent record here as
1864 * they're not used for inline dirs.
1865 */
1866
1644 if (ocfs2_dirent_would_fit(de, rec_len)) { 1867 if (ocfs2_dirent_would_fit(de, rec_len)) {
1645 /* Ok, we found a spot. Return this bh and let 1868 /* Ok, we found a spot. Return this bh and let
1646 * the caller actually fill it in. */ 1869 * the caller actually fill it in. */
@@ -1661,7 +1884,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
1661 * dirent can be found. 1884 * dirent can be found.
1662 */ 1885 */
1663 *blocks_wanted = 1; 1886 *blocks_wanted = 1;
1664 new_rec_len = le16_to_cpu(last_de->rec_len) + (dir->i_sb->s_blocksize - i_size_read(dir)); 1887 new_rec_len = le16_to_cpu(last_de->rec_len) + free_space;
1665 if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len))) 1888 if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
1666 *blocks_wanted = 2; 1889 *blocks_wanted = 2;
1667 1890
@@ -1679,9 +1902,10 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
1679 struct ocfs2_dir_entry *de; 1902 struct ocfs2_dir_entry *de;
1680 struct super_block *sb = dir->i_sb; 1903 struct super_block *sb = dir->i_sb;
1681 int status; 1904 int status;
1905 int blocksize = dir->i_sb->s_blocksize;
1682 1906
1683 bh = ocfs2_bread(dir, 0, &status, 0); 1907 status = ocfs2_read_dir_block(dir, 0, &bh, 0);
1684 if (!bh) { 1908 if (status) {
1685 mlog_errno(status); 1909 mlog_errno(status);
1686 goto bail; 1910 goto bail;
1687 } 1911 }
@@ -1702,11 +1926,10 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
1702 status = -ENOSPC; 1926 status = -ENOSPC;
1703 goto bail; 1927 goto bail;
1704 } 1928 }
1705 bh = ocfs2_bread(dir, 1929 status = ocfs2_read_dir_block(dir,
1706 offset >> sb->s_blocksize_bits, 1930 offset >> sb->s_blocksize_bits,
1707 &status, 1931 &bh, 0);
1708 0); 1932 if (status) {
1709 if (!bh) {
1710 mlog_errno(status); 1933 mlog_errno(status);
1711 goto bail; 1934 goto bail;
1712 } 1935 }
@@ -1721,6 +1944,11 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
1721 status = -EEXIST; 1944 status = -EEXIST;
1722 goto bail; 1945 goto bail;
1723 } 1946 }
1947
1948 if (ocfs2_skip_dir_trailer(dir, de, offset % blocksize,
1949 blocksize))
1950 goto next;
1951
1724 if (ocfs2_dirent_would_fit(de, rec_len)) { 1952 if (ocfs2_dirent_would_fit(de, rec_len)) {
1725 /* Ok, we found a spot. Return this bh and let 1953 /* Ok, we found a spot. Return this bh and let
1726 * the caller actually fill it in. */ 1954 * the caller actually fill it in. */
@@ -1729,6 +1957,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
1729 status = 0; 1957 status = 0;
1730 goto bail; 1958 goto bail;
1731 } 1959 }
1960next:
1732 offset += le16_to_cpu(de->rec_len); 1961 offset += le16_to_cpu(de->rec_len);
1733 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len)); 1962 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
1734 } 1963 }
diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h
index ce48b9080d87..c511e2e18e9f 100644
--- a/fs/ocfs2/dir.h
+++ b/fs/ocfs2/dir.h
@@ -83,4 +83,6 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb,
83 struct buffer_head *fe_bh, 83 struct buffer_head *fe_bh,
84 struct ocfs2_alloc_context *data_ac); 84 struct ocfs2_alloc_context *data_ac);
85 85
86struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
87 void *data);
86#endif /* OCFS2_DIR_H */ 88#endif /* OCFS2_DIR_H */
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 644bee55d8ba..d07ddbe4b283 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -275,6 +275,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
275 struct list_head *iter, *head=NULL; 275 struct list_head *iter, *head=NULL;
276 u64 cookie; 276 u64 cookie;
277 u32 flags; 277 u32 flags;
278 u8 node;
278 279
279 if (!dlm_grab(dlm)) { 280 if (!dlm_grab(dlm)) {
280 dlm_error(DLM_REJECTED); 281 dlm_error(DLM_REJECTED);
@@ -286,18 +287,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
286 287
287 name = past->name; 288 name = past->name;
288 locklen = past->namelen; 289 locklen = past->namelen;
289 cookie = be64_to_cpu(past->cookie); 290 cookie = past->cookie;
290 flags = be32_to_cpu(past->flags); 291 flags = be32_to_cpu(past->flags);
292 node = past->node_idx;
291 293
292 if (locklen > DLM_LOCKID_NAME_MAX) { 294 if (locklen > DLM_LOCKID_NAME_MAX) {
293 ret = DLM_IVBUFLEN; 295 ret = DLM_IVBUFLEN;
294 mlog(ML_ERROR, "Invalid name length in proxy ast handler!\n"); 296 mlog(ML_ERROR, "Invalid name length (%d) in proxy ast "
297 "handler!\n", locklen);
295 goto leave; 298 goto leave;
296 } 299 }
297 300
298 if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) == 301 if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) ==
299 (LKM_PUT_LVB|LKM_GET_LVB)) { 302 (LKM_PUT_LVB|LKM_GET_LVB)) {
300 mlog(ML_ERROR, "both PUT and GET lvb specified\n"); 303 mlog(ML_ERROR, "Both PUT and GET lvb specified, (0x%x)\n",
304 flags);
301 ret = DLM_BADARGS; 305 ret = DLM_BADARGS;
302 goto leave; 306 goto leave;
303 } 307 }
@@ -310,22 +314,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
310 if (past->type != DLM_AST && 314 if (past->type != DLM_AST &&
311 past->type != DLM_BAST) { 315 past->type != DLM_BAST) {
312 mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu" 316 mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu"
313 "name=%.*s\n", past->type, 317 "name=%.*s, node=%u\n", past->type,
314 dlm_get_lock_cookie_node(cookie), 318 dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
315 dlm_get_lock_cookie_seq(cookie), 319 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
316 locklen, name); 320 locklen, name, node);
317 ret = DLM_IVLOCKID; 321 ret = DLM_IVLOCKID;
318 goto leave; 322 goto leave;
319 } 323 }
320 324
321 res = dlm_lookup_lockres(dlm, name, locklen); 325 res = dlm_lookup_lockres(dlm, name, locklen);
322 if (!res) { 326 if (!res) {
323 mlog(0, "got %sast for unknown lockres! " 327 mlog(0, "Got %sast for unknown lockres! cookie=%u:%llu, "
324 "cookie=%u:%llu, name=%.*s, namelen=%u\n", 328 "name=%.*s, node=%u\n", (past->type == DLM_AST ? "" : "b"),
325 past->type == DLM_AST ? "" : "b", 329 dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
326 dlm_get_lock_cookie_node(cookie), 330 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
327 dlm_get_lock_cookie_seq(cookie), 331 locklen, name, node);
328 locklen, name, locklen);
329 ret = DLM_IVLOCKID; 332 ret = DLM_IVLOCKID;
330 goto leave; 333 goto leave;
331 } 334 }
@@ -337,12 +340,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
337 340
338 spin_lock(&res->spinlock); 341 spin_lock(&res->spinlock);
339 if (res->state & DLM_LOCK_RES_RECOVERING) { 342 if (res->state & DLM_LOCK_RES_RECOVERING) {
340 mlog(0, "responding with DLM_RECOVERING!\n"); 343 mlog(0, "Responding with DLM_RECOVERING!\n");
341 ret = DLM_RECOVERING; 344 ret = DLM_RECOVERING;
342 goto unlock_out; 345 goto unlock_out;
343 } 346 }
344 if (res->state & DLM_LOCK_RES_MIGRATING) { 347 if (res->state & DLM_LOCK_RES_MIGRATING) {
345 mlog(0, "responding with DLM_MIGRATING!\n"); 348 mlog(0, "Responding with DLM_MIGRATING!\n");
346 ret = DLM_MIGRATING; 349 ret = DLM_MIGRATING;
347 goto unlock_out; 350 goto unlock_out;
348 } 351 }
@@ -351,7 +354,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
351 lock = NULL; 354 lock = NULL;
352 list_for_each(iter, head) { 355 list_for_each(iter, head) {
353 lock = list_entry (iter, struct dlm_lock, list); 356 lock = list_entry (iter, struct dlm_lock, list);
354 if (be64_to_cpu(lock->ml.cookie) == cookie) 357 if (lock->ml.cookie == cookie)
355 goto do_ast; 358 goto do_ast;
356 } 359 }
357 360
@@ -363,15 +366,15 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
363 366
364 list_for_each(iter, head) { 367 list_for_each(iter, head) {
365 lock = list_entry (iter, struct dlm_lock, list); 368 lock = list_entry (iter, struct dlm_lock, list);
366 if (be64_to_cpu(lock->ml.cookie) == cookie) 369 if (lock->ml.cookie == cookie)
367 goto do_ast; 370 goto do_ast;
368 } 371 }
369 372
370 mlog(0, "got %sast for unknown lock! cookie=%u:%llu, " 373 mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, "
371 "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", 374 "node=%u\n", past->type == DLM_AST ? "" : "b",
372 dlm_get_lock_cookie_node(cookie), 375 dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
373 dlm_get_lock_cookie_seq(cookie), 376 dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
374 locklen, name, locklen); 377 locklen, name, node);
375 378
376 ret = DLM_NORMAL; 379 ret = DLM_NORMAL;
377unlock_out: 380unlock_out:
@@ -383,8 +386,8 @@ do_ast:
383 if (past->type == DLM_AST) { 386 if (past->type == DLM_AST) {
384 /* do not alter lock refcount. switching lists. */ 387 /* do not alter lock refcount. switching lists. */
385 list_move_tail(&lock->list, &res->granted); 388 list_move_tail(&lock->list, &res->granted);
386 mlog(0, "ast: adding to granted list... type=%d, " 389 mlog(0, "ast: Adding to granted list... type=%d, "
387 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type); 390 "convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
388 if (lock->ml.convert_type != LKM_IVMODE) { 391 if (lock->ml.convert_type != LKM_IVMODE) {
389 lock->ml.type = lock->ml.convert_type; 392 lock->ml.type = lock->ml.convert_type;
390 lock->ml.convert_type = LKM_IVMODE; 393 lock->ml.convert_type = LKM_IVMODE;
@@ -408,7 +411,6 @@ do_ast:
408 dlm_do_local_bast(dlm, res, lock, past->blocked_type); 411 dlm_do_local_bast(dlm, res, lock, past->blocked_type);
409 412
410leave: 413leave:
411
412 if (res) 414 if (res)
413 dlm_lockres_put(res); 415 dlm_lockres_put(res);
414 416
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index d5a86fb81a49..bb53714813ab 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -140,6 +140,7 @@ struct dlm_ctxt
140 unsigned int purge_count; 140 unsigned int purge_count;
141 spinlock_t spinlock; 141 spinlock_t spinlock;
142 spinlock_t ast_lock; 142 spinlock_t ast_lock;
143 spinlock_t track_lock;
143 char *name; 144 char *name;
144 u8 node_num; 145 u8 node_num;
145 u32 key; 146 u32 key;
@@ -316,6 +317,8 @@ struct dlm_lock_resource
316 * put on a list for the dlm thread to run. */ 317 * put on a list for the dlm thread to run. */
317 unsigned long last_used; 318 unsigned long last_used;
318 319
320 struct dlm_ctxt *dlm;
321
319 unsigned migration_pending:1; 322 unsigned migration_pending:1;
320 atomic_t asts_reserved; 323 atomic_t asts_reserved;
321 spinlock_t spinlock; 324 spinlock_t spinlock;
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 1b81dcba175d..b32f60a5acfb 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -630,43 +630,38 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
630{ 630{
631 struct debug_lockres *dl = m->private; 631 struct debug_lockres *dl = m->private;
632 struct dlm_ctxt *dlm = dl->dl_ctxt; 632 struct dlm_ctxt *dlm = dl->dl_ctxt;
633 struct dlm_lock_resource *oldres = dl->dl_res;
633 struct dlm_lock_resource *res = NULL; 634 struct dlm_lock_resource *res = NULL;
635 struct list_head *track_list;
634 636
635 spin_lock(&dlm->spinlock); 637 spin_lock(&dlm->track_lock);
638 if (oldres)
639 track_list = &oldres->tracking;
640 else
641 track_list = &dlm->tracking_list;
636 642
637 if (dl->dl_res) { 643 list_for_each_entry(res, track_list, tracking) {
638 list_for_each_entry(res, &dl->dl_res->tracking, tracking) { 644 if (&res->tracking == &dlm->tracking_list)
639 if (dl->dl_res) { 645 res = NULL;
640 dlm_lockres_put(dl->dl_res); 646 else
641 dl->dl_res = NULL;
642 }
643 if (&res->tracking == &dlm->tracking_list) {
644 mlog(0, "End of list found, %p\n", res);
645 dl = NULL;
646 break;
647 }
648 dlm_lockres_get(res); 647 dlm_lockres_get(res);
649 dl->dl_res = res; 648 break;
650 break;
651 }
652 } else {
653 if (!list_empty(&dlm->tracking_list)) {
654 list_for_each_entry(res, &dlm->tracking_list, tracking)
655 break;
656 dlm_lockres_get(res);
657 dl->dl_res = res;
658 } else
659 dl = NULL;
660 } 649 }
650 spin_unlock(&dlm->track_lock);
661 651
662 if (dl) { 652 if (oldres)
663 spin_lock(&dl->dl_res->spinlock); 653 dlm_lockres_put(oldres);
664 dump_lockres(dl->dl_res, dl->dl_buf, dl->dl_len - 1);
665 spin_unlock(&dl->dl_res->spinlock);
666 }
667 654
668 spin_unlock(&dlm->spinlock); 655 dl->dl_res = res;
656
657 if (res) {
658 spin_lock(&res->spinlock);
659 dump_lockres(res, dl->dl_buf, dl->dl_len - 1);
660 spin_unlock(&res->spinlock);
661 } else
662 dl = NULL;
669 663
664 /* passed to seq_show */
670 return dl; 665 return dl;
671} 666}
672 667
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 63f8125824e8..d8d578f45613 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1550,6 +1550,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1550 spin_lock_init(&dlm->spinlock); 1550 spin_lock_init(&dlm->spinlock);
1551 spin_lock_init(&dlm->master_lock); 1551 spin_lock_init(&dlm->master_lock);
1552 spin_lock_init(&dlm->ast_lock); 1552 spin_lock_init(&dlm->ast_lock);
1553 spin_lock_init(&dlm->track_lock);
1553 INIT_LIST_HEAD(&dlm->list); 1554 INIT_LIST_HEAD(&dlm->list);
1554 INIT_LIST_HEAD(&dlm->dirty_list); 1555 INIT_LIST_HEAD(&dlm->dirty_list);
1555 INIT_LIST_HEAD(&dlm->reco.resources); 1556 INIT_LIST_HEAD(&dlm->reco.resources);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 44f87caf3683..54e182a27caf 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -505,8 +505,10 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
505static void dlm_lockres_release(struct kref *kref) 505static void dlm_lockres_release(struct kref *kref)
506{ 506{
507 struct dlm_lock_resource *res; 507 struct dlm_lock_resource *res;
508 struct dlm_ctxt *dlm;
508 509
509 res = container_of(kref, struct dlm_lock_resource, refs); 510 res = container_of(kref, struct dlm_lock_resource, refs);
511 dlm = res->dlm;
510 512
511 /* This should not happen -- all lockres' have a name 513 /* This should not happen -- all lockres' have a name
512 * associated with them at init time. */ 514 * associated with them at init time. */
@@ -515,6 +517,7 @@ static void dlm_lockres_release(struct kref *kref)
515 mlog(0, "destroying lockres %.*s\n", res->lockname.len, 517 mlog(0, "destroying lockres %.*s\n", res->lockname.len,
516 res->lockname.name); 518 res->lockname.name);
517 519
520 spin_lock(&dlm->track_lock);
518 if (!list_empty(&res->tracking)) 521 if (!list_empty(&res->tracking))
519 list_del_init(&res->tracking); 522 list_del_init(&res->tracking);
520 else { 523 else {
@@ -522,6 +525,9 @@ static void dlm_lockres_release(struct kref *kref)
522 res->lockname.len, res->lockname.name); 525 res->lockname.len, res->lockname.name);
523 dlm_print_one_lock_resource(res); 526 dlm_print_one_lock_resource(res);
524 } 527 }
528 spin_unlock(&dlm->track_lock);
529
530 dlm_put(dlm);
525 531
526 if (!hlist_unhashed(&res->hash_node) || 532 if (!hlist_unhashed(&res->hash_node) ||
527 !list_empty(&res->granted) || 533 !list_empty(&res->granted) ||
@@ -595,6 +601,10 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
595 res->migration_pending = 0; 601 res->migration_pending = 0;
596 res->inflight_locks = 0; 602 res->inflight_locks = 0;
597 603
604 /* put in dlm_lockres_release */
605 dlm_grab(dlm);
606 res->dlm = dlm;
607
598 kref_init(&res->refs); 608 kref_init(&res->refs);
599 609
600 /* just for consistency */ 610 /* just for consistency */
@@ -722,14 +732,21 @@ lookup:
722 if (tmpres) { 732 if (tmpres) {
723 int dropping_ref = 0; 733 int dropping_ref = 0;
724 734
735 spin_unlock(&dlm->spinlock);
736
725 spin_lock(&tmpres->spinlock); 737 spin_lock(&tmpres->spinlock);
738 /* We wait for the other thread that is mastering the resource */
739 if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
740 __dlm_wait_on_lockres(tmpres);
741 BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN);
742 }
743
726 if (tmpres->owner == dlm->node_num) { 744 if (tmpres->owner == dlm->node_num) {
727 BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF); 745 BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF);
728 dlm_lockres_grab_inflight_ref(dlm, tmpres); 746 dlm_lockres_grab_inflight_ref(dlm, tmpres);
729 } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) 747 } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF)
730 dropping_ref = 1; 748 dropping_ref = 1;
731 spin_unlock(&tmpres->spinlock); 749 spin_unlock(&tmpres->spinlock);
732 spin_unlock(&dlm->spinlock);
733 750
734 /* wait until done messaging the master, drop our ref to allow 751 /* wait until done messaging the master, drop our ref to allow
735 * the lockres to be purged, start over. */ 752 * the lockres to be purged, start over. */
@@ -2949,7 +2966,7 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
2949 struct dlm_node_iter *iter) 2966 struct dlm_node_iter *iter)
2950{ 2967{
2951 struct dlm_migrate_request migrate; 2968 struct dlm_migrate_request migrate;
2952 int ret, status = 0; 2969 int ret, skip, status = 0;
2953 int nodenum; 2970 int nodenum;
2954 2971
2955 memset(&migrate, 0, sizeof(migrate)); 2972 memset(&migrate, 0, sizeof(migrate));
@@ -2966,12 +2983,27 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
2966 nodenum == new_master) 2983 nodenum == new_master)
2967 continue; 2984 continue;
2968 2985
2986 /* We could race exit domain. If exited, skip. */
2987 spin_lock(&dlm->spinlock);
2988 skip = (!test_bit(nodenum, dlm->domain_map));
2989 spin_unlock(&dlm->spinlock);
2990 if (skip) {
2991 clear_bit(nodenum, iter->node_map);
2992 continue;
2993 }
2994
2969 ret = o2net_send_message(DLM_MIGRATE_REQUEST_MSG, dlm->key, 2995 ret = o2net_send_message(DLM_MIGRATE_REQUEST_MSG, dlm->key,
2970 &migrate, sizeof(migrate), nodenum, 2996 &migrate, sizeof(migrate), nodenum,
2971 &status); 2997 &status);
2972 if (ret < 0) 2998 if (ret < 0) {
2973 mlog_errno(ret); 2999 mlog(0, "migrate_request returned %d!\n", ret);
2974 else if (status < 0) { 3000 if (!dlm_is_host_down(ret)) {
3001 mlog(ML_ERROR, "unhandled error=%d!\n", ret);
3002 BUG();
3003 }
3004 clear_bit(nodenum, iter->node_map);
3005 ret = 0;
3006 } else if (status < 0) {
2975 mlog(0, "migrate request (node %u) returned %d!\n", 3007 mlog(0, "migrate request (node %u) returned %d!\n",
2976 nodenum, status); 3008 nodenum, status);
2977 ret = status; 3009 ret = status;
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 4060bb328bc8..d1295203029f 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -181,7 +181,8 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
181 181
182 spin_lock(&res->spinlock); 182 spin_lock(&res->spinlock);
183 /* This ensures that clear refmap is sent after the set */ 183 /* This ensures that clear refmap is sent after the set */
184 __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); 184 __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_SETREF_INPROG |
185 DLM_LOCK_RES_MIGRATING));
185 spin_unlock(&res->spinlock); 186 spin_unlock(&res->spinlock);
186 187
187 /* clear our bit from the master's refmap, ignore errors */ 188 /* clear our bit from the master's refmap, ignore errors */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 6e6cc0a2e5f7..f731ab491795 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -32,6 +32,7 @@
32#include <linux/debugfs.h> 32#include <linux/debugfs.h>
33#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/time.h> 34#include <linux/time.h>
35#include <linux/quotaops.h>
35 36
36#define MLOG_MASK_PREFIX ML_DLM_GLUE 37#define MLOG_MASK_PREFIX ML_DLM_GLUE
37#include <cluster/masklog.h> 38#include <cluster/masklog.h>
@@ -51,6 +52,7 @@
51#include "slot_map.h" 52#include "slot_map.h"
52#include "super.h" 53#include "super.h"
53#include "uptodate.h" 54#include "uptodate.h"
55#include "quota.h"
54 56
55#include "buffer_head_io.h" 57#include "buffer_head_io.h"
56 58
@@ -68,6 +70,7 @@ struct ocfs2_mask_waiter {
68static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); 70static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
69static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); 71static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
70static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); 72static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
73static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
71 74
72/* 75/*
73 * Return value from ->downconvert_worker functions. 76 * Return value from ->downconvert_worker functions.
@@ -102,6 +105,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
102static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, 105static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
103 struct ocfs2_lock_res *lockres); 106 struct ocfs2_lock_res *lockres);
104 107
108static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
105 109
106#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 110#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
107 111
@@ -111,8 +115,7 @@ static void ocfs2_dump_meta_lvb_info(u64 level,
111 unsigned int line, 115 unsigned int line,
112 struct ocfs2_lock_res *lockres) 116 struct ocfs2_lock_res *lockres)
113{ 117{
114 struct ocfs2_meta_lvb *lvb = 118 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
115 (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
116 119
117 mlog(level, "LVB information for %s (called from %s:%u):\n", 120 mlog(level, "LVB information for %s (called from %s:%u):\n",
118 lockres->l_name, function, line); 121 lockres->l_name, function, line);
@@ -258,6 +261,12 @@ static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
258 .flags = 0, 261 .flags = 0,
259}; 262};
260 263
264static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
265 .set_lvb = ocfs2_set_qinfo_lvb,
266 .get_osb = ocfs2_get_qinfo_osb,
267 .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
268};
269
261static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 270static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
262{ 271{
263 return lockres->l_type == OCFS2_LOCK_TYPE_META || 272 return lockres->l_type == OCFS2_LOCK_TYPE_META ||
@@ -279,6 +288,13 @@ static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res
279 return (struct ocfs2_dentry_lock *)lockres->l_priv; 288 return (struct ocfs2_dentry_lock *)lockres->l_priv;
280} 289}
281 290
291static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
292{
293 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
294
295 return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
296}
297
282static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 298static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
283{ 299{
284 if (lockres->l_ops->get_osb) 300 if (lockres->l_ops->get_osb)
@@ -507,6 +523,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
507 return OCFS2_SB(inode->i_sb); 523 return OCFS2_SB(inode->i_sb);
508} 524}
509 525
526static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
527{
528 struct ocfs2_mem_dqinfo *info = lockres->l_priv;
529
530 return OCFS2_SB(info->dqi_gi.dqi_sb);
531}
532
510static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) 533static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
511{ 534{
512 struct ocfs2_file_private *fp = lockres->l_priv; 535 struct ocfs2_file_private *fp = lockres->l_priv;
@@ -609,6 +632,17 @@ void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
609 lockres->l_flags |= OCFS2_LOCK_NOCACHE; 632 lockres->l_flags |= OCFS2_LOCK_NOCACHE;
610} 633}
611 634
635void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
636 struct ocfs2_mem_dqinfo *info)
637{
638 ocfs2_lock_res_init_once(lockres);
639 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
640 0, lockres->l_name);
641 ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
642 OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
643 info);
644}
645
612void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 646void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
613{ 647{
614 mlog_entry_void(); 648 mlog_entry_void();
@@ -1829,7 +1863,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1829 1863
1830 mlog_entry_void(); 1864 mlog_entry_void();
1831 1865
1832 lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1866 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1833 1867
1834 /* 1868 /*
1835 * Invalidate the LVB of a deleted inode - this way other 1869 * Invalidate the LVB of a deleted inode - this way other
@@ -1881,7 +1915,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1881 1915
1882 mlog_meta_lvb(0, lockres); 1916 mlog_meta_lvb(0, lockres);
1883 1917
1884 lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); 1918 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1885 1919
1886 /* We're safe here without the lockres lock... */ 1920 /* We're safe here without the lockres lock... */
1887 spin_lock(&oi->ip_lock); 1921 spin_lock(&oi->ip_lock);
@@ -1916,8 +1950,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1916static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, 1950static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
1917 struct ocfs2_lock_res *lockres) 1951 struct ocfs2_lock_res *lockres)
1918{ 1952{
1919 struct ocfs2_meta_lvb *lvb = 1953 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1920 (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
1921 1954
1922 if (lvb->lvb_version == OCFS2_LVB_VERSION 1955 if (lvb->lvb_version == OCFS2_LVB_VERSION
1923 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 1956 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
@@ -2024,7 +2057,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
2024 } else { 2057 } else {
2025 /* Boo, we have to go to disk. */ 2058 /* Boo, we have to go to disk. */
2026 /* read bh, cast, ocfs2_refresh_inode */ 2059 /* read bh, cast, ocfs2_refresh_inode */
2027 status = ocfs2_read_block(inode, oi->ip_blkno, bh); 2060 status = ocfs2_read_inode_block(inode, bh);
2028 if (status < 0) { 2061 if (status < 0) {
2029 mlog_errno(status); 2062 mlog_errno(status);
2030 goto bail_refresh; 2063 goto bail_refresh;
@@ -2032,18 +2065,14 @@ static int ocfs2_inode_lock_update(struct inode *inode,
2032 fe = (struct ocfs2_dinode *) (*bh)->b_data; 2065 fe = (struct ocfs2_dinode *) (*bh)->b_data;
2033 2066
2034 /* This is a good chance to make sure we're not 2067 /* This is a good chance to make sure we're not
2035 * locking an invalid object. 2068 * locking an invalid object. ocfs2_read_inode_block()
2069 * already checked that the inode block is sane.
2036 * 2070 *
2037 * We bug on a stale inode here because we checked 2071 * We bug on a stale inode here because we checked
2038 * above whether it was wiped from disk. The wiping 2072 * above whether it was wiped from disk. The wiping
2039 * node provides a guarantee that we receive that 2073 * node provides a guarantee that we receive that
2040 * message and can mark the inode before dropping any 2074 * message and can mark the inode before dropping any
2041 * locks associated with it. */ 2075 * locks associated with it. */
2042 if (!OCFS2_IS_VALID_DINODE(fe)) {
2043 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
2044 status = -EIO;
2045 goto bail_refresh;
2046 }
2047 mlog_bug_on_msg(inode->i_generation != 2076 mlog_bug_on_msg(inode->i_generation !=
2048 le32_to_cpu(fe->i_generation), 2077 le32_to_cpu(fe->i_generation),
2049 "Invalid dinode %llu disk generation: %u " 2078 "Invalid dinode %llu disk generation: %u "
@@ -2085,7 +2114,7 @@ static int ocfs2_assign_bh(struct inode *inode,
2085 return 0; 2114 return 0;
2086 } 2115 }
2087 2116
2088 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh); 2117 status = ocfs2_read_inode_block(inode, ret_bh);
2089 if (status < 0) 2118 if (status < 0)
2090 mlog_errno(status); 2119 mlog_errno(status);
2091 2120
@@ -3449,6 +3478,117 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3449 return UNBLOCK_CONTINUE_POST; 3478 return UNBLOCK_CONTINUE_POST;
3450} 3479}
3451 3480
3481static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
3482{
3483 struct ocfs2_qinfo_lvb *lvb;
3484 struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
3485 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
3486 oinfo->dqi_gi.dqi_type);
3487
3488 mlog_entry_void();
3489
3490 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3491 lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
3492 lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
3493 lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
3494 lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
3495 lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
3496 lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
3497 lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
3498
3499 mlog_exit_void();
3500}
3501
3502void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3503{
3504 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3505 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
3506 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3507
3508 mlog_entry_void();
3509 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
3510 ocfs2_cluster_unlock(osb, lockres, level);
3511 mlog_exit_void();
3512}
3513
3514static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
3515{
3516 struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
3517 oinfo->dqi_gi.dqi_type);
3518 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3519 struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
3520 struct buffer_head *bh = NULL;
3521 struct ocfs2_global_disk_dqinfo *gdinfo;
3522 int status = 0;
3523
3524 if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
3525 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
3526 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
3527 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
3528 oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
3529 oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
3530 oinfo->dqi_gi.dqi_free_entry =
3531 be32_to_cpu(lvb->lvb_free_entry);
3532 } else {
3533 status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh);
3534 if (status) {
3535 mlog_errno(status);
3536 goto bail;
3537 }
3538 gdinfo = (struct ocfs2_global_disk_dqinfo *)
3539 (bh->b_data + OCFS2_GLOBAL_INFO_OFF);
3540 info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
3541 info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
3542 oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
3543 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
3544 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
3545 oinfo->dqi_gi.dqi_free_entry =
3546 le32_to_cpu(gdinfo->dqi_free_entry);
3547 brelse(bh);
3548 ocfs2_track_lock_refresh(lockres);
3549 }
3550
3551bail:
3552 return status;
3553}
3554
3555/* Lock quota info, this function expects at least shared lock on the quota file
3556 * so that we can safely refresh quota info from disk. */
3557int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
3558{
3559 struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
3560 struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
3561 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3562 int status = 0;
3563
3564 mlog_entry_void();
3565
3566 /* On RO devices, locking really isn't needed... */
3567 if (ocfs2_is_hard_readonly(osb)) {
3568 if (ex)
3569 status = -EROFS;
3570 goto bail;
3571 }
3572 if (ocfs2_mount_local(osb))
3573 goto bail;
3574
3575 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
3576 if (status < 0) {
3577 mlog_errno(status);
3578 goto bail;
3579 }
3580 if (!ocfs2_should_refresh_lock_res(lockres))
3581 goto bail;
3582 /* OK, we have the lock but we need to refresh the quota info */
3583 status = ocfs2_refresh_qinfo(oinfo);
3584 if (status)
3585 ocfs2_qinfo_unlock(oinfo, ex);
3586 ocfs2_complete_lock_res_refresh(lockres, status);
3587bail:
3588 mlog_exit(status);
3589 return status;
3590}
3591
3452/* 3592/*
3453 * This is the filesystem locking protocol. It provides the lock handling 3593 * This is the filesystem locking protocol. It provides the lock handling
3454 * hooks for the underlying DLM. It has a maximum version number. 3594 * hooks for the underlying DLM. It has a maximum version number.
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 2bb01f09c1b1..3f8d9986b8e0 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -49,6 +49,19 @@ struct ocfs2_meta_lvb {
49 __be32 lvb_reserved2; 49 __be32 lvb_reserved2;
50}; 50};
51 51
52#define OCFS2_QINFO_LVB_VERSION 1
53
54struct ocfs2_qinfo_lvb {
55 __u8 lvb_version;
56 __u8 lvb_reserved[3];
57 __be32 lvb_bgrace;
58 __be32 lvb_igrace;
59 __be32 lvb_syncms;
60 __be32 lvb_blocks;
61 __be32 lvb_free_blk;
62 __be32 lvb_free_entry;
63};
64
52/* ocfs2_inode_lock_full() 'arg_flags' flags */ 65/* ocfs2_inode_lock_full() 'arg_flags' flags */
53/* don't wait on recovery. */ 66/* don't wait on recovery. */
54#define OCFS2_META_LOCK_RECOVERY (0x01) 67#define OCFS2_META_LOCK_RECOVERY (0x01)
@@ -69,6 +82,9 @@ void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
69struct ocfs2_file_private; 82struct ocfs2_file_private;
70void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 83void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
71 struct ocfs2_file_private *fp); 84 struct ocfs2_file_private *fp);
85struct ocfs2_mem_dqinfo;
86void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
87 struct ocfs2_mem_dqinfo *info);
72void ocfs2_lock_res_free(struct ocfs2_lock_res *res); 88void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
73int ocfs2_create_new_inode_locks(struct inode *inode); 89int ocfs2_create_new_inode_locks(struct inode *inode);
74int ocfs2_drop_inode_locks(struct inode *inode); 90int ocfs2_drop_inode_locks(struct inode *inode);
@@ -103,6 +119,9 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex);
103void ocfs2_dentry_unlock(struct dentry *dentry, int ex); 119void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
104int ocfs2_file_lock(struct file *file, int ex, int trylock); 120int ocfs2_file_lock(struct file *file, int ex, int trylock);
105void ocfs2_file_unlock(struct file *file); 121void ocfs2_file_unlock(struct file *file);
122int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex);
123void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex);
124
106 125
107void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); 126void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
108void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, 127void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 2baedac58234..f2bb1a04d253 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
293 struct ocfs2_extent_block *eb; 293 struct ocfs2_extent_block *eb;
294 struct ocfs2_extent_list *el; 294 struct ocfs2_extent_list *el;
295 295
296 ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh); 296 ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
297 if (ret) { 297 if (ret) {
298 mlog_errno(ret); 298 mlog_errno(ret);
299 goto out; 299 goto out;
@@ -302,12 +302,6 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
302 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 302 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
303 el = &eb->h_list; 303 el = &eb->h_list;
304 304
305 if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
306 ret = -EROFS;
307 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
308 goto out;
309 }
310
311 if (el->l_tree_depth) { 305 if (el->l_tree_depth) {
312 ocfs2_error(inode->i_sb, 306 ocfs2_error(inode->i_sb,
313 "Inode %lu has non zero tree depth in " 307 "Inode %lu has non zero tree depth in "
@@ -381,23 +375,16 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
381 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) 375 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
382 goto no_more_extents; 376 goto no_more_extents;
383 377
384 ret = ocfs2_read_block(inode, 378 ret = ocfs2_read_extent_block(inode,
385 le64_to_cpu(eb->h_next_leaf_blk), 379 le64_to_cpu(eb->h_next_leaf_blk),
386 &next_eb_bh); 380 &next_eb_bh);
387 if (ret) { 381 if (ret) {
388 mlog_errno(ret); 382 mlog_errno(ret);
389 goto out; 383 goto out;
390 } 384 }
391 next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
392
393 if (!OCFS2_IS_VALID_EXTENT_BLOCK(next_eb)) {
394 ret = -EROFS;
395 OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, next_eb);
396 goto out;
397 }
398 385
386 next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
399 el = &next_eb->h_list; 387 el = &next_eb->h_list;
400
401 i = ocfs2_search_for_hole_index(el, v_cluster); 388 i = ocfs2_search_for_hole_index(el, v_cluster);
402 } 389 }
403 390
@@ -630,7 +617,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
630 if (ret == 0) 617 if (ret == 0)
631 goto out; 618 goto out;
632 619
633 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); 620 ret = ocfs2_read_inode_block(inode, &di_bh);
634 if (ret) { 621 if (ret) {
635 mlog_errno(ret); 622 mlog_errno(ret);
636 goto out; 623 goto out;
@@ -819,3 +806,74 @@ out:
819 806
820 return ret; 807 return ret;
821} 808}
809
810int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
811 struct buffer_head *bhs[], int flags,
812 int (*validate)(struct super_block *sb,
813 struct buffer_head *bh))
814{
815 int rc = 0;
816 u64 p_block, p_count;
817 int i, count, done = 0;
818
819 mlog_entry("(inode = %p, v_block = %llu, nr = %d, bhs = %p, "
820 "flags = %x, validate = %p)\n",
821 inode, (unsigned long long)v_block, nr, bhs, flags,
822 validate);
823
824 if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
825 i_size_read(inode)) {
826 BUG_ON(!(flags & OCFS2_BH_READAHEAD));
827 goto out;
828 }
829
830 while (done < nr) {
831 down_read(&OCFS2_I(inode)->ip_alloc_sem);
832 rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
833 &p_block, &p_count, NULL);
834 up_read(&OCFS2_I(inode)->ip_alloc_sem);
835 if (rc) {
836 mlog_errno(rc);
837 break;
838 }
839
840 if (!p_block) {
841 rc = -EIO;
842 mlog(ML_ERROR,
843 "Inode #%llu contains a hole at offset %llu\n",
844 (unsigned long long)OCFS2_I(inode)->ip_blkno,
845 (unsigned long long)(v_block + done) <<
846 inode->i_sb->s_blocksize_bits);
847 break;
848 }
849
850 count = nr - done;
851 if (p_count < count)
852 count = p_count;
853
854 /*
855 * If the caller passed us bhs, they should have come
856 * from a previous readahead call to this function. Thus,
857 * they should have the right b_blocknr.
858 */
859 for (i = 0; i < count; i++) {
860 if (!bhs[done + i])
861 continue;
862 BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
863 }
864
865 rc = ocfs2_read_blocks(inode, p_block, count, bhs + done,
866 flags, validate);
867 if (rc) {
868 mlog_errno(rc);
869 break;
870 }
871 done += count;
872 }
873
874out:
875 mlog_exit(rc);
876 return rc;
877}
878
879
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index 1c4aa8b06f34..b7dd9731b462 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -57,4 +57,28 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
57 u32 *p_cluster, u32 *num_clusters, 57 u32 *p_cluster, u32 *num_clusters,
58 struct ocfs2_extent_list *el); 58 struct ocfs2_extent_list *el);
59 59
60int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
61 struct buffer_head *bhs[], int flags,
62 int (*validate)(struct super_block *sb,
63 struct buffer_head *bh));
64static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block,
65 struct buffer_head **bh,
66 int (*validate)(struct super_block *sb,
67 struct buffer_head *bh))
68{
69 int status = 0;
70
71 if (bh == NULL) {
72 printk("ocfs2: bh == NULL\n");
73 status = -EINVAL;
74 goto bail;
75 }
76
77 status = ocfs2_read_virt_blocks(inode, v_block, 1, bh, 0, validate);
78
79bail:
80 return status;
81}
82
83
60#endif /* _EXTENT_MAP_H */ 84#endif /* _EXTENT_MAP_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e2570a3bc2b2..e8f795f978aa 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -35,6 +35,7 @@
35#include <linux/mount.h> 35#include <linux/mount.h>
36#include <linux/writeback.h> 36#include <linux/writeback.h>
37#include <linux/falloc.h> 37#include <linux/falloc.h>
38#include <linux/quotaops.h>
38 39
39#define MLOG_MASK_PREFIX ML_INODE 40#define MLOG_MASK_PREFIX ML_INODE
40#include <cluster/masklog.h> 41#include <cluster/masklog.h>
@@ -56,6 +57,8 @@
56#include "suballoc.h" 57#include "suballoc.h"
57#include "super.h" 58#include "super.h"
58#include "xattr.h" 59#include "xattr.h"
60#include "acl.h"
61#include "quota.h"
59 62
60#include "buffer_head_io.h" 63#include "buffer_head_io.h"
61 64
@@ -253,8 +256,8 @@ int ocfs2_update_inode_atime(struct inode *inode,
253 goto out; 256 goto out;
254 } 257 }
255 258
256 ret = ocfs2_journal_access(handle, inode, bh, 259 ret = ocfs2_journal_access_di(handle, inode, bh,
257 OCFS2_JOURNAL_ACCESS_WRITE); 260 OCFS2_JOURNAL_ACCESS_WRITE);
258 if (ret) { 261 if (ret) {
259 mlog_errno(ret); 262 mlog_errno(ret);
260 goto out_commit; 263 goto out_commit;
@@ -303,9 +306,9 @@ bail:
303 return status; 306 return status;
304} 307}
305 308
306static int ocfs2_simple_size_update(struct inode *inode, 309int ocfs2_simple_size_update(struct inode *inode,
307 struct buffer_head *di_bh, 310 struct buffer_head *di_bh,
308 u64 new_i_size) 311 u64 new_i_size)
309{ 312{
310 int ret; 313 int ret;
311 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 314 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -350,8 +353,8 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
350 goto out; 353 goto out;
351 } 354 }
352 355
353 status = ocfs2_journal_access(handle, inode, fe_bh, 356 status = ocfs2_journal_access_di(handle, inode, fe_bh,
354 OCFS2_JOURNAL_ACCESS_WRITE); 357 OCFS2_JOURNAL_ACCESS_WRITE);
355 if (status < 0) { 358 if (status < 0) {
356 mlog_errno(status); 359 mlog_errno(status);
357 goto out_commit; 360 goto out_commit;
@@ -401,12 +404,9 @@ static int ocfs2_truncate_file(struct inode *inode,
401 (unsigned long long)OCFS2_I(inode)->ip_blkno, 404 (unsigned long long)OCFS2_I(inode)->ip_blkno,
402 (unsigned long long)new_i_size); 405 (unsigned long long)new_i_size);
403 406
407 /* We trust di_bh because it comes from ocfs2_inode_lock(), which
408 * already validated it */
404 fe = (struct ocfs2_dinode *) di_bh->b_data; 409 fe = (struct ocfs2_dinode *) di_bh->b_data;
405 if (!OCFS2_IS_VALID_DINODE(fe)) {
406 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
407 status = -EIO;
408 goto bail;
409 }
410 410
411 mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), 411 mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
412 "Inode %llu, inode i_size = %lld != di " 412 "Inode %llu, inode i_size = %lld != di "
@@ -536,6 +536,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
536 enum ocfs2_alloc_restarted why; 536 enum ocfs2_alloc_restarted why;
537 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 537 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
538 struct ocfs2_extent_tree et; 538 struct ocfs2_extent_tree et;
539 int did_quota = 0;
539 540
540 mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); 541 mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
541 542
@@ -545,18 +546,12 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
545 */ 546 */
546 BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); 547 BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
547 548
548 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); 549 status = ocfs2_read_inode_block(inode, &bh);
549 if (status < 0) { 550 if (status < 0) {
550 mlog_errno(status); 551 mlog_errno(status);
551 goto leave; 552 goto leave;
552 } 553 }
553
554 fe = (struct ocfs2_dinode *) bh->b_data; 554 fe = (struct ocfs2_dinode *) bh->b_data;
555 if (!OCFS2_IS_VALID_DINODE(fe)) {
556 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
557 status = -EIO;
558 goto leave;
559 }
560 555
561restart_all: 556restart_all:
562 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); 557 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
@@ -585,11 +580,18 @@ restart_all:
585 } 580 }
586 581
587restarted_transaction: 582restarted_transaction:
583 if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb,
584 clusters_to_add))) {
585 status = -EDQUOT;
586 goto leave;
587 }
588 did_quota = 1;
589
588 /* reserve a write to the file entry early on - that we if we 590 /* reserve a write to the file entry early on - that we if we
589 * run out of credits in the allocation path, we can still 591 * run out of credits in the allocation path, we can still
590 * update i_size. */ 592 * update i_size. */
591 status = ocfs2_journal_access(handle, inode, bh, 593 status = ocfs2_journal_access_di(handle, inode, bh,
592 OCFS2_JOURNAL_ACCESS_WRITE); 594 OCFS2_JOURNAL_ACCESS_WRITE);
593 if (status < 0) { 595 if (status < 0) {
594 mlog_errno(status); 596 mlog_errno(status);
595 goto leave; 597 goto leave;
@@ -622,6 +624,10 @@ restarted_transaction:
622 spin_lock(&OCFS2_I(inode)->ip_lock); 624 spin_lock(&OCFS2_I(inode)->ip_lock);
623 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); 625 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
624 spin_unlock(&OCFS2_I(inode)->ip_lock); 626 spin_unlock(&OCFS2_I(inode)->ip_lock);
627 /* Release unused quota reservation */
628 vfs_dq_free_space(inode,
629 ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
630 did_quota = 0;
625 631
626 if (why != RESTART_NONE && clusters_to_add) { 632 if (why != RESTART_NONE && clusters_to_add) {
627 if (why == RESTART_META) { 633 if (why == RESTART_META) {
@@ -654,6 +660,9 @@ restarted_transaction:
654 OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode)); 660 OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode));
655 661
656leave: 662leave:
663 if (status < 0 && did_quota)
664 vfs_dq_free_space(inode,
665 ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
657 if (handle) { 666 if (handle) {
658 ocfs2_commit_trans(osb, handle); 667 ocfs2_commit_trans(osb, handle);
659 handle = NULL; 668 handle = NULL;
@@ -885,6 +894,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
885 struct ocfs2_super *osb = OCFS2_SB(sb); 894 struct ocfs2_super *osb = OCFS2_SB(sb);
886 struct buffer_head *bh = NULL; 895 struct buffer_head *bh = NULL;
887 handle_t *handle = NULL; 896 handle_t *handle = NULL;
897 int locked[MAXQUOTAS] = {0, 0};
898 int credits, qtype;
899 struct ocfs2_mem_dqinfo *oinfo;
888 900
889 mlog_entry("(0x%p, '%.*s')\n", dentry, 901 mlog_entry("(0x%p, '%.*s')\n", dentry,
890 dentry->d_name.len, dentry->d_name.name); 902 dentry->d_name.len, dentry->d_name.name);
@@ -955,11 +967,47 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
955 } 967 }
956 } 968 }
957 969
958 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 970 if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
959 if (IS_ERR(handle)) { 971 (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
960 status = PTR_ERR(handle); 972 credits = OCFS2_INODE_UPDATE_CREDITS;
961 mlog_errno(status); 973 if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
962 goto bail_unlock; 974 && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
975 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
976 oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv;
977 status = ocfs2_lock_global_qf(oinfo, 1);
978 if (status < 0)
979 goto bail_unlock;
980 credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) +
981 ocfs2_calc_qdel_credits(sb, USRQUOTA);
982 locked[USRQUOTA] = 1;
983 }
984 if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid
985 && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
986 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
987 oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv;
988 status = ocfs2_lock_global_qf(oinfo, 1);
989 if (status < 0)
990 goto bail_unlock;
991 credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) +
992 ocfs2_calc_qdel_credits(sb, GRPQUOTA);
993 locked[GRPQUOTA] = 1;
994 }
995 handle = ocfs2_start_trans(osb, credits);
996 if (IS_ERR(handle)) {
997 status = PTR_ERR(handle);
998 mlog_errno(status);
999 goto bail_unlock;
1000 }
1001 status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
1002 if (status < 0)
1003 goto bail_commit;
1004 } else {
1005 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1006 if (IS_ERR(handle)) {
1007 status = PTR_ERR(handle);
1008 mlog_errno(status);
1009 goto bail_unlock;
1010 }
963 } 1011 }
964 1012
965 /* 1013 /*
@@ -982,6 +1030,12 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
982bail_commit: 1030bail_commit:
983 ocfs2_commit_trans(osb, handle); 1031 ocfs2_commit_trans(osb, handle);
984bail_unlock: 1032bail_unlock:
1033 for (qtype = 0; qtype < MAXQUOTAS; qtype++) {
1034 if (!locked[qtype])
1035 continue;
1036 oinfo = sb_dqinfo(sb, qtype)->dqi_priv;
1037 ocfs2_unlock_global_qf(oinfo, 1);
1038 }
985 ocfs2_inode_unlock(inode, 1); 1039 ocfs2_inode_unlock(inode, 1);
986bail_unlock_rw: 1040bail_unlock_rw:
987 if (size_change) 1041 if (size_change)
@@ -989,6 +1043,12 @@ bail_unlock_rw:
989bail: 1043bail:
990 brelse(bh); 1044 brelse(bh);
991 1045
1046 if (!status && attr->ia_valid & ATTR_MODE) {
1047 status = ocfs2_acl_chmod(inode);
1048 if (status < 0)
1049 mlog_errno(status);
1050 }
1051
992 mlog_exit(status); 1052 mlog_exit(status);
993 return status; 1053 return status;
994} 1054}
@@ -1035,7 +1095,7 @@ int ocfs2_permission(struct inode *inode, int mask)
1035 goto out; 1095 goto out;
1036 } 1096 }
1037 1097
1038 ret = generic_permission(inode, mask, NULL); 1098 ret = generic_permission(inode, mask, ocfs2_check_acl);
1039 1099
1040 ocfs2_inode_unlock(inode, 0); 1100 ocfs2_inode_unlock(inode, 0);
1041out: 1101out:
@@ -1061,8 +1121,8 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1061 goto out; 1121 goto out;
1062 } 1122 }
1063 1123
1064 ret = ocfs2_journal_access(handle, inode, bh, 1124 ret = ocfs2_journal_access_di(handle, inode, bh,
1065 OCFS2_JOURNAL_ACCESS_WRITE); 1125 OCFS2_JOURNAL_ACCESS_WRITE);
1066 if (ret < 0) { 1126 if (ret < 0) {
1067 mlog_errno(ret); 1127 mlog_errno(ret);
1068 goto out_trans; 1128 goto out_trans;
@@ -1128,9 +1188,8 @@ static int ocfs2_write_remove_suid(struct inode *inode)
1128{ 1188{
1129 int ret; 1189 int ret;
1130 struct buffer_head *bh = NULL; 1190 struct buffer_head *bh = NULL;
1131 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1132 1191
1133 ret = ocfs2_read_block(inode, oi->ip_blkno, &bh); 1192 ret = ocfs2_read_inode_block(inode, &bh);
1134 if (ret < 0) { 1193 if (ret < 0) {
1135 mlog_errno(ret); 1194 mlog_errno(ret);
1136 goto out; 1195 goto out;
@@ -1156,8 +1215,7 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
1156 struct buffer_head *di_bh = NULL; 1215 struct buffer_head *di_bh = NULL;
1157 1216
1158 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 1217 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1159 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, 1218 ret = ocfs2_read_inode_block(inode, &di_bh);
1160 &di_bh);
1161 if (ret) { 1219 if (ret) {
1162 mlog_errno(ret); 1220 mlog_errno(ret);
1163 goto out; 1221 goto out;
@@ -1226,83 +1284,6 @@ out:
1226 return ret; 1284 return ret;
1227} 1285}
1228 1286
1229static int __ocfs2_remove_inode_range(struct inode *inode,
1230 struct buffer_head *di_bh,
1231 u32 cpos, u32 phys_cpos, u32 len,
1232 struct ocfs2_cached_dealloc_ctxt *dealloc)
1233{
1234 int ret;
1235 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
1236 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1237 struct inode *tl_inode = osb->osb_tl_inode;
1238 handle_t *handle;
1239 struct ocfs2_alloc_context *meta_ac = NULL;
1240 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1241 struct ocfs2_extent_tree et;
1242
1243 ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
1244
1245 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
1246 if (ret) {
1247 mlog_errno(ret);
1248 return ret;
1249 }
1250
1251 mutex_lock(&tl_inode->i_mutex);
1252
1253 if (ocfs2_truncate_log_needs_flush(osb)) {
1254 ret = __ocfs2_flush_truncate_log(osb);
1255 if (ret < 0) {
1256 mlog_errno(ret);
1257 goto out;
1258 }
1259 }
1260
1261 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
1262 if (IS_ERR(handle)) {
1263 ret = PTR_ERR(handle);
1264 mlog_errno(ret);
1265 goto out;
1266 }
1267
1268 ret = ocfs2_journal_access(handle, inode, di_bh,
1269 OCFS2_JOURNAL_ACCESS_WRITE);
1270 if (ret) {
1271 mlog_errno(ret);
1272 goto out;
1273 }
1274
1275 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
1276 dealloc);
1277 if (ret) {
1278 mlog_errno(ret);
1279 goto out_commit;
1280 }
1281
1282 OCFS2_I(inode)->ip_clusters -= len;
1283 di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
1284
1285 ret = ocfs2_journal_dirty(handle, di_bh);
1286 if (ret) {
1287 mlog_errno(ret);
1288 goto out_commit;
1289 }
1290
1291 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
1292 if (ret)
1293 mlog_errno(ret);
1294
1295out_commit:
1296 ocfs2_commit_trans(osb, handle);
1297out:
1298 mutex_unlock(&tl_inode->i_mutex);
1299
1300 if (meta_ac)
1301 ocfs2_free_alloc_context(meta_ac);
1302
1303 return ret;
1304}
1305
1306/* 1287/*
1307 * Truncate a byte range, avoiding pages within partial clusters. This 1288 * Truncate a byte range, avoiding pages within partial clusters. This
1308 * preserves those pages for the zeroing code to write to. 1289 * preserves those pages for the zeroing code to write to.
@@ -1402,7 +1383,9 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1402 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1383 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1403 struct ocfs2_cached_dealloc_ctxt dealloc; 1384 struct ocfs2_cached_dealloc_ctxt dealloc;
1404 struct address_space *mapping = inode->i_mapping; 1385 struct address_space *mapping = inode->i_mapping;
1386 struct ocfs2_extent_tree et;
1405 1387
1388 ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
1406 ocfs2_init_dealloc_ctxt(&dealloc); 1389 ocfs2_init_dealloc_ctxt(&dealloc);
1407 1390
1408 if (byte_len == 0) 1391 if (byte_len == 0)
@@ -1458,9 +1441,9 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1458 1441
1459 /* Only do work for non-holes */ 1442 /* Only do work for non-holes */
1460 if (phys_cpos != 0) { 1443 if (phys_cpos != 0) {
1461 ret = __ocfs2_remove_inode_range(inode, di_bh, cpos, 1444 ret = ocfs2_remove_btree_range(inode, &et, cpos,
1462 phys_cpos, alloc_size, 1445 phys_cpos, alloc_size,
1463 &dealloc); 1446 &dealloc);
1464 if (ret) { 1447 if (ret) {
1465 mlog_errno(ret); 1448 mlog_errno(ret);
1466 goto out; 1449 goto out;
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index e92382cbca5f..172f9fbc9fc7 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -51,6 +51,9 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
51 struct ocfs2_alloc_context *data_ac, 51 struct ocfs2_alloc_context *data_ac,
52 struct ocfs2_alloc_context *meta_ac, 52 struct ocfs2_alloc_context *meta_ac,
53 enum ocfs2_alloc_restarted *reason_ret); 53 enum ocfs2_alloc_restarted *reason_ret);
54int ocfs2_simple_size_update(struct inode *inode,
55 struct buffer_head *di_bh,
56 u64 new_i_size);
54int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, 57int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
55 u64 zero_to); 58 u64 zero_to);
56int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 59int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7aa00d511874..229e707bc050 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -28,6 +28,7 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/pagemap.h> 30#include <linux/pagemap.h>
31#include <linux/quotaops.h>
31 32
32#include <asm/byteorder.h> 33#include <asm/byteorder.h>
33 34
@@ -37,6 +38,7 @@
37#include "ocfs2.h" 38#include "ocfs2.h"
38 39
39#include "alloc.h" 40#include "alloc.h"
41#include "blockcheck.h"
40#include "dlmglue.h" 42#include "dlmglue.h"
41#include "extent_map.h" 43#include "extent_map.h"
42#include "file.h" 44#include "file.h"
@@ -214,12 +216,11 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
214 return 0; 216 return 0;
215} 217}
216 218
217int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, 219void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
218 int create_ino) 220 int create_ino)
219{ 221{
220 struct super_block *sb; 222 struct super_block *sb;
221 struct ocfs2_super *osb; 223 struct ocfs2_super *osb;
222 int status = -EINVAL;
223 int use_plocks = 1; 224 int use_plocks = 1;
224 225
225 mlog_entry("(0x%p, size:%llu)\n", inode, 226 mlog_entry("(0x%p, size:%llu)\n", inode,
@@ -232,25 +233,17 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
232 ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks()) 233 ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks())
233 use_plocks = 0; 234 use_plocks = 0;
234 235
235 /* this means that read_inode cannot create a superblock inode 236 /*
236 * today. change if needed. */ 237 * These have all been checked by ocfs2_read_inode_block() or set
237 if (!OCFS2_IS_VALID_DINODE(fe) || 238 * by ocfs2_mknod_locked(), so a failure is a code bug.
238 !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { 239 */
239 mlog(0, "Invalid dinode: i_ino=%lu, i_blkno=%llu, " 240 BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); /* This means that read_inode
240 "signature = %.*s, flags = 0x%x\n", 241 cannot create a superblock
241 inode->i_ino, 242 inode today. change if
242 (unsigned long long)le64_to_cpu(fe->i_blkno), 7, 243 that is needed. */
243 fe->i_signature, le32_to_cpu(fe->i_flags)); 244 BUG_ON(!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)));
244 goto bail; 245 BUG_ON(le32_to_cpu(fe->i_fs_generation) != osb->fs_generation);
245 }
246 246
247 if (le32_to_cpu(fe->i_fs_generation) != osb->fs_generation) {
248 mlog(ML_ERROR, "file entry generation does not match "
249 "superblock! osb->fs_generation=%x, "
250 "fe->i_fs_generation=%x\n",
251 osb->fs_generation, le32_to_cpu(fe->i_fs_generation));
252 goto bail;
253 }
254 247
255 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 248 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
256 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); 249 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
@@ -284,14 +277,18 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
284 277
285 inode->i_nlink = le16_to_cpu(fe->i_links_count); 278 inode->i_nlink = le16_to_cpu(fe->i_links_count);
286 279
287 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) 280 if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
288 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; 281 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
282 inode->i_flags |= S_NOQUOTA;
283 }
289 284
290 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { 285 if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
291 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 286 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
292 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); 287 mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
293 } else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) { 288 } else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) {
294 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; 289 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
290 } else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) {
291 inode->i_flags |= S_NOQUOTA;
295 } else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) { 292 } else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) {
296 mlog(0, "superblock inode: i_ino=%lu\n", inode->i_ino); 293 mlog(0, "superblock inode: i_ino=%lu\n", inode->i_ino);
297 /* we can't actually hit this as read_inode can't 294 /* we can't actually hit this as read_inode can't
@@ -354,10 +351,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
354 351
355 ocfs2_set_inode_flags(inode); 352 ocfs2_set_inode_flags(inode);
356 353
357 status = 0; 354 mlog_exit_void();
358bail:
359 mlog_exit(status);
360 return status;
361} 355}
362 356
363static int ocfs2_read_locked_inode(struct inode *inode, 357static int ocfs2_read_locked_inode(struct inode *inode,
@@ -460,11 +454,14 @@ static int ocfs2_read_locked_inode(struct inode *inode,
460 } 454 }
461 } 455 }
462 456
463 if (can_lock) 457 if (can_lock) {
464 status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh, 458 status = ocfs2_read_inode_block_full(inode, &bh,
465 OCFS2_BH_IGNORE_CACHE); 459 OCFS2_BH_IGNORE_CACHE);
466 else 460 } else {
467 status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); 461 status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
462 if (!status)
463 status = ocfs2_validate_inode_block(osb->sb, bh);
464 }
468 if (status < 0) { 465 if (status < 0) {
469 mlog_errno(status); 466 mlog_errno(status);
470 goto bail; 467 goto bail;
@@ -472,12 +469,6 @@ static int ocfs2_read_locked_inode(struct inode *inode,
472 469
473 status = -EINVAL; 470 status = -EINVAL;
474 fe = (struct ocfs2_dinode *) bh->b_data; 471 fe = (struct ocfs2_dinode *) bh->b_data;
475 if (!OCFS2_IS_VALID_DINODE(fe)) {
476 mlog(0, "Invalid dinode #%llu: signature = %.*s\n",
477 (unsigned long long)args->fi_blkno, 7,
478 fe->i_signature);
479 goto bail;
480 }
481 472
482 /* 473 /*
483 * This is a code bug. Right now the caller needs to 474 * This is a code bug. Right now the caller needs to
@@ -491,10 +482,9 @@ static int ocfs2_read_locked_inode(struct inode *inode,
491 482
492 if (S_ISCHR(le16_to_cpu(fe->i_mode)) || 483 if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
493 S_ISBLK(le16_to_cpu(fe->i_mode))) 484 S_ISBLK(le16_to_cpu(fe->i_mode)))
494 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); 485 inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
495 486
496 if (ocfs2_populate_inode(inode, fe, 0) < 0) 487 ocfs2_populate_inode(inode, fe, 0);
497 goto bail;
498 488
499 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); 489 BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
500 490
@@ -547,8 +537,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
547 goto out; 537 goto out;
548 } 538 }
549 539
550 status = ocfs2_journal_access(handle, inode, fe_bh, 540 status = ocfs2_journal_access_di(handle, inode, fe_bh,
551 OCFS2_JOURNAL_ACCESS_WRITE); 541 OCFS2_JOURNAL_ACCESS_WRITE);
552 if (status < 0) { 542 if (status < 0) {
553 mlog_errno(status); 543 mlog_errno(status);
554 goto out; 544 goto out;
@@ -615,7 +605,8 @@ static int ocfs2_remove_inode(struct inode *inode,
615 goto bail; 605 goto bail;
616 } 606 }
617 607
618 handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS); 608 handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS +
609 ocfs2_quota_trans_credits(inode->i_sb));
619 if (IS_ERR(handle)) { 610 if (IS_ERR(handle)) {
620 status = PTR_ERR(handle); 611 status = PTR_ERR(handle);
621 mlog_errno(status); 612 mlog_errno(status);
@@ -630,8 +621,8 @@ static int ocfs2_remove_inode(struct inode *inode,
630 } 621 }
631 622
632 /* set the inodes dtime */ 623 /* set the inodes dtime */
633 status = ocfs2_journal_access(handle, inode, di_bh, 624 status = ocfs2_journal_access_di(handle, inode, di_bh,
634 OCFS2_JOURNAL_ACCESS_WRITE); 625 OCFS2_JOURNAL_ACCESS_WRITE);
635 if (status < 0) { 626 if (status < 0) {
636 mlog_errno(status); 627 mlog_errno(status);
637 goto bail_commit; 628 goto bail_commit;
@@ -647,6 +638,7 @@ static int ocfs2_remove_inode(struct inode *inode,
647 } 638 }
648 639
649 ocfs2_remove_from_cache(inode, di_bh); 640 ocfs2_remove_from_cache(inode, di_bh);
641 vfs_dq_free_inode(inode);
650 642
651 status = ocfs2_free_dinode(handle, inode_alloc_inode, 643 status = ocfs2_free_dinode(handle, inode_alloc_inode,
652 inode_alloc_bh, di); 644 inode_alloc_bh, di);
@@ -929,7 +921,10 @@ void ocfs2_delete_inode(struct inode *inode)
929 921
930 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 922 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
931 923
932 if (is_bad_inode(inode)) { 924 /* When we fail in read_inode() we mark inode as bad. The second test
925 * catches the case when inode allocation fails before allocating
926 * a block for inode. */
927 if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) {
933 mlog(0, "Skipping delete of bad inode\n"); 928 mlog(0, "Skipping delete of bad inode\n");
934 goto bail; 929 goto bail;
935 } 930 }
@@ -1195,8 +1190,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1195 mlog_entry("(inode %llu)\n", 1190 mlog_entry("(inode %llu)\n",
1196 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1191 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1197 1192
1198 status = ocfs2_journal_access(handle, inode, bh, 1193 status = ocfs2_journal_access_di(handle, inode, bh,
1199 OCFS2_JOURNAL_ACCESS_WRITE); 1194 OCFS2_JOURNAL_ACCESS_WRITE);
1200 if (status < 0) { 1195 if (status < 0) {
1201 mlog_errno(status); 1196 mlog_errno(status);
1202 goto leave; 1197 goto leave;
@@ -1264,3 +1259,89 @@ void ocfs2_refresh_inode(struct inode *inode,
1264 1259
1265 spin_unlock(&OCFS2_I(inode)->ip_lock); 1260 spin_unlock(&OCFS2_I(inode)->ip_lock);
1266} 1261}
1262
1263int ocfs2_validate_inode_block(struct super_block *sb,
1264 struct buffer_head *bh)
1265{
1266 int rc;
1267 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
1268
1269 mlog(0, "Validating dinode %llu\n",
1270 (unsigned long long)bh->b_blocknr);
1271
1272 BUG_ON(!buffer_uptodate(bh));
1273
1274 /*
1275 * If the ecc fails, we return the error but otherwise
1276 * leave the filesystem running. We know any error is
1277 * local to this block.
1278 */
1279 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check);
1280 if (rc) {
1281 mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
1282 (unsigned long long)bh->b_blocknr);
1283 goto bail;
1284 }
1285
1286 /*
1287 * Errors after here are fatal.
1288 */
1289
1290 rc = -EINVAL;
1291
1292 if (!OCFS2_IS_VALID_DINODE(di)) {
1293 ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n",
1294 (unsigned long long)bh->b_blocknr, 7,
1295 di->i_signature);
1296 goto bail;
1297 }
1298
1299 if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
1300 ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n",
1301 (unsigned long long)bh->b_blocknr,
1302 (unsigned long long)le64_to_cpu(di->i_blkno));
1303 goto bail;
1304 }
1305
1306 if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
1307 ocfs2_error(sb,
1308 "Invalid dinode #%llu: OCFS2_VALID_FL not set\n",
1309 (unsigned long long)bh->b_blocknr);
1310 goto bail;
1311 }
1312
1313 if (le32_to_cpu(di->i_fs_generation) !=
1314 OCFS2_SB(sb)->fs_generation) {
1315 ocfs2_error(sb,
1316 "Invalid dinode #%llu: fs_generation is %u\n",
1317 (unsigned long long)bh->b_blocknr,
1318 le32_to_cpu(di->i_fs_generation));
1319 goto bail;
1320 }
1321
1322 rc = 0;
1323
1324bail:
1325 return rc;
1326}
1327
1328int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
1329 int flags)
1330{
1331 int rc;
1332 struct buffer_head *tmp = *bh;
1333
1334 rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp,
1335 flags, ocfs2_validate_inode_block);
1336
1337 /* If ocfs2_read_blocks() got us a new bh, pass it up. */
1338 if (!rc && !*bh)
1339 *bh = tmp;
1340
1341 return rc;
1342}
1343
1344int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
1345{
1346 return ocfs2_read_inode_block_full(inode, bh, 0);
1347}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 2f37af9bcc4a..eb3c302b38d3 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -128,8 +128,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
128 int sysfile_type); 128 int sysfile_type);
129int ocfs2_inode_init_private(struct inode *inode); 129int ocfs2_inode_init_private(struct inode *inode);
130int ocfs2_inode_revalidate(struct dentry *dentry); 130int ocfs2_inode_revalidate(struct dentry *dentry);
131int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, 131void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
132 int create_ino); 132 int create_ino);
133void ocfs2_read_inode(struct inode *inode); 133void ocfs2_read_inode(struct inode *inode);
134void ocfs2_read_inode2(struct inode *inode, void *opaque); 134void ocfs2_read_inode2(struct inode *inode, void *opaque);
135ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf, 135ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf,
@@ -142,6 +142,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
142 struct buffer_head *bh); 142 struct buffer_head *bh);
143int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb); 143int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
144int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb); 144int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
145struct buffer_head *ocfs2_bread(struct inode *inode,
146 int block, int *err, int reada);
145 147
146void ocfs2_set_inode_flags(struct inode *inode); 148void ocfs2_set_inode_flags(struct inode *inode);
147void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi); 149void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
@@ -153,4 +155,16 @@ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
153 return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits); 155 return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits);
154} 156}
155 157
158/* Validate that a bh contains a valid inode */
159int ocfs2_validate_inode_block(struct super_block *sb,
160 struct buffer_head *bh);
161/*
162 * Read an inode block into *bh. If *bh is NULL, a bh will be allocated.
163 * This is a cached read. The inode will be validated with
164 * ocfs2_validate_inode_block().
165 */
166int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh);
167/* The same, but can be passed OCFS2_BH_* flags */
168int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
169 int flags);
156#endif /* OCFS2_INODE_H */ 170#endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 99fe9d584f3c..57d7d25a2b9a 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -35,6 +35,7 @@
35#include "ocfs2.h" 35#include "ocfs2.h"
36 36
37#include "alloc.h" 37#include "alloc.h"
38#include "blockcheck.h"
38#include "dir.h" 39#include "dir.h"
39#include "dlmglue.h" 40#include "dlmglue.h"
40#include "extent_map.h" 41#include "extent_map.h"
@@ -45,6 +46,7 @@
45#include "slot_map.h" 46#include "slot_map.h"
46#include "super.h" 47#include "super.h"
47#include "sysfile.h" 48#include "sysfile.h"
49#include "quota.h"
48 50
49#include "buffer_head_io.h" 51#include "buffer_head_io.h"
50 52
@@ -52,10 +54,10 @@ DEFINE_SPINLOCK(trans_inc_lock);
52 54
53static int ocfs2_force_read_journal(struct inode *inode); 55static int ocfs2_force_read_journal(struct inode *inode);
54static int ocfs2_recover_node(struct ocfs2_super *osb, 56static int ocfs2_recover_node(struct ocfs2_super *osb,
55 int node_num); 57 int node_num, int slot_num);
56static int __ocfs2_recovery_thread(void *arg); 58static int __ocfs2_recovery_thread(void *arg);
57static int ocfs2_commit_cache(struct ocfs2_super *osb); 59static int ocfs2_commit_cache(struct ocfs2_super *osb);
58static int ocfs2_wait_on_mount(struct ocfs2_super *osb); 60static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
59static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 61static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
60 int dirty, int replayed); 62 int dirty, int replayed);
61static int ocfs2_trylock_journal(struct ocfs2_super *osb, 63static int ocfs2_trylock_journal(struct ocfs2_super *osb,
@@ -64,6 +66,17 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
64 int slot); 66 int slot);
65static int ocfs2_commit_thread(void *arg); 67static int ocfs2_commit_thread(void *arg);
66 68
69static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
70{
71 return __ocfs2_wait_on_mount(osb, 0);
72}
73
74static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
75{
76 return __ocfs2_wait_on_mount(osb, 1);
77}
78
79
67 80
68/* 81/*
69 * The recovery_list is a simple linked list of node numbers to recover. 82 * The recovery_list is a simple linked list of node numbers to recover.
@@ -256,11 +269,9 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
256 BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE); 269 BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
257 BUG_ON(max_buffs <= 0); 270 BUG_ON(max_buffs <= 0);
258 271
259 /* JBD might support this, but our journalling code doesn't yet. */ 272 /* Nested transaction? Just return the handle... */
260 if (journal_current_handle()) { 273 if (journal_current_handle())
261 mlog(ML_ERROR, "Recursive transaction attempted!\n"); 274 return jbd2_journal_start(journal, max_buffs);
262 BUG();
263 }
264 275
265 down_read(&osb->journal->j_trans_barrier); 276 down_read(&osb->journal->j_trans_barrier);
266 277
@@ -285,16 +296,18 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
285int ocfs2_commit_trans(struct ocfs2_super *osb, 296int ocfs2_commit_trans(struct ocfs2_super *osb,
286 handle_t *handle) 297 handle_t *handle)
287{ 298{
288 int ret; 299 int ret, nested;
289 struct ocfs2_journal *journal = osb->journal; 300 struct ocfs2_journal *journal = osb->journal;
290 301
291 BUG_ON(!handle); 302 BUG_ON(!handle);
292 303
304 nested = handle->h_ref > 1;
293 ret = jbd2_journal_stop(handle); 305 ret = jbd2_journal_stop(handle);
294 if (ret < 0) 306 if (ret < 0)
295 mlog_errno(ret); 307 mlog_errno(ret);
296 308
297 up_read(&journal->j_trans_barrier); 309 if (!nested)
310 up_read(&journal->j_trans_barrier);
298 311
299 return ret; 312 return ret;
300} 313}
@@ -357,10 +370,137 @@ bail:
357 return status; 370 return status;
358} 371}
359 372
360int ocfs2_journal_access(handle_t *handle, 373struct ocfs2_triggers {
361 struct inode *inode, 374 struct jbd2_buffer_trigger_type ot_triggers;
362 struct buffer_head *bh, 375 int ot_offset;
363 int type) 376};
377
378static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
379{
380 return container_of(triggers, struct ocfs2_triggers, ot_triggers);
381}
382
383static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
384 struct buffer_head *bh,
385 void *data, size_t size)
386{
387 struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
388
389 /*
390 * We aren't guaranteed to have the superblock here, so we
391 * must unconditionally compute the ecc data.
392 * __ocfs2_journal_access() will only set the triggers if
393 * metaecc is enabled.
394 */
395 ocfs2_block_check_compute(data, size, data + ot->ot_offset);
396}
397
398/*
399 * Quota blocks have their own trigger because the struct ocfs2_block_check
400 * offset depends on the blocksize.
401 */
402static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
403 struct buffer_head *bh,
404 void *data, size_t size)
405{
406 struct ocfs2_disk_dqtrailer *dqt =
407 ocfs2_block_dqtrailer(size, data);
408
409 /*
410 * We aren't guaranteed to have the superblock here, so we
411 * must unconditionally compute the ecc data.
412 * __ocfs2_journal_access() will only set the triggers if
413 * metaecc is enabled.
414 */
415 ocfs2_block_check_compute(data, size, &dqt->dq_check);
416}
417
418/*
419 * Directory blocks also have their own trigger because the
420 * struct ocfs2_block_check offset depends on the blocksize.
421 */
422static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
423 struct buffer_head *bh,
424 void *data, size_t size)
425{
426 struct ocfs2_dir_block_trailer *trailer =
427 ocfs2_dir_trailer_from_size(size, data);
428
429 /*
430 * We aren't guaranteed to have the superblock here, so we
431 * must unconditionally compute the ecc data.
432 * __ocfs2_journal_access() will only set the triggers if
433 * metaecc is enabled.
434 */
435 ocfs2_block_check_compute(data, size, &trailer->db_check);
436}
437
438static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
439 struct buffer_head *bh)
440{
441 mlog(ML_ERROR,
442 "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
443 "bh->b_blocknr = %llu\n",
444 (unsigned long)bh,
445 (unsigned long long)bh->b_blocknr);
446
447 /* We aren't guaranteed to have the superblock here - but if we
448 * don't, it'll just crash. */
449 ocfs2_error(bh->b_assoc_map->host->i_sb,
450 "JBD2 has aborted our journal, ocfs2 cannot continue\n");
451}
452
453static struct ocfs2_triggers di_triggers = {
454 .ot_triggers = {
455 .t_commit = ocfs2_commit_trigger,
456 .t_abort = ocfs2_abort_trigger,
457 },
458 .ot_offset = offsetof(struct ocfs2_dinode, i_check),
459};
460
461static struct ocfs2_triggers eb_triggers = {
462 .ot_triggers = {
463 .t_commit = ocfs2_commit_trigger,
464 .t_abort = ocfs2_abort_trigger,
465 },
466 .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
467};
468
469static struct ocfs2_triggers gd_triggers = {
470 .ot_triggers = {
471 .t_commit = ocfs2_commit_trigger,
472 .t_abort = ocfs2_abort_trigger,
473 },
474 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
475};
476
477static struct ocfs2_triggers db_triggers = {
478 .ot_triggers = {
479 .t_commit = ocfs2_db_commit_trigger,
480 .t_abort = ocfs2_abort_trigger,
481 },
482};
483
484static struct ocfs2_triggers xb_triggers = {
485 .ot_triggers = {
486 .t_commit = ocfs2_commit_trigger,
487 .t_abort = ocfs2_abort_trigger,
488 },
489 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
490};
491
492static struct ocfs2_triggers dq_triggers = {
493 .ot_triggers = {
494 .t_commit = ocfs2_dq_commit_trigger,
495 .t_abort = ocfs2_abort_trigger,
496 },
497};
498
499static int __ocfs2_journal_access(handle_t *handle,
500 struct inode *inode,
501 struct buffer_head *bh,
502 struct ocfs2_triggers *triggers,
503 int type)
364{ 504{
365 int status; 505 int status;
366 506
@@ -406,6 +546,8 @@ int ocfs2_journal_access(handle_t *handle,
406 status = -EINVAL; 546 status = -EINVAL;
407 mlog(ML_ERROR, "Uknown access type!\n"); 547 mlog(ML_ERROR, "Uknown access type!\n");
408 } 548 }
549 if (!status && ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)) && triggers)
550 jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
409 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 551 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
410 552
411 if (status < 0) 553 if (status < 0)
@@ -416,6 +558,54 @@ int ocfs2_journal_access(handle_t *handle,
416 return status; 558 return status;
417} 559}
418 560
561int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
562 struct buffer_head *bh, int type)
563{
564 return __ocfs2_journal_access(handle, inode, bh, &di_triggers,
565 type);
566}
567
568int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
569 struct buffer_head *bh, int type)
570{
571 return __ocfs2_journal_access(handle, inode, bh, &eb_triggers,
572 type);
573}
574
575int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
576 struct buffer_head *bh, int type)
577{
578 return __ocfs2_journal_access(handle, inode, bh, &gd_triggers,
579 type);
580}
581
582int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
583 struct buffer_head *bh, int type)
584{
585 return __ocfs2_journal_access(handle, inode, bh, &db_triggers,
586 type);
587}
588
589int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
590 struct buffer_head *bh, int type)
591{
592 return __ocfs2_journal_access(handle, inode, bh, &xb_triggers,
593 type);
594}
595
596int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
597 struct buffer_head *bh, int type)
598{
599 return __ocfs2_journal_access(handle, inode, bh, &dq_triggers,
600 type);
601}
602
603int ocfs2_journal_access(handle_t *handle, struct inode *inode,
604 struct buffer_head *bh, int type)
605{
606 return __ocfs2_journal_access(handle, inode, bh, NULL, type);
607}
608
419int ocfs2_journal_dirty(handle_t *handle, 609int ocfs2_journal_dirty(handle_t *handle,
420 struct buffer_head *bh) 610 struct buffer_head *bh)
421{ 611{
@@ -434,20 +624,6 @@ int ocfs2_journal_dirty(handle_t *handle,
434 return status; 624 return status;
435} 625}
436 626
437#ifdef CONFIG_OCFS2_COMPAT_JBD
438int ocfs2_journal_dirty_data(handle_t *handle,
439 struct buffer_head *bh)
440{
441 int err = journal_dirty_data(handle, bh);
442 if (err)
443 mlog_errno(err);
444 /* TODO: When we can handle it, abort the handle and go RO on
445 * error here. */
446
447 return err;
448}
449#endif
450
451#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) 627#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
452 628
453void ocfs2_set_journal_params(struct ocfs2_super *osb) 629void ocfs2_set_journal_params(struct ocfs2_super *osb)
@@ -587,17 +763,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
587 mlog_entry_void(); 763 mlog_entry_void();
588 764
589 fe = (struct ocfs2_dinode *)bh->b_data; 765 fe = (struct ocfs2_dinode *)bh->b_data;
590 if (!OCFS2_IS_VALID_DINODE(fe)) { 766
591 /* This is called from startup/shutdown which will 767 /* The journal bh on the osb always comes from ocfs2_journal_init()
592 * handle the errors in a specific manner, so no need 768 * and was validated there inside ocfs2_inode_lock_full(). It's a
593 * to call ocfs2_error() here. */ 769 * code bug if we mess it up. */
594 mlog(ML_ERROR, "Journal dinode %llu has invalid " 770 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
595 "signature: %.*s",
596 (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
597 fe->i_signature);
598 status = -EIO;
599 goto out;
600 }
601 771
602 flags = le32_to_cpu(fe->id1.journal1.ij_flags); 772 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
603 if (dirty) 773 if (dirty)
@@ -609,11 +779,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
609 if (replayed) 779 if (replayed)
610 ocfs2_bump_recovery_generation(fe); 780 ocfs2_bump_recovery_generation(fe);
611 781
782 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
612 status = ocfs2_write_block(osb, bh, journal->j_inode); 783 status = ocfs2_write_block(osb, bh, journal->j_inode);
613 if (status < 0) 784 if (status < 0)
614 mlog_errno(status); 785 mlog_errno(status);
615 786
616out:
617 mlog_exit(status); 787 mlog_exit(status);
618 return status; 788 return status;
619} 789}
@@ -878,6 +1048,7 @@ struct ocfs2_la_recovery_item {
878 int lri_slot; 1048 int lri_slot;
879 struct ocfs2_dinode *lri_la_dinode; 1049 struct ocfs2_dinode *lri_la_dinode;
880 struct ocfs2_dinode *lri_tl_dinode; 1050 struct ocfs2_dinode *lri_tl_dinode;
1051 struct ocfs2_quota_recovery *lri_qrec;
881}; 1052};
882 1053
883/* Does the second half of the recovery process. By this point, the 1054/* Does the second half of the recovery process. By this point, the
@@ -898,6 +1069,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
898 struct ocfs2_super *osb = journal->j_osb; 1069 struct ocfs2_super *osb = journal->j_osb;
899 struct ocfs2_dinode *la_dinode, *tl_dinode; 1070 struct ocfs2_dinode *la_dinode, *tl_dinode;
900 struct ocfs2_la_recovery_item *item, *n; 1071 struct ocfs2_la_recovery_item *item, *n;
1072 struct ocfs2_quota_recovery *qrec;
901 LIST_HEAD(tmp_la_list); 1073 LIST_HEAD(tmp_la_list);
902 1074
903 mlog_entry_void(); 1075 mlog_entry_void();
@@ -913,6 +1085,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
913 1085
914 mlog(0, "Complete recovery for slot %d\n", item->lri_slot); 1086 mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
915 1087
1088 ocfs2_wait_on_quotas(osb);
1089
916 la_dinode = item->lri_la_dinode; 1090 la_dinode = item->lri_la_dinode;
917 if (la_dinode) { 1091 if (la_dinode) {
918 mlog(0, "Clean up local alloc %llu\n", 1092 mlog(0, "Clean up local alloc %llu\n",
@@ -943,6 +1117,16 @@ void ocfs2_complete_recovery(struct work_struct *work)
943 if (ret < 0) 1117 if (ret < 0)
944 mlog_errno(ret); 1118 mlog_errno(ret);
945 1119
1120 qrec = item->lri_qrec;
1121 if (qrec) {
1122 mlog(0, "Recovering quota files");
1123 ret = ocfs2_finish_quota_recovery(osb, qrec,
1124 item->lri_slot);
1125 if (ret < 0)
1126 mlog_errno(ret);
1127 /* Recovery info is already freed now */
1128 }
1129
946 kfree(item); 1130 kfree(item);
947 } 1131 }
948 1132
@@ -956,7 +1140,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
956static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, 1140static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
957 int slot_num, 1141 int slot_num,
958 struct ocfs2_dinode *la_dinode, 1142 struct ocfs2_dinode *la_dinode,
959 struct ocfs2_dinode *tl_dinode) 1143 struct ocfs2_dinode *tl_dinode,
1144 struct ocfs2_quota_recovery *qrec)
960{ 1145{
961 struct ocfs2_la_recovery_item *item; 1146 struct ocfs2_la_recovery_item *item;
962 1147
@@ -971,6 +1156,9 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
971 if (tl_dinode) 1156 if (tl_dinode)
972 kfree(tl_dinode); 1157 kfree(tl_dinode);
973 1158
1159 if (qrec)
1160 ocfs2_free_quota_recovery(qrec);
1161
974 mlog_errno(-ENOMEM); 1162 mlog_errno(-ENOMEM);
975 return; 1163 return;
976 } 1164 }
@@ -979,6 +1167,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
979 item->lri_la_dinode = la_dinode; 1167 item->lri_la_dinode = la_dinode;
980 item->lri_slot = slot_num; 1168 item->lri_slot = slot_num;
981 item->lri_tl_dinode = tl_dinode; 1169 item->lri_tl_dinode = tl_dinode;
1170 item->lri_qrec = qrec;
982 1171
983 spin_lock(&journal->j_lock); 1172 spin_lock(&journal->j_lock);
984 list_add_tail(&item->lri_list, &journal->j_la_cleanups); 1173 list_add_tail(&item->lri_list, &journal->j_la_cleanups);
@@ -998,6 +1187,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
998 ocfs2_queue_recovery_completion(journal, 1187 ocfs2_queue_recovery_completion(journal,
999 osb->slot_num, 1188 osb->slot_num,
1000 osb->local_alloc_copy, 1189 osb->local_alloc_copy,
1190 NULL,
1001 NULL); 1191 NULL);
1002 ocfs2_schedule_truncate_log_flush(osb, 0); 1192 ocfs2_schedule_truncate_log_flush(osb, 0);
1003 1193
@@ -1006,11 +1196,26 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
1006 } 1196 }
1007} 1197}
1008 1198
1199void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
1200{
1201 if (osb->quota_rec) {
1202 ocfs2_queue_recovery_completion(osb->journal,
1203 osb->slot_num,
1204 NULL,
1205 NULL,
1206 osb->quota_rec);
1207 osb->quota_rec = NULL;
1208 }
1209}
1210
1009static int __ocfs2_recovery_thread(void *arg) 1211static int __ocfs2_recovery_thread(void *arg)
1010{ 1212{
1011 int status, node_num; 1213 int status, node_num, slot_num;
1012 struct ocfs2_super *osb = arg; 1214 struct ocfs2_super *osb = arg;
1013 struct ocfs2_recovery_map *rm = osb->recovery_map; 1215 struct ocfs2_recovery_map *rm = osb->recovery_map;
1216 int *rm_quota = NULL;
1217 int rm_quota_used = 0, i;
1218 struct ocfs2_quota_recovery *qrec;
1014 1219
1015 mlog_entry_void(); 1220 mlog_entry_void();
1016 1221
@@ -1019,6 +1224,11 @@ static int __ocfs2_recovery_thread(void *arg)
1019 goto bail; 1224 goto bail;
1020 } 1225 }
1021 1226
1227 rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS);
1228 if (!rm_quota) {
1229 status = -ENOMEM;
1230 goto bail;
1231 }
1022restart: 1232restart:
1023 status = ocfs2_super_lock(osb, 1); 1233 status = ocfs2_super_lock(osb, 1);
1024 if (status < 0) { 1234 if (status < 0) {
@@ -1032,8 +1242,28 @@ restart:
1032 * clear it until ocfs2_recover_node() has succeeded. */ 1242 * clear it until ocfs2_recover_node() has succeeded. */
1033 node_num = rm->rm_entries[0]; 1243 node_num = rm->rm_entries[0];
1034 spin_unlock(&osb->osb_lock); 1244 spin_unlock(&osb->osb_lock);
1035 1245 mlog(0, "checking node %d\n", node_num);
1036 status = ocfs2_recover_node(osb, node_num); 1246 slot_num = ocfs2_node_num_to_slot(osb, node_num);
1247 if (slot_num == -ENOENT) {
1248 status = 0;
1249 mlog(0, "no slot for this node, so no recovery"
1250 "required.\n");
1251 goto skip_recovery;
1252 }
1253 mlog(0, "node %d was using slot %d\n", node_num, slot_num);
1254
1255 /* It is a bit subtle with quota recovery. We cannot do it
1256 * immediately because we have to obtain cluster locks from
1257 * quota files and we also don't want to just skip it because
1258 * then quota usage would be out of sync until some node takes
1259 * the slot. So we remember which nodes need quota recovery
1260 * and when everything else is done, we recover quotas. */
1261 for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
1262 if (i == rm_quota_used)
1263 rm_quota[rm_quota_used++] = slot_num;
1264
1265 status = ocfs2_recover_node(osb, node_num, slot_num);
1266skip_recovery:
1037 if (!status) { 1267 if (!status) {
1038 ocfs2_recovery_map_clear(osb, node_num); 1268 ocfs2_recovery_map_clear(osb, node_num);
1039 } else { 1269 } else {
@@ -1055,13 +1285,27 @@ restart:
1055 if (status < 0) 1285 if (status < 0)
1056 mlog_errno(status); 1286 mlog_errno(status);
1057 1287
1288 /* Now it is right time to recover quotas... We have to do this under
1289 * superblock lock so that noone can start using the slot (and crash)
1290 * before we recover it */
1291 for (i = 0; i < rm_quota_used; i++) {
1292 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
1293 if (IS_ERR(qrec)) {
1294 status = PTR_ERR(qrec);
1295 mlog_errno(status);
1296 continue;
1297 }
1298 ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
1299 NULL, NULL, qrec);
1300 }
1301
1058 ocfs2_super_unlock(osb, 1); 1302 ocfs2_super_unlock(osb, 1);
1059 1303
1060 /* We always run recovery on our own orphan dir - the dead 1304 /* We always run recovery on our own orphan dir - the dead
1061 * node(s) may have disallowd a previos inode delete. Re-processing 1305 * node(s) may have disallowd a previos inode delete. Re-processing
1062 * is therefore required. */ 1306 * is therefore required. */
1063 ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, 1307 ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
1064 NULL); 1308 NULL, NULL);
1065 1309
1066bail: 1310bail:
1067 mutex_lock(&osb->recovery_lock); 1311 mutex_lock(&osb->recovery_lock);
@@ -1076,6 +1320,9 @@ bail:
1076 1320
1077 mutex_unlock(&osb->recovery_lock); 1321 mutex_unlock(&osb->recovery_lock);
1078 1322
1323 if (rm_quota)
1324 kfree(rm_quota);
1325
1079 mlog_exit(status); 1326 mlog_exit(status);
1080 /* no one is callint kthread_stop() for us so the kthread() api 1327 /* no one is callint kthread_stop() for us so the kthread() api
1081 * requires that we call do_exit(). And it isn't exported, but 1328 * requires that we call do_exit(). And it isn't exported, but
@@ -1135,8 +1382,7 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1135 } 1382 }
1136 SET_INODE_JOURNAL(inode); 1383 SET_INODE_JOURNAL(inode);
1137 1384
1138 status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh, 1385 status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
1139 OCFS2_BH_IGNORE_CACHE);
1140 if (status < 0) { 1386 if (status < 0) {
1141 mlog_errno(status); 1387 mlog_errno(status);
1142 goto bail; 1388 goto bail;
@@ -1268,6 +1514,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1268 osb->slot_recovery_generations[slot_num] = 1514 osb->slot_recovery_generations[slot_num] =
1269 ocfs2_get_recovery_generation(fe); 1515 ocfs2_get_recovery_generation(fe);
1270 1516
1517 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
1271 status = ocfs2_write_block(osb, bh, inode); 1518 status = ocfs2_write_block(osb, bh, inode);
1272 if (status < 0) 1519 if (status < 0)
1273 mlog_errno(status); 1520 mlog_errno(status);
@@ -1304,31 +1551,19 @@ done:
1304 * far less concerning. 1551 * far less concerning.
1305 */ 1552 */
1306static int ocfs2_recover_node(struct ocfs2_super *osb, 1553static int ocfs2_recover_node(struct ocfs2_super *osb,
1307 int node_num) 1554 int node_num, int slot_num)
1308{ 1555{
1309 int status = 0; 1556 int status = 0;
1310 int slot_num;
1311 struct ocfs2_dinode *la_copy = NULL; 1557 struct ocfs2_dinode *la_copy = NULL;
1312 struct ocfs2_dinode *tl_copy = NULL; 1558 struct ocfs2_dinode *tl_copy = NULL;
1313 1559
1314 mlog_entry("(node_num=%d, osb->node_num = %d)\n", 1560 mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n",
1315 node_num, osb->node_num); 1561 node_num, slot_num, osb->node_num);
1316
1317 mlog(0, "checking node %d\n", node_num);
1318 1562
1319 /* Should not ever be called to recover ourselves -- in that 1563 /* Should not ever be called to recover ourselves -- in that
1320 * case we should've called ocfs2_journal_load instead. */ 1564 * case we should've called ocfs2_journal_load instead. */
1321 BUG_ON(osb->node_num == node_num); 1565 BUG_ON(osb->node_num == node_num);
1322 1566
1323 slot_num = ocfs2_node_num_to_slot(osb, node_num);
1324 if (slot_num == -ENOENT) {
1325 status = 0;
1326 mlog(0, "no slot for this node, so no recovery required.\n");
1327 goto done;
1328 }
1329
1330 mlog(0, "node %d was using slot %d\n", node_num, slot_num);
1331
1332 status = ocfs2_replay_journal(osb, node_num, slot_num); 1567 status = ocfs2_replay_journal(osb, node_num, slot_num);
1333 if (status < 0) { 1568 if (status < 0) {
1334 if (status == -EBUSY) { 1569 if (status == -EBUSY) {
@@ -1364,7 +1599,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1364 1599
1365 /* This will kfree the memory pointed to by la_copy and tl_copy */ 1600 /* This will kfree the memory pointed to by la_copy and tl_copy */
1366 ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, 1601 ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
1367 tl_copy); 1602 tl_copy, NULL);
1368 1603
1369 status = 0; 1604 status = 0;
1370done: 1605done:
@@ -1659,13 +1894,14 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
1659 return ret; 1894 return ret;
1660} 1895}
1661 1896
1662static int ocfs2_wait_on_mount(struct ocfs2_super *osb) 1897static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
1663{ 1898{
1664 /* This check is good because ocfs2 will wait on our recovery 1899 /* This check is good because ocfs2 will wait on our recovery
1665 * thread before changing it to something other than MOUNTED 1900 * thread before changing it to something other than MOUNTED
1666 * or DISABLED. */ 1901 * or DISABLED. */
1667 wait_event(osb->osb_mount_event, 1902 wait_event(osb->osb_mount_event,
1668 atomic_read(&osb->vol_state) == VOLUME_MOUNTED || 1903 (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
1904 atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
1669 atomic_read(&osb->vol_state) == VOLUME_DISABLED); 1905 atomic_read(&osb->vol_state) == VOLUME_DISABLED);
1670 1906
1671 /* If there's an error on mount, then we may never get to the 1907 /* If there's an error on mount, then we may never get to the
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index d4d14e9a3cea..3c3532e1307c 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -27,12 +27,7 @@
27#define OCFS2_JOURNAL_H 27#define OCFS2_JOURNAL_H
28 28
29#include <linux/fs.h> 29#include <linux/fs.h>
30#ifndef CONFIG_OCFS2_COMPAT_JBD 30#include <linux/jbd2.h>
31# include <linux/jbd2.h>
32#else
33# include <linux/jbd.h>
34# include "ocfs2_jbd_compat.h"
35#endif
36 31
37enum ocfs2_journal_state { 32enum ocfs2_journal_state {
38 OCFS2_JOURNAL_FREE = 0, 33 OCFS2_JOURNAL_FREE = 0,
@@ -173,6 +168,7 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb,
173 int node_num); 168 int node_num);
174int ocfs2_mark_dead_nodes(struct ocfs2_super *osb); 169int ocfs2_mark_dead_nodes(struct ocfs2_super *osb);
175void ocfs2_complete_mount_recovery(struct ocfs2_super *osb); 170void ocfs2_complete_mount_recovery(struct ocfs2_super *osb);
171void ocfs2_complete_quota_recovery(struct ocfs2_super *osb);
176 172
177static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb) 173static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb)
178{ 174{
@@ -216,9 +212,12 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
216 * ocfs2_extend_trans - Extend a handle by nblocks credits. This may 212 * ocfs2_extend_trans - Extend a handle by nblocks credits. This may
217 * commit the handle to disk in the process, but will 213 * commit the handle to disk in the process, but will
218 * not release any locks taken during the transaction. 214 * not release any locks taken during the transaction.
219 * ocfs2_journal_access - Notify the handle that we want to journal this 215 * ocfs2_journal_access* - Notify the handle that we want to journal this
220 * buffer. Will have to call ocfs2_journal_dirty once 216 * buffer. Will have to call ocfs2_journal_dirty once
221 * we've actually dirtied it. Type is one of . or . 217 * we've actually dirtied it. Type is one of . or .
218 * Always call the specific flavor of
219 * ocfs2_journal_access_*() unless you intend to
220 * manage the checksum by hand.
222 * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data. 221 * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data.
223 * ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before 222 * ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before
224 * the current handle commits. 223 * the current handle commits.
@@ -248,10 +247,29 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks);
248#define OCFS2_JOURNAL_ACCESS_WRITE 1 247#define OCFS2_JOURNAL_ACCESS_WRITE 1
249#define OCFS2_JOURNAL_ACCESS_UNDO 2 248#define OCFS2_JOURNAL_ACCESS_UNDO 2
250 249
251int ocfs2_journal_access(handle_t *handle, 250
252 struct inode *inode, 251/* ocfs2_inode */
253 struct buffer_head *bh, 252int ocfs2_journal_access_di(handle_t *handle, struct inode *inode,
254 int type); 253 struct buffer_head *bh, int type);
254/* ocfs2_extent_block */
255int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode,
256 struct buffer_head *bh, int type);
257/* ocfs2_group_desc */
258int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode,
259 struct buffer_head *bh, int type);
260/* ocfs2_xattr_block */
261int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode,
262 struct buffer_head *bh, int type);
263/* quota blocks */
264int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode,
265 struct buffer_head *bh, int type);
266/* dirblock */
267int ocfs2_journal_access_db(handle_t *handle, struct inode *inode,
268 struct buffer_head *bh, int type);
269/* Anything that has no ecc */
270int ocfs2_journal_access(handle_t *handle, struct inode *inode,
271 struct buffer_head *bh, int type);
272
255/* 273/*
256 * A word about the journal_access/journal_dirty "dance". It is 274 * A word about the journal_access/journal_dirty "dance". It is
257 * entirely legal to journal_access a buffer more than once (as long 275 * entirely legal to journal_access a buffer more than once (as long
@@ -273,10 +291,6 @@ int ocfs2_journal_access(handle_t *handle,
273 */ 291 */
274int ocfs2_journal_dirty(handle_t *handle, 292int ocfs2_journal_dirty(handle_t *handle,
275 struct buffer_head *bh); 293 struct buffer_head *bh);
276#ifdef CONFIG_OCFS2_COMPAT_JBD
277int ocfs2_journal_dirty_data(handle_t *handle,
278 struct buffer_head *bh);
279#endif
280 294
281/* 295/*
282 * Credit Macros: 296 * Credit Macros:
@@ -293,6 +307,37 @@ int ocfs2_journal_dirty_data(handle_t *handle,
293/* extended attribute block update */ 307/* extended attribute block update */
294#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 308#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
295 309
310/* global quotafile inode update, data block */
311#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
312
313/*
314 * The two writes below can accidentally see global info dirty due
315 * to set_info() quotactl so make them prepared for the writes.
316 */
317/* quota data block, global info */
318/* Write to local quota file */
319#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1)
320
321/* global quota data block, local quota data block, global quota inode,
322 * global quota info */
323#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3)
324
325static inline int ocfs2_quota_trans_credits(struct super_block *sb)
326{
327 int credits = 0;
328
329 if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA))
330 credits += OCFS2_QWRITE_CREDITS;
331 if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA))
332 credits += OCFS2_QWRITE_CREDITS;
333 return credits;
334}
335
336/* Number of credits needed for removing quota structure from file */
337int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
338/* Number of credits needed for initialization of new quota structure */
339int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
340
296/* group extend. inode update and last group update. */ 341/* group extend. inode update and last group update. */
297#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 342#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
298 343
@@ -303,8 +348,11 @@ int ocfs2_journal_dirty_data(handle_t *handle,
303 * prev. group desc. if we relink. */ 348 * prev. group desc. if we relink. */
304#define OCFS2_SUBALLOC_ALLOC (3) 349#define OCFS2_SUBALLOC_ALLOC (3)
305 350
306#define OCFS2_INLINE_TO_EXTENTS_CREDITS (OCFS2_SUBALLOC_ALLOC \ 351static inline int ocfs2_inline_to_extents_credits(struct super_block *sb)
307 + OCFS2_INODE_UPDATE_CREDITS) 352{
353 return OCFS2_SUBALLOC_ALLOC + OCFS2_INODE_UPDATE_CREDITS +
354 ocfs2_quota_trans_credits(sb);
355}
308 356
309/* dinode + group descriptor update. We don't relink on free yet. */ 357/* dinode + group descriptor update. We don't relink on free yet. */
310#define OCFS2_SUBALLOC_FREE (2) 358#define OCFS2_SUBALLOC_FREE (2)
@@ -313,16 +361,23 @@ int ocfs2_journal_dirty_data(handle_t *handle,
313#define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ 361#define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \
314 + OCFS2_TRUNCATE_LOG_UPDATE) 362 + OCFS2_TRUNCATE_LOG_UPDATE)
315 363
316#define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS) 364static inline int ocfs2_remove_extent_credits(struct super_block *sb)
365{
366 return OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS +
367 ocfs2_quota_trans_credits(sb);
368}
317 369
318/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + 370/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
319 * bitmap block for the new bit) */ 371 * bitmap block for the new bit) */
320#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) 372#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
321 373
322/* parent fe, parent block, new file entry, inode alloc fe, inode alloc 374/* parent fe, parent block, new file entry, inode alloc fe, inode alloc
323 * group descriptor + mkdir/symlink blocks */ 375 * group descriptor + mkdir/symlink blocks + quota update */
324#define OCFS2_MKNOD_CREDITS (3 + OCFS2_SUBALLOC_ALLOC \ 376static inline int ocfs2_mknod_credits(struct super_block *sb)
325 + OCFS2_DIR_LINK_ADDITIONAL_CREDITS) 377{
378 return 3 + OCFS2_SUBALLOC_ALLOC + OCFS2_DIR_LINK_ADDITIONAL_CREDITS +
379 ocfs2_quota_trans_credits(sb);
380}
326 381
327/* local alloc metadata change + main bitmap updates */ 382/* local alloc metadata change + main bitmap updates */
328#define OCFS2_WINDOW_MOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS \ 383#define OCFS2_WINDOW_MOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS \
@@ -332,13 +387,21 @@ int ocfs2_journal_dirty_data(handle_t *handle,
332 * for the dinode, one for the new block. */ 387 * for the dinode, one for the new block. */
333#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) 388#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
334 389
335/* file update (nlink, etc) + directory mtime/ctime + dir entry block */ 390/* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota
336#define OCFS2_LINK_CREDITS (2*OCFS2_INODE_UPDATE_CREDITS + 1) 391 * update on dir */
392static inline int ocfs2_link_credits(struct super_block *sb)
393{
394 return 2*OCFS2_INODE_UPDATE_CREDITS + 1 +
395 ocfs2_quota_trans_credits(sb);
396}
337 397
338/* inode + dir inode (if we unlink a dir), + dir entry block + orphan 398/* inode + dir inode (if we unlink a dir), + dir entry block + orphan
339 * dir inode link */ 399 * dir inode link */
340#define OCFS2_UNLINK_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 1 \ 400static inline int ocfs2_unlink_credits(struct super_block *sb)
341 + OCFS2_LINK_CREDITS) 401{
402 /* The quota update from ocfs2_link_credits is unused here... */
403 return 2 * OCFS2_INODE_UPDATE_CREDITS + 1 + ocfs2_link_credits(sb);
404}
342 405
343/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + 406/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
344 * inode alloc group descriptor */ 407 * inode alloc group descriptor */
@@ -347,8 +410,10 @@ int ocfs2_journal_dirty_data(handle_t *handle,
347/* dinode update, old dir dinode update, new dir dinode update, old 410/* dinode update, old dir dinode update, new dir dinode update, old
348 * dir dir entry, new dir dir entry, dir entry update for renaming 411 * dir dir entry, new dir dir entry, dir entry update for renaming
349 * directory + target unlink */ 412 * directory + target unlink */
350#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \ 413static inline int ocfs2_rename_credits(struct super_block *sb)
351 + OCFS2_UNLINK_CREDITS) 414{
415 return 3 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_unlink_credits(sb);
416}
352 417
353/* global bitmap dinode, group desc., relinked group, 418/* global bitmap dinode, group desc., relinked group,
354 * suballocator dinode, group desc., relinked group, 419 * suballocator dinode, group desc., relinked group,
@@ -386,18 +451,19 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
386 * credit for the dinode there. */ 451 * credit for the dinode there. */
387 extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); 452 extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
388 453
389 return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks; 454 return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks +
455 ocfs2_quota_trans_credits(sb);
390} 456}
391 457
392static inline int ocfs2_calc_symlink_credits(struct super_block *sb) 458static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
393{ 459{
394 int blocks = OCFS2_MKNOD_CREDITS; 460 int blocks = ocfs2_mknod_credits(sb);
395 461
396 /* links can be longer than one block so we may update many 462 /* links can be longer than one block so we may update many
397 * within our single allocated extent. */ 463 * within our single allocated extent. */
398 blocks += ocfs2_clusters_to_blocks(sb, 1); 464 blocks += ocfs2_clusters_to_blocks(sb, 1);
399 465
400 return blocks; 466 return blocks + ocfs2_quota_trans_credits(sb);
401} 467}
402 468
403static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb, 469static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb,
@@ -434,6 +500,8 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
434 /* update to the truncate log. */ 500 /* update to the truncate log. */
435 credits += OCFS2_TRUNCATE_LOG_UPDATE; 501 credits += OCFS2_TRUNCATE_LOG_UPDATE;
436 502
503 credits += ocfs2_quota_trans_credits(sb);
504
437 return credits; 505 return credits;
438} 506}
439 507
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 687b28713c32..ec70cdbe77fc 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -36,6 +36,7 @@
36#include "ocfs2.h" 36#include "ocfs2.h"
37 37
38#include "alloc.h" 38#include "alloc.h"
39#include "blockcheck.h"
39#include "dlmglue.h" 40#include "dlmglue.h"
40#include "inode.h" 41#include "inode.h"
41#include "journal.h" 42#include "journal.h"
@@ -248,8 +249,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
248 goto bail; 249 goto bail;
249 } 250 }
250 251
251 status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, 252 status = ocfs2_read_inode_block_full(inode, &alloc_bh,
252 &alloc_bh, OCFS2_BH_IGNORE_CACHE); 253 OCFS2_BH_IGNORE_CACHE);
253 if (status < 0) { 254 if (status < 0) {
254 mlog_errno(status); 255 mlog_errno(status);
255 goto bail; 256 goto bail;
@@ -382,8 +383,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
382 } 383 }
383 memcpy(alloc_copy, alloc, bh->b_size); 384 memcpy(alloc_copy, alloc, bh->b_size);
384 385
385 status = ocfs2_journal_access(handle, local_alloc_inode, bh, 386 status = ocfs2_journal_access_di(handle, local_alloc_inode, bh,
386 OCFS2_JOURNAL_ACCESS_WRITE); 387 OCFS2_JOURNAL_ACCESS_WRITE);
387 if (status < 0) { 388 if (status < 0) {
388 mlog_errno(status); 389 mlog_errno(status);
389 goto out_commit; 390 goto out_commit;
@@ -459,8 +460,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
459 460
460 mutex_lock(&inode->i_mutex); 461 mutex_lock(&inode->i_mutex);
461 462
462 status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, 463 status = ocfs2_read_inode_block_full(inode, &alloc_bh,
463 &alloc_bh, OCFS2_BH_IGNORE_CACHE); 464 OCFS2_BH_IGNORE_CACHE);
464 if (status < 0) { 465 if (status < 0) {
465 mlog_errno(status); 466 mlog_errno(status);
466 goto bail; 467 goto bail;
@@ -476,6 +477,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
476 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 477 alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
477 ocfs2_clear_local_alloc(alloc); 478 ocfs2_clear_local_alloc(alloc);
478 479
480 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
479 status = ocfs2_write_block(osb, alloc_bh, inode); 481 status = ocfs2_write_block(osb, alloc_bh, inode);
480 if (status < 0) 482 if (status < 0)
481 mlog_errno(status); 483 mlog_errno(status);
@@ -762,9 +764,9 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
762 * delete bits from it! */ 764 * delete bits from it! */
763 *num_bits = bits_wanted; 765 *num_bits = bits_wanted;
764 766
765 status = ocfs2_journal_access(handle, local_alloc_inode, 767 status = ocfs2_journal_access_di(handle, local_alloc_inode,
766 osb->local_alloc_bh, 768 osb->local_alloc_bh,
767 OCFS2_JOURNAL_ACCESS_WRITE); 769 OCFS2_JOURNAL_ACCESS_WRITE);
768 if (status < 0) { 770 if (status < 0) {
769 mlog_errno(status); 771 mlog_errno(status);
770 goto bail; 772 goto bail;
@@ -1240,9 +1242,9 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1240 } 1242 }
1241 memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); 1243 memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
1242 1244
1243 status = ocfs2_journal_access(handle, local_alloc_inode, 1245 status = ocfs2_journal_access_di(handle, local_alloc_inode,
1244 osb->local_alloc_bh, 1246 osb->local_alloc_bh,
1245 OCFS2_JOURNAL_ACCESS_WRITE); 1247 OCFS2_JOURNAL_ACCESS_WRITE);
1246 if (status < 0) { 1248 if (status < 0) {
1247 mlog_errno(status); 1249 mlog_errno(status);
1248 goto bail; 1250 goto bail;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2545e7402efe..084aba86c3b2 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -40,6 +40,7 @@
40#include <linux/types.h> 40#include <linux/types.h>
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/quotaops.h>
43 44
44#define MLOG_MASK_PREFIX ML_NAMEI 45#define MLOG_MASK_PREFIX ML_NAMEI
45#include <cluster/masklog.h> 46#include <cluster/masklog.h>
@@ -61,17 +62,18 @@
61#include "sysfile.h" 62#include "sysfile.h"
62#include "uptodate.h" 63#include "uptodate.h"
63#include "xattr.h" 64#include "xattr.h"
65#include "acl.h"
64 66
65#include "buffer_head_io.h" 67#include "buffer_head_io.h"
66 68
67static int ocfs2_mknod_locked(struct ocfs2_super *osb, 69static int ocfs2_mknod_locked(struct ocfs2_super *osb,
68 struct inode *dir, 70 struct inode *dir,
69 struct dentry *dentry, int mode, 71 struct inode *inode,
72 struct dentry *dentry,
70 dev_t dev, 73 dev_t dev,
71 struct buffer_head **new_fe_bh, 74 struct buffer_head **new_fe_bh,
72 struct buffer_head *parent_fe_bh, 75 struct buffer_head *parent_fe_bh,
73 handle_t *handle, 76 handle_t *handle,
74 struct inode **ret_inode,
75 struct ocfs2_alloc_context *inode_ac); 77 struct ocfs2_alloc_context *inode_ac);
76 78
77static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, 79static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
@@ -186,6 +188,35 @@ bail:
186 return ret; 188 return ret;
187} 189}
188 190
191static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
192{
193 struct inode *inode;
194
195 inode = new_inode(dir->i_sb);
196 if (!inode) {
197 mlog(ML_ERROR, "new_inode failed!\n");
198 return NULL;
199 }
200
201 /* populate as many fields early on as possible - many of
202 * these are used by the support functions here and in
203 * callers. */
204 if (S_ISDIR(mode))
205 inode->i_nlink = 2;
206 else
207 inode->i_nlink = 1;
208 inode->i_uid = current_fsuid();
209 if (dir->i_mode & S_ISGID) {
210 inode->i_gid = dir->i_gid;
211 if (S_ISDIR(mode))
212 mode |= S_ISGID;
213 } else
214 inode->i_gid = current_fsgid();
215 inode->i_mode = mode;
216 vfs_dq_init(inode);
217 return inode;
218}
219
189static int ocfs2_mknod(struct inode *dir, 220static int ocfs2_mknod(struct inode *dir,
190 struct dentry *dentry, 221 struct dentry *dentry,
191 int mode, 222 int mode,
@@ -201,6 +232,13 @@ static int ocfs2_mknod(struct inode *dir,
201 struct inode *inode = NULL; 232 struct inode *inode = NULL;
202 struct ocfs2_alloc_context *inode_ac = NULL; 233 struct ocfs2_alloc_context *inode_ac = NULL;
203 struct ocfs2_alloc_context *data_ac = NULL; 234 struct ocfs2_alloc_context *data_ac = NULL;
235 struct ocfs2_alloc_context *xattr_ac = NULL;
236 int want_clusters = 0;
237 int xattr_credits = 0;
238 struct ocfs2_security_xattr_info si = {
239 .enable = 1,
240 };
241 int did_quota_inode = 0;
204 242
205 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, 243 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
206 (unsigned long)dev, dentry->d_name.len, 244 (unsigned long)dev, dentry->d_name.len,
@@ -250,17 +288,46 @@ static int ocfs2_mknod(struct inode *dir,
250 goto leave; 288 goto leave;
251 } 289 }
252 290
253 /* Reserve a cluster if creating an extent based directory. */ 291 inode = ocfs2_get_init_inode(dir, mode);
254 if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) { 292 if (!inode) {
255 status = ocfs2_reserve_clusters(osb, 1, &data_ac); 293 status = -ENOMEM;
256 if (status < 0) { 294 mlog_errno(status);
257 if (status != -ENOSPC) 295 goto leave;
258 mlog_errno(status); 296 }
297
298 /* get security xattr */
299 status = ocfs2_init_security_get(inode, dir, &si);
300 if (status) {
301 if (status == -EOPNOTSUPP)
302 si.enable = 0;
303 else {
304 mlog_errno(status);
259 goto leave; 305 goto leave;
260 } 306 }
261 } 307 }
262 308
263 handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS); 309 /* calculate meta data/clusters for setting security and acl xattr */
310 status = ocfs2_calc_xattr_init(dir, parent_fe_bh, mode,
311 &si, &want_clusters,
312 &xattr_credits, &xattr_ac);
313 if (status < 0) {
314 mlog_errno(status);
315 goto leave;
316 }
317
318 /* Reserve a cluster if creating an extent based directory. */
319 if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb))
320 want_clusters += 1;
321
322 status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
323 if (status < 0) {
324 if (status != -ENOSPC)
325 mlog_errno(status);
326 goto leave;
327 }
328
329 handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb) +
330 xattr_credits);
264 if (IS_ERR(handle)) { 331 if (IS_ERR(handle)) {
265 status = PTR_ERR(handle); 332 status = PTR_ERR(handle);
266 handle = NULL; 333 handle = NULL;
@@ -268,10 +335,19 @@ static int ocfs2_mknod(struct inode *dir,
268 goto leave; 335 goto leave;
269 } 336 }
270 337
338 /* We don't use standard VFS wrapper because we don't want vfs_dq_init
339 * to be called. */
340 if (sb_any_quota_active(osb->sb) &&
341 osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
342 status = -EDQUOT;
343 goto leave;
344 }
345 did_quota_inode = 1;
346
271 /* do the real work now. */ 347 /* do the real work now. */
272 status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev, 348 status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev,
273 &new_fe_bh, parent_fe_bh, handle, 349 &new_fe_bh, parent_fe_bh, handle,
274 &inode, inode_ac); 350 inode_ac);
275 if (status < 0) { 351 if (status < 0) {
276 mlog_errno(status); 352 mlog_errno(status);
277 goto leave; 353 goto leave;
@@ -285,8 +361,8 @@ static int ocfs2_mknod(struct inode *dir,
285 goto leave; 361 goto leave;
286 } 362 }
287 363
288 status = ocfs2_journal_access(handle, dir, parent_fe_bh, 364 status = ocfs2_journal_access_di(handle, dir, parent_fe_bh,
289 OCFS2_JOURNAL_ACCESS_WRITE); 365 OCFS2_JOURNAL_ACCESS_WRITE);
290 if (status < 0) { 366 if (status < 0) {
291 mlog_errno(status); 367 mlog_errno(status);
292 goto leave; 368 goto leave;
@@ -300,6 +376,22 @@ static int ocfs2_mknod(struct inode *dir,
300 inc_nlink(dir); 376 inc_nlink(dir);
301 } 377 }
302 378
379 status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
380 xattr_ac, data_ac);
381 if (status < 0) {
382 mlog_errno(status);
383 goto leave;
384 }
385
386 if (si.enable) {
387 status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
388 xattr_ac, data_ac);
389 if (status < 0) {
390 mlog_errno(status);
391 goto leave;
392 }
393 }
394
303 status = ocfs2_add_entry(handle, dentry, inode, 395 status = ocfs2_add_entry(handle, dentry, inode,
304 OCFS2_I(inode)->ip_blkno, parent_fe_bh, 396 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
305 de_bh); 397 de_bh);
@@ -320,6 +412,8 @@ static int ocfs2_mknod(struct inode *dir,
320 d_instantiate(dentry, inode); 412 d_instantiate(dentry, inode);
321 status = 0; 413 status = 0;
322leave: 414leave:
415 if (status < 0 && did_quota_inode)
416 vfs_dq_free_inode(inode);
323 if (handle) 417 if (handle)
324 ocfs2_commit_trans(osb, handle); 418 ocfs2_commit_trans(osb, handle);
325 419
@@ -331,9 +425,13 @@ leave:
331 brelse(new_fe_bh); 425 brelse(new_fe_bh);
332 brelse(de_bh); 426 brelse(de_bh);
333 brelse(parent_fe_bh); 427 brelse(parent_fe_bh);
428 kfree(si.name);
429 kfree(si.value);
334 430
335 if ((status < 0) && inode) 431 if ((status < 0) && inode) {
432 clear_nlink(inode);
336 iput(inode); 433 iput(inode);
434 }
337 435
338 if (inode_ac) 436 if (inode_ac)
339 ocfs2_free_alloc_context(inode_ac); 437 ocfs2_free_alloc_context(inode_ac);
@@ -341,6 +439,9 @@ leave:
341 if (data_ac) 439 if (data_ac)
342 ocfs2_free_alloc_context(data_ac); 440 ocfs2_free_alloc_context(data_ac);
343 441
442 if (xattr_ac)
443 ocfs2_free_alloc_context(xattr_ac);
444
344 mlog_exit(status); 445 mlog_exit(status);
345 446
346 return status; 447 return status;
@@ -348,12 +449,12 @@ leave:
348 449
349static int ocfs2_mknod_locked(struct ocfs2_super *osb, 450static int ocfs2_mknod_locked(struct ocfs2_super *osb,
350 struct inode *dir, 451 struct inode *dir,
351 struct dentry *dentry, int mode, 452 struct inode *inode,
453 struct dentry *dentry,
352 dev_t dev, 454 dev_t dev,
353 struct buffer_head **new_fe_bh, 455 struct buffer_head **new_fe_bh,
354 struct buffer_head *parent_fe_bh, 456 struct buffer_head *parent_fe_bh,
355 handle_t *handle, 457 handle_t *handle,
356 struct inode **ret_inode,
357 struct ocfs2_alloc_context *inode_ac) 458 struct ocfs2_alloc_context *inode_ac)
358{ 459{
359 int status = 0; 460 int status = 0;
@@ -361,14 +462,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
361 struct ocfs2_extent_list *fel; 462 struct ocfs2_extent_list *fel;
362 u64 fe_blkno = 0; 463 u64 fe_blkno = 0;
363 u16 suballoc_bit; 464 u16 suballoc_bit;
364 struct inode *inode = NULL;
365 465
366 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, 466 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
367 (unsigned long)dev, dentry->d_name.len, 467 inode->i_mode, (unsigned long)dev, dentry->d_name.len,
368 dentry->d_name.name); 468 dentry->d_name.name);
369 469
370 *new_fe_bh = NULL; 470 *new_fe_bh = NULL;
371 *ret_inode = NULL;
372 471
373 status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit, 472 status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
374 &fe_blkno); 473 &fe_blkno);
@@ -377,23 +476,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
377 goto leave; 476 goto leave;
378 } 477 }
379 478
380 inode = new_inode(dir->i_sb);
381 if (!inode) {
382 status = -ENOMEM;
383 mlog(ML_ERROR, "new_inode failed!\n");
384 goto leave;
385 }
386
387 /* populate as many fields early on as possible - many of 479 /* populate as many fields early on as possible - many of
388 * these are used by the support functions here and in 480 * these are used by the support functions here and in
389 * callers. */ 481 * callers. */
390 inode->i_ino = ino_from_blkno(osb->sb, fe_blkno); 482 inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
391 OCFS2_I(inode)->ip_blkno = fe_blkno; 483 OCFS2_I(inode)->ip_blkno = fe_blkno;
392 if (S_ISDIR(mode))
393 inode->i_nlink = 2;
394 else
395 inode->i_nlink = 1;
396 inode->i_mode = mode;
397 spin_lock(&osb->osb_lock); 484 spin_lock(&osb->osb_lock);
398 inode->i_generation = osb->s_next_generation++; 485 inode->i_generation = osb->s_next_generation++;
399 spin_unlock(&osb->osb_lock); 486 spin_unlock(&osb->osb_lock);
@@ -406,8 +493,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
406 } 493 }
407 ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh); 494 ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh);
408 495
409 status = ocfs2_journal_access(handle, inode, *new_fe_bh, 496 status = ocfs2_journal_access_di(handle, inode, *new_fe_bh,
410 OCFS2_JOURNAL_ACCESS_CREATE); 497 OCFS2_JOURNAL_ACCESS_CREATE);
411 if (status < 0) { 498 if (status < 0) {
412 mlog_errno(status); 499 mlog_errno(status);
413 goto leave; 500 goto leave;
@@ -421,17 +508,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
421 fe->i_blkno = cpu_to_le64(fe_blkno); 508 fe->i_blkno = cpu_to_le64(fe_blkno);
422 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); 509 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
423 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); 510 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
424 fe->i_uid = cpu_to_le32(current_fsuid()); 511 fe->i_uid = cpu_to_le32(inode->i_uid);
425 if (dir->i_mode & S_ISGID) { 512 fe->i_gid = cpu_to_le32(inode->i_gid);
426 fe->i_gid = cpu_to_le32(dir->i_gid); 513 fe->i_mode = cpu_to_le16(inode->i_mode);
427 if (S_ISDIR(mode)) 514 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
428 mode |= S_ISGID;
429 } else
430 fe->i_gid = cpu_to_le32(current_fsgid());
431 fe->i_mode = cpu_to_le16(mode);
432 if (S_ISCHR(mode) || S_ISBLK(mode))
433 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev)); 515 fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
434
435 fe->i_links_count = cpu_to_le16(inode->i_nlink); 516 fe->i_links_count = cpu_to_le16(inode->i_nlink);
436 517
437 fe->i_last_eb_blk = 0; 518 fe->i_last_eb_blk = 0;
@@ -446,7 +527,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
446 /* 527 /*
447 * If supported, directories start with inline data. 528 * If supported, directories start with inline data.
448 */ 529 */
449 if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) { 530 if (S_ISDIR(inode->i_mode) && ocfs2_supports_inline_data(osb)) {
450 u16 feat = le16_to_cpu(fe->i_dyn_features); 531 u16 feat = le16_to_cpu(fe->i_dyn_features);
451 532
452 fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL); 533 fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
@@ -465,15 +546,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
465 goto leave; 546 goto leave;
466 } 547 }
467 548
468 if (ocfs2_populate_inode(inode, fe, 1) < 0) { 549 ocfs2_populate_inode(inode, fe, 1);
469 mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
470 "i_blkno=%llu, i_ino=%lu\n",
471 (unsigned long long)(*new_fe_bh)->b_blocknr,
472 (unsigned long long)le64_to_cpu(fe->i_blkno),
473 inode->i_ino);
474 BUG();
475 }
476
477 ocfs2_inode_set_new(osb, inode); 550 ocfs2_inode_set_new(osb, inode);
478 if (!ocfs2_mount_local(osb)) { 551 if (!ocfs2_mount_local(osb)) {
479 status = ocfs2_create_new_inode_locks(inode); 552 status = ocfs2_create_new_inode_locks(inode);
@@ -484,17 +557,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
484 status = 0; /* error in ocfs2_create_new_inode_locks is not 557 status = 0; /* error in ocfs2_create_new_inode_locks is not
485 * critical */ 558 * critical */
486 559
487 *ret_inode = inode;
488leave: 560leave:
489 if (status < 0) { 561 if (status < 0) {
490 if (*new_fe_bh) { 562 if (*new_fe_bh) {
491 brelse(*new_fe_bh); 563 brelse(*new_fe_bh);
492 *new_fe_bh = NULL; 564 *new_fe_bh = NULL;
493 } 565 }
494 if (inode) {
495 clear_nlink(inode);
496 iput(inode);
497 }
498 } 566 }
499 567
500 mlog_exit(status); 568 mlog_exit(status);
@@ -588,7 +656,7 @@ static int ocfs2_link(struct dentry *old_dentry,
588 goto out_unlock_inode; 656 goto out_unlock_inode;
589 } 657 }
590 658
591 handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS); 659 handle = ocfs2_start_trans(osb, ocfs2_link_credits(osb->sb));
592 if (IS_ERR(handle)) { 660 if (IS_ERR(handle)) {
593 err = PTR_ERR(handle); 661 err = PTR_ERR(handle);
594 handle = NULL; 662 handle = NULL;
@@ -596,8 +664,8 @@ static int ocfs2_link(struct dentry *old_dentry,
596 goto out_unlock_inode; 664 goto out_unlock_inode;
597 } 665 }
598 666
599 err = ocfs2_journal_access(handle, inode, fe_bh, 667 err = ocfs2_journal_access_di(handle, inode, fe_bh,
600 OCFS2_JOURNAL_ACCESS_WRITE); 668 OCFS2_JOURNAL_ACCESS_WRITE);
601 if (err < 0) { 669 if (err < 0) {
602 mlog_errno(err); 670 mlog_errno(err);
603 goto out_commit; 671 goto out_commit;
@@ -775,7 +843,7 @@ static int ocfs2_unlink(struct inode *dir,
775 } 843 }
776 } 844 }
777 845
778 handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS); 846 handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb));
779 if (IS_ERR(handle)) { 847 if (IS_ERR(handle)) {
780 status = PTR_ERR(handle); 848 status = PTR_ERR(handle);
781 handle = NULL; 849 handle = NULL;
@@ -783,8 +851,8 @@ static int ocfs2_unlink(struct inode *dir,
783 goto leave; 851 goto leave;
784 } 852 }
785 853
786 status = ocfs2_journal_access(handle, inode, fe_bh, 854 status = ocfs2_journal_access_di(handle, inode, fe_bh,
787 OCFS2_JOURNAL_ACCESS_WRITE); 855 OCFS2_JOURNAL_ACCESS_WRITE);
788 if (status < 0) { 856 if (status < 0) {
789 mlog_errno(status); 857 mlog_errno(status);
790 goto leave; 858 goto leave;
@@ -1181,7 +1249,7 @@ static int ocfs2_rename(struct inode *old_dir,
1181 } 1249 }
1182 } 1250 }
1183 1251
1184 handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS); 1252 handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb));
1185 if (IS_ERR(handle)) { 1253 if (IS_ERR(handle)) {
1186 status = PTR_ERR(handle); 1254 status = PTR_ERR(handle);
1187 handle = NULL; 1255 handle = NULL;
@@ -1197,8 +1265,8 @@ static int ocfs2_rename(struct inode *old_dir,
1197 goto bail; 1265 goto bail;
1198 } 1266 }
1199 } 1267 }
1200 status = ocfs2_journal_access(handle, new_inode, newfe_bh, 1268 status = ocfs2_journal_access_di(handle, new_inode, newfe_bh,
1201 OCFS2_JOURNAL_ACCESS_WRITE); 1269 OCFS2_JOURNAL_ACCESS_WRITE);
1202 if (status < 0) { 1270 if (status < 0) {
1203 mlog_errno(status); 1271 mlog_errno(status);
1204 goto bail; 1272 goto bail;
@@ -1244,8 +1312,8 @@ static int ocfs2_rename(struct inode *old_dir,
1244 old_inode->i_ctime = CURRENT_TIME; 1312 old_inode->i_ctime = CURRENT_TIME;
1245 mark_inode_dirty(old_inode); 1313 mark_inode_dirty(old_inode);
1246 1314
1247 status = ocfs2_journal_access(handle, old_inode, old_inode_bh, 1315 status = ocfs2_journal_access_di(handle, old_inode, old_inode_bh,
1248 OCFS2_JOURNAL_ACCESS_WRITE); 1316 OCFS2_JOURNAL_ACCESS_WRITE);
1249 if (status >= 0) { 1317 if (status >= 0) {
1250 old_di = (struct ocfs2_dinode *) old_inode_bh->b_data; 1318 old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
1251 1319
@@ -1321,9 +1389,9 @@ static int ocfs2_rename(struct inode *old_dir,
1321 (int)old_dir_nlink, old_dir->i_nlink); 1389 (int)old_dir_nlink, old_dir->i_nlink);
1322 } else { 1390 } else {
1323 struct ocfs2_dinode *fe; 1391 struct ocfs2_dinode *fe;
1324 status = ocfs2_journal_access(handle, old_dir, 1392 status = ocfs2_journal_access_di(handle, old_dir,
1325 old_dir_bh, 1393 old_dir_bh,
1326 OCFS2_JOURNAL_ACCESS_WRITE); 1394 OCFS2_JOURNAL_ACCESS_WRITE);
1327 fe = (struct ocfs2_dinode *) old_dir_bh->b_data; 1395 fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
1328 fe->i_links_count = cpu_to_le16(old_dir->i_nlink); 1396 fe->i_links_count = cpu_to_le16(old_dir->i_nlink);
1329 status = ocfs2_journal_dirty(handle, old_dir_bh); 1397 status = ocfs2_journal_dirty(handle, old_dir_bh);
@@ -1496,6 +1564,13 @@ static int ocfs2_symlink(struct inode *dir,
1496 handle_t *handle = NULL; 1564 handle_t *handle = NULL;
1497 struct ocfs2_alloc_context *inode_ac = NULL; 1565 struct ocfs2_alloc_context *inode_ac = NULL;
1498 struct ocfs2_alloc_context *data_ac = NULL; 1566 struct ocfs2_alloc_context *data_ac = NULL;
1567 struct ocfs2_alloc_context *xattr_ac = NULL;
1568 int want_clusters = 0;
1569 int xattr_credits = 0;
1570 struct ocfs2_security_xattr_info si = {
1571 .enable = 1,
1572 };
1573 int did_quota = 0, did_quota_inode = 0;
1499 1574
1500 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, 1575 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
1501 dentry, symname, dentry->d_name.len, dentry->d_name.name); 1576 dentry, symname, dentry->d_name.len, dentry->d_name.name);
@@ -1542,17 +1617,46 @@ static int ocfs2_symlink(struct inode *dir,
1542 goto bail; 1617 goto bail;
1543 } 1618 }
1544 1619
1545 /* don't reserve bitmap space for fast symlinks. */ 1620 inode = ocfs2_get_init_inode(dir, S_IFLNK | S_IRWXUGO);
1546 if (l > ocfs2_fast_symlink_chars(sb)) { 1621 if (!inode) {
1547 status = ocfs2_reserve_clusters(osb, 1, &data_ac); 1622 status = -ENOMEM;
1623 mlog_errno(status);
1624 goto bail;
1625 }
1626
1627 /* get security xattr */
1628 status = ocfs2_init_security_get(inode, dir, &si);
1629 if (status) {
1630 if (status == -EOPNOTSUPP)
1631 si.enable = 0;
1632 else {
1633 mlog_errno(status);
1634 goto bail;
1635 }
1636 }
1637
1638 /* calculate meta data/clusters for setting security xattr */
1639 if (si.enable) {
1640 status = ocfs2_calc_security_init(dir, &si, &want_clusters,
1641 &xattr_credits, &xattr_ac);
1548 if (status < 0) { 1642 if (status < 0) {
1549 if (status != -ENOSPC) 1643 mlog_errno(status);
1550 mlog_errno(status);
1551 goto bail; 1644 goto bail;
1552 } 1645 }
1553 } 1646 }
1554 1647
1555 handle = ocfs2_start_trans(osb, credits); 1648 /* don't reserve bitmap space for fast symlinks. */
1649 if (l > ocfs2_fast_symlink_chars(sb))
1650 want_clusters += 1;
1651
1652 status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
1653 if (status < 0) {
1654 if (status != -ENOSPC)
1655 mlog_errno(status);
1656 goto bail;
1657 }
1658
1659 handle = ocfs2_start_trans(osb, credits + xattr_credits);
1556 if (IS_ERR(handle)) { 1660 if (IS_ERR(handle)) {
1557 status = PTR_ERR(handle); 1661 status = PTR_ERR(handle);
1558 handle = NULL; 1662 handle = NULL;
@@ -1560,10 +1664,18 @@ static int ocfs2_symlink(struct inode *dir,
1560 goto bail; 1664 goto bail;
1561 } 1665 }
1562 1666
1563 status = ocfs2_mknod_locked(osb, dir, dentry, 1667 /* We don't use standard VFS wrapper because we don't want vfs_dq_init
1564 S_IFLNK | S_IRWXUGO, 0, 1668 * to be called. */
1565 &new_fe_bh, parent_fe_bh, handle, 1669 if (sb_any_quota_active(osb->sb) &&
1566 &inode, inode_ac); 1670 osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
1671 status = -EDQUOT;
1672 goto bail;
1673 }
1674 did_quota_inode = 1;
1675
1676 status = ocfs2_mknod_locked(osb, dir, inode, dentry,
1677 0, &new_fe_bh, parent_fe_bh, handle,
1678 inode_ac);
1567 if (status < 0) { 1679 if (status < 0) {
1568 mlog_errno(status); 1680 mlog_errno(status);
1569 goto bail; 1681 goto bail;
@@ -1576,6 +1688,12 @@ static int ocfs2_symlink(struct inode *dir,
1576 u32 offset = 0; 1688 u32 offset = 0;
1577 1689
1578 inode->i_op = &ocfs2_symlink_inode_operations; 1690 inode->i_op = &ocfs2_symlink_inode_operations;
1691 if (vfs_dq_alloc_space_nodirty(inode,
1692 ocfs2_clusters_to_bytes(osb->sb, 1))) {
1693 status = -EDQUOT;
1694 goto bail;
1695 }
1696 did_quota = 1;
1579 status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, 1697 status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
1580 new_fe_bh, 1698 new_fe_bh,
1581 handle, data_ac, NULL, 1699 handle, data_ac, NULL,
@@ -1614,6 +1732,15 @@ static int ocfs2_symlink(struct inode *dir,
1614 } 1732 }
1615 } 1733 }
1616 1734
1735 if (si.enable) {
1736 status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
1737 xattr_ac, data_ac);
1738 if (status < 0) {
1739 mlog_errno(status);
1740 goto bail;
1741 }
1742 }
1743
1617 status = ocfs2_add_entry(handle, dentry, inode, 1744 status = ocfs2_add_entry(handle, dentry, inode,
1618 le64_to_cpu(fe->i_blkno), parent_fe_bh, 1745 le64_to_cpu(fe->i_blkno), parent_fe_bh,
1619 de_bh); 1746 de_bh);
@@ -1632,6 +1759,11 @@ static int ocfs2_symlink(struct inode *dir,
1632 dentry->d_op = &ocfs2_dentry_ops; 1759 dentry->d_op = &ocfs2_dentry_ops;
1633 d_instantiate(dentry, inode); 1760 d_instantiate(dentry, inode);
1634bail: 1761bail:
1762 if (status < 0 && did_quota)
1763 vfs_dq_free_space_nodirty(inode,
1764 ocfs2_clusters_to_bytes(osb->sb, 1));
1765 if (status < 0 && did_quota_inode)
1766 vfs_dq_free_inode(inode);
1635 if (handle) 1767 if (handle)
1636 ocfs2_commit_trans(osb, handle); 1768 ocfs2_commit_trans(osb, handle);
1637 1769
@@ -1640,12 +1772,18 @@ bail:
1640 brelse(new_fe_bh); 1772 brelse(new_fe_bh);
1641 brelse(parent_fe_bh); 1773 brelse(parent_fe_bh);
1642 brelse(de_bh); 1774 brelse(de_bh);
1775 kfree(si.name);
1776 kfree(si.value);
1643 if (inode_ac) 1777 if (inode_ac)
1644 ocfs2_free_alloc_context(inode_ac); 1778 ocfs2_free_alloc_context(inode_ac);
1645 if (data_ac) 1779 if (data_ac)
1646 ocfs2_free_alloc_context(data_ac); 1780 ocfs2_free_alloc_context(data_ac);
1647 if ((status < 0) && inode) 1781 if (xattr_ac)
1782 ocfs2_free_alloc_context(xattr_ac);
1783 if ((status < 0) && inode) {
1784 clear_nlink(inode);
1648 iput(inode); 1785 iput(inode);
1786 }
1649 1787
1650 mlog_exit(status); 1788 mlog_exit(status);
1651 1789
@@ -1754,16 +1892,14 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1754 1892
1755 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 1893 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
1756 1894
1757 status = ocfs2_read_block(orphan_dir_inode, 1895 status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh);
1758 OCFS2_I(orphan_dir_inode)->ip_blkno,
1759 &orphan_dir_bh);
1760 if (status < 0) { 1896 if (status < 0) {
1761 mlog_errno(status); 1897 mlog_errno(status);
1762 goto leave; 1898 goto leave;
1763 } 1899 }
1764 1900
1765 status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh, 1901 status = ocfs2_journal_access_di(handle, orphan_dir_inode, orphan_dir_bh,
1766 OCFS2_JOURNAL_ACCESS_WRITE); 1902 OCFS2_JOURNAL_ACCESS_WRITE);
1767 if (status < 0) { 1903 if (status < 0) {
1768 mlog_errno(status); 1904 mlog_errno(status);
1769 goto leave; 1905 goto leave;
@@ -1850,8 +1986,8 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
1850 goto leave; 1986 goto leave;
1851 } 1987 }
1852 1988
1853 status = ocfs2_journal_access(handle,orphan_dir_inode, orphan_dir_bh, 1989 status = ocfs2_journal_access_di(handle,orphan_dir_inode, orphan_dir_bh,
1854 OCFS2_JOURNAL_ACCESS_WRITE); 1990 OCFS2_JOURNAL_ACCESS_WRITE);
1855 if (status < 0) { 1991 if (status < 0) {
1856 mlog_errno(status); 1992 mlog_errno(status);
1857 goto leave; 1993 goto leave;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 3fed9e3d8992..ad5c24a29edd 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -161,6 +161,7 @@ enum ocfs2_vol_state
161{ 161{
162 VOLUME_INIT = 0, 162 VOLUME_INIT = 0,
163 VOLUME_MOUNTED, 163 VOLUME_MOUNTED,
164 VOLUME_MOUNTED_QUOTAS,
164 VOLUME_DISMOUNTED, 165 VOLUME_DISMOUNTED,
165 VOLUME_DISABLED 166 VOLUME_DISABLED
166}; 167};
@@ -195,6 +196,9 @@ enum ocfs2_mount_options
195 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ 196 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
196 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ 197 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
197 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ 198 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
199 OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */
200 OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */
201 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
198}; 202};
199 203
200#define OCFS2_OSB_SOFT_RO 0x0001 204#define OCFS2_OSB_SOFT_RO 0x0001
@@ -205,6 +209,7 @@ enum ocfs2_mount_options
205struct ocfs2_journal; 209struct ocfs2_journal;
206struct ocfs2_slot_info; 210struct ocfs2_slot_info;
207struct ocfs2_recovery_map; 211struct ocfs2_recovery_map;
212struct ocfs2_quota_recovery;
208struct ocfs2_super 213struct ocfs2_super
209{ 214{
210 struct task_struct *commit_task; 215 struct task_struct *commit_task;
@@ -286,10 +291,11 @@ struct ocfs2_super
286 char *local_alloc_debug_buf; 291 char *local_alloc_debug_buf;
287#endif 292#endif
288 293
289 /* Next two fields are for local node slot recovery during 294 /* Next three fields are for local node slot recovery during
290 * mount. */ 295 * mount. */
291 int dirty; 296 int dirty;
292 struct ocfs2_dinode *local_alloc_copy; 297 struct ocfs2_dinode *local_alloc_copy;
298 struct ocfs2_quota_recovery *quota_rec;
293 299
294 struct ocfs2_alloc_stats alloc_stats; 300 struct ocfs2_alloc_stats alloc_stats;
295 char dev_str[20]; /* "major,minor" of the device */ 301 char dev_str[20]; /* "major,minor" of the device */
@@ -333,6 +339,10 @@ struct ocfs2_super
333 339
334#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) 340#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
335 341
342/* Useful typedef for passing around journal access functions */
343typedef int (*ocfs2_journal_access_func)(handle_t *handle, struct inode *inode,
344 struct buffer_head *bh, int type);
345
336static inline int ocfs2_should_order_data(struct inode *inode) 346static inline int ocfs2_should_order_data(struct inode *inode)
337{ 347{
338 if (!S_ISREG(inode->i_mode)) 348 if (!S_ISREG(inode->i_mode))
@@ -376,6 +386,13 @@ static inline int ocfs2_supports_xattr(struct ocfs2_super *osb)
376 return 0; 386 return 0;
377} 387}
378 388
389static inline int ocfs2_meta_ecc(struct ocfs2_super *osb)
390{
391 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_META_ECC)
392 return 1;
393 return 0;
394}
395
379/* set / clear functions because cluster events can make these happen 396/* set / clear functions because cluster events can make these happen
380 * in parallel so we want the transitions to be atomic. this also 397 * in parallel so we want the transitions to be atomic. this also
381 * means that any future flags osb_flags must be protected by spinlock 398 * means that any future flags osb_flags must be protected by spinlock
@@ -443,39 +460,19 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
443#define OCFS2_IS_VALID_DINODE(ptr) \ 460#define OCFS2_IS_VALID_DINODE(ptr) \
444 (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) 461 (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
445 462
446#define OCFS2_RO_ON_INVALID_DINODE(__sb, __di) do { \
447 typeof(__di) ____di = (__di); \
448 ocfs2_error((__sb), \
449 "Dinode # %llu has bad signature %.*s", \
450 (unsigned long long)le64_to_cpu((____di)->i_blkno), 7, \
451 (____di)->i_signature); \
452} while (0)
453
454#define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ 463#define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \
455 (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) 464 (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
456 465
457#define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb) do { \
458 typeof(__eb) ____eb = (__eb); \
459 ocfs2_error((__sb), \
460 "Extent Block # %llu has bad signature %.*s", \
461 (unsigned long long)le64_to_cpu((____eb)->h_blkno), 7, \
462 (____eb)->h_signature); \
463} while (0)
464
465#define OCFS2_IS_VALID_GROUP_DESC(ptr) \ 466#define OCFS2_IS_VALID_GROUP_DESC(ptr) \
466 (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) 467 (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE))
467 468
468#define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd) do { \
469 typeof(__gd) ____gd = (__gd); \
470 ocfs2_error((__sb), \
471 "Group Descriptor # %llu has bad signature %.*s", \
472 (unsigned long long)le64_to_cpu((____gd)->bg_blkno), 7, \
473 (____gd)->bg_signature); \
474} while (0)
475 469
476#define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ 470#define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \
477 (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) 471 (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE))
478 472
473#define OCFS2_IS_VALID_DIR_TRAILER(ptr) \
474 (!strcmp((ptr)->db_signature, OCFS2_DIR_TRAILER_SIGNATURE))
475
479static inline unsigned long ino_from_blkno(struct super_block *sb, 476static inline unsigned long ino_from_blkno(struct super_block *sb,
480 u64 blkno) 477 u64 blkno)
481{ 478{
@@ -632,5 +629,6 @@ static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
632#define ocfs2_clear_bit ext2_clear_bit 629#define ocfs2_clear_bit ext2_clear_bit
633#define ocfs2_test_bit ext2_test_bit 630#define ocfs2_test_bit ext2_test_bit
634#define ocfs2_find_next_zero_bit ext2_find_next_zero_bit 631#define ocfs2_find_next_zero_bit ext2_find_next_zero_bit
632#define ocfs2_find_next_bit ext2_find_next_bit
635#endif /* OCFS2_H */ 633#endif /* OCFS2_H */
636 634
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 5e0c0d0aef7d..c7ae45aaa36c 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -65,6 +65,7 @@
65#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01" 65#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01"
66#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" 66#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01"
67#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01" 67#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01"
68#define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1"
68 69
69/* Compatibility flags */ 70/* Compatibility flags */
70#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ 71#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
@@ -93,8 +94,11 @@
93 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ 94 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \
94 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ 95 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
95 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ 96 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
96 | OCFS2_FEATURE_INCOMPAT_XATTR) 97 | OCFS2_FEATURE_INCOMPAT_XATTR \
97#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 98 | OCFS2_FEATURE_INCOMPAT_META_ECC)
99#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
100 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
101 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
98 102
99/* 103/*
100 * Heartbeat-only devices are missing journals and other files. The 104 * Heartbeat-only devices are missing journals and other files. The
@@ -147,6 +151,9 @@
147/* Support for extended attributes */ 151/* Support for extended attributes */
148#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200 152#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200
149 153
154/* Metadata checksum and error correction */
155#define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800
156
150/* 157/*
151 * backup superblock flag is used to indicate that this volume 158 * backup superblock flag is used to indicate that this volume
152 * has backup superblocks. 159 * has backup superblocks.
@@ -163,6 +170,12 @@
163 */ 170 */
164#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001 171#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001
165 172
173/*
174 * Maintain quota information for this filesystem
175 */
176#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002
177#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004
178
166/* The byte offset of the first backup block will be 1G. 179/* The byte offset of the first backup block will be 1G.
167 * The following will be 4G, 16G, 64G, 256G and 1T. 180 * The following will be 4G, 16G, 64G, 256G and 1T.
168 */ 181 */
@@ -192,6 +205,7 @@
192#define OCFS2_HEARTBEAT_FL (0x00000200) /* Heartbeat area */ 205#define OCFS2_HEARTBEAT_FL (0x00000200) /* Heartbeat area */
193#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ 206#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
194#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ 207#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
208#define OCFS2_QUOTA_FL (0x00001000) /* Quota file */
195 209
196/* 210/*
197 * Flags on ocfs2_dinode.i_dyn_features 211 * Flags on ocfs2_dinode.i_dyn_features
@@ -329,13 +343,17 @@ enum {
329#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE 343#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
330 HEARTBEAT_SYSTEM_INODE, 344 HEARTBEAT_SYSTEM_INODE,
331 GLOBAL_BITMAP_SYSTEM_INODE, 345 GLOBAL_BITMAP_SYSTEM_INODE,
332#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GLOBAL_BITMAP_SYSTEM_INODE 346 USER_QUOTA_SYSTEM_INODE,
347 GROUP_QUOTA_SYSTEM_INODE,
348#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE
333 ORPHAN_DIR_SYSTEM_INODE, 349 ORPHAN_DIR_SYSTEM_INODE,
334 EXTENT_ALLOC_SYSTEM_INODE, 350 EXTENT_ALLOC_SYSTEM_INODE,
335 INODE_ALLOC_SYSTEM_INODE, 351 INODE_ALLOC_SYSTEM_INODE,
336 JOURNAL_SYSTEM_INODE, 352 JOURNAL_SYSTEM_INODE,
337 LOCAL_ALLOC_SYSTEM_INODE, 353 LOCAL_ALLOC_SYSTEM_INODE,
338 TRUNCATE_LOG_SYSTEM_INODE, 354 TRUNCATE_LOG_SYSTEM_INODE,
355 LOCAL_USER_QUOTA_SYSTEM_INODE,
356 LOCAL_GROUP_QUOTA_SYSTEM_INODE,
339 NUM_SYSTEM_INODES 357 NUM_SYSTEM_INODES
340}; 358};
341 359
@@ -349,6 +367,8 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
349 [SLOT_MAP_SYSTEM_INODE] = { "slot_map", 0, S_IFREG | 0644 }, 367 [SLOT_MAP_SYSTEM_INODE] = { "slot_map", 0, S_IFREG | 0644 },
350 [HEARTBEAT_SYSTEM_INODE] = { "heartbeat", OCFS2_HEARTBEAT_FL, S_IFREG | 0644 }, 368 [HEARTBEAT_SYSTEM_INODE] = { "heartbeat", OCFS2_HEARTBEAT_FL, S_IFREG | 0644 },
351 [GLOBAL_BITMAP_SYSTEM_INODE] = { "global_bitmap", 0, S_IFREG | 0644 }, 369 [GLOBAL_BITMAP_SYSTEM_INODE] = { "global_bitmap", 0, S_IFREG | 0644 },
370 [USER_QUOTA_SYSTEM_INODE] = { "aquota.user", OCFS2_QUOTA_FL, S_IFREG | 0644 },
371 [GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group", OCFS2_QUOTA_FL, S_IFREG | 0644 },
352 372
353 /* Slot-specific system inodes (one copy per slot) */ 373 /* Slot-specific system inodes (one copy per slot) */
354 [ORPHAN_DIR_SYSTEM_INODE] = { "orphan_dir:%04d", 0, S_IFDIR | 0755 }, 374 [ORPHAN_DIR_SYSTEM_INODE] = { "orphan_dir:%04d", 0, S_IFDIR | 0755 },
@@ -356,7 +376,9 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
356 [INODE_ALLOC_SYSTEM_INODE] = { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 }, 376 [INODE_ALLOC_SYSTEM_INODE] = { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
357 [JOURNAL_SYSTEM_INODE] = { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 }, 377 [JOURNAL_SYSTEM_INODE] = { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 },
358 [LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 }, 378 [LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 },
359 [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 } 379 [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 },
380 [LOCAL_USER_QUOTA_SYSTEM_INODE] = { "aquota.user:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 },
381 [LOCAL_GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 },
360}; 382};
361 383
362/* Parameter passed from mount.ocfs2 to module */ 384/* Parameter passed from mount.ocfs2 to module */
@@ -410,6 +432,22 @@ static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
410#define OCFS2_RAW_SB(dinode) (&((dinode)->id2.i_super)) 432#define OCFS2_RAW_SB(dinode) (&((dinode)->id2.i_super))
411 433
412/* 434/*
435 * Block checking structure. This is used in metadata to validate the
436 * contents. If OCFS2_FEATURE_INCOMPAT_META_ECC is not set, it is all
437 * zeros.
438 */
439struct ocfs2_block_check {
440/*00*/ __le32 bc_crc32e; /* 802.3 Ethernet II CRC32 */
441 __le16 bc_ecc; /* Single-error-correction parity vector.
442 This is a simple Hamming code dependant
443 on the blocksize. OCFS2's maximum
444 blocksize, 4K, requires 16 parity bits,
445 so we fit in __le16. */
446 __le16 bc_reserved1;
447/*08*/
448};
449
450/*
413 * On disk extent record for OCFS2 451 * On disk extent record for OCFS2
414 * It describes a range of clusters on disk. 452 * It describes a range of clusters on disk.
415 * 453 *
@@ -496,7 +534,7 @@ struct ocfs2_truncate_log {
496struct ocfs2_extent_block 534struct ocfs2_extent_block
497{ 535{
498/*00*/ __u8 h_signature[8]; /* Signature for verification */ 536/*00*/ __u8 h_signature[8]; /* Signature for verification */
499 __le64 h_reserved1; 537 struct ocfs2_block_check h_check; /* Error checking */
500/*10*/ __le16 h_suballoc_slot; /* Slot suballocator this 538/*10*/ __le16 h_suballoc_slot; /* Slot suballocator this
501 extent_header belongs to */ 539 extent_header belongs to */
502 __le16 h_suballoc_bit; /* Bit offset in suballocator 540 __le16 h_suballoc_bit; /* Bit offset in suballocator
@@ -666,7 +704,8 @@ struct ocfs2_dinode {
666 was set in i_flags */ 704 was set in i_flags */
667 __le16 i_dyn_features; 705 __le16 i_dyn_features;
668 __le64 i_xattr_loc; 706 __le64 i_xattr_loc;
669/*80*/ __le64 i_reserved2[7]; 707/*80*/ struct ocfs2_block_check i_check; /* Error checking */
708/*88*/ __le64 i_reserved2[6];
670/*B8*/ union { 709/*B8*/ union {
671 __le64 i_pad1; /* Generic way to refer to this 710 __le64 i_pad1; /* Generic way to refer to this
672 64bit union */ 711 64bit union */
@@ -715,6 +754,34 @@ struct ocfs2_dir_entry {
715} __attribute__ ((packed)); 754} __attribute__ ((packed));
716 755
717/* 756/*
757 * Per-block record for the unindexed directory btree. This is carefully
758 * crafted so that the rec_len and name_len records of an ocfs2_dir_entry are
759 * mirrored. That way, the directory manipulation code needs a minimal amount
760 * of update.
761 *
762 * NOTE: Keep this structure aligned to a multiple of 4 bytes.
763 */
764struct ocfs2_dir_block_trailer {
765/*00*/ __le64 db_compat_inode; /* Always zero. Was inode */
766
767 __le16 db_compat_rec_len; /* Backwards compatible with
768 * ocfs2_dir_entry. */
769 __u8 db_compat_name_len; /* Always zero. Was name_len */
770 __u8 db_reserved0;
771 __le16 db_reserved1;
772 __le16 db_free_rec_len; /* Size of largest empty hole
773 * in this block. (unused) */
774/*10*/ __u8 db_signature[8]; /* Signature for verification */
775 __le64 db_reserved2;
776 __le64 db_free_next; /* Next block in list (unused) */
777/*20*/ __le64 db_blkno; /* Offset on disk, in blocks */
778 __le64 db_parent_dinode; /* dinode which owns me, in
779 blocks */
780/*30*/ struct ocfs2_block_check db_check; /* Error checking */
781/*40*/
782};
783
784/*
718 * On disk allocator group structure for OCFS2 785 * On disk allocator group structure for OCFS2
719 */ 786 */
720struct ocfs2_group_desc 787struct ocfs2_group_desc
@@ -733,7 +800,8 @@ struct ocfs2_group_desc
733/*20*/ __le64 bg_parent_dinode; /* dinode which owns me, in 800/*20*/ __le64 bg_parent_dinode; /* dinode which owns me, in
734 blocks */ 801 blocks */
735 __le64 bg_blkno; /* Offset on disk, in blocks */ 802 __le64 bg_blkno; /* Offset on disk, in blocks */
736/*30*/ __le64 bg_reserved2[2]; 803/*30*/ struct ocfs2_block_check bg_check; /* Error checking */
804 __le64 bg_reserved2;
737/*40*/ __u8 bg_bitmap[0]; 805/*40*/ __u8 bg_bitmap[0];
738}; 806};
739 807
@@ -776,7 +844,12 @@ struct ocfs2_xattr_header {
776 in this extent record, 844 in this extent record,
777 only valid in the first 845 only valid in the first
778 bucket. */ 846 bucket. */
779 __le64 xh_csum; 847 struct ocfs2_block_check xh_check; /* Error checking
848 (Note, this is only
849 used for xattr
850 buckets. A block uses
851 xb_check and sets
852 this field to zero.) */
780 struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ 853 struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */
781}; 854};
782 855
@@ -827,7 +900,7 @@ struct ocfs2_xattr_block {
827 block group */ 900 block group */
828 __le32 xb_fs_generation; /* Must match super block */ 901 __le32 xb_fs_generation; /* Must match super block */
829/*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */ 902/*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */
830 __le64 xb_csum; 903 struct ocfs2_block_check xb_check; /* Error checking */
831/*20*/ __le16 xb_flags; /* Indicates whether this block contains 904/*20*/ __le16 xb_flags; /* Indicates whether this block contains
832 real xattr or a xattr tree. */ 905 real xattr or a xattr tree. */
833 __le16 xb_reserved0; 906 __le16 xb_reserved0;
@@ -868,6 +941,128 @@ static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe)
868 return xe->xe_type & OCFS2_XATTR_TYPE_MASK; 941 return xe->xe_type & OCFS2_XATTR_TYPE_MASK;
869} 942}
870 943
944/*
945 * On disk structures for global quota file
946 */
947
948/* Magic numbers and known versions for global quota files */
949#define OCFS2_GLOBAL_QMAGICS {\
950 0x0cf52470, /* USRQUOTA */ \
951 0x0cf52471 /* GRPQUOTA */ \
952}
953
954#define OCFS2_GLOBAL_QVERSIONS {\
955 0, \
956 0, \
957}
958
959
960/* Each block of each quota file has a certain fixed number of bytes reserved
961 * for OCFS2 internal use at its end. OCFS2 can use it for things like
962 * checksums, etc. */
963#define OCFS2_QBLK_RESERVED_SPACE 8
964
965/* Generic header of all quota files */
966struct ocfs2_disk_dqheader {
967 __le32 dqh_magic; /* Magic number identifying file */
968 __le32 dqh_version; /* Quota format version */
969};
970
971#define OCFS2_GLOBAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader))
972
973/* Information header of global quota file (immediately follows the generic
974 * header) */
975struct ocfs2_global_disk_dqinfo {
976/*00*/ __le32 dqi_bgrace; /* Grace time for space softlimit excess */
977 __le32 dqi_igrace; /* Grace time for inode softlimit excess */
978 __le32 dqi_syncms; /* Time after which we sync local changes to
979 * global quota file */
980 __le32 dqi_blocks; /* Number of blocks in quota file */
981/*10*/ __le32 dqi_free_blk; /* First free block in quota file */
982 __le32 dqi_free_entry; /* First block with free dquot entry in quota
983 * file */
984};
985
986/* Structure with global user / group information. We reserve some space
987 * for future use. */
988struct ocfs2_global_disk_dqblk {
989/*00*/ __le32 dqb_id; /* ID the structure belongs to */
990 __le32 dqb_use_count; /* Number of nodes having reference to this structure */
991 __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */
992/*10*/ __le64 dqb_isoftlimit; /* preferred inode limit */
993 __le64 dqb_curinodes; /* current # allocated inodes */
994/*20*/ __le64 dqb_bhardlimit; /* absolute limit on disk space */
995 __le64 dqb_bsoftlimit; /* preferred limit on disk space */
996/*30*/ __le64 dqb_curspace; /* current space occupied */
997 __le64 dqb_btime; /* time limit for excessive disk use */
998/*40*/ __le64 dqb_itime; /* time limit for excessive inode use */
999 __le64 dqb_pad1;
1000/*50*/ __le64 dqb_pad2;
1001};
1002
1003/*
1004 * On-disk structures for local quota file
1005 */
1006
1007/* Magic numbers and known versions for local quota files */
1008#define OCFS2_LOCAL_QMAGICS {\
1009 0x0cf524c0, /* USRQUOTA */ \
1010 0x0cf524c1 /* GRPQUOTA */ \
1011}
1012
1013#define OCFS2_LOCAL_QVERSIONS {\
1014 0, \
1015 0, \
1016}
1017
1018/* Quota flags in dqinfo header */
1019#define OLQF_CLEAN 0x0001 /* Quota file is empty (this should be after\
1020 * quota has been cleanly turned off) */
1021
1022#define OCFS2_LOCAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader))
1023
1024/* Information header of local quota file (immediately follows the generic
1025 * header) */
1026struct ocfs2_local_disk_dqinfo {
1027 __le32 dqi_flags; /* Flags for quota file */
1028 __le32 dqi_chunks; /* Number of chunks of quota structures
1029 * with a bitmap */
1030 __le32 dqi_blocks; /* Number of blocks allocated for quota file */
1031};
1032
1033/* Header of one chunk of a quota file */
1034struct ocfs2_local_disk_chunk {
1035 __le32 dqc_free; /* Number of free entries in the bitmap */
1036 u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding
1037 * chunk of quota file */
1038};
1039
1040/* One entry in local quota file */
1041struct ocfs2_local_disk_dqblk {
1042/*00*/ __le64 dqb_id; /* id this quota applies to */
1043 __le64 dqb_spacemod; /* Change in the amount of used space */
1044/*10*/ __le64 dqb_inodemod; /* Change in the amount of used inodes */
1045};
1046
1047
1048/*
1049 * The quota trailer lives at the end of each quota block.
1050 */
1051
1052struct ocfs2_disk_dqtrailer {
1053/*00*/ struct ocfs2_block_check dq_check; /* Error checking */
1054/*08*/ /* Cannot be larger than OCFS2_QBLK_RESERVED_SPACE */
1055};
1056
1057static inline struct ocfs2_disk_dqtrailer *ocfs2_block_dqtrailer(int blocksize,
1058 void *buf)
1059{
1060 char *ptr = buf;
1061 ptr += blocksize - OCFS2_QBLK_RESERVED_SPACE;
1062
1063 return (struct ocfs2_disk_dqtrailer *)ptr;
1064}
1065
871#ifdef __KERNEL__ 1066#ifdef __KERNEL__
872static inline int ocfs2_fast_symlink_chars(struct super_block *sb) 1067static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
873{ 1068{
diff --git a/fs/ocfs2/ocfs2_jbd_compat.h b/fs/ocfs2/ocfs2_jbd_compat.h
deleted file mode 100644
index b91c78f8f558..000000000000
--- a/fs/ocfs2/ocfs2_jbd_compat.h
+++ /dev/null
@@ -1,82 +0,0 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * ocfs2_jbd_compat.h
5 *
6 * Compatibility defines for JBD.
7 *
8 * Copyright (C) 2008 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License version 2 as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#ifndef OCFS2_JBD_COMPAT_H
21#define OCFS2_JBD_COMPAT_H
22
23#ifndef CONFIG_OCFS2_COMPAT_JBD
24# error Should not have been included
25#endif
26
27struct jbd2_inode {
28 unsigned int dummy;
29};
30
31#define JBD2_BARRIER JFS_BARRIER
32#define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE
33
34#define jbd2_journal_ack_err journal_ack_err
35#define jbd2_journal_clear_err journal_clear_err
36#define jbd2_journal_destroy journal_destroy
37#define jbd2_journal_dirty_metadata journal_dirty_metadata
38#define jbd2_journal_errno journal_errno
39#define jbd2_journal_extend journal_extend
40#define jbd2_journal_flush journal_flush
41#define jbd2_journal_force_commit journal_force_commit
42#define jbd2_journal_get_write_access journal_get_write_access
43#define jbd2_journal_get_undo_access journal_get_undo_access
44#define jbd2_journal_init_inode journal_init_inode
45#define jbd2_journal_invalidatepage journal_invalidatepage
46#define jbd2_journal_load journal_load
47#define jbd2_journal_lock_updates journal_lock_updates
48#define jbd2_journal_restart journal_restart
49#define jbd2_journal_start journal_start
50#define jbd2_journal_start_commit journal_start_commit
51#define jbd2_journal_stop journal_stop
52#define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers
53#define jbd2_journal_unlock_updates journal_unlock_updates
54#define jbd2_journal_wipe journal_wipe
55#define jbd2_log_wait_commit log_wait_commit
56
57static inline int jbd2_journal_file_inode(handle_t *handle,
58 struct jbd2_inode *inode)
59{
60 return 0;
61}
62
63static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
64 loff_t new_size)
65{
66 return 0;
67}
68
69static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode,
70 struct inode *inode)
71{
72 return;
73}
74
75static inline void jbd2_journal_release_jbd_inode(journal_t *journal,
76 struct jbd2_inode *jinode)
77{
78 return;
79}
80
81
82#endif /* OCFS2_JBD_COMPAT_H */
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 82c200f7a8f1..eb6f50c9ceca 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -46,6 +46,7 @@ enum ocfs2_lock_type {
46 OCFS2_LOCK_TYPE_DENTRY, 46 OCFS2_LOCK_TYPE_DENTRY,
47 OCFS2_LOCK_TYPE_OPEN, 47 OCFS2_LOCK_TYPE_OPEN,
48 OCFS2_LOCK_TYPE_FLOCK, 48 OCFS2_LOCK_TYPE_FLOCK,
49 OCFS2_LOCK_TYPE_QINFO,
49 OCFS2_NUM_LOCK_TYPES 50 OCFS2_NUM_LOCK_TYPES
50}; 51};
51 52
@@ -77,6 +78,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
77 case OCFS2_LOCK_TYPE_FLOCK: 78 case OCFS2_LOCK_TYPE_FLOCK:
78 c = 'F'; 79 c = 'F';
79 break; 80 break;
81 case OCFS2_LOCK_TYPE_QINFO:
82 c = 'Q';
83 break;
80 default: 84 default:
81 c = '\0'; 85 c = '\0';
82 } 86 }
@@ -95,6 +99,7 @@ static char *ocfs2_lock_type_strings[] = {
95 [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", 99 [OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
96 [OCFS2_LOCK_TYPE_OPEN] = "Open", 100 [OCFS2_LOCK_TYPE_OPEN] = "Open",
97 [OCFS2_LOCK_TYPE_FLOCK] = "Flock", 101 [OCFS2_LOCK_TYPE_FLOCK] = "Flock",
102 [OCFS2_LOCK_TYPE_QINFO] = "Quota",
98}; 103};
99 104
100static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) 105static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
new file mode 100644
index 000000000000..7365e2e08706
--- /dev/null
+++ b/fs/ocfs2/quota.h
@@ -0,0 +1,119 @@
1/*
2 * quota.h for OCFS2
3 *
4 * On disk quota structures for local and global quota file, in-memory
5 * structures.
6 *
7 */
8
9#ifndef _OCFS2_QUOTA_H
10#define _OCFS2_QUOTA_H
11
12#include <linux/types.h>
13#include <linux/slab.h>
14#include <linux/quota.h>
15#include <linux/list.h>
16#include <linux/dqblk_qtree.h>
17
18#include "ocfs2.h"
19
20/* Common stuff */
21/* id number of quota format */
22#define QFMT_OCFS2 3
23
24/*
25 * In-memory structures
26 */
27struct ocfs2_dquot {
28 struct dquot dq_dquot; /* Generic VFS dquot */
29 loff_t dq_local_off; /* Offset in the local quota file */
30 struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */
31 unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */
32 s64 dq_origspace; /* Last globally synced space usage */
33 s64 dq_originodes; /* Last globally synced inode usage */
34};
35
36/* Description of one chunk to recover in memory */
37struct ocfs2_recovery_chunk {
38 struct list_head rc_list; /* List of chunks */
39 int rc_chunk; /* Chunk number */
40 unsigned long *rc_bitmap; /* Bitmap of entries to recover */
41};
42
43struct ocfs2_quota_recovery {
44 struct list_head r_list[MAXQUOTAS]; /* List of chunks to recover */
45};
46
47/* In-memory structure with quota header information */
48struct ocfs2_mem_dqinfo {
49 unsigned int dqi_type; /* Quota type this structure describes */
50 unsigned int dqi_chunks; /* Number of chunks in local quota file */
51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
52 unsigned int dqi_syncms; /* How often should we sync with other nodes */
53 unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
54 struct list_head dqi_chunk; /* List of chunks */
55 struct inode *dqi_gqinode; /* Global quota file inode */
56 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
57 struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */
58 int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */
59 struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */
60 struct buffer_head *dqi_ibh; /* Buffer with information header */
61 struct qtree_mem_dqinfo dqi_gi; /* Info about global file */
62 struct delayed_work dqi_sync_work; /* Work for syncing dquots */
63 struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery
64 * information, in case we
65 * enable quotas on file
66 * needing it */
67};
68
69static inline struct ocfs2_dquot *OCFS2_DQUOT(struct dquot *dquot)
70{
71 return container_of(dquot, struct ocfs2_dquot, dq_dquot);
72}
73
74struct ocfs2_quota_chunk {
75 struct list_head qc_chunk; /* List of quotafile chunks */
76 int qc_num; /* Number of quota chunk */
77 struct buffer_head *qc_headerbh; /* Buffer head with chunk header */
78};
79
80extern struct kmem_cache *ocfs2_dquot_cachep;
81extern struct kmem_cache *ocfs2_qf_chunk_cachep;
82
83extern struct qtree_fmt_operations ocfs2_global_ops;
84
85struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
86 struct ocfs2_super *osb, int slot_num);
87int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
88 struct ocfs2_quota_recovery *rec,
89 int slot_num);
90void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec);
91ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
92 size_t len, loff_t off);
93ssize_t ocfs2_quota_write(struct super_block *sb, int type,
94 const char *data, size_t len, loff_t off);
95int ocfs2_global_read_info(struct super_block *sb, int type);
96int ocfs2_global_write_info(struct super_block *sb, int type);
97int ocfs2_global_read_dquot(struct dquot *dquot);
98int __ocfs2_sync_dquot(struct dquot *dquot, int freeing);
99static inline int ocfs2_sync_dquot(struct dquot *dquot)
100{
101 return __ocfs2_sync_dquot(dquot, 0);
102}
103static inline int ocfs2_global_release_dquot(struct dquot *dquot)
104{
105 return __ocfs2_sync_dquot(dquot, 1);
106}
107
108int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
109void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
110int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
111 struct buffer_head **bh);
112
113extern struct dquot_operations ocfs2_quota_operations;
114extern struct quota_format_type ocfs2_quota_format;
115
116int ocfs2_quota_setup(void);
117void ocfs2_quota_shutdown(void);
118
119#endif /* _OCFS2_QUOTA_H */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
new file mode 100644
index 000000000000..6aff8f2d3e49
--- /dev/null
+++ b/fs/ocfs2/quota_global.c
@@ -0,0 +1,1025 @@
1/*
2 * Implementation of operations over global quota file
3 */
4#include <linux/spinlock.h>
5#include <linux/fs.h>
6#include <linux/quota.h>
7#include <linux/quotaops.h>
8#include <linux/dqblk_qtree.h>
9#include <linux/jiffies.h>
10#include <linux/writeback.h>
11#include <linux/workqueue.h>
12
13#define MLOG_MASK_PREFIX ML_QUOTA
14#include <cluster/masklog.h>
15
16#include "ocfs2_fs.h"
17#include "ocfs2.h"
18#include "alloc.h"
19#include "blockcheck.h"
20#include "inode.h"
21#include "journal.h"
22#include "file.h"
23#include "sysfile.h"
24#include "dlmglue.h"
25#include "uptodate.h"
26#include "quota.h"
27
28static struct workqueue_struct *ocfs2_quota_wq = NULL;
29
30static void qsync_work_fn(struct work_struct *work);
31
32static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp)
33{
34 struct ocfs2_global_disk_dqblk *d = dp;
35 struct mem_dqblk *m = &dquot->dq_dqb;
36
37 /* Update from disk only entries not set by the admin */
38 if (!test_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags)) {
39 m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit);
40 m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit);
41 }
42 if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags))
43 m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes);
44 if (!test_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags)) {
45 m->dqb_bhardlimit = le64_to_cpu(d->dqb_bhardlimit);
46 m->dqb_bsoftlimit = le64_to_cpu(d->dqb_bsoftlimit);
47 }
48 if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags))
49 m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
50 if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags))
51 m->dqb_btime = le64_to_cpu(d->dqb_btime);
52 if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags))
53 m->dqb_itime = le64_to_cpu(d->dqb_itime);
54 OCFS2_DQUOT(dquot)->dq_use_count = le32_to_cpu(d->dqb_use_count);
55}
56
57static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
58{
59 struct ocfs2_global_disk_dqblk *d = dp;
60 struct mem_dqblk *m = &dquot->dq_dqb;
61
62 d->dqb_id = cpu_to_le32(dquot->dq_id);
63 d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count);
64 d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
65 d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
66 d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
67 d->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
68 d->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
70 d->dqb_btime = cpu_to_le64(m->dqb_btime);
71 d->dqb_itime = cpu_to_le64(m->dqb_itime);
72}
73
74static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
75{
76 struct ocfs2_global_disk_dqblk *d = dp;
77 struct ocfs2_mem_dqinfo *oinfo =
78 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
79
80 if (qtree_entry_unused(&oinfo->dqi_gi, dp))
81 return 0;
82 return le32_to_cpu(d->dqb_id) == dquot->dq_id;
83}
84
85struct qtree_fmt_operations ocfs2_global_ops = {
86 .mem2disk_dqblk = ocfs2_global_mem2diskdqb,
87 .disk2mem_dqblk = ocfs2_global_disk2memdqb,
88 .is_id = ocfs2_global_is_id,
89};
90
91static int ocfs2_validate_quota_block(struct super_block *sb,
92 struct buffer_head *bh)
93{
94 struct ocfs2_disk_dqtrailer *dqt =
95 ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data);
96
97 mlog(0, "Validating quota block %llu\n",
98 (unsigned long long)bh->b_blocknr);
99
100 BUG_ON(!buffer_uptodate(bh));
101
102 /*
103 * If the ecc fails, we return the error but otherwise
104 * leave the filesystem running. We know any error is
105 * local to this block.
106 */
107 return ocfs2_validate_meta_ecc(sb, bh->b_data, &dqt->dq_check);
108}
109
110int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
111 struct buffer_head **bh)
112{
113 int rc = 0;
114 struct buffer_head *tmp = *bh;
115
116 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0,
117 ocfs2_validate_quota_block);
118 if (rc)
119 mlog_errno(rc);
120
121 /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
122 if (!rc && !*bh)
123 *bh = tmp;
124
125 return rc;
126}
127
128static int ocfs2_get_quota_block(struct inode *inode, int block,
129 struct buffer_head **bh)
130{
131 u64 pblock, pcount;
132 int err;
133
134 down_read(&OCFS2_I(inode)->ip_alloc_sem);
135 err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount, NULL);
136 up_read(&OCFS2_I(inode)->ip_alloc_sem);
137 if (err) {
138 mlog_errno(err);
139 return err;
140 }
141 *bh = sb_getblk(inode->i_sb, pblock);
142 if (!*bh) {
143 err = -EIO;
144 mlog_errno(err);
145 }
146 return err;;
147}
148
149/* Read data from global quotafile - avoid pagecache and such because we cannot
150 * afford acquiring the locks... We use quota cluster lock to serialize
151 * operations. Caller is responsible for acquiring it. */
152ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
153 size_t len, loff_t off)
154{
155 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
156 struct inode *gqinode = oinfo->dqi_gqinode;
157 loff_t i_size = i_size_read(gqinode);
158 int offset = off & (sb->s_blocksize - 1);
159 sector_t blk = off >> sb->s_blocksize_bits;
160 int err = 0;
161 struct buffer_head *bh;
162 size_t toread, tocopy;
163
164 if (off > i_size)
165 return 0;
166 if (off + len > i_size)
167 len = i_size - off;
168 toread = len;
169 while (toread > 0) {
170 tocopy = min_t(size_t, (sb->s_blocksize - offset), toread);
171 bh = NULL;
172 err = ocfs2_read_quota_block(gqinode, blk, &bh);
173 if (err) {
174 mlog_errno(err);
175 return err;
176 }
177 memcpy(data, bh->b_data + offset, tocopy);
178 brelse(bh);
179 offset = 0;
180 toread -= tocopy;
181 data += tocopy;
182 blk++;
183 }
184 return len;
185}
186
187/* Write to quotafile (we know the transaction is already started and has
188 * enough credits) */
189ssize_t ocfs2_quota_write(struct super_block *sb, int type,
190 const char *data, size_t len, loff_t off)
191{
192 struct mem_dqinfo *info = sb_dqinfo(sb, type);
193 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
194 struct inode *gqinode = oinfo->dqi_gqinode;
195 int offset = off & (sb->s_blocksize - 1);
196 sector_t blk = off >> sb->s_blocksize_bits;
197 int err = 0, new = 0, ja_type;
198 struct buffer_head *bh = NULL;
199 handle_t *handle = journal_current_handle();
200
201 if (!handle) {
202 mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled "
203 "because transaction was not started.\n",
204 (unsigned long long)off, (unsigned long long)len);
205 return -EIO;
206 }
207 if (len > sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset) {
208 WARN_ON(1);
209 len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset;
210 }
211
212 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
213 if (gqinode->i_size < off + len) {
214 down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
215 err = ocfs2_extend_no_holes(gqinode, off + len, off);
216 up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
217 if (err < 0)
218 goto out;
219 err = ocfs2_simple_size_update(gqinode,
220 oinfo->dqi_gqi_bh,
221 off + len);
222 if (err < 0)
223 goto out;
224 new = 1;
225 }
226 /* Not rewriting whole block? */
227 if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) &&
228 !new) {
229 err = ocfs2_read_quota_block(gqinode, blk, &bh);
230 ja_type = OCFS2_JOURNAL_ACCESS_WRITE;
231 } else {
232 err = ocfs2_get_quota_block(gqinode, blk, &bh);
233 ja_type = OCFS2_JOURNAL_ACCESS_CREATE;
234 }
235 if (err) {
236 mlog_errno(err);
237 return err;
238 }
239 lock_buffer(bh);
240 if (new)
241 memset(bh->b_data, 0, sb->s_blocksize);
242 memcpy(bh->b_data + offset, data, len);
243 flush_dcache_page(bh->b_page);
244 set_buffer_uptodate(bh);
245 unlock_buffer(bh);
246 ocfs2_set_buffer_uptodate(gqinode, bh);
247 err = ocfs2_journal_access_dq(handle, gqinode, bh, ja_type);
248 if (err < 0) {
249 brelse(bh);
250 goto out;
251 }
252 err = ocfs2_journal_dirty(handle, bh);
253 brelse(bh);
254 if (err < 0)
255 goto out;
256out:
257 if (err) {
258 mutex_unlock(&gqinode->i_mutex);
259 mlog_errno(err);
260 return err;
261 }
262 gqinode->i_version++;
263 ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh);
264 mutex_unlock(&gqinode->i_mutex);
265 return len;
266}
267
268int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
269{
270 int status;
271 struct buffer_head *bh = NULL;
272
273 status = ocfs2_inode_lock(oinfo->dqi_gqinode, &bh, ex);
274 if (status < 0)
275 return status;
276 spin_lock(&dq_data_lock);
277 if (!oinfo->dqi_gqi_count++)
278 oinfo->dqi_gqi_bh = bh;
279 else
280 WARN_ON(bh != oinfo->dqi_gqi_bh);
281 spin_unlock(&dq_data_lock);
282 return 0;
283}
284
285void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
286{
287 ocfs2_inode_unlock(oinfo->dqi_gqinode, ex);
288 brelse(oinfo->dqi_gqi_bh);
289 spin_lock(&dq_data_lock);
290 if (!--oinfo->dqi_gqi_count)
291 oinfo->dqi_gqi_bh = NULL;
292 spin_unlock(&dq_data_lock);
293}
294
295/* Read information header from global quota file */
296int ocfs2_global_read_info(struct super_block *sb, int type)
297{
298 struct inode *gqinode = NULL;
299 unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE,
300 GROUP_QUOTA_SYSTEM_INODE };
301 struct ocfs2_global_disk_dqinfo dinfo;
302 struct mem_dqinfo *info = sb_dqinfo(sb, type);
303 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
304 int status;
305
306 mlog_entry_void();
307
308 /* Read global header */
309 gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
310 OCFS2_INVALID_SLOT);
311 if (!gqinode) {
312 mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n",
313 type);
314 status = -EINVAL;
315 goto out_err;
316 }
317 oinfo->dqi_gi.dqi_sb = sb;
318 oinfo->dqi_gi.dqi_type = type;
319 ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo);
320 oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk);
321 oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops;
322 oinfo->dqi_gqi_bh = NULL;
323 oinfo->dqi_gqi_count = 0;
324 oinfo->dqi_gqinode = gqinode;
325 status = ocfs2_lock_global_qf(oinfo, 0);
326 if (status < 0) {
327 mlog_errno(status);
328 goto out_err;
329 }
330 status = sb->s_op->quota_read(sb, type, (char *)&dinfo,
331 sizeof(struct ocfs2_global_disk_dqinfo),
332 OCFS2_GLOBAL_INFO_OFF);
333 ocfs2_unlock_global_qf(oinfo, 0);
334 if (status != sizeof(struct ocfs2_global_disk_dqinfo)) {
335 mlog(ML_ERROR, "Cannot read global quota info (%d).\n",
336 status);
337 if (status >= 0)
338 status = -EIO;
339 mlog_errno(status);
340 goto out_err;
341 }
342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
345 oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
346 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
347 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
348 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
349 oinfo->dqi_gi.dqi_blocksize_bits = sb->s_blocksize_bits;
350 oinfo->dqi_gi.dqi_usable_bs = sb->s_blocksize -
351 OCFS2_QBLK_RESERVED_SPACE;
352 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
353 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
354 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
355 oinfo->dqi_syncjiff);
356
357out_err:
358 mlog_exit(status);
359 return status;
360}
361
362/* Write information to global quota file. Expects exlusive lock on quota
363 * file inode and quota info */
364static int __ocfs2_global_write_info(struct super_block *sb, int type)
365{
366 struct mem_dqinfo *info = sb_dqinfo(sb, type);
367 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
368 struct ocfs2_global_disk_dqinfo dinfo;
369 ssize_t size;
370
371 spin_lock(&dq_data_lock);
372 info->dqi_flags &= ~DQF_INFO_DIRTY;
373 dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
374 dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
375 spin_unlock(&dq_data_lock);
376 dinfo.dqi_syncms = cpu_to_le32(oinfo->dqi_syncms);
377 dinfo.dqi_blocks = cpu_to_le32(oinfo->dqi_gi.dqi_blocks);
378 dinfo.dqi_free_blk = cpu_to_le32(oinfo->dqi_gi.dqi_free_blk);
379 dinfo.dqi_free_entry = cpu_to_le32(oinfo->dqi_gi.dqi_free_entry);
380 size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
381 sizeof(struct ocfs2_global_disk_dqinfo),
382 OCFS2_GLOBAL_INFO_OFF);
383 if (size != sizeof(struct ocfs2_global_disk_dqinfo)) {
384 mlog(ML_ERROR, "Cannot write global quota info structure\n");
385 if (size >= 0)
386 size = -EIO;
387 return size;
388 }
389 return 0;
390}
391
392int ocfs2_global_write_info(struct super_block *sb, int type)
393{
394 int err;
395 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
396
397 err = ocfs2_qinfo_lock(info, 1);
398 if (err < 0)
399 return err;
400 err = __ocfs2_global_write_info(sb, type);
401 ocfs2_qinfo_unlock(info, 1);
402 return err;
403}
404
405/* Read in information from global quota file and acquire a reference to it.
406 * dquot_acquire() has already started the transaction and locked quota file */
407int ocfs2_global_read_dquot(struct dquot *dquot)
408{
409 int err, err2, ex = 0;
410 struct ocfs2_mem_dqinfo *info =
411 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
412
413 err = ocfs2_qinfo_lock(info, 0);
414 if (err < 0)
415 goto out;
416 err = qtree_read_dquot(&info->dqi_gi, dquot);
417 if (err < 0)
418 goto out_qlock;
419 OCFS2_DQUOT(dquot)->dq_use_count++;
420 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
421 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
422 if (!dquot->dq_off) { /* No real quota entry? */
423 /* Upgrade to exclusive lock for allocation */
424 err = ocfs2_qinfo_lock(info, 1);
425 if (err < 0)
426 goto out_qlock;
427 ex = 1;
428 }
429 err = qtree_write_dquot(&info->dqi_gi, dquot);
430 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
431 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
432 if (!err)
433 err = err2;
434 }
435out_qlock:
436 if (ex)
437 ocfs2_qinfo_unlock(info, 1);
438 ocfs2_qinfo_unlock(info, 0);
439out:
440 if (err < 0)
441 mlog_errno(err);
442 return err;
443}
444
445/* Sync local information about quota modifications with global quota file.
446 * Caller must have started the transaction and obtained exclusive lock for
447 * global quota file inode */
448int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
449{
450 int err, err2;
451 struct super_block *sb = dquot->dq_sb;
452 int type = dquot->dq_type;
453 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
454 struct ocfs2_global_disk_dqblk dqblk;
455 s64 spacechange, inodechange;
456 time_t olditime, oldbtime;
457
458 err = sb->s_op->quota_read(sb, type, (char *)&dqblk,
459 sizeof(struct ocfs2_global_disk_dqblk),
460 dquot->dq_off);
461 if (err != sizeof(struct ocfs2_global_disk_dqblk)) {
462 if (err >= 0) {
463 mlog(ML_ERROR, "Short read from global quota file "
464 "(%u read)\n", err);
465 err = -EIO;
466 }
467 goto out;
468 }
469
470 /* Update space and inode usage. Get also other information from
471 * global quota file so that we don't overwrite any changes there.
472 * We are */
473 spin_lock(&dq_data_lock);
474 spacechange = dquot->dq_dqb.dqb_curspace -
475 OCFS2_DQUOT(dquot)->dq_origspace;
476 inodechange = dquot->dq_dqb.dqb_curinodes -
477 OCFS2_DQUOT(dquot)->dq_originodes;
478 olditime = dquot->dq_dqb.dqb_itime;
479 oldbtime = dquot->dq_dqb.dqb_btime;
480 ocfs2_global_disk2memdqb(dquot, &dqblk);
481 mlog(0, "Syncing global dquot %u space %lld+%lld, inodes %lld+%lld\n",
482 dquot->dq_id, dquot->dq_dqb.dqb_curspace, (long long)spacechange,
483 dquot->dq_dqb.dqb_curinodes, (long long)inodechange);
484 if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags))
485 dquot->dq_dqb.dqb_curspace += spacechange;
486 if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags))
487 dquot->dq_dqb.dqb_curinodes += inodechange;
488 /* Set properly space grace time... */
489 if (dquot->dq_dqb.dqb_bsoftlimit &&
490 dquot->dq_dqb.dqb_curspace > dquot->dq_dqb.dqb_bsoftlimit) {
491 if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags) &&
492 oldbtime > 0) {
493 if (dquot->dq_dqb.dqb_btime > 0)
494 dquot->dq_dqb.dqb_btime =
495 min(dquot->dq_dqb.dqb_btime, oldbtime);
496 else
497 dquot->dq_dqb.dqb_btime = oldbtime;
498 }
499 } else {
500 dquot->dq_dqb.dqb_btime = 0;
501 clear_bit(DQ_BLKS_B, &dquot->dq_flags);
502 }
503 /* Set properly inode grace time... */
504 if (dquot->dq_dqb.dqb_isoftlimit &&
505 dquot->dq_dqb.dqb_curinodes > dquot->dq_dqb.dqb_isoftlimit) {
506 if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags) &&
507 olditime > 0) {
508 if (dquot->dq_dqb.dqb_itime > 0)
509 dquot->dq_dqb.dqb_itime =
510 min(dquot->dq_dqb.dqb_itime, olditime);
511 else
512 dquot->dq_dqb.dqb_itime = olditime;
513 }
514 } else {
515 dquot->dq_dqb.dqb_itime = 0;
516 clear_bit(DQ_INODES_B, &dquot->dq_flags);
517 }
518 /* All information is properly updated, clear the flags */
519 __clear_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
520 __clear_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
521 __clear_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
522 __clear_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
523 __clear_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
524 __clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
525 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
526 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
527 spin_unlock(&dq_data_lock);
528 err = ocfs2_qinfo_lock(info, freeing);
529 if (err < 0) {
530 mlog(ML_ERROR, "Failed to lock quota info, loosing quota write"
531 " (type=%d, id=%u)\n", dquot->dq_type,
532 (unsigned)dquot->dq_id);
533 goto out;
534 }
535 if (freeing)
536 OCFS2_DQUOT(dquot)->dq_use_count--;
537 err = qtree_write_dquot(&info->dqi_gi, dquot);
538 if (err < 0)
539 goto out_qlock;
540 if (freeing && !OCFS2_DQUOT(dquot)->dq_use_count) {
541 err = qtree_release_dquot(&info->dqi_gi, dquot);
542 if (info_dirty(sb_dqinfo(sb, type))) {
543 err2 = __ocfs2_global_write_info(sb, type);
544 if (!err)
545 err = err2;
546 }
547 }
548out_qlock:
549 ocfs2_qinfo_unlock(info, freeing);
550out:
551 if (err < 0)
552 mlog_errno(err);
553 return err;
554}
555
556/*
557 * Functions for periodic syncing of dquots with global file
558 */
559static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
560{
561 handle_t *handle;
562 struct super_block *sb = dquot->dq_sb;
563 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
564 struct ocfs2_super *osb = OCFS2_SB(sb);
565 int status = 0;
566
567 mlog_entry("id=%u qtype=%u type=%lu device=%s\n", dquot->dq_id,
568 dquot->dq_type, type, sb->s_id);
569 if (type != dquot->dq_type)
570 goto out;
571 status = ocfs2_lock_global_qf(oinfo, 1);
572 if (status < 0)
573 goto out;
574
575 handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS);
576 if (IS_ERR(handle)) {
577 status = PTR_ERR(handle);
578 mlog_errno(status);
579 goto out_ilock;
580 }
581 mutex_lock(&sb_dqopt(sb)->dqio_mutex);
582 status = ocfs2_sync_dquot(dquot);
583 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
584 if (status < 0)
585 mlog_errno(status);
586 /* We have to write local structure as well... */
587 dquot_mark_dquot_dirty(dquot);
588 status = dquot_commit(dquot);
589 if (status < 0)
590 mlog_errno(status);
591 ocfs2_commit_trans(osb, handle);
592out_ilock:
593 ocfs2_unlock_global_qf(oinfo, 1);
594out:
595 mlog_exit(status);
596 return status;
597}
598
599static void qsync_work_fn(struct work_struct *work)
600{
601 struct ocfs2_mem_dqinfo *oinfo = container_of(work,
602 struct ocfs2_mem_dqinfo,
603 dqi_sync_work.work);
604 struct super_block *sb = oinfo->dqi_gqinode->i_sb;
605
606 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
607 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
608 oinfo->dqi_syncjiff);
609}
610
611/*
612 * Wrappers for generic quota functions
613 */
614
615static int ocfs2_write_dquot(struct dquot *dquot)
616{
617 handle_t *handle;
618 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
619 int status = 0;
620
621 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
622
623 handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS);
624 if (IS_ERR(handle)) {
625 status = PTR_ERR(handle);
626 mlog_errno(status);
627 goto out;
628 }
629 status = dquot_commit(dquot);
630 ocfs2_commit_trans(osb, handle);
631out:
632 mlog_exit(status);
633 return status;
634}
635
636int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
637{
638 struct ocfs2_mem_dqinfo *oinfo;
639 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
640 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
641
642 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
643 return 0;
644
645 oinfo = sb_dqinfo(sb, type)->dqi_priv;
646 /* We modify tree, leaf block, global info, local chunk header,
647 * global and local inode */
648 return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
649 2 * OCFS2_INODE_UPDATE_CREDITS;
650}
651
652static int ocfs2_release_dquot(struct dquot *dquot)
653{
654 handle_t *handle;
655 struct ocfs2_mem_dqinfo *oinfo =
656 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
657 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
658 int status = 0;
659
660 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
661
662 status = ocfs2_lock_global_qf(oinfo, 1);
663 if (status < 0)
664 goto out;
665 handle = ocfs2_start_trans(osb,
666 ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_type));
667 if (IS_ERR(handle)) {
668 status = PTR_ERR(handle);
669 mlog_errno(status);
670 goto out_ilock;
671 }
672 status = dquot_release(dquot);
673 ocfs2_commit_trans(osb, handle);
674out_ilock:
675 ocfs2_unlock_global_qf(oinfo, 1);
676out:
677 mlog_exit(status);
678 return status;
679}
680
681int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
682{
683 struct ocfs2_mem_dqinfo *oinfo;
684 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
685 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
686 struct ocfs2_dinode *lfe, *gfe;
687
688 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
689 return 0;
690
691 oinfo = sb_dqinfo(sb, type)->dqi_priv;
692 gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
693 lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
694 /* We can extend local file + global file. In local file we
695 * can modify info, chunk header block and dquot block. In
696 * global file we can modify info, tree and leaf block */
697 return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
698 ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
699 3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
700}
701
702static int ocfs2_acquire_dquot(struct dquot *dquot)
703{
704 handle_t *handle;
705 struct ocfs2_mem_dqinfo *oinfo =
706 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
707 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
708 int status = 0;
709
710 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
711 /* We need an exclusive lock, because we're going to update use count
712 * and instantiate possibly new dquot structure */
713 status = ocfs2_lock_global_qf(oinfo, 1);
714 if (status < 0)
715 goto out;
716 handle = ocfs2_start_trans(osb,
717 ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
718 if (IS_ERR(handle)) {
719 status = PTR_ERR(handle);
720 mlog_errno(status);
721 goto out_ilock;
722 }
723 status = dquot_acquire(dquot);
724 ocfs2_commit_trans(osb, handle);
725out_ilock:
726 ocfs2_unlock_global_qf(oinfo, 1);
727out:
728 mlog_exit(status);
729 return status;
730}
731
732static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
733{
734 unsigned long mask = (1 << (DQ_LASTSET_B + QIF_ILIMITS_B)) |
735 (1 << (DQ_LASTSET_B + QIF_BLIMITS_B)) |
736 (1 << (DQ_LASTSET_B + QIF_INODES_B)) |
737 (1 << (DQ_LASTSET_B + QIF_SPACE_B)) |
738 (1 << (DQ_LASTSET_B + QIF_BTIME_B)) |
739 (1 << (DQ_LASTSET_B + QIF_ITIME_B));
740 int sync = 0;
741 int status;
742 struct super_block *sb = dquot->dq_sb;
743 int type = dquot->dq_type;
744 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
745 handle_t *handle;
746 struct ocfs2_super *osb = OCFS2_SB(sb);
747
748 mlog_entry("id=%u, type=%d", dquot->dq_id, type);
749 dquot_mark_dquot_dirty(dquot);
750
751 /* In case user set some limits, sync dquot immediately to global
752 * quota file so that information propagates quicker */
753 spin_lock(&dq_data_lock);
754 if (dquot->dq_flags & mask)
755 sync = 1;
756 spin_unlock(&dq_data_lock);
757 if (!sync) {
758 status = ocfs2_write_dquot(dquot);
759 goto out;
760 }
761 status = ocfs2_lock_global_qf(oinfo, 1);
762 if (status < 0)
763 goto out;
764 handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS);
765 if (IS_ERR(handle)) {
766 status = PTR_ERR(handle);
767 mlog_errno(status);
768 goto out_ilock;
769 }
770 status = ocfs2_sync_dquot(dquot);
771 if (status < 0) {
772 mlog_errno(status);
773 goto out_trans;
774 }
775 /* Now write updated local dquot structure */
776 status = dquot_commit(dquot);
777out_trans:
778 ocfs2_commit_trans(osb, handle);
779out_ilock:
780 ocfs2_unlock_global_qf(oinfo, 1);
781out:
782 mlog_exit(status);
783 return status;
784}
785
786/* This should happen only after set_dqinfo(). */
787static int ocfs2_write_info(struct super_block *sb, int type)
788{
789 handle_t *handle;
790 int status = 0;
791 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
792
793 mlog_entry_void();
794
795 status = ocfs2_lock_global_qf(oinfo, 1);
796 if (status < 0)
797 goto out;
798 handle = ocfs2_start_trans(OCFS2_SB(sb), OCFS2_QINFO_WRITE_CREDITS);
799 if (IS_ERR(handle)) {
800 status = PTR_ERR(handle);
801 mlog_errno(status);
802 goto out_ilock;
803 }
804 status = dquot_commit_info(sb, type);
805 ocfs2_commit_trans(OCFS2_SB(sb), handle);
806out_ilock:
807 ocfs2_unlock_global_qf(oinfo, 1);
808out:
809 mlog_exit(status);
810 return status;
811}
812
813/* This is difficult. We have to lock quota inode and start transaction
814 * in this function but we don't want to take the penalty of exlusive
815 * quota file lock when we are just going to use cached structures. So
816 * we just take read lock check whether we have dquot cached and if so,
817 * we don't have to take the write lock... */
818static int ocfs2_dquot_initialize(struct inode *inode, int type)
819{
820 handle_t *handle = NULL;
821 int status = 0;
822 struct super_block *sb = inode->i_sb;
823 struct ocfs2_mem_dqinfo *oinfo;
824 int exclusive = 0;
825 int cnt;
826 qid_t id;
827
828 mlog_entry_void();
829
830 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
831 if (type != -1 && cnt != type)
832 continue;
833 if (!sb_has_quota_active(sb, cnt))
834 continue;
835 oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
836 status = ocfs2_lock_global_qf(oinfo, 0);
837 if (status < 0)
838 goto out;
839 /* This is just a performance optimization not a reliable test.
840 * Since we hold an inode lock, noone can actually release
841 * the structure until we are finished with initialization. */
842 if (inode->i_dquot[cnt] != NODQUOT) {
843 ocfs2_unlock_global_qf(oinfo, 0);
844 continue;
845 }
846 /* When we have inode lock, we know that no dquot_release() can
847 * run and thus we can safely check whether we need to
848 * read+modify global file to get quota information or whether
849 * our node already has it. */
850 if (cnt == USRQUOTA)
851 id = inode->i_uid;
852 else if (cnt == GRPQUOTA)
853 id = inode->i_gid;
854 else
855 BUG();
856 /* Obtain exclusion from quota off... */
857 down_write(&sb_dqopt(sb)->dqptr_sem);
858 exclusive = !dquot_is_cached(sb, id, cnt);
859 up_write(&sb_dqopt(sb)->dqptr_sem);
860 if (exclusive) {
861 status = ocfs2_lock_global_qf(oinfo, 1);
862 if (status < 0) {
863 exclusive = 0;
864 mlog_errno(status);
865 goto out_ilock;
866 }
867 handle = ocfs2_start_trans(OCFS2_SB(sb),
868 ocfs2_calc_qinit_credits(sb, cnt));
869 if (IS_ERR(handle)) {
870 status = PTR_ERR(handle);
871 mlog_errno(status);
872 goto out_ilock;
873 }
874 }
875 dquot_initialize(inode, cnt);
876 if (exclusive) {
877 ocfs2_commit_trans(OCFS2_SB(sb), handle);
878 ocfs2_unlock_global_qf(oinfo, 1);
879 }
880 ocfs2_unlock_global_qf(oinfo, 0);
881 }
882 mlog_exit(0);
883 return 0;
884out_ilock:
885 if (exclusive)
886 ocfs2_unlock_global_qf(oinfo, 1);
887 ocfs2_unlock_global_qf(oinfo, 0);
888out:
889 mlog_exit(status);
890 return status;
891}
892
893static int ocfs2_dquot_drop_slow(struct inode *inode)
894{
895 int status = 0;
896 int cnt;
897 int got_lock[MAXQUOTAS] = {0, 0};
898 handle_t *handle;
899 struct super_block *sb = inode->i_sb;
900 struct ocfs2_mem_dqinfo *oinfo;
901
902 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
903 if (!sb_has_quota_active(sb, cnt))
904 continue;
905 oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
906 status = ocfs2_lock_global_qf(oinfo, 1);
907 if (status < 0)
908 goto out;
909 got_lock[cnt] = 1;
910 }
911 handle = ocfs2_start_trans(OCFS2_SB(sb),
912 ocfs2_calc_qinit_credits(sb, USRQUOTA) +
913 ocfs2_calc_qinit_credits(sb, GRPQUOTA));
914 if (IS_ERR(handle)) {
915 status = PTR_ERR(handle);
916 mlog_errno(status);
917 goto out;
918 }
919 dquot_drop(inode);
920 ocfs2_commit_trans(OCFS2_SB(sb), handle);
921out:
922 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
923 if (got_lock[cnt]) {
924 oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
925 ocfs2_unlock_global_qf(oinfo, 1);
926 }
927 return status;
928}
929
930/* See the comment before ocfs2_dquot_initialize. */
931static int ocfs2_dquot_drop(struct inode *inode)
932{
933 int status = 0;
934 struct super_block *sb = inode->i_sb;
935 struct ocfs2_mem_dqinfo *oinfo;
936 int exclusive = 0;
937 int cnt;
938 int got_lock[MAXQUOTAS] = {0, 0};
939
940 mlog_entry_void();
941 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
942 if (!sb_has_quota_active(sb, cnt))
943 continue;
944 oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
945 status = ocfs2_lock_global_qf(oinfo, 0);
946 if (status < 0)
947 goto out;
948 got_lock[cnt] = 1;
949 }
950 /* Lock against anyone releasing references so that when when we check
951 * we know we are not going to be last ones to release dquot */
952 down_write(&sb_dqopt(sb)->dqptr_sem);
953 /* Urgh, this is a terrible hack :( */
954 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
955 if (inode->i_dquot[cnt] != NODQUOT &&
956 atomic_read(&inode->i_dquot[cnt]->dq_count) > 1) {
957 exclusive = 1;
958 break;
959 }
960 }
961 if (!exclusive)
962 dquot_drop_locked(inode);
963 up_write(&sb_dqopt(sb)->dqptr_sem);
964out:
965 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
966 if (got_lock[cnt]) {
967 oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
968 ocfs2_unlock_global_qf(oinfo, 0);
969 }
970 /* In case we bailed out because we had to do expensive locking
971 * do it now... */
972 if (exclusive)
973 status = ocfs2_dquot_drop_slow(inode);
974 mlog_exit(status);
975 return status;
976}
977
978static struct dquot *ocfs2_alloc_dquot(struct super_block *sb, int type)
979{
980 struct ocfs2_dquot *dquot =
981 kmem_cache_zalloc(ocfs2_dquot_cachep, GFP_NOFS);
982
983 if (!dquot)
984 return NULL;
985 return &dquot->dq_dquot;
986}
987
988static void ocfs2_destroy_dquot(struct dquot *dquot)
989{
990 kmem_cache_free(ocfs2_dquot_cachep, dquot);
991}
992
993struct dquot_operations ocfs2_quota_operations = {
994 .initialize = ocfs2_dquot_initialize,
995 .drop = ocfs2_dquot_drop,
996 .alloc_space = dquot_alloc_space,
997 .alloc_inode = dquot_alloc_inode,
998 .free_space = dquot_free_space,
999 .free_inode = dquot_free_inode,
1000 .transfer = dquot_transfer,
1001 .write_dquot = ocfs2_write_dquot,
1002 .acquire_dquot = ocfs2_acquire_dquot,
1003 .release_dquot = ocfs2_release_dquot,
1004 .mark_dirty = ocfs2_mark_dquot_dirty,
1005 .write_info = ocfs2_write_info,
1006 .alloc_dquot = ocfs2_alloc_dquot,
1007 .destroy_dquot = ocfs2_destroy_dquot,
1008};
1009
1010int ocfs2_quota_setup(void)
1011{
1012 ocfs2_quota_wq = create_workqueue("o2quot");
1013 if (!ocfs2_quota_wq)
1014 return -ENOMEM;
1015 return 0;
1016}
1017
1018void ocfs2_quota_shutdown(void)
1019{
1020 if (ocfs2_quota_wq) {
1021 flush_workqueue(ocfs2_quota_wq);
1022 destroy_workqueue(ocfs2_quota_wq);
1023 ocfs2_quota_wq = NULL;
1024 }
1025}
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
new file mode 100644
index 000000000000..07deec5e9721
--- /dev/null
+++ b/fs/ocfs2/quota_local.c
@@ -0,0 +1,1253 @@
1/*
2 * Implementation of operations over local quota file
3 */
4
5#include <linux/fs.h>
6#include <linux/quota.h>
7#include <linux/quotaops.h>
8#include <linux/module.h>
9
10#define MLOG_MASK_PREFIX ML_QUOTA
11#include <cluster/masklog.h>
12
13#include "ocfs2_fs.h"
14#include "ocfs2.h"
15#include "inode.h"
16#include "alloc.h"
17#include "file.h"
18#include "buffer_head_io.h"
19#include "journal.h"
20#include "sysfile.h"
21#include "dlmglue.h"
22#include "quota.h"
23
24/* Number of local quota structures per block */
25static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
26{
27 return ((sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) /
28 sizeof(struct ocfs2_local_disk_dqblk));
29}
30
31/* Number of blocks with entries in one chunk */
32static inline unsigned int ol_chunk_blocks(struct super_block *sb)
33{
34 return ((sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
35 OCFS2_QBLK_RESERVED_SPACE) << 3) /
36 ol_quota_entries_per_block(sb);
37}
38
39/* Number of entries in a chunk bitmap */
40static unsigned int ol_chunk_entries(struct super_block *sb)
41{
42 return ol_chunk_blocks(sb) * ol_quota_entries_per_block(sb);
43}
44
45/* Offset of the chunk in quota file */
46static unsigned int ol_quota_chunk_block(struct super_block *sb, int c)
47{
48 /* 1 block for local quota file info, 1 block per chunk for chunk info */
49 return 1 + (ol_chunk_blocks(sb) + 1) * c;
50}
51
52static unsigned int ol_dqblk_block(struct super_block *sb, int c, int off)
53{
54 int epb = ol_quota_entries_per_block(sb);
55
56 return ol_quota_chunk_block(sb, c) + 1 + off / epb;
57}
58
59static unsigned int ol_dqblk_block_off(struct super_block *sb, int c, int off)
60{
61 int epb = ol_quota_entries_per_block(sb);
62
63 return (off % epb) * sizeof(struct ocfs2_local_disk_dqblk);
64}
65
66/* Offset of the dquot structure in the quota file */
67static loff_t ol_dqblk_off(struct super_block *sb, int c, int off)
68{
69 return (ol_dqblk_block(sb, c, off) << sb->s_blocksize_bits) +
70 ol_dqblk_block_off(sb, c, off);
71}
72
73/* Compute block number from given offset */
74static inline unsigned int ol_dqblk_file_block(struct super_block *sb, loff_t off)
75{
76 return off >> sb->s_blocksize_bits;
77}
78
79static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off)
80{
81 return off & ((1 << sb->s_blocksize_bits) - 1);
82}
83
84/* Compute offset in the chunk of a structure with the given offset */
85static int ol_dqblk_chunk_off(struct super_block *sb, int c, loff_t off)
86{
87 int epb = ol_quota_entries_per_block(sb);
88
89 return ((off >> sb->s_blocksize_bits) -
90 ol_quota_chunk_block(sb, c) - 1) * epb
91 + ((unsigned int)(off & ((1 << sb->s_blocksize_bits) - 1))) /
92 sizeof(struct ocfs2_local_disk_dqblk);
93}
94
95/* Write bufferhead into the fs */
96static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
97 void (*modify)(struct buffer_head *, void *), void *private)
98{
99 struct super_block *sb = inode->i_sb;
100 handle_t *handle;
101 int status;
102
103 handle = ocfs2_start_trans(OCFS2_SB(sb), 1);
104 if (IS_ERR(handle)) {
105 status = PTR_ERR(handle);
106 mlog_errno(status);
107 return status;
108 }
109 status = ocfs2_journal_access_dq(handle, inode, bh,
110 OCFS2_JOURNAL_ACCESS_WRITE);
111 if (status < 0) {
112 mlog_errno(status);
113 ocfs2_commit_trans(OCFS2_SB(sb), handle);
114 return status;
115 }
116 lock_buffer(bh);
117 modify(bh, private);
118 unlock_buffer(bh);
119 status = ocfs2_journal_dirty(handle, bh);
120 if (status < 0) {
121 mlog_errno(status);
122 ocfs2_commit_trans(OCFS2_SB(sb), handle);
123 return status;
124 }
125 status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
126 if (status < 0) {
127 mlog_errno(status);
128 return status;
129 }
130 return 0;
131}
132
133/* Check whether we understand format of quota files */
134static int ocfs2_local_check_quota_file(struct super_block *sb, int type)
135{
136 unsigned int lmagics[MAXQUOTAS] = OCFS2_LOCAL_QMAGICS;
137 unsigned int lversions[MAXQUOTAS] = OCFS2_LOCAL_QVERSIONS;
138 unsigned int gmagics[MAXQUOTAS] = OCFS2_GLOBAL_QMAGICS;
139 unsigned int gversions[MAXQUOTAS] = OCFS2_GLOBAL_QVERSIONS;
140 unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE,
141 GROUP_QUOTA_SYSTEM_INODE };
142 struct buffer_head *bh = NULL;
143 struct inode *linode = sb_dqopt(sb)->files[type];
144 struct inode *ginode = NULL;
145 struct ocfs2_disk_dqheader *dqhead;
146 int status, ret = 0;
147
148 /* First check whether we understand local quota file */
149 status = ocfs2_read_quota_block(linode, 0, &bh);
150 if (status) {
151 mlog_errno(status);
152 mlog(ML_ERROR, "failed to read quota file header (type=%d)\n",
153 type);
154 goto out_err;
155 }
156 dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data);
157 if (le32_to_cpu(dqhead->dqh_magic) != lmagics[type]) {
158 mlog(ML_ERROR, "quota file magic does not match (%u != %u),"
159 " type=%d\n", le32_to_cpu(dqhead->dqh_magic),
160 lmagics[type], type);
161 goto out_err;
162 }
163 if (le32_to_cpu(dqhead->dqh_version) != lversions[type]) {
164 mlog(ML_ERROR, "quota file version does not match (%u != %u),"
165 " type=%d\n", le32_to_cpu(dqhead->dqh_version),
166 lversions[type], type);
167 goto out_err;
168 }
169 brelse(bh);
170 bh = NULL;
171
172 /* Next check whether we understand global quota file */
173 ginode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
174 OCFS2_INVALID_SLOT);
175 if (!ginode) {
176 mlog(ML_ERROR, "cannot get global quota file inode "
177 "(type=%d)\n", type);
178 goto out_err;
179 }
180 /* Since the header is read only, we don't care about locking */
181 status = ocfs2_read_quota_block(ginode, 0, &bh);
182 if (status) {
183 mlog_errno(status);
184 mlog(ML_ERROR, "failed to read global quota file header "
185 "(type=%d)\n", type);
186 goto out_err;
187 }
188 dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data);
189 if (le32_to_cpu(dqhead->dqh_magic) != gmagics[type]) {
190 mlog(ML_ERROR, "global quota file magic does not match "
191 "(%u != %u), type=%d\n",
192 le32_to_cpu(dqhead->dqh_magic), gmagics[type], type);
193 goto out_err;
194 }
195 if (le32_to_cpu(dqhead->dqh_version) != gversions[type]) {
196 mlog(ML_ERROR, "global quota file version does not match "
197 "(%u != %u), type=%d\n",
198 le32_to_cpu(dqhead->dqh_version), gversions[type],
199 type);
200 goto out_err;
201 }
202
203 ret = 1;
204out_err:
205 brelse(bh);
206 iput(ginode);
207 return ret;
208}
209
210/* Release given list of quota file chunks */
211static void ocfs2_release_local_quota_bitmaps(struct list_head *head)
212{
213 struct ocfs2_quota_chunk *pos, *next;
214
215 list_for_each_entry_safe(pos, next, head, qc_chunk) {
216 list_del(&pos->qc_chunk);
217 brelse(pos->qc_headerbh);
218 kmem_cache_free(ocfs2_qf_chunk_cachep, pos);
219 }
220}
221
222/* Load quota bitmaps into memory */
223static int ocfs2_load_local_quota_bitmaps(struct inode *inode,
224 struct ocfs2_local_disk_dqinfo *ldinfo,
225 struct list_head *head)
226{
227 struct ocfs2_quota_chunk *newchunk;
228 int i, status;
229
230 INIT_LIST_HEAD(head);
231 for (i = 0; i < le32_to_cpu(ldinfo->dqi_chunks); i++) {
232 newchunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS);
233 if (!newchunk) {
234 ocfs2_release_local_quota_bitmaps(head);
235 return -ENOMEM;
236 }
237 newchunk->qc_num = i;
238 newchunk->qc_headerbh = NULL;
239 status = ocfs2_read_quota_block(inode,
240 ol_quota_chunk_block(inode->i_sb, i),
241 &newchunk->qc_headerbh);
242 if (status) {
243 mlog_errno(status);
244 kmem_cache_free(ocfs2_qf_chunk_cachep, newchunk);
245 ocfs2_release_local_quota_bitmaps(head);
246 return status;
247 }
248 list_add_tail(&newchunk->qc_chunk, head);
249 }
250 return 0;
251}
252
253static void olq_update_info(struct buffer_head *bh, void *private)
254{
255 struct mem_dqinfo *info = private;
256 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
257 struct ocfs2_local_disk_dqinfo *ldinfo;
258
259 ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
260 OCFS2_LOCAL_INFO_OFF);
261 spin_lock(&dq_data_lock);
262 ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
263 ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks);
264 ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks);
265 spin_unlock(&dq_data_lock);
266}
267
268static int ocfs2_add_recovery_chunk(struct super_block *sb,
269 struct ocfs2_local_disk_chunk *dchunk,
270 int chunk,
271 struct list_head *head)
272{
273 struct ocfs2_recovery_chunk *rc;
274
275 rc = kmalloc(sizeof(struct ocfs2_recovery_chunk), GFP_NOFS);
276 if (!rc)
277 return -ENOMEM;
278 rc->rc_chunk = chunk;
279 rc->rc_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
280 if (!rc->rc_bitmap) {
281 kfree(rc);
282 return -ENOMEM;
283 }
284 memcpy(rc->rc_bitmap, dchunk->dqc_bitmap,
285 (ol_chunk_entries(sb) + 7) >> 3);
286 list_add_tail(&rc->rc_list, head);
287 return 0;
288}
289
290static void free_recovery_list(struct list_head *head)
291{
292 struct ocfs2_recovery_chunk *next;
293 struct ocfs2_recovery_chunk *rchunk;
294
295 list_for_each_entry_safe(rchunk, next, head, rc_list) {
296 list_del(&rchunk->rc_list);
297 kfree(rchunk->rc_bitmap);
298 kfree(rchunk);
299 }
300}
301
302void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec)
303{
304 int type;
305
306 for (type = 0; type < MAXQUOTAS; type++)
307 free_recovery_list(&(rec->r_list[type]));
308 kfree(rec);
309}
310
311/* Load entries in our quota file we have to recover*/
312static int ocfs2_recovery_load_quota(struct inode *lqinode,
313 struct ocfs2_local_disk_dqinfo *ldinfo,
314 int type,
315 struct list_head *head)
316{
317 struct super_block *sb = lqinode->i_sb;
318 struct buffer_head *hbh;
319 struct ocfs2_local_disk_chunk *dchunk;
320 int i, chunks = le32_to_cpu(ldinfo->dqi_chunks);
321 int status = 0;
322
323 for (i = 0; i < chunks; i++) {
324 hbh = NULL;
325 status = ocfs2_read_quota_block(lqinode,
326 ol_quota_chunk_block(sb, i),
327 &hbh);
328 if (status) {
329 mlog_errno(status);
330 break;
331 }
332 dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data;
333 if (le32_to_cpu(dchunk->dqc_free) < ol_chunk_entries(sb))
334 status = ocfs2_add_recovery_chunk(sb, dchunk, i, head);
335 brelse(hbh);
336 if (status < 0)
337 break;
338 }
339 if (status < 0)
340 free_recovery_list(head);
341 return status;
342}
343
344static struct ocfs2_quota_recovery *ocfs2_alloc_quota_recovery(void)
345{
346 int type;
347 struct ocfs2_quota_recovery *rec;
348
349 rec = kmalloc(sizeof(struct ocfs2_quota_recovery), GFP_NOFS);
350 if (!rec)
351 return NULL;
352 for (type = 0; type < MAXQUOTAS; type++)
353 INIT_LIST_HEAD(&(rec->r_list[type]));
354 return rec;
355}
356
357/* Load information we need for quota recovery into memory */
358struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
359 struct ocfs2_super *osb,
360 int slot_num)
361{
362 unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
363 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
364 unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
365 LOCAL_GROUP_QUOTA_SYSTEM_INODE };
366 struct super_block *sb = osb->sb;
367 struct ocfs2_local_disk_dqinfo *ldinfo;
368 struct inode *lqinode;
369 struct buffer_head *bh;
370 int type;
371 int status = 0;
372 struct ocfs2_quota_recovery *rec;
373
374 mlog(ML_NOTICE, "Beginning quota recovery in slot %u\n", slot_num);
375 rec = ocfs2_alloc_quota_recovery();
376 if (!rec)
377 return ERR_PTR(-ENOMEM);
378 /* First init... */
379
380 for (type = 0; type < MAXQUOTAS; type++) {
381 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
382 continue;
383 /* At this point, journal of the slot is already replayed so
384 * we can trust metadata and data of the quota file */
385 lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num);
386 if (!lqinode) {
387 status = -ENOENT;
388 goto out;
389 }
390 status = ocfs2_inode_lock_full(lqinode, NULL, 1,
391 OCFS2_META_LOCK_RECOVERY);
392 if (status < 0) {
393 mlog_errno(status);
394 goto out_put;
395 }
396 /* Now read local header */
397 bh = NULL;
398 status = ocfs2_read_quota_block(lqinode, 0, &bh);
399 if (status) {
400 mlog_errno(status);
401 mlog(ML_ERROR, "failed to read quota file info header "
402 "(slot=%d type=%d)\n", slot_num, type);
403 goto out_lock;
404 }
405 ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
406 OCFS2_LOCAL_INFO_OFF);
407 status = ocfs2_recovery_load_quota(lqinode, ldinfo, type,
408 &rec->r_list[type]);
409 brelse(bh);
410out_lock:
411 ocfs2_inode_unlock(lqinode, 1);
412out_put:
413 iput(lqinode);
414 if (status < 0)
415 break;
416 }
417out:
418 if (status < 0) {
419 ocfs2_free_quota_recovery(rec);
420 rec = ERR_PTR(status);
421 }
422 return rec;
423}
424
425/* Sync changes in local quota file into global quota file and
426 * reinitialize local quota file.
427 * The function expects local quota file to be already locked and
428 * dqonoff_mutex locked. */
429static int ocfs2_recover_local_quota_file(struct inode *lqinode,
430 int type,
431 struct ocfs2_quota_recovery *rec)
432{
433 struct super_block *sb = lqinode->i_sb;
434 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
435 struct ocfs2_local_disk_chunk *dchunk;
436 struct ocfs2_local_disk_dqblk *dqblk;
437 struct dquot *dquot;
438 handle_t *handle;
439 struct buffer_head *hbh = NULL, *qbh = NULL;
440 int status = 0;
441 int bit, chunk;
442 struct ocfs2_recovery_chunk *rchunk, *next;
443 qsize_t spacechange, inodechange;
444
445 mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type);
446
447 status = ocfs2_lock_global_qf(oinfo, 1);
448 if (status < 0)
449 goto out;
450
451 list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) {
452 chunk = rchunk->rc_chunk;
453 hbh = NULL;
454 status = ocfs2_read_quota_block(lqinode,
455 ol_quota_chunk_block(sb, chunk),
456 &hbh);
457 if (status) {
458 mlog_errno(status);
459 break;
460 }
461 dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data;
462 for_each_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) {
463 qbh = NULL;
464 status = ocfs2_read_quota_block(lqinode,
465 ol_dqblk_block(sb, chunk, bit),
466 &qbh);
467 if (status) {
468 mlog_errno(status);
469 break;
470 }
471 dqblk = (struct ocfs2_local_disk_dqblk *)(qbh->b_data +
472 ol_dqblk_block_off(sb, chunk, bit));
473 dquot = dqget(sb, le64_to_cpu(dqblk->dqb_id), type);
474 if (!dquot) {
475 status = -EIO;
476 mlog(ML_ERROR, "Failed to get quota structure "
477 "for id %u, type %d. Cannot finish quota "
478 "file recovery.\n",
479 (unsigned)le64_to_cpu(dqblk->dqb_id),
480 type);
481 goto out_put_bh;
482 }
483 handle = ocfs2_start_trans(OCFS2_SB(sb),
484 OCFS2_QSYNC_CREDITS);
485 if (IS_ERR(handle)) {
486 status = PTR_ERR(handle);
487 mlog_errno(status);
488 goto out_put_dquot;
489 }
490 mutex_lock(&sb_dqopt(sb)->dqio_mutex);
491 spin_lock(&dq_data_lock);
492 /* Add usage from quota entry into quota changes
493 * of our node. Auxiliary variables are important
494 * due to signedness */
495 spacechange = le64_to_cpu(dqblk->dqb_spacemod);
496 inodechange = le64_to_cpu(dqblk->dqb_inodemod);
497 dquot->dq_dqb.dqb_curspace += spacechange;
498 dquot->dq_dqb.dqb_curinodes += inodechange;
499 spin_unlock(&dq_data_lock);
500 /* We want to drop reference held by the crashed
501 * node. Since we have our own reference we know
502 * global structure actually won't be freed. */
503 status = ocfs2_global_release_dquot(dquot);
504 if (status < 0) {
505 mlog_errno(status);
506 goto out_commit;
507 }
508 /* Release local quota file entry */
509 status = ocfs2_journal_access_dq(handle, lqinode,
510 qbh, OCFS2_JOURNAL_ACCESS_WRITE);
511 if (status < 0) {
512 mlog_errno(status);
513 goto out_commit;
514 }
515 lock_buffer(qbh);
516 WARN_ON(!ocfs2_test_bit(bit, dchunk->dqc_bitmap));
517 ocfs2_clear_bit(bit, dchunk->dqc_bitmap);
518 le32_add_cpu(&dchunk->dqc_free, 1);
519 unlock_buffer(qbh);
520 status = ocfs2_journal_dirty(handle, qbh);
521 if (status < 0)
522 mlog_errno(status);
523out_commit:
524 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
525 ocfs2_commit_trans(OCFS2_SB(sb), handle);
526out_put_dquot:
527 dqput(dquot);
528out_put_bh:
529 brelse(qbh);
530 if (status < 0)
531 break;
532 }
533 brelse(hbh);
534 list_del(&rchunk->rc_list);
535 kfree(rchunk->rc_bitmap);
536 kfree(rchunk);
537 if (status < 0)
538 break;
539 }
540 ocfs2_unlock_global_qf(oinfo, 1);
541out:
542 if (status < 0)
543 free_recovery_list(&(rec->r_list[type]));
544 mlog_exit(status);
545 return status;
546}
547
548/* Recover local quota files for given node different from us */
549int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
550 struct ocfs2_quota_recovery *rec,
551 int slot_num)
552{
553 unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
554 LOCAL_GROUP_QUOTA_SYSTEM_INODE };
555 struct super_block *sb = osb->sb;
556 struct ocfs2_local_disk_dqinfo *ldinfo;
557 struct buffer_head *bh;
558 handle_t *handle;
559 int type;
560 int status = 0;
561 struct inode *lqinode;
562 unsigned int flags;
563
564 mlog(ML_NOTICE, "Finishing quota recovery in slot %u\n", slot_num);
565 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
566 for (type = 0; type < MAXQUOTAS; type++) {
567 if (list_empty(&(rec->r_list[type])))
568 continue;
569 mlog(0, "Recovering quota in slot %d\n", slot_num);
570 lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num);
571 if (!lqinode) {
572 status = -ENOENT;
573 goto out;
574 }
575 status = ocfs2_inode_lock_full(lqinode, NULL, 1,
576 OCFS2_META_LOCK_NOQUEUE);
577 /* Someone else is holding the lock? Then he must be
578 * doing the recovery. Just skip the file... */
579 if (status == -EAGAIN) {
580 mlog(ML_NOTICE, "skipping quota recovery for slot %d "
581 "because quota file is locked.\n", slot_num);
582 status = 0;
583 goto out_put;
584 } else if (status < 0) {
585 mlog_errno(status);
586 goto out_put;
587 }
588 /* Now read local header */
589 bh = NULL;
590 status = ocfs2_read_quota_block(lqinode, 0, &bh);
591 if (status) {
592 mlog_errno(status);
593 mlog(ML_ERROR, "failed to read quota file info header "
594 "(slot=%d type=%d)\n", slot_num, type);
595 goto out_lock;
596 }
597 ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
598 OCFS2_LOCAL_INFO_OFF);
599 /* Is recovery still needed? */
600 flags = le32_to_cpu(ldinfo->dqi_flags);
601 if (!(flags & OLQF_CLEAN))
602 status = ocfs2_recover_local_quota_file(lqinode,
603 type,
604 rec);
605 /* We don't want to mark file as clean when it is actually
606 * active */
607 if (slot_num == osb->slot_num)
608 goto out_bh;
609 /* Mark quota file as clean if we are recovering quota file of
610 * some other node. */
611 handle = ocfs2_start_trans(osb, 1);
612 if (IS_ERR(handle)) {
613 status = PTR_ERR(handle);
614 mlog_errno(status);
615 goto out_bh;
616 }
617 status = ocfs2_journal_access_dq(handle, lqinode, bh,
618 OCFS2_JOURNAL_ACCESS_WRITE);
619 if (status < 0) {
620 mlog_errno(status);
621 goto out_trans;
622 }
623 lock_buffer(bh);
624 ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN);
625 unlock_buffer(bh);
626 status = ocfs2_journal_dirty(handle, bh);
627 if (status < 0)
628 mlog_errno(status);
629out_trans:
630 ocfs2_commit_trans(osb, handle);
631out_bh:
632 brelse(bh);
633out_lock:
634 ocfs2_inode_unlock(lqinode, 1);
635out_put:
636 iput(lqinode);
637 if (status < 0)
638 break;
639 }
640out:
641 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
642 kfree(rec);
643 return status;
644}
645
646/* Read information header from quota file */
647static int ocfs2_local_read_info(struct super_block *sb, int type)
648{
649 struct ocfs2_local_disk_dqinfo *ldinfo;
650 struct mem_dqinfo *info = sb_dqinfo(sb, type);
651 struct ocfs2_mem_dqinfo *oinfo;
652 struct inode *lqinode = sb_dqopt(sb)->files[type];
653 int status;
654 struct buffer_head *bh = NULL;
655 struct ocfs2_quota_recovery *rec;
656 int locked = 0;
657
658 info->dqi_maxblimit = 0x7fffffffffffffffLL;
659 info->dqi_maxilimit = 0x7fffffffffffffffLL;
660 oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS);
661 if (!oinfo) {
662 mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota"
663 " info.");
664 goto out_err;
665 }
666 info->dqi_priv = oinfo;
667 oinfo->dqi_type = type;
668 INIT_LIST_HEAD(&oinfo->dqi_chunk);
669 oinfo->dqi_rec = NULL;
670 oinfo->dqi_lqi_bh = NULL;
671 oinfo->dqi_ibh = NULL;
672
673 status = ocfs2_global_read_info(sb, type);
674 if (status < 0)
675 goto out_err;
676
677 status = ocfs2_inode_lock(lqinode, &oinfo->dqi_lqi_bh, 1);
678 if (status < 0) {
679 mlog_errno(status);
680 goto out_err;
681 }
682 locked = 1;
683
684 /* Now read local header */
685 status = ocfs2_read_quota_block(lqinode, 0, &bh);
686 if (status) {
687 mlog_errno(status);
688 mlog(ML_ERROR, "failed to read quota file info header "
689 "(type=%d)\n", type);
690 goto out_err;
691 }
692 ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
693 OCFS2_LOCAL_INFO_OFF);
694 info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags);
695 oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks);
696 oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks);
697 oinfo->dqi_ibh = bh;
698
699 /* We crashed when using local quota file? */
700 if (!(info->dqi_flags & OLQF_CLEAN)) {
701 rec = OCFS2_SB(sb)->quota_rec;
702 if (!rec) {
703 rec = ocfs2_alloc_quota_recovery();
704 if (!rec) {
705 status = -ENOMEM;
706 mlog_errno(status);
707 goto out_err;
708 }
709 OCFS2_SB(sb)->quota_rec = rec;
710 }
711
712 status = ocfs2_recovery_load_quota(lqinode, ldinfo, type,
713 &rec->r_list[type]);
714 if (status < 0) {
715 mlog_errno(status);
716 goto out_err;
717 }
718 }
719
720 status = ocfs2_load_local_quota_bitmaps(lqinode,
721 ldinfo,
722 &oinfo->dqi_chunk);
723 if (status < 0) {
724 mlog_errno(status);
725 goto out_err;
726 }
727
728 /* Now mark quota file as used */
729 info->dqi_flags &= ~OLQF_CLEAN;
730 status = ocfs2_modify_bh(lqinode, bh, olq_update_info, info);
731 if (status < 0) {
732 mlog_errno(status);
733 goto out_err;
734 }
735
736 return 0;
737out_err:
738 if (oinfo) {
739 iput(oinfo->dqi_gqinode);
740 ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock);
741 ocfs2_lock_res_free(&oinfo->dqi_gqlock);
742 brelse(oinfo->dqi_lqi_bh);
743 if (locked)
744 ocfs2_inode_unlock(lqinode, 1);
745 ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
746 kfree(oinfo);
747 }
748 brelse(bh);
749 return -1;
750}
751
752/* Write local info to quota file */
753static int ocfs2_local_write_info(struct super_block *sb, int type)
754{
755 struct mem_dqinfo *info = sb_dqinfo(sb, type);
756 struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv)
757 ->dqi_ibh;
758 int status;
759
760 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info,
761 info);
762 if (status < 0) {
763 mlog_errno(status);
764 return -1;
765 }
766
767 return 0;
768}
769
770/* Release info from memory */
771static int ocfs2_local_free_info(struct super_block *sb, int type)
772{
773 struct mem_dqinfo *info = sb_dqinfo(sb, type);
774 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
775 struct ocfs2_quota_chunk *chunk;
776 struct ocfs2_local_disk_chunk *dchunk;
777 int mark_clean = 1, len;
778 int status;
779
780 /* At this point we know there are no more dquots and thus
781 * even if there's some sync in the pdflush queue, it won't
782 * find any dquots and return without doing anything */
783 cancel_delayed_work_sync(&oinfo->dqi_sync_work);
784 iput(oinfo->dqi_gqinode);
785 ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock);
786 ocfs2_lock_res_free(&oinfo->dqi_gqlock);
787 list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) {
788 dchunk = (struct ocfs2_local_disk_chunk *)
789 (chunk->qc_headerbh->b_data);
790 if (chunk->qc_num < oinfo->dqi_chunks - 1) {
791 len = ol_chunk_entries(sb);
792 } else {
793 len = (oinfo->dqi_blocks -
794 ol_quota_chunk_block(sb, chunk->qc_num) - 1)
795 * ol_quota_entries_per_block(sb);
796 }
797 /* Not all entries free? Bug! */
798 if (le32_to_cpu(dchunk->dqc_free) != len) {
799 mlog(ML_ERROR, "releasing quota file with used "
800 "entries (type=%d)\n", type);
801 mark_clean = 0;
802 }
803 }
804 ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
805
806 /* dqonoff_mutex protects us against racing with recovery thread... */
807 if (oinfo->dqi_rec) {
808 ocfs2_free_quota_recovery(oinfo->dqi_rec);
809 mark_clean = 0;
810 }
811
812 if (!mark_clean)
813 goto out;
814
815 /* Mark local file as clean */
816 info->dqi_flags |= OLQF_CLEAN;
817 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type],
818 oinfo->dqi_ibh,
819 olq_update_info,
820 info);
821 if (status < 0) {
822 mlog_errno(status);
823 goto out;
824 }
825
826out:
827 ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1);
828 brelse(oinfo->dqi_ibh);
829 brelse(oinfo->dqi_lqi_bh);
830 kfree(oinfo);
831 return 0;
832}
833
834static void olq_set_dquot(struct buffer_head *bh, void *private)
835{
836 struct ocfs2_dquot *od = private;
837 struct ocfs2_local_disk_dqblk *dqblk;
838 struct super_block *sb = od->dq_dquot.dq_sb;
839
840 dqblk = (struct ocfs2_local_disk_dqblk *)(bh->b_data
841 + ol_dqblk_block_offset(sb, od->dq_local_off));
842
843 dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id);
844 spin_lock(&dq_data_lock);
845 dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace -
846 od->dq_origspace);
847 dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes -
848 od->dq_originodes);
849 spin_unlock(&dq_data_lock);
850 mlog(0, "Writing local dquot %u space %lld inodes %lld\n",
851 od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod),
852 (long long)le64_to_cpu(dqblk->dqb_inodemod));
853}
854
855/* Write dquot to local quota file */
856static int ocfs2_local_write_dquot(struct dquot *dquot)
857{
858 struct super_block *sb = dquot->dq_sb;
859 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
860 struct buffer_head *bh = NULL;
861 int status;
862
863 status = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type],
864 ol_dqblk_file_block(sb, od->dq_local_off),
865 &bh);
866 if (status) {
867 mlog_errno(status);
868 goto out;
869 }
870 status = ocfs2_modify_bh(sb_dqopt(sb)->files[dquot->dq_type], bh,
871 olq_set_dquot, od);
872 if (status < 0) {
873 mlog_errno(status);
874 goto out;
875 }
876out:
877 brelse(bh);
878 return status;
879}
880
881/* Find free entry in local quota file */
882static struct ocfs2_quota_chunk *ocfs2_find_free_entry(struct super_block *sb,
883 int type,
884 int *offset)
885{
886 struct mem_dqinfo *info = sb_dqinfo(sb, type);
887 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
888 struct ocfs2_quota_chunk *chunk;
889 struct ocfs2_local_disk_chunk *dchunk;
890 int found = 0, len;
891
892 list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) {
893 dchunk = (struct ocfs2_local_disk_chunk *)
894 chunk->qc_headerbh->b_data;
895 if (le32_to_cpu(dchunk->dqc_free) > 0) {
896 found = 1;
897 break;
898 }
899 }
900 if (!found)
901 return NULL;
902
903 if (chunk->qc_num < oinfo->dqi_chunks - 1) {
904 len = ol_chunk_entries(sb);
905 } else {
906 len = (oinfo->dqi_blocks -
907 ol_quota_chunk_block(sb, chunk->qc_num) - 1)
908 * ol_quota_entries_per_block(sb);
909 }
910
911 found = ocfs2_find_next_zero_bit(dchunk->dqc_bitmap, len, 0);
912 /* We failed? */
913 if (found == len) {
914 mlog(ML_ERROR, "Did not find empty entry in chunk %d with %u"
915 " entries free (type=%d)\n", chunk->qc_num,
916 le32_to_cpu(dchunk->dqc_free), type);
917 return ERR_PTR(-EIO);
918 }
919 *offset = found;
920 return chunk;
921}
922
923/* Add new chunk to the local quota file */
924static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
925 struct super_block *sb,
926 int type,
927 int *offset)
928{
929 struct mem_dqinfo *info = sb_dqinfo(sb, type);
930 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
931 struct inode *lqinode = sb_dqopt(sb)->files[type];
932 struct ocfs2_quota_chunk *chunk = NULL;
933 struct ocfs2_local_disk_chunk *dchunk;
934 int status;
935 handle_t *handle;
936 struct buffer_head *bh = NULL;
937 u64 p_blkno;
938
939 /* We are protected by dqio_sem so no locking needed */
940 status = ocfs2_extend_no_holes(lqinode,
941 lqinode->i_size + 2 * sb->s_blocksize,
942 lqinode->i_size);
943 if (status < 0) {
944 mlog_errno(status);
945 goto out;
946 }
947 status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
948 lqinode->i_size + 2 * sb->s_blocksize);
949 if (status < 0) {
950 mlog_errno(status);
951 goto out;
952 }
953
954 chunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS);
955 if (!chunk) {
956 status = -ENOMEM;
957 mlog_errno(status);
958 goto out;
959 }
960
961 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
962 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
963 &p_blkno, NULL, NULL);
964 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
965 if (status < 0) {
966 mlog_errno(status);
967 goto out;
968 }
969 bh = sb_getblk(sb, p_blkno);
970 if (!bh) {
971 status = -ENOMEM;
972 mlog_errno(status);
973 goto out;
974 }
975 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
976
977 handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
978 if (IS_ERR(handle)) {
979 status = PTR_ERR(handle);
980 mlog_errno(status);
981 goto out;
982 }
983
984 status = ocfs2_journal_access_dq(handle, lqinode, bh,
985 OCFS2_JOURNAL_ACCESS_WRITE);
986 if (status < 0) {
987 mlog_errno(status);
988 goto out_trans;
989 }
990 lock_buffer(bh);
991 dchunk->dqc_free = cpu_to_le32(ol_quota_entries_per_block(sb));
992 memset(dchunk->dqc_bitmap, 0,
993 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
994 OCFS2_QBLK_RESERVED_SPACE);
995 set_buffer_uptodate(bh);
996 unlock_buffer(bh);
997 status = ocfs2_journal_dirty(handle, bh);
998 if (status < 0) {
999 mlog_errno(status);
1000 goto out_trans;
1001 }
1002
1003 oinfo->dqi_blocks += 2;
1004 oinfo->dqi_chunks++;
1005 status = ocfs2_local_write_info(sb, type);
1006 if (status < 0) {
1007 mlog_errno(status);
1008 goto out_trans;
1009 }
1010 status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
1011 if (status < 0) {
1012 mlog_errno(status);
1013 goto out;
1014 }
1015
1016 list_add_tail(&chunk->qc_chunk, &oinfo->dqi_chunk);
1017 chunk->qc_num = list_entry(chunk->qc_chunk.prev,
1018 struct ocfs2_quota_chunk,
1019 qc_chunk)->qc_num + 1;
1020 chunk->qc_headerbh = bh;
1021 *offset = 0;
1022 return chunk;
1023out_trans:
1024 ocfs2_commit_trans(OCFS2_SB(sb), handle);
1025out:
1026 brelse(bh);
1027 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
1028 return ERR_PTR(status);
1029}
1030
1031/* Find free entry in local quota file */
1032static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1033 struct super_block *sb,
1034 int type,
1035 int *offset)
1036{
1037 struct mem_dqinfo *info = sb_dqinfo(sb, type);
1038 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
1039 struct ocfs2_quota_chunk *chunk;
1040 struct inode *lqinode = sb_dqopt(sb)->files[type];
1041 struct ocfs2_local_disk_chunk *dchunk;
1042 int epb = ol_quota_entries_per_block(sb);
1043 unsigned int chunk_blocks;
1044 int status;
1045 handle_t *handle;
1046
1047 if (list_empty(&oinfo->dqi_chunk))
1048 return ocfs2_local_quota_add_chunk(sb, type, offset);
1049 /* Is the last chunk full? */
1050 chunk = list_entry(oinfo->dqi_chunk.prev,
1051 struct ocfs2_quota_chunk, qc_chunk);
1052 chunk_blocks = oinfo->dqi_blocks -
1053 ol_quota_chunk_block(sb, chunk->qc_num) - 1;
1054 if (ol_chunk_blocks(sb) == chunk_blocks)
1055 return ocfs2_local_quota_add_chunk(sb, type, offset);
1056
1057 /* We are protected by dqio_sem so no locking needed */
1058 status = ocfs2_extend_no_holes(lqinode,
1059 lqinode->i_size + sb->s_blocksize,
1060 lqinode->i_size);
1061 if (status < 0) {
1062 mlog_errno(status);
1063 goto out;
1064 }
1065 status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
1066 lqinode->i_size + sb->s_blocksize);
1067 if (status < 0) {
1068 mlog_errno(status);
1069 goto out;
1070 }
1071 handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
1072 if (IS_ERR(handle)) {
1073 status = PTR_ERR(handle);
1074 mlog_errno(status);
1075 goto out;
1076 }
1077 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
1078 OCFS2_JOURNAL_ACCESS_WRITE);
1079 if (status < 0) {
1080 mlog_errno(status);
1081 goto out_trans;
1082 }
1083
1084 dchunk = (struct ocfs2_local_disk_chunk *)chunk->qc_headerbh->b_data;
1085 lock_buffer(chunk->qc_headerbh);
1086 le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb));
1087 unlock_buffer(chunk->qc_headerbh);
1088 status = ocfs2_journal_dirty(handle, chunk->qc_headerbh);
1089 if (status < 0) {
1090 mlog_errno(status);
1091 goto out_trans;
1092 }
1093 oinfo->dqi_blocks++;
1094 status = ocfs2_local_write_info(sb, type);
1095 if (status < 0) {
1096 mlog_errno(status);
1097 goto out_trans;
1098 }
1099
1100 status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
1101 if (status < 0) {
1102 mlog_errno(status);
1103 goto out;
1104 }
1105 *offset = chunk_blocks * epb;
1106 return chunk;
1107out_trans:
1108 ocfs2_commit_trans(OCFS2_SB(sb), handle);
1109out:
1110 return ERR_PTR(status);
1111}
1112
1113static void olq_alloc_dquot(struct buffer_head *bh, void *private)
1114{
1115 int *offset = private;
1116 struct ocfs2_local_disk_chunk *dchunk;
1117
1118 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
1119 ocfs2_set_bit(*offset, dchunk->dqc_bitmap);
1120 le32_add_cpu(&dchunk->dqc_free, -1);
1121}
1122
1123/* Create dquot in the local file for given id */
1124static int ocfs2_create_local_dquot(struct dquot *dquot)
1125{
1126 struct super_block *sb = dquot->dq_sb;
1127 int type = dquot->dq_type;
1128 struct inode *lqinode = sb_dqopt(sb)->files[type];
1129 struct ocfs2_quota_chunk *chunk;
1130 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
1131 int offset;
1132 int status;
1133
1134 chunk = ocfs2_find_free_entry(sb, type, &offset);
1135 if (!chunk) {
1136 chunk = ocfs2_extend_local_quota_file(sb, type, &offset);
1137 if (IS_ERR(chunk))
1138 return PTR_ERR(chunk);
1139 } else if (IS_ERR(chunk)) {
1140 return PTR_ERR(chunk);
1141 }
1142 od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset);
1143 od->dq_chunk = chunk;
1144
1145 /* Initialize dquot structure on disk */
1146 status = ocfs2_local_write_dquot(dquot);
1147 if (status < 0) {
1148 mlog_errno(status);
1149 goto out;
1150 }
1151
1152 /* Mark structure as allocated */
1153 status = ocfs2_modify_bh(lqinode, chunk->qc_headerbh, olq_alloc_dquot,
1154 &offset);
1155 if (status < 0) {
1156 mlog_errno(status);
1157 goto out;
1158 }
1159out:
1160 return status;
1161}
1162
1163/* Create entry in local file for dquot, load data from the global file */
1164static int ocfs2_local_read_dquot(struct dquot *dquot)
1165{
1166 int status;
1167
1168 mlog_entry("id=%u, type=%d\n", dquot->dq_id, dquot->dq_type);
1169
1170 status = ocfs2_global_read_dquot(dquot);
1171 if (status < 0) {
1172 mlog_errno(status);
1173 goto out_err;
1174 }
1175
1176 /* Now create entry in the local quota file */
1177 status = ocfs2_create_local_dquot(dquot);
1178 if (status < 0) {
1179 mlog_errno(status);
1180 goto out_err;
1181 }
1182 mlog_exit(0);
1183 return 0;
1184out_err:
1185 mlog_exit(status);
1186 return status;
1187}
1188
1189/* Release dquot structure from local quota file. ocfs2_release_dquot() has
1190 * already started a transaction and obtained exclusive lock for global
1191 * quota file. */
1192static int ocfs2_local_release_dquot(struct dquot *dquot)
1193{
1194 int status;
1195 int type = dquot->dq_type;
1196 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
1197 struct super_block *sb = dquot->dq_sb;
1198 struct ocfs2_local_disk_chunk *dchunk;
1199 int offset;
1200 handle_t *handle = journal_current_handle();
1201
1202 BUG_ON(!handle);
1203 /* First write all local changes to global file */
1204 status = ocfs2_global_release_dquot(dquot);
1205 if (status < 0) {
1206 mlog_errno(status);
1207 goto out;
1208 }
1209
1210 status = ocfs2_journal_access_dq(handle, sb_dqopt(sb)->files[type],
1211 od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE);
1212 if (status < 0) {
1213 mlog_errno(status);
1214 goto out;
1215 }
1216 offset = ol_dqblk_chunk_off(sb, od->dq_chunk->qc_num,
1217 od->dq_local_off);
1218 dchunk = (struct ocfs2_local_disk_chunk *)
1219 (od->dq_chunk->qc_headerbh->b_data);
1220 /* Mark structure as freed */
1221 lock_buffer(od->dq_chunk->qc_headerbh);
1222 ocfs2_clear_bit(offset, dchunk->dqc_bitmap);
1223 le32_add_cpu(&dchunk->dqc_free, 1);
1224 unlock_buffer(od->dq_chunk->qc_headerbh);
1225 status = ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
1226 if (status < 0) {
1227 mlog_errno(status);
1228 goto out;
1229 }
1230 status = 0;
1231out:
1232 /* Clear the read bit so that next time someone uses this
1233 * dquot he reads fresh info from disk and allocates local
1234 * dquot structure */
1235 clear_bit(DQ_READ_B, &dquot->dq_flags);
1236 return status;
1237}
1238
1239static struct quota_format_ops ocfs2_format_ops = {
1240 .check_quota_file = ocfs2_local_check_quota_file,
1241 .read_file_info = ocfs2_local_read_info,
1242 .write_file_info = ocfs2_global_write_info,
1243 .free_file_info = ocfs2_local_free_info,
1244 .read_dqblk = ocfs2_local_read_dquot,
1245 .commit_dqblk = ocfs2_local_write_dquot,
1246 .release_dqblk = ocfs2_local_release_dquot,
1247};
1248
1249struct quota_format_type ocfs2_quota_format = {
1250 .qf_fmt_id = QFMT_OCFS2,
1251 .qf_ops = &ocfs2_format_ops,
1252 .qf_owner = THIS_MODULE
1253};
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index ffd48db229a7..424adaa5f900 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -106,8 +106,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
106 mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", 106 mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n",
107 new_clusters, first_new_cluster); 107 new_clusters, first_new_cluster);
108 108
109 ret = ocfs2_journal_access(handle, bm_inode, group_bh, 109 ret = ocfs2_journal_access_gd(handle, bm_inode, group_bh,
110 OCFS2_JOURNAL_ACCESS_WRITE); 110 OCFS2_JOURNAL_ACCESS_WRITE);
111 if (ret < 0) { 111 if (ret < 0) {
112 mlog_errno(ret); 112 mlog_errno(ret);
113 goto out; 113 goto out;
@@ -141,8 +141,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
141 } 141 }
142 142
143 /* update the inode accordingly. */ 143 /* update the inode accordingly. */
144 ret = ocfs2_journal_access(handle, bm_inode, bm_bh, 144 ret = ocfs2_journal_access_di(handle, bm_inode, bm_bh,
145 OCFS2_JOURNAL_ACCESS_WRITE); 145 OCFS2_JOURNAL_ACCESS_WRITE);
146 if (ret < 0) { 146 if (ret < 0) {
147 mlog_errno(ret); 147 mlog_errno(ret);
148 goto out_rollback; 148 goto out_rollback;
@@ -314,6 +314,10 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
314 314
315 fe = (struct ocfs2_dinode *)main_bm_bh->b_data; 315 fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
316 316
317 /* main_bm_bh is validated by inode read inside ocfs2_inode_lock(),
318 * so any corruption is a code bug. */
319 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
320
317 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != 321 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
318 ocfs2_group_bitmap_size(osb->sb) * 8) { 322 ocfs2_group_bitmap_size(osb->sb) * 8) {
319 mlog(ML_ERROR, "The disk is too old and small. " 323 mlog(ML_ERROR, "The disk is too old and small. "
@@ -322,30 +326,18 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
322 goto out_unlock; 326 goto out_unlock;
323 } 327 }
324 328
325 if (!OCFS2_IS_VALID_DINODE(fe)) {
326 OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe);
327 ret = -EIO;
328 goto out_unlock;
329 }
330
331 first_new_cluster = le32_to_cpu(fe->i_clusters); 329 first_new_cluster = le32_to_cpu(fe->i_clusters);
332 lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, 330 lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
333 first_new_cluster - 1); 331 first_new_cluster - 1);
334 332
335 ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh); 333 ret = ocfs2_read_group_descriptor(main_bm_inode, fe, lgd_blkno,
334 &group_bh);
336 if (ret < 0) { 335 if (ret < 0) {
337 mlog_errno(ret); 336 mlog_errno(ret);
338 goto out_unlock; 337 goto out_unlock;
339 } 338 }
340
341 group = (struct ocfs2_group_desc *)group_bh->b_data; 339 group = (struct ocfs2_group_desc *)group_bh->b_data;
342 340
343 ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group);
344 if (ret) {
345 mlog_errno(ret);
346 goto out_unlock;
347 }
348
349 cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); 341 cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
350 if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters > 342 if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters >
351 le16_to_cpu(fe->id2.i_chain.cl_cpg)) { 343 le16_to_cpu(fe->id2.i_chain.cl_cpg)) {
@@ -398,41 +390,16 @@ static int ocfs2_check_new_group(struct inode *inode,
398 struct buffer_head *group_bh) 390 struct buffer_head *group_bh)
399{ 391{
400 int ret; 392 int ret;
401 struct ocfs2_group_desc *gd; 393 struct ocfs2_group_desc *gd =
394 (struct ocfs2_group_desc *)group_bh->b_data;
402 u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); 395 u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
403 unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) *
404 le16_to_cpu(di->id2.i_chain.cl_bpc);
405
406 396
407 gd = (struct ocfs2_group_desc *)group_bh->b_data; 397 ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh);
398 if (ret)
399 goto out;
408 400
409 ret = -EIO; 401 ret = -EINVAL;
410 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) 402 if (le16_to_cpu(gd->bg_chain) != input->chain)
411 mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n",
412 (unsigned long long)le64_to_cpu(gd->bg_blkno));
413 else if (di->i_blkno != gd->bg_parent_dinode)
414 mlog(ML_ERROR, "Group descriptor # %llu has bad parent "
415 "pointer (%llu, expected %llu)\n",
416 (unsigned long long)le64_to_cpu(gd->bg_blkno),
417 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
418 (unsigned long long)le64_to_cpu(di->i_blkno));
419 else if (le16_to_cpu(gd->bg_bits) > max_bits)
420 mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n",
421 (unsigned long long)le64_to_cpu(gd->bg_blkno),
422 le16_to_cpu(gd->bg_bits));
423 else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits))
424 mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
425 "claims that %u are free\n",
426 (unsigned long long)le64_to_cpu(gd->bg_blkno),
427 le16_to_cpu(gd->bg_bits),
428 le16_to_cpu(gd->bg_free_bits_count));
429 else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size)))
430 mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
431 "max bitmap bits of %u\n",
432 (unsigned long long)le64_to_cpu(gd->bg_blkno),
433 le16_to_cpu(gd->bg_bits),
434 8 * le16_to_cpu(gd->bg_size));
435 else if (le16_to_cpu(gd->bg_chain) != input->chain)
436 mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u " 403 mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u "
437 "while input has %u set.\n", 404 "while input has %u set.\n",
438 (unsigned long long)le64_to_cpu(gd->bg_blkno), 405 (unsigned long long)le64_to_cpu(gd->bg_blkno),
@@ -451,6 +418,7 @@ static int ocfs2_check_new_group(struct inode *inode,
451 else 418 else
452 ret = 0; 419 ret = 0;
453 420
421out:
454 return ret; 422 return ret;
455} 423}
456 424
@@ -568,8 +536,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
568 cl = &fe->id2.i_chain; 536 cl = &fe->id2.i_chain;
569 cr = &cl->cl_recs[input->chain]; 537 cr = &cl->cl_recs[input->chain];
570 538
571 ret = ocfs2_journal_access(handle, main_bm_inode, group_bh, 539 ret = ocfs2_journal_access_gd(handle, main_bm_inode, group_bh,
572 OCFS2_JOURNAL_ACCESS_WRITE); 540 OCFS2_JOURNAL_ACCESS_WRITE);
573 if (ret < 0) { 541 if (ret < 0) {
574 mlog_errno(ret); 542 mlog_errno(ret);
575 goto out_commit; 543 goto out_commit;
@@ -584,8 +552,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
584 goto out_commit; 552 goto out_commit;
585 } 553 }
586 554
587 ret = ocfs2_journal_access(handle, main_bm_inode, main_bm_bh, 555 ret = ocfs2_journal_access_di(handle, main_bm_inode, main_bm_bh,
588 OCFS2_JOURNAL_ACCESS_WRITE); 556 OCFS2_JOURNAL_ACCESS_WRITE);
589 if (ret < 0) { 557 if (ret < 0) {
590 mlog_errno(ret); 558 mlog_errno(ret);
591 goto out_commit; 559 goto out_commit;
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index bdda2d8f8508..40661e7824e9 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -151,7 +151,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
151 * this is not true, the read of -1 (UINT64_MAX) will fail. 151 * this is not true, the read of -1 (UINT64_MAX) will fail.
152 */ 152 */
153 ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh, 153 ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
154 OCFS2_BH_IGNORE_CACHE); 154 OCFS2_BH_IGNORE_CACHE, NULL);
155 if (ret == 0) { 155 if (ret == 0) {
156 spin_lock(&osb->osb_lock); 156 spin_lock(&osb->osb_lock);
157 ocfs2_update_slot_info(si); 157 ocfs2_update_slot_info(si);
@@ -405,7 +405,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
405 405
406 bh = NULL; /* Acquire a fresh bh */ 406 bh = NULL; /* Acquire a fresh bh */
407 status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh, 407 status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
408 OCFS2_BH_IGNORE_CACHE); 408 OCFS2_BH_IGNORE_CACHE, NULL);
409 if (status < 0) { 409 if (status < 0) {
410 mlog_errno(status); 410 mlog_errno(status);
411 goto bail; 411 goto bail;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index c5ff18b46b57..a69628603e18 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -35,6 +35,7 @@
35#include "ocfs2.h" 35#include "ocfs2.h"
36 36
37#include "alloc.h" 37#include "alloc.h"
38#include "blockcheck.h"
38#include "dlmglue.h" 39#include "dlmglue.h"
39#include "inode.h" 40#include "inode.h"
40#include "journal.h" 41#include "journal.h"
@@ -145,62 +146,183 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
145 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); 146 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
146} 147}
147 148
148/* somewhat more expensive than our other checks, so use sparingly. */ 149#define do_error(fmt, ...) \
149int ocfs2_check_group_descriptor(struct super_block *sb, 150 do{ \
150 struct ocfs2_dinode *di, 151 if (clean_error) \
151 struct ocfs2_group_desc *gd) 152 mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \
153 else \
154 ocfs2_error(sb, fmt, ##__VA_ARGS__); \
155 } while (0)
156
157static int ocfs2_validate_gd_self(struct super_block *sb,
158 struct buffer_head *bh,
159 int clean_error)
152{ 160{
153 unsigned int max_bits; 161 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
154 162
155 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { 163 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
156 OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd); 164 do_error("Group descriptor #%llu has bad signature %.*s",
157 return -EIO; 165 (unsigned long long)bh->b_blocknr, 7,
166 gd->bg_signature);
167 return -EINVAL;
158 } 168 }
159 169
170 if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) {
171 do_error("Group descriptor #%llu has an invalid bg_blkno "
172 "of %llu",
173 (unsigned long long)bh->b_blocknr,
174 (unsigned long long)le64_to_cpu(gd->bg_blkno));
175 return -EINVAL;
176 }
177
178 if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) {
179 do_error("Group descriptor #%llu has an invalid "
180 "fs_generation of #%u",
181 (unsigned long long)bh->b_blocknr,
182 le32_to_cpu(gd->bg_generation));
183 return -EINVAL;
184 }
185
186 if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
187 do_error("Group descriptor #%llu has bit count %u but "
188 "claims that %u are free",
189 (unsigned long long)bh->b_blocknr,
190 le16_to_cpu(gd->bg_bits),
191 le16_to_cpu(gd->bg_free_bits_count));
192 return -EINVAL;
193 }
194
195 if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
196 do_error("Group descriptor #%llu has bit count %u but "
197 "max bitmap bits of %u",
198 (unsigned long long)bh->b_blocknr,
199 le16_to_cpu(gd->bg_bits),
200 8 * le16_to_cpu(gd->bg_size));
201 return -EINVAL;
202 }
203
204 return 0;
205}
206
207static int ocfs2_validate_gd_parent(struct super_block *sb,
208 struct ocfs2_dinode *di,
209 struct buffer_head *bh,
210 int clean_error)
211{
212 unsigned int max_bits;
213 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
214
160 if (di->i_blkno != gd->bg_parent_dinode) { 215 if (di->i_blkno != gd->bg_parent_dinode) {
161 ocfs2_error(sb, "Group descriptor # %llu has bad parent " 216 do_error("Group descriptor #%llu has bad parent "
162 "pointer (%llu, expected %llu)", 217 "pointer (%llu, expected %llu)",
163 (unsigned long long)le64_to_cpu(gd->bg_blkno), 218 (unsigned long long)bh->b_blocknr,
164 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), 219 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
165 (unsigned long long)le64_to_cpu(di->i_blkno)); 220 (unsigned long long)le64_to_cpu(di->i_blkno));
166 return -EIO; 221 return -EINVAL;
167 } 222 }
168 223
169 max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); 224 max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
170 if (le16_to_cpu(gd->bg_bits) > max_bits) { 225 if (le16_to_cpu(gd->bg_bits) > max_bits) {
171 ocfs2_error(sb, "Group descriptor # %llu has bit count of %u", 226 do_error("Group descriptor #%llu has bit count of %u",
172 (unsigned long long)le64_to_cpu(gd->bg_blkno), 227 (unsigned long long)bh->b_blocknr,
173 le16_to_cpu(gd->bg_bits)); 228 le16_to_cpu(gd->bg_bits));
174 return -EIO; 229 return -EINVAL;
175 } 230 }
176 231
177 if (le16_to_cpu(gd->bg_chain) >= 232 if (le16_to_cpu(gd->bg_chain) >=
178 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { 233 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
179 ocfs2_error(sb, "Group descriptor # %llu has bad chain %u", 234 do_error("Group descriptor #%llu has bad chain %u",
180 (unsigned long long)le64_to_cpu(gd->bg_blkno), 235 (unsigned long long)bh->b_blocknr,
181 le16_to_cpu(gd->bg_chain)); 236 le16_to_cpu(gd->bg_chain));
182 return -EIO; 237 return -EINVAL;
183 } 238 }
184 239
185 if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { 240 return 0;
186 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " 241}
187 "claims that %u are free",
188 (unsigned long long)le64_to_cpu(gd->bg_blkno),
189 le16_to_cpu(gd->bg_bits),
190 le16_to_cpu(gd->bg_free_bits_count));
191 return -EIO;
192 }
193 242
194 if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { 243#undef do_error
195 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " 244
196 "max bitmap bits of %u", 245/*
197 (unsigned long long)le64_to_cpu(gd->bg_blkno), 246 * This version only prints errors. It does not fail the filesystem, and
198 le16_to_cpu(gd->bg_bits), 247 * exists only for resize.
199 8 * le16_to_cpu(gd->bg_size)); 248 */
200 return -EIO; 249int ocfs2_check_group_descriptor(struct super_block *sb,
250 struct ocfs2_dinode *di,
251 struct buffer_head *bh)
252{
253 int rc;
254 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
255
256 BUG_ON(!buffer_uptodate(bh));
257
258 /*
259 * If the ecc fails, we return the error but otherwise
260 * leave the filesystem running. We know any error is
261 * local to this block.
262 */
263 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
264 if (rc) {
265 mlog(ML_ERROR,
266 "Checksum failed for group descriptor %llu\n",
267 (unsigned long long)bh->b_blocknr);
268 } else
269 rc = ocfs2_validate_gd_self(sb, bh, 1);
270 if (!rc)
271 rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
272
273 return rc;
274}
275
276static int ocfs2_validate_group_descriptor(struct super_block *sb,
277 struct buffer_head *bh)
278{
279 int rc;
280 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
281
282 mlog(0, "Validating group descriptor %llu\n",
283 (unsigned long long)bh->b_blocknr);
284
285 BUG_ON(!buffer_uptodate(bh));
286
287 /*
288 * If the ecc fails, we return the error but otherwise
289 * leave the filesystem running. We know any error is
290 * local to this block.
291 */
292 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
293 if (rc)
294 return rc;
295
296 /*
297 * Errors after here are fatal.
298 */
299
300 return ocfs2_validate_gd_self(sb, bh, 0);
301}
302
303int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
304 u64 gd_blkno, struct buffer_head **bh)
305{
306 int rc;
307 struct buffer_head *tmp = *bh;
308
309 rc = ocfs2_read_block(inode, gd_blkno, &tmp,
310 ocfs2_validate_group_descriptor);
311 if (rc)
312 goto out;
313
314 rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
315 if (rc) {
316 brelse(tmp);
317 goto out;
201 } 318 }
202 319
203 return 0; 320 /* If ocfs2_read_block() got us a new bh, pass it up. */
321 if (!*bh)
322 *bh = tmp;
323
324out:
325 return rc;
204} 326}
205 327
206static int ocfs2_block_group_fill(handle_t *handle, 328static int ocfs2_block_group_fill(handle_t *handle,
@@ -225,10 +347,10 @@ static int ocfs2_block_group_fill(handle_t *handle,
225 goto bail; 347 goto bail;
226 } 348 }
227 349
228 status = ocfs2_journal_access(handle, 350 status = ocfs2_journal_access_gd(handle,
229 alloc_inode, 351 alloc_inode,
230 bg_bh, 352 bg_bh,
231 OCFS2_JOURNAL_ACCESS_CREATE); 353 OCFS2_JOURNAL_ACCESS_CREATE);
232 if (status < 0) { 354 if (status < 0) {
233 mlog_errno(status); 355 mlog_errno(status);
234 goto bail; 356 goto bail;
@@ -358,8 +480,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
358 480
359 bg = (struct ocfs2_group_desc *) bg_bh->b_data; 481 bg = (struct ocfs2_group_desc *) bg_bh->b_data;
360 482
361 status = ocfs2_journal_access(handle, alloc_inode, 483 status = ocfs2_journal_access_di(handle, alloc_inode,
362 bh, OCFS2_JOURNAL_ACCESS_WRITE); 484 bh, OCFS2_JOURNAL_ACCESS_WRITE);
363 if (status < 0) { 485 if (status < 0) {
364 mlog_errno(status); 486 mlog_errno(status);
365 goto bail; 487 goto bail;
@@ -441,11 +563,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
441 ac->ac_alloc_slot = slot; 563 ac->ac_alloc_slot = slot;
442 564
443 fe = (struct ocfs2_dinode *) bh->b_data; 565 fe = (struct ocfs2_dinode *) bh->b_data;
444 if (!OCFS2_IS_VALID_DINODE(fe)) { 566
445 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 567 /* The bh was validated by the inode read inside
446 status = -EIO; 568 * ocfs2_inode_lock(). Any corruption is a code bug. */
447 goto bail; 569 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
448 } 570
449 if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { 571 if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
450 ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu", 572 ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
451 (unsigned long long)le64_to_cpu(fe->i_blkno)); 573 (unsigned long long)le64_to_cpu(fe->i_blkno));
@@ -790,10 +912,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
790 int offset, start, found, status = 0; 912 int offset, start, found, status = 0;
791 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 913 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
792 914
793 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 915 /* Callers got this descriptor from
794 OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg); 916 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
795 return -EIO; 917 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
796 }
797 918
798 found = start = best_offset = best_size = 0; 919 found = start = best_offset = best_size = 0;
799 bitmap = bg->bg_bitmap; 920 bitmap = bg->bg_bitmap;
@@ -858,11 +979,9 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
858 979
859 mlog_entry_void(); 980 mlog_entry_void();
860 981
861 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 982 /* All callers get the descriptor via
862 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 983 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
863 status = -EIO; 984 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
864 goto bail;
865 }
866 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); 985 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
867 986
868 mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, 987 mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
@@ -871,10 +990,10 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
871 if (ocfs2_is_cluster_bitmap(alloc_inode)) 990 if (ocfs2_is_cluster_bitmap(alloc_inode))
872 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 991 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
873 992
874 status = ocfs2_journal_access(handle, 993 status = ocfs2_journal_access_gd(handle,
875 alloc_inode, 994 alloc_inode,
876 group_bh, 995 group_bh,
877 journal_type); 996 journal_type);
878 if (status < 0) { 997 if (status < 0) {
879 mlog_errno(status); 998 mlog_errno(status);
880 goto bail; 999 goto bail;
@@ -931,21 +1050,10 @@ static int ocfs2_relink_block_group(handle_t *handle,
931 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 1050 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
932 struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; 1051 struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
933 1052
934 if (!OCFS2_IS_VALID_DINODE(fe)) { 1053 /* The caller got these descriptors from
935 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 1054 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
936 status = -EIO; 1055 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
937 goto out; 1056 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
938 }
939 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
940 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
941 status = -EIO;
942 goto out;
943 }
944 if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) {
945 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg);
946 status = -EIO;
947 goto out;
948 }
949 1057
950 mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", 1058 mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
951 (unsigned long long)le64_to_cpu(fe->i_blkno), chain, 1059 (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
@@ -956,8 +1064,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
956 bg_ptr = le64_to_cpu(bg->bg_next_group); 1064 bg_ptr = le64_to_cpu(bg->bg_next_group);
957 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); 1065 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
958 1066
959 status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh, 1067 status = ocfs2_journal_access_gd(handle, alloc_inode, prev_bg_bh,
960 OCFS2_JOURNAL_ACCESS_WRITE); 1068 OCFS2_JOURNAL_ACCESS_WRITE);
961 if (status < 0) { 1069 if (status < 0) {
962 mlog_errno(status); 1070 mlog_errno(status);
963 goto out_rollback; 1071 goto out_rollback;
@@ -971,8 +1079,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
971 goto out_rollback; 1079 goto out_rollback;
972 } 1080 }
973 1081
974 status = ocfs2_journal_access(handle, alloc_inode, bg_bh, 1082 status = ocfs2_journal_access_gd(handle, alloc_inode, bg_bh,
975 OCFS2_JOURNAL_ACCESS_WRITE); 1083 OCFS2_JOURNAL_ACCESS_WRITE);
976 if (status < 0) { 1084 if (status < 0) {
977 mlog_errno(status); 1085 mlog_errno(status);
978 goto out_rollback; 1086 goto out_rollback;
@@ -986,8 +1094,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
986 goto out_rollback; 1094 goto out_rollback;
987 } 1095 }
988 1096
989 status = ocfs2_journal_access(handle, alloc_inode, fe_bh, 1097 status = ocfs2_journal_access_di(handle, alloc_inode, fe_bh,
990 OCFS2_JOURNAL_ACCESS_WRITE); 1098 OCFS2_JOURNAL_ACCESS_WRITE);
991 if (status < 0) { 1099 if (status < 0) {
992 mlog_errno(status); 1100 mlog_errno(status);
993 goto out_rollback; 1101 goto out_rollback;
@@ -1008,7 +1116,7 @@ out_rollback:
1008 bg->bg_next_group = cpu_to_le64(bg_ptr); 1116 bg->bg_next_group = cpu_to_le64(bg_ptr);
1009 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); 1117 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
1010 } 1118 }
1011out: 1119
1012 mlog_exit(status); 1120 mlog_exit(status);
1013 return status; 1121 return status;
1014} 1122}
@@ -1138,8 +1246,8 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1138 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; 1246 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1139 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; 1247 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
1140 1248
1141 ret = ocfs2_journal_access(handle, inode, di_bh, 1249 ret = ocfs2_journal_access_di(handle, inode, di_bh,
1142 OCFS2_JOURNAL_ACCESS_WRITE); 1250 OCFS2_JOURNAL_ACCESS_WRITE);
1143 if (ret < 0) { 1251 if (ret < 0) {
1144 mlog_errno(ret); 1252 mlog_errno(ret);
1145 goto out; 1253 goto out;
@@ -1170,21 +1278,17 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1170 u16 found; 1278 u16 found;
1171 struct buffer_head *group_bh = NULL; 1279 struct buffer_head *group_bh = NULL;
1172 struct ocfs2_group_desc *gd; 1280 struct ocfs2_group_desc *gd;
1281 struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
1173 struct inode *alloc_inode = ac->ac_inode; 1282 struct inode *alloc_inode = ac->ac_inode;
1174 1283
1175 ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh); 1284 ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno,
1285 &group_bh);
1176 if (ret < 0) { 1286 if (ret < 0) {
1177 mlog_errno(ret); 1287 mlog_errno(ret);
1178 return ret; 1288 return ret;
1179 } 1289 }
1180 1290
1181 gd = (struct ocfs2_group_desc *) group_bh->b_data; 1291 gd = (struct ocfs2_group_desc *) group_bh->b_data;
1182 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
1183 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
1184 ret = -EIO;
1185 goto out;
1186 }
1187
1188 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, 1292 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1189 ac->ac_max_block, bit_off, &found); 1293 ac->ac_max_block, bit_off, &found);
1190 if (ret < 0) { 1294 if (ret < 0) {
@@ -1241,19 +1345,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1241 bits_wanted, chain, 1345 bits_wanted, chain,
1242 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); 1346 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
1243 1347
1244 status = ocfs2_read_block(alloc_inode, 1348 status = ocfs2_read_group_descriptor(alloc_inode, fe,
1245 le64_to_cpu(cl->cl_recs[chain].c_blkno), 1349 le64_to_cpu(cl->cl_recs[chain].c_blkno),
1246 &group_bh); 1350 &group_bh);
1247 if (status < 0) { 1351 if (status < 0) {
1248 mlog_errno(status); 1352 mlog_errno(status);
1249 goto bail; 1353 goto bail;
1250 } 1354 }
1251 bg = (struct ocfs2_group_desc *) group_bh->b_data; 1355 bg = (struct ocfs2_group_desc *) group_bh->b_data;
1252 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
1253 if (status) {
1254 mlog_errno(status);
1255 goto bail;
1256 }
1257 1356
1258 status = -ENOSPC; 1357 status = -ENOSPC;
1259 /* for now, the chain search is a bit simplistic. We just use 1358 /* for now, the chain search is a bit simplistic. We just use
@@ -1271,18 +1370,13 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1271 next_group = le64_to_cpu(bg->bg_next_group); 1370 next_group = le64_to_cpu(bg->bg_next_group);
1272 prev_group_bh = group_bh; 1371 prev_group_bh = group_bh;
1273 group_bh = NULL; 1372 group_bh = NULL;
1274 status = ocfs2_read_block(alloc_inode, 1373 status = ocfs2_read_group_descriptor(alloc_inode, fe,
1275 next_group, &group_bh); 1374 next_group, &group_bh);
1276 if (status < 0) { 1375 if (status < 0) {
1277 mlog_errno(status); 1376 mlog_errno(status);
1278 goto bail; 1377 goto bail;
1279 } 1378 }
1280 bg = (struct ocfs2_group_desc *) group_bh->b_data; 1379 bg = (struct ocfs2_group_desc *) group_bh->b_data;
1281 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
1282 if (status) {
1283 mlog_errno(status);
1284 goto bail;
1285 }
1286 } 1380 }
1287 if (status < 0) { 1381 if (status < 0) {
1288 if (status != -ENOSPC) 1382 if (status != -ENOSPC)
@@ -1324,10 +1418,10 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1324 1418
1325 /* Ok, claim our bits now: set the info on dinode, chainlist 1419 /* Ok, claim our bits now: set the info on dinode, chainlist
1326 * and then the group */ 1420 * and then the group */
1327 status = ocfs2_journal_access(handle, 1421 status = ocfs2_journal_access_di(handle,
1328 alloc_inode, 1422 alloc_inode,
1329 ac->ac_bh, 1423 ac->ac_bh,
1330 OCFS2_JOURNAL_ACCESS_WRITE); 1424 OCFS2_JOURNAL_ACCESS_WRITE);
1331 if (status < 0) { 1425 if (status < 0) {
1332 mlog_errno(status); 1426 mlog_errno(status);
1333 goto bail; 1427 goto bail;
@@ -1392,11 +1486,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1392 BUG_ON(!ac->ac_bh); 1486 BUG_ON(!ac->ac_bh);
1393 1487
1394 fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; 1488 fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1395 if (!OCFS2_IS_VALID_DINODE(fe)) { 1489
1396 OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe); 1490 /* The bh was validated by the inode read during
1397 status = -EIO; 1491 * ocfs2_reserve_suballoc_bits(). Any corruption is a code bug. */
1398 goto bail; 1492 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
1399 } 1493
1400 if (le32_to_cpu(fe->id1.bitmap1.i_used) >= 1494 if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
1401 le32_to_cpu(fe->id1.bitmap1.i_total)) { 1495 le32_to_cpu(fe->id1.bitmap1.i_total)) {
1402 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " 1496 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
@@ -1725,19 +1819,17 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1725 1819
1726 mlog_entry_void(); 1820 mlog_entry_void();
1727 1821
1728 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 1822 /* The caller got this descriptor from
1729 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 1823 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
1730 status = -EIO; 1824 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
1731 goto bail;
1732 }
1733 1825
1734 mlog(0, "off = %u, num = %u\n", bit_off, num_bits); 1826 mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
1735 1827
1736 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1828 if (ocfs2_is_cluster_bitmap(alloc_inode))
1737 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 1829 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
1738 1830
1739 status = ocfs2_journal_access(handle, alloc_inode, group_bh, 1831 status = ocfs2_journal_access_gd(handle, alloc_inode, group_bh,
1740 journal_type); 1832 journal_type);
1741 if (status < 0) { 1833 if (status < 0) {
1742 mlog_errno(status); 1834 mlog_errno(status);
1743 goto bail; 1835 goto bail;
@@ -1782,29 +1874,26 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
1782 1874
1783 mlog_entry_void(); 1875 mlog_entry_void();
1784 1876
1785 if (!OCFS2_IS_VALID_DINODE(fe)) { 1877 /* The alloc_bh comes from ocfs2_free_dinode() or
1786 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 1878 * ocfs2_free_clusters(). The callers have all locked the
1787 status = -EIO; 1879 * allocator and gotten alloc_bh from the lock call. This
1788 goto bail; 1880 * validates the dinode buffer. Any corruption that has happended
1789 } 1881 * is a code bug. */
1882 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
1790 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); 1883 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
1791 1884
1792 mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", 1885 mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
1793 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, 1886 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
1794 (unsigned long long)bg_blkno, start_bit); 1887 (unsigned long long)bg_blkno, start_bit);
1795 1888
1796 status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh); 1889 status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
1890 &group_bh);
1797 if (status < 0) { 1891 if (status < 0) {
1798 mlog_errno(status); 1892 mlog_errno(status);
1799 goto bail; 1893 goto bail;
1800 } 1894 }
1801
1802 group = (struct ocfs2_group_desc *) group_bh->b_data; 1895 group = (struct ocfs2_group_desc *) group_bh->b_data;
1803 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); 1896
1804 if (status) {
1805 mlog_errno(status);
1806 goto bail;
1807 }
1808 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); 1897 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
1809 1898
1810 status = ocfs2_block_group_clear_bits(handle, alloc_inode, 1899 status = ocfs2_block_group_clear_bits(handle, alloc_inode,
@@ -1815,8 +1904,8 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
1815 goto bail; 1904 goto bail;
1816 } 1905 }
1817 1906
1818 status = ocfs2_journal_access(handle, alloc_inode, alloc_bh, 1907 status = ocfs2_journal_access_di(handle, alloc_inode, alloc_bh,
1819 OCFS2_JOURNAL_ACCESS_WRITE); 1908 OCFS2_JOURNAL_ACCESS_WRITE);
1820 if (status < 0) { 1909 if (status < 0) {
1821 mlog_errno(status); 1910 mlog_errno(status);
1822 goto bail; 1911 goto bail;
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 4df159d8f450..e3c13c77f9e8 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -164,10 +164,24 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
164 * and return that block offset. */ 164 * and return that block offset. */
165u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); 165u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
166 166
167/* somewhat more expensive than our other checks, so use sparingly. */ 167/*
168 * By default, ocfs2_read_group_descriptor() calls ocfs2_error() when it
169 * finds a problem. A caller that wants to check a group descriptor
170 * without going readonly should read the block with ocfs2_read_block[s]()
171 * and then checking it with this function. This is only resize, really.
172 * Everyone else should be using ocfs2_read_group_descriptor().
173 */
168int ocfs2_check_group_descriptor(struct super_block *sb, 174int ocfs2_check_group_descriptor(struct super_block *sb,
169 struct ocfs2_dinode *di, 175 struct ocfs2_dinode *di,
170 struct ocfs2_group_desc *gd); 176 struct buffer_head *bh);
177/*
178 * Read a group descriptor block into *bh. If *bh is NULL, a bh will be
179 * allocated. This is a cached read. The descriptor will be validated with
180 * ocfs2_validate_group_descriptor().
181 */
182int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
183 u64 gd_blkno, struct buffer_head **bh);
184
171int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, 185int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
172 u32 clusters_to_add, u32 extents_to_split, 186 u32 clusters_to_add, u32 extents_to_split,
173 struct ocfs2_alloc_context **data_ac, 187 struct ocfs2_alloc_context **data_ac,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 304b63ac78cf..43ed11345b59 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -41,6 +41,7 @@
41#include <linux/debugfs.h> 41#include <linux/debugfs.h>
42#include <linux/mount.h> 42#include <linux/mount.h>
43#include <linux/seq_file.h> 43#include <linux/seq_file.h>
44#include <linux/quotaops.h>
44 45
45#define MLOG_MASK_PREFIX ML_SUPER 46#define MLOG_MASK_PREFIX ML_SUPER
46#include <cluster/masklog.h> 47#include <cluster/masklog.h>
@@ -51,6 +52,7 @@
51#include "ocfs1_fs_compat.h" 52#include "ocfs1_fs_compat.h"
52 53
53#include "alloc.h" 54#include "alloc.h"
55#include "blockcheck.h"
54#include "dlmglue.h" 56#include "dlmglue.h"
55#include "export.h" 57#include "export.h"
56#include "extent_map.h" 58#include "extent_map.h"
@@ -65,10 +67,13 @@
65#include "uptodate.h" 67#include "uptodate.h"
66#include "ver.h" 68#include "ver.h"
67#include "xattr.h" 69#include "xattr.h"
70#include "quota.h"
68 71
69#include "buffer_head_io.h" 72#include "buffer_head_io.h"
70 73
71static struct kmem_cache *ocfs2_inode_cachep = NULL; 74static struct kmem_cache *ocfs2_inode_cachep = NULL;
75struct kmem_cache *ocfs2_dquot_cachep;
76struct kmem_cache *ocfs2_qf_chunk_cachep;
72 77
73/* OCFS2 needs to schedule several differnt types of work which 78/* OCFS2 needs to schedule several differnt types of work which
74 * require cluster locking, disk I/O, recovery waits, etc. Since these 79 * require cluster locking, disk I/O, recovery waits, etc. Since these
@@ -124,6 +129,9 @@ static int ocfs2_get_sector(struct super_block *sb,
124static void ocfs2_write_super(struct super_block *sb); 129static void ocfs2_write_super(struct super_block *sb);
125static struct inode *ocfs2_alloc_inode(struct super_block *sb); 130static struct inode *ocfs2_alloc_inode(struct super_block *sb);
126static void ocfs2_destroy_inode(struct inode *inode); 131static void ocfs2_destroy_inode(struct inode *inode);
132static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
133static int ocfs2_enable_quotas(struct ocfs2_super *osb);
134static void ocfs2_disable_quotas(struct ocfs2_super *osb);
127 135
128static const struct super_operations ocfs2_sops = { 136static const struct super_operations ocfs2_sops = {
129 .statfs = ocfs2_statfs, 137 .statfs = ocfs2_statfs,
@@ -137,6 +145,8 @@ static const struct super_operations ocfs2_sops = {
137 .put_super = ocfs2_put_super, 145 .put_super = ocfs2_put_super,
138 .remount_fs = ocfs2_remount, 146 .remount_fs = ocfs2_remount,
139 .show_options = ocfs2_show_options, 147 .show_options = ocfs2_show_options,
148 .quota_read = ocfs2_quota_read,
149 .quota_write = ocfs2_quota_write,
140}; 150};
141 151
142enum { 152enum {
@@ -158,6 +168,10 @@ enum {
158 Opt_user_xattr, 168 Opt_user_xattr,
159 Opt_nouser_xattr, 169 Opt_nouser_xattr,
160 Opt_inode64, 170 Opt_inode64,
171 Opt_acl,
172 Opt_noacl,
173 Opt_usrquota,
174 Opt_grpquota,
161 Opt_err, 175 Opt_err,
162}; 176};
163 177
@@ -180,6 +194,10 @@ static const match_table_t tokens = {
180 {Opt_user_xattr, "user_xattr"}, 194 {Opt_user_xattr, "user_xattr"},
181 {Opt_nouser_xattr, "nouser_xattr"}, 195 {Opt_nouser_xattr, "nouser_xattr"},
182 {Opt_inode64, "inode64"}, 196 {Opt_inode64, "inode64"},
197 {Opt_acl, "acl"},
198 {Opt_noacl, "noacl"},
199 {Opt_usrquota, "usrquota"},
200 {Opt_grpquota, "grpquota"},
183 {Opt_err, NULL} 201 {Opt_err, NULL}
184}; 202};
185 203
@@ -221,6 +239,19 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait)
221 return 0; 239 return 0;
222} 240}
223 241
242static int ocfs2_need_system_inode(struct ocfs2_super *osb, int ino)
243{
244 if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
245 && (ino == USER_QUOTA_SYSTEM_INODE
246 || ino == LOCAL_USER_QUOTA_SYSTEM_INODE))
247 return 0;
248 if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
249 && (ino == GROUP_QUOTA_SYSTEM_INODE
250 || ino == LOCAL_GROUP_QUOTA_SYSTEM_INODE))
251 return 0;
252 return 1;
253}
254
224static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) 255static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
225{ 256{
226 struct inode *new = NULL; 257 struct inode *new = NULL;
@@ -247,6 +278,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
247 278
248 for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE; 279 for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE;
249 i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) { 280 i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) {
281 if (!ocfs2_need_system_inode(osb, i))
282 continue;
250 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); 283 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
251 if (!new) { 284 if (!new) {
252 ocfs2_release_system_inodes(osb); 285 ocfs2_release_system_inodes(osb);
@@ -277,6 +310,8 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
277 for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; 310 for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1;
278 i < NUM_SYSTEM_INODES; 311 i < NUM_SYSTEM_INODES;
279 i++) { 312 i++) {
313 if (!ocfs2_need_system_inode(osb, i))
314 continue;
280 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); 315 new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
281 if (!new) { 316 if (!new) {
282 ocfs2_release_system_inodes(osb); 317 ocfs2_release_system_inodes(osb);
@@ -426,6 +461,12 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
426 461
427 /* We're going to/from readonly mode. */ 462 /* We're going to/from readonly mode. */
428 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { 463 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
464 /* Disable quota accounting before remounting RO */
465 if (*flags & MS_RDONLY) {
466 ret = ocfs2_susp_quotas(osb, 0);
467 if (ret < 0)
468 goto out;
469 }
429 /* Lock here so the check of HARD_RO and the potential 470 /* Lock here so the check of HARD_RO and the potential
430 * setting of SOFT_RO is atomic. */ 471 * setting of SOFT_RO is atomic. */
431 spin_lock(&osb->osb_lock); 472 spin_lock(&osb->osb_lock);
@@ -461,11 +502,28 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
461 } 502 }
462unlock_osb: 503unlock_osb:
463 spin_unlock(&osb->osb_lock); 504 spin_unlock(&osb->osb_lock);
505 /* Enable quota accounting after remounting RW */
506 if (!ret && !(*flags & MS_RDONLY)) {
507 if (sb_any_quota_suspended(sb))
508 ret = ocfs2_susp_quotas(osb, 1);
509 else
510 ret = ocfs2_enable_quotas(osb);
511 if (ret < 0) {
512 /* Return back changes... */
513 spin_lock(&osb->osb_lock);
514 sb->s_flags |= MS_RDONLY;
515 osb->osb_flags |= OCFS2_OSB_SOFT_RO;
516 spin_unlock(&osb->osb_lock);
517 goto out;
518 }
519 }
464 } 520 }
465 521
466 if (!ret) { 522 if (!ret) {
467 /* Only save off the new mount options in case of a successful 523 /* Only save off the new mount options in case of a successful
468 * remount. */ 524 * remount. */
525 if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
526 parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
469 osb->s_mount_opt = parsed_options.mount_opt; 527 osb->s_mount_opt = parsed_options.mount_opt;
470 osb->s_atime_quantum = parsed_options.atime_quantum; 528 osb->s_atime_quantum = parsed_options.atime_quantum;
471 osb->preferred_slot = parsed_options.slot; 529 osb->preferred_slot = parsed_options.slot;
@@ -619,6 +677,131 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
619 return 0; 677 return 0;
620} 678}
621 679
680static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend)
681{
682 int type;
683 struct super_block *sb = osb->sb;
684 unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
685 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
686 int status = 0;
687
688 for (type = 0; type < MAXQUOTAS; type++) {
689 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
690 continue;
691 if (unsuspend)
692 status = vfs_quota_enable(
693 sb_dqopt(sb)->files[type],
694 type, QFMT_OCFS2,
695 DQUOT_SUSPENDED);
696 else
697 status = vfs_quota_disable(sb, type,
698 DQUOT_SUSPENDED);
699 if (status < 0)
700 break;
701 }
702 if (status < 0)
703 mlog(ML_ERROR, "Failed to suspend/unsuspend quotas on "
704 "remount (error = %d).\n", status);
705 return status;
706}
707
708static int ocfs2_enable_quotas(struct ocfs2_super *osb)
709{
710 struct inode *inode[MAXQUOTAS] = { NULL, NULL };
711 struct super_block *sb = osb->sb;
712 unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
713 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
714 unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
715 LOCAL_GROUP_QUOTA_SYSTEM_INODE };
716 int status;
717 int type;
718
719 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE;
720 for (type = 0; type < MAXQUOTAS; type++) {
721 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
722 continue;
723 inode[type] = ocfs2_get_system_file_inode(osb, ino[type],
724 osb->slot_num);
725 if (!inode[type]) {
726 status = -ENOENT;
727 goto out_quota_off;
728 }
729 status = vfs_quota_enable(inode[type], type, QFMT_OCFS2,
730 DQUOT_USAGE_ENABLED);
731 if (status < 0)
732 goto out_quota_off;
733 }
734
735 for (type = 0; type < MAXQUOTAS; type++)
736 iput(inode[type]);
737 return 0;
738out_quota_off:
739 ocfs2_disable_quotas(osb);
740 for (type = 0; type < MAXQUOTAS; type++)
741 iput(inode[type]);
742 mlog_errno(status);
743 return status;
744}
745
746static void ocfs2_disable_quotas(struct ocfs2_super *osb)
747{
748 int type;
749 struct inode *inode;
750 struct super_block *sb = osb->sb;
751
752 /* We mostly ignore errors in this function because there's not much
753 * we can do when we see them */
754 for (type = 0; type < MAXQUOTAS; type++) {
755 if (!sb_has_quota_loaded(sb, type))
756 continue;
757 inode = igrab(sb->s_dquot.files[type]);
758 /* Turn off quotas. This will remove all dquot structures from
759 * memory and so they will be automatically synced to global
760 * quota files */
761 vfs_quota_disable(sb, type, DQUOT_USAGE_ENABLED |
762 DQUOT_LIMITS_ENABLED);
763 if (!inode)
764 continue;
765 iput(inode);
766 }
767}
768
769/* Handle quota on quotactl */
770static int ocfs2_quota_on(struct super_block *sb, int type, int format_id,
771 char *path, int remount)
772{
773 unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
774 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
775
776 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
777 return -EINVAL;
778
779 if (remount)
780 return 0; /* Just ignore it has been handled in
781 * ocfs2_remount() */
782 return vfs_quota_enable(sb_dqopt(sb)->files[type], type,
783 format_id, DQUOT_LIMITS_ENABLED);
784}
785
786/* Handle quota off quotactl */
787static int ocfs2_quota_off(struct super_block *sb, int type, int remount)
788{
789 if (remount)
790 return 0; /* Ignore now and handle later in
791 * ocfs2_remount() */
792 return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
793}
794
795static struct quotactl_ops ocfs2_quotactl_ops = {
796 .quota_on = ocfs2_quota_on,
797 .quota_off = ocfs2_quota_off,
798 .quota_sync = vfs_quota_sync,
799 .get_info = vfs_get_dqinfo,
800 .set_info = vfs_set_dqinfo,
801 .get_dqblk = vfs_get_dqblk,
802 .set_dqblk = vfs_set_dqblk,
803};
804
622static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) 805static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
623{ 806{
624 struct dentry *root; 807 struct dentry *root;
@@ -651,12 +834,32 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
651 } 834 }
652 brelse(bh); 835 brelse(bh);
653 bh = NULL; 836 bh = NULL;
837
838 if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
839 parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
840
654 osb->s_mount_opt = parsed_options.mount_opt; 841 osb->s_mount_opt = parsed_options.mount_opt;
655 osb->s_atime_quantum = parsed_options.atime_quantum; 842 osb->s_atime_quantum = parsed_options.atime_quantum;
656 osb->preferred_slot = parsed_options.slot; 843 osb->preferred_slot = parsed_options.slot;
657 osb->osb_commit_interval = parsed_options.commit_interval; 844 osb->osb_commit_interval = parsed_options.commit_interval;
658 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); 845 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
659 osb->local_alloc_bits = osb->local_alloc_default_bits; 846 osb->local_alloc_bits = osb->local_alloc_default_bits;
847 if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA &&
848 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
849 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
850 status = -EINVAL;
851 mlog(ML_ERROR, "User quotas were requested, but this "
852 "filesystem does not have the feature enabled.\n");
853 goto read_super_error;
854 }
855 if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA &&
856 !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
857 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
858 status = -EINVAL;
859 mlog(ML_ERROR, "Group quotas were requested, but this "
860 "filesystem does not have the feature enabled.\n");
861 goto read_super_error;
862 }
660 863
661 status = ocfs2_verify_userspace_stack(osb, &parsed_options); 864 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
662 if (status) 865 if (status)
@@ -664,6 +867,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
664 867
665 sb->s_magic = OCFS2_SUPER_MAGIC; 868 sb->s_magic = OCFS2_SUPER_MAGIC;
666 869
870 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
871 ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
872
667 /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, 873 /* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
668 * heartbeat=none */ 874 * heartbeat=none */
669 if (bdev_read_only(sb->s_bdev)) { 875 if (bdev_read_only(sb->s_bdev)) {
@@ -758,6 +964,28 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
758 atomic_set(&osb->vol_state, VOLUME_MOUNTED); 964 atomic_set(&osb->vol_state, VOLUME_MOUNTED);
759 wake_up(&osb->osb_mount_event); 965 wake_up(&osb->osb_mount_event);
760 966
967 /* Now we can initialize quotas because we can afford to wait
968 * for cluster locks recovery now. That also means that truncation
969 * log recovery can happen but that waits for proper quota setup */
970 if (!(sb->s_flags & MS_RDONLY)) {
971 status = ocfs2_enable_quotas(osb);
972 if (status < 0) {
973 /* We have to err-out specially here because
974 * s_root is already set */
975 mlog_errno(status);
976 atomic_set(&osb->vol_state, VOLUME_DISABLED);
977 wake_up(&osb->osb_mount_event);
978 mlog_exit(status);
979 return status;
980 }
981 }
982
983 ocfs2_complete_quota_recovery(osb);
984
985 /* Now we wake up again for processes waiting for quotas */
986 atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
987 wake_up(&osb->osb_mount_event);
988
761 mlog_exit(status); 989 mlog_exit(status);
762 return status; 990 return status;
763 991
@@ -945,6 +1173,41 @@ static int ocfs2_parse_options(struct super_block *sb,
945 case Opt_inode64: 1173 case Opt_inode64:
946 mopt->mount_opt |= OCFS2_MOUNT_INODE64; 1174 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
947 break; 1175 break;
1176 case Opt_usrquota:
1177 /* We check only on remount, otherwise features
1178 * aren't yet initialized. */
1179 if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1180 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1181 mlog(ML_ERROR, "User quota requested but "
1182 "filesystem feature is not set\n");
1183 status = 0;
1184 goto bail;
1185 }
1186 mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
1187 break;
1188 case Opt_grpquota:
1189 if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
1190 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1191 mlog(ML_ERROR, "Group quota requested but "
1192 "filesystem feature is not set\n");
1193 status = 0;
1194 goto bail;
1195 }
1196 mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
1197 break;
1198#ifdef CONFIG_OCFS2_FS_POSIX_ACL
1199 case Opt_acl:
1200 mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
1201 break;
1202 case Opt_noacl:
1203 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
1204 break;
1205#else
1206 case Opt_acl:
1207 case Opt_noacl:
1208 printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
1209 break;
1210#endif
948 default: 1211 default:
949 mlog(ML_ERROR, 1212 mlog(ML_ERROR,
950 "Unrecognized mount option \"%s\" " 1213 "Unrecognized mount option \"%s\" "
@@ -1008,6 +1271,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1008 if (osb->osb_cluster_stack[0]) 1271 if (osb->osb_cluster_stack[0])
1009 seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, 1272 seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN,
1010 osb->osb_cluster_stack); 1273 osb->osb_cluster_stack);
1274 if (opts & OCFS2_MOUNT_USRQUOTA)
1275 seq_printf(s, ",usrquota");
1276 if (opts & OCFS2_MOUNT_GRPQUOTA)
1277 seq_printf(s, ",grpquota");
1011 1278
1012 if (opts & OCFS2_MOUNT_NOUSERXATTR) 1279 if (opts & OCFS2_MOUNT_NOUSERXATTR)
1013 seq_printf(s, ",nouser_xattr"); 1280 seq_printf(s, ",nouser_xattr");
@@ -1017,6 +1284,13 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1017 if (opts & OCFS2_MOUNT_INODE64) 1284 if (opts & OCFS2_MOUNT_INODE64)
1018 seq_printf(s, ",inode64"); 1285 seq_printf(s, ",inode64");
1019 1286
1287#ifdef CONFIG_OCFS2_FS_POSIX_ACL
1288 if (opts & OCFS2_MOUNT_POSIX_ACL)
1289 seq_printf(s, ",acl");
1290 else
1291 seq_printf(s, ",noacl");
1292#endif
1293
1020 return 0; 1294 return 0;
1021} 1295}
1022 1296
@@ -1052,10 +1326,16 @@ static int __init ocfs2_init(void)
1052 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); 1326 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
1053 } 1327 }
1054 1328
1329 status = ocfs2_quota_setup();
1330 if (status)
1331 goto leave;
1332
1055 ocfs2_set_locking_protocol(); 1333 ocfs2_set_locking_protocol();
1056 1334
1335 status = register_quota_format(&ocfs2_quota_format);
1057leave: 1336leave:
1058 if (status < 0) { 1337 if (status < 0) {
1338 ocfs2_quota_shutdown();
1059 ocfs2_free_mem_caches(); 1339 ocfs2_free_mem_caches();
1060 exit_ocfs2_uptodate_cache(); 1340 exit_ocfs2_uptodate_cache();
1061 } 1341 }
@@ -1072,11 +1352,15 @@ static void __exit ocfs2_exit(void)
1072{ 1352{
1073 mlog_entry_void(); 1353 mlog_entry_void();
1074 1354
1355 ocfs2_quota_shutdown();
1356
1075 if (ocfs2_wq) { 1357 if (ocfs2_wq) {
1076 flush_workqueue(ocfs2_wq); 1358 flush_workqueue(ocfs2_wq);
1077 destroy_workqueue(ocfs2_wq); 1359 destroy_workqueue(ocfs2_wq);
1078 } 1360 }
1079 1361
1362 unregister_quota_format(&ocfs2_quota_format);
1363
1080 debugfs_remove(ocfs2_debugfs_root); 1364 debugfs_remove(ocfs2_debugfs_root);
1081 1365
1082 ocfs2_free_mem_caches(); 1366 ocfs2_free_mem_caches();
@@ -1192,8 +1476,27 @@ static int ocfs2_initialize_mem_caches(void)
1192 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 1476 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
1193 SLAB_MEM_SPREAD), 1477 SLAB_MEM_SPREAD),
1194 ocfs2_inode_init_once); 1478 ocfs2_inode_init_once);
1195 if (!ocfs2_inode_cachep) 1479 ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache",
1480 sizeof(struct ocfs2_dquot),
1481 0,
1482 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
1483 SLAB_MEM_SPREAD),
1484 NULL);
1485 ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache",
1486 sizeof(struct ocfs2_quota_chunk),
1487 0,
1488 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
1489 NULL);
1490 if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep ||
1491 !ocfs2_qf_chunk_cachep) {
1492 if (ocfs2_inode_cachep)
1493 kmem_cache_destroy(ocfs2_inode_cachep);
1494 if (ocfs2_dquot_cachep)
1495 kmem_cache_destroy(ocfs2_dquot_cachep);
1496 if (ocfs2_qf_chunk_cachep)
1497 kmem_cache_destroy(ocfs2_qf_chunk_cachep);
1196 return -ENOMEM; 1498 return -ENOMEM;
1499 }
1197 1500
1198 return 0; 1501 return 0;
1199} 1502}
@@ -1202,8 +1505,15 @@ static void ocfs2_free_mem_caches(void)
1202{ 1505{
1203 if (ocfs2_inode_cachep) 1506 if (ocfs2_inode_cachep)
1204 kmem_cache_destroy(ocfs2_inode_cachep); 1507 kmem_cache_destroy(ocfs2_inode_cachep);
1205
1206 ocfs2_inode_cachep = NULL; 1508 ocfs2_inode_cachep = NULL;
1509
1510 if (ocfs2_dquot_cachep)
1511 kmem_cache_destroy(ocfs2_dquot_cachep);
1512 ocfs2_dquot_cachep = NULL;
1513
1514 if (ocfs2_qf_chunk_cachep)
1515 kmem_cache_destroy(ocfs2_qf_chunk_cachep);
1516 ocfs2_qf_chunk_cachep = NULL;
1207} 1517}
1208 1518
1209static int ocfs2_get_sector(struct super_block *sb, 1519static int ocfs2_get_sector(struct super_block *sb,
@@ -1303,6 +1613,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1303 osb = OCFS2_SB(sb); 1613 osb = OCFS2_SB(sb);
1304 BUG_ON(!osb); 1614 BUG_ON(!osb);
1305 1615
1616 ocfs2_disable_quotas(osb);
1617
1306 ocfs2_shutdown_local_alloc(osb); 1618 ocfs2_shutdown_local_alloc(osb);
1307 1619
1308 ocfs2_truncate_log_shutdown(osb); 1620 ocfs2_truncate_log_shutdown(osb);
@@ -1413,6 +1725,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
1413 sb->s_fs_info = osb; 1725 sb->s_fs_info = osb;
1414 sb->s_op = &ocfs2_sops; 1726 sb->s_op = &ocfs2_sops;
1415 sb->s_export_op = &ocfs2_export_ops; 1727 sb->s_export_op = &ocfs2_export_ops;
1728 sb->s_qcop = &ocfs2_quotactl_ops;
1729 sb->dq_op = &ocfs2_quota_operations;
1416 sb->s_xattr = ocfs2_xattr_handlers; 1730 sb->s_xattr = ocfs2_xattr_handlers;
1417 sb->s_time_gran = 1; 1731 sb->s_time_gran = 1;
1418 sb->s_flags |= MS_NOATIME; 1732 sb->s_flags |= MS_NOATIME;
@@ -1676,6 +1990,15 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
1676 1990
1677 if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, 1991 if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
1678 strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) { 1992 strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
1993 /* We have to do a raw check of the feature here */
1994 if (le32_to_cpu(di->id2.i_super.s_feature_incompat) &
1995 OCFS2_FEATURE_INCOMPAT_META_ECC) {
1996 status = ocfs2_block_check_validate(bh->b_data,
1997 bh->b_size,
1998 &di->i_check);
1999 if (status)
2000 goto out;
2001 }
1679 status = -EINVAL; 2002 status = -EINVAL;
1680 if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) { 2003 if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) {
1681 mlog(ML_ERROR, "found superblock with incorrect block " 2004 mlog(ML_ERROR, "found superblock with incorrect block "
@@ -1717,6 +2040,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
1717 } 2040 }
1718 } 2041 }
1719 2042
2043out:
1720 mlog_exit(status); 2044 mlog_exit(status);
1721 return status; 2045 return status;
1722} 2046}
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index cbd03dfdc7b9..ed0a0cfd68d2 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -84,7 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
84 84
85 mlog_entry_void(); 85 mlog_entry_void();
86 86
87 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh); 87 status = ocfs2_read_inode_block(inode, bh);
88 if (status < 0) { 88 if (status < 0) {
89 mlog_errno(status); 89 mlog_errno(status);
90 link = ERR_PTR(status); 90 link = ERR_PTR(status);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 74d7367ade13..e1d638af6ac3 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -35,12 +35,14 @@
35#include <linux/init.h> 35#include <linux/init.h>
36#include <linux/module.h> 36#include <linux/module.h>
37#include <linux/string.h> 37#include <linux/string.h>
38#include <linux/security.h>
38 39
39#define MLOG_MASK_PREFIX ML_XATTR 40#define MLOG_MASK_PREFIX ML_XATTR
40#include <cluster/masklog.h> 41#include <cluster/masklog.h>
41 42
42#include "ocfs2.h" 43#include "ocfs2.h"
43#include "alloc.h" 44#include "alloc.h"
45#include "blockcheck.h"
44#include "dlmglue.h" 46#include "dlmglue.h"
45#include "file.h" 47#include "file.h"
46#include "symlink.h" 48#include "symlink.h"
@@ -61,12 +63,32 @@ struct ocfs2_xattr_def_value_root {
61}; 63};
62 64
63struct ocfs2_xattr_bucket { 65struct ocfs2_xattr_bucket {
64 struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; 66 /* The inode these xattrs are associated with */
65 struct ocfs2_xattr_header *xh; 67 struct inode *bu_inode;
68
69 /* The actual buffers that make up the bucket */
70 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
71
72 /* How many blocks make up one bucket for this filesystem */
73 int bu_blocks;
74};
75
76struct ocfs2_xattr_set_ctxt {
77 handle_t *handle;
78 struct ocfs2_alloc_context *meta_ac;
79 struct ocfs2_alloc_context *data_ac;
80 struct ocfs2_cached_dealloc_ctxt dealloc;
66}; 81};
67 82
68#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 83#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
69#define OCFS2_XATTR_INLINE_SIZE 80 84#define OCFS2_XATTR_INLINE_SIZE 80
85#define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \
86 - sizeof(struct ocfs2_xattr_header) \
87 - sizeof(__u32))
88#define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \
89 - sizeof(struct ocfs2_xattr_block) \
90 - sizeof(struct ocfs2_xattr_header) \
91 - sizeof(__u32))
70 92
71static struct ocfs2_xattr_def_value_root def_xv = { 93static struct ocfs2_xattr_def_value_root def_xv = {
72 .xv.xr_list.l_count = cpu_to_le16(1), 94 .xv.xr_list.l_count = cpu_to_le16(1),
@@ -74,13 +96,25 @@ static struct ocfs2_xattr_def_value_root def_xv = {
74 96
75struct xattr_handler *ocfs2_xattr_handlers[] = { 97struct xattr_handler *ocfs2_xattr_handlers[] = {
76 &ocfs2_xattr_user_handler, 98 &ocfs2_xattr_user_handler,
99#ifdef CONFIG_OCFS2_FS_POSIX_ACL
100 &ocfs2_xattr_acl_access_handler,
101 &ocfs2_xattr_acl_default_handler,
102#endif
77 &ocfs2_xattr_trusted_handler, 103 &ocfs2_xattr_trusted_handler,
104 &ocfs2_xattr_security_handler,
78 NULL 105 NULL
79}; 106};
80 107
81static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { 108static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
82 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 109 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
110#ifdef CONFIG_OCFS2_FS_POSIX_ACL
111 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
112 = &ocfs2_xattr_acl_access_handler,
113 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
114 = &ocfs2_xattr_acl_default_handler,
115#endif
83 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 116 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
117 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler,
84}; 118};
85 119
86struct ocfs2_xattr_info { 120struct ocfs2_xattr_info {
@@ -98,7 +132,7 @@ struct ocfs2_xattr_search {
98 */ 132 */
99 struct buffer_head *xattr_bh; 133 struct buffer_head *xattr_bh;
100 struct ocfs2_xattr_header *header; 134 struct ocfs2_xattr_header *header;
101 struct ocfs2_xattr_bucket bucket; 135 struct ocfs2_xattr_bucket *bucket;
102 void *base; 136 void *base;
103 void *end; 137 void *end;
104 struct ocfs2_xattr_entry *here; 138 struct ocfs2_xattr_entry *here;
@@ -127,14 +161,20 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
127 size_t buffer_size); 161 size_t buffer_size);
128 162
129static int ocfs2_xattr_create_index_block(struct inode *inode, 163static int ocfs2_xattr_create_index_block(struct inode *inode,
130 struct ocfs2_xattr_search *xs); 164 struct ocfs2_xattr_search *xs,
165 struct ocfs2_xattr_set_ctxt *ctxt);
131 166
132static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 167static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
133 struct ocfs2_xattr_info *xi, 168 struct ocfs2_xattr_info *xi,
134 struct ocfs2_xattr_search *xs); 169 struct ocfs2_xattr_search *xs,
170 struct ocfs2_xattr_set_ctxt *ctxt);
135 171
136static int ocfs2_delete_xattr_index_block(struct inode *inode, 172static int ocfs2_delete_xattr_index_block(struct inode *inode,
137 struct buffer_head *xb_bh); 173 struct buffer_head *xb_bh);
174static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
175 u64 src_blk, u64 last_blk, u64 to_blk,
176 unsigned int start_bucket,
177 u32 *first_hash);
138 178
139static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 179static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
140{ 180{
@@ -154,6 +194,216 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
154 return len / sizeof(struct ocfs2_xattr_entry); 194 return len / sizeof(struct ocfs2_xattr_entry);
155} 195}
156 196
197#define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
198#define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
199#define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
200
201static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
202{
203 struct ocfs2_xattr_bucket *bucket;
204 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
205
206 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
207
208 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
209 if (bucket) {
210 bucket->bu_inode = inode;
211 bucket->bu_blocks = blks;
212 }
213
214 return bucket;
215}
216
217static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
218{
219 int i;
220
221 for (i = 0; i < bucket->bu_blocks; i++) {
222 brelse(bucket->bu_bhs[i]);
223 bucket->bu_bhs[i] = NULL;
224 }
225}
226
227static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
228{
229 if (bucket) {
230 ocfs2_xattr_bucket_relse(bucket);
231 bucket->bu_inode = NULL;
232 kfree(bucket);
233 }
234}
235
236/*
237 * A bucket that has never been written to disk doesn't need to be
238 * read. We just need the buffer_heads. Don't call this for
239 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes
240 * them fully.
241 */
242static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
243 u64 xb_blkno)
244{
245 int i, rc = 0;
246
247 for (i = 0; i < bucket->bu_blocks; i++) {
248 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
249 xb_blkno + i);
250 if (!bucket->bu_bhs[i]) {
251 rc = -EIO;
252 mlog_errno(rc);
253 break;
254 }
255
256 if (!ocfs2_buffer_uptodate(bucket->bu_inode,
257 bucket->bu_bhs[i]))
258 ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
259 bucket->bu_bhs[i]);
260 }
261
262 if (rc)
263 ocfs2_xattr_bucket_relse(bucket);
264 return rc;
265}
266
267/* Read the xattr bucket at xb_blkno */
268static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
269 u64 xb_blkno)
270{
271 int rc;
272
273 rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
274 bucket->bu_blocks, bucket->bu_bhs, 0,
275 NULL);
276 if (!rc) {
277 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
278 bucket->bu_bhs,
279 bucket->bu_blocks,
280 &bucket_xh(bucket)->xh_check);
281 if (rc)
282 mlog_errno(rc);
283 }
284
285 if (rc)
286 ocfs2_xattr_bucket_relse(bucket);
287 return rc;
288}
289
290static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
291 struct ocfs2_xattr_bucket *bucket,
292 int type)
293{
294 int i, rc = 0;
295
296 for (i = 0; i < bucket->bu_blocks; i++) {
297 rc = ocfs2_journal_access(handle, bucket->bu_inode,
298 bucket->bu_bhs[i], type);
299 if (rc) {
300 mlog_errno(rc);
301 break;
302 }
303 }
304
305 return rc;
306}
307
308static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
309 struct ocfs2_xattr_bucket *bucket)
310{
311 int i;
312
313 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
314 bucket->bu_bhs, bucket->bu_blocks,
315 &bucket_xh(bucket)->xh_check);
316
317 for (i = 0; i < bucket->bu_blocks; i++)
318 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
319}
320
321static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
322 struct ocfs2_xattr_bucket *src)
323{
324 int i;
325 int blocksize = src->bu_inode->i_sb->s_blocksize;
326
327 BUG_ON(dest->bu_blocks != src->bu_blocks);
328 BUG_ON(dest->bu_inode != src->bu_inode);
329
330 for (i = 0; i < src->bu_blocks; i++) {
331 memcpy(bucket_block(dest, i), bucket_block(src, i),
332 blocksize);
333 }
334}
335
336static int ocfs2_validate_xattr_block(struct super_block *sb,
337 struct buffer_head *bh)
338{
339 int rc;
340 struct ocfs2_xattr_block *xb =
341 (struct ocfs2_xattr_block *)bh->b_data;
342
343 mlog(0, "Validating xattr block %llu\n",
344 (unsigned long long)bh->b_blocknr);
345
346 BUG_ON(!buffer_uptodate(bh));
347
348 /*
349 * If the ecc fails, we return the error but otherwise
350 * leave the filesystem running. We know any error is
351 * local to this block.
352 */
353 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
354 if (rc)
355 return rc;
356
357 /*
358 * Errors after here are fatal
359 */
360
361 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
362 ocfs2_error(sb,
363 "Extended attribute block #%llu has bad "
364 "signature %.*s",
365 (unsigned long long)bh->b_blocknr, 7,
366 xb->xb_signature);
367 return -EINVAL;
368 }
369
370 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
371 ocfs2_error(sb,
372 "Extended attribute block #%llu has an "
373 "invalid xb_blkno of %llu",
374 (unsigned long long)bh->b_blocknr,
375 (unsigned long long)le64_to_cpu(xb->xb_blkno));
376 return -EINVAL;
377 }
378
379 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
380 ocfs2_error(sb,
381 "Extended attribute block #%llu has an invalid "
382 "xb_fs_generation of #%u",
383 (unsigned long long)bh->b_blocknr,
384 le32_to_cpu(xb->xb_fs_generation));
385 return -EINVAL;
386 }
387
388 return 0;
389}
390
391static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
392 struct buffer_head **bh)
393{
394 int rc;
395 struct buffer_head *tmp = *bh;
396
397 rc = ocfs2_read_block(inode, xb_blkno, &tmp,
398 ocfs2_validate_xattr_block);
399
400 /* If ocfs2_read_block() got us a new bh, pass it up. */
401 if (!rc && !*bh)
402 *bh = tmp;
403
404 return rc;
405}
406
157static inline const char *ocfs2_xattr_prefix(int name_index) 407static inline const char *ocfs2_xattr_prefix(int name_index)
158{ 408{
159 struct xattr_handler *handler = NULL; 409 struct xattr_handler *handler = NULL;
@@ -200,54 +450,163 @@ static void ocfs2_xattr_hash_entry(struct inode *inode,
200 return; 450 return;
201} 451}
202 452
453static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
454{
455 int size = 0;
456
457 if (value_len <= OCFS2_XATTR_INLINE_SIZE)
458 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
459 else
460 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
461 size += sizeof(struct ocfs2_xattr_entry);
462
463 return size;
464}
465
466int ocfs2_calc_security_init(struct inode *dir,
467 struct ocfs2_security_xattr_info *si,
468 int *want_clusters,
469 int *xattr_credits,
470 struct ocfs2_alloc_context **xattr_ac)
471{
472 int ret = 0;
473 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
474 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
475 si->value_len);
476
477 /*
478 * The max space of security xattr taken inline is
479 * 256(name) + 80(value) + 16(entry) = 352 bytes,
480 * So reserve one metadata block for it is ok.
481 */
482 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
483 s_size > OCFS2_XATTR_FREE_IN_IBODY) {
484 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
485 if (ret) {
486 mlog_errno(ret);
487 return ret;
488 }
489 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
490 }
491
492 /* reserve clusters for xattr value which will be set in B tree*/
493 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
494 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
495 si->value_len);
496
497 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
498 new_clusters);
499 *want_clusters += new_clusters;
500 }
501 return ret;
502}
503
504int ocfs2_calc_xattr_init(struct inode *dir,
505 struct buffer_head *dir_bh,
506 int mode,
507 struct ocfs2_security_xattr_info *si,
508 int *want_clusters,
509 int *xattr_credits,
510 struct ocfs2_alloc_context **xattr_ac)
511{
512 int ret = 0;
513 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
514 int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
515
516 if (si->enable)
517 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
518 si->value_len);
519
520 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
521 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
522 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
523 "", NULL, 0);
524 if (acl_len > 0) {
525 a_size = ocfs2_xattr_entry_real_size(0, acl_len);
526 if (S_ISDIR(mode))
527 a_size <<= 1;
528 } else if (acl_len != 0 && acl_len != -ENODATA) {
529 mlog_errno(ret);
530 return ret;
531 }
532 }
533
534 if (!(s_size + a_size))
535 return ret;
536
537 /*
538 * The max space of security xattr taken inline is
539 * 256(name) + 80(value) + 16(entry) = 352 bytes,
540 * The max space of acl xattr taken inline is
541 * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
542 * when blocksize = 512, may reserve one more cluser for
543 * xattr bucket, otherwise reserve one metadata block
544 * for them is ok.
545 */
546 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
547 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
548 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
549 if (ret) {
550 mlog_errno(ret);
551 return ret;
552 }
553 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
554 }
555
556 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
557 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
558 *want_clusters += 1;
559 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
560 }
561
562 /*
563 * reserve credits and clusters for xattrs which has large value
564 * and have to be set outside
565 */
566 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
567 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
568 si->value_len);
569 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
570 new_clusters);
571 *want_clusters += new_clusters;
572 }
573 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
574 acl_len > OCFS2_XATTR_INLINE_SIZE) {
575 /* for directory, it has DEFAULT and ACCESS two types of acls */
576 new_clusters = (S_ISDIR(mode) ? 2 : 1) *
577 ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
578 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
579 new_clusters);
580 *want_clusters += new_clusters;
581 }
582
583 return ret;
584}
585
203static int ocfs2_xattr_extend_allocation(struct inode *inode, 586static int ocfs2_xattr_extend_allocation(struct inode *inode,
204 u32 clusters_to_add, 587 u32 clusters_to_add,
205 struct buffer_head *xattr_bh, 588 struct ocfs2_xattr_value_buf *vb,
206 struct ocfs2_xattr_value_root *xv) 589 struct ocfs2_xattr_set_ctxt *ctxt)
207{ 590{
208 int status = 0; 591 int status = 0;
209 int restart_func = 0; 592 handle_t *handle = ctxt->handle;
210 int credits = 0;
211 handle_t *handle = NULL;
212 struct ocfs2_alloc_context *data_ac = NULL;
213 struct ocfs2_alloc_context *meta_ac = NULL;
214 enum ocfs2_alloc_restarted why; 593 enum ocfs2_alloc_restarted why;
215 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 594 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
216 u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters); 595 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
217 struct ocfs2_extent_tree et; 596 struct ocfs2_extent_tree et;
218 597
219 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add); 598 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
220 599
221 ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv); 600 ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
222
223restart_all:
224
225 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
226 &data_ac, &meta_ac);
227 if (status) {
228 mlog_errno(status);
229 goto leave;
230 }
231
232 credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
233 clusters_to_add);
234 handle = ocfs2_start_trans(osb, credits);
235 if (IS_ERR(handle)) {
236 status = PTR_ERR(handle);
237 handle = NULL;
238 mlog_errno(status);
239 goto leave;
240 }
241 601
242restarted_transaction: 602 status = vb->vb_access(handle, inode, vb->vb_bh,
243 status = ocfs2_journal_access(handle, inode, xattr_bh, 603 OCFS2_JOURNAL_ACCESS_WRITE);
244 OCFS2_JOURNAL_ACCESS_WRITE);
245 if (status < 0) { 604 if (status < 0) {
246 mlog_errno(status); 605 mlog_errno(status);
247 goto leave; 606 goto leave;
248 } 607 }
249 608
250 prev_clusters = le32_to_cpu(xv->xr_clusters); 609 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
251 status = ocfs2_add_clusters_in_btree(osb, 610 status = ocfs2_add_clusters_in_btree(osb,
252 inode, 611 inode,
253 &logical_start, 612 &logical_start,
@@ -255,157 +614,84 @@ restarted_transaction:
255 0, 614 0,
256 &et, 615 &et,
257 handle, 616 handle,
258 data_ac, 617 ctxt->data_ac,
259 meta_ac, 618 ctxt->meta_ac,
260 &why); 619 &why);
261 if ((status < 0) && (status != -EAGAIN)) { 620 if (status < 0) {
262 if (status != -ENOSPC) 621 mlog_errno(status);
263 mlog_errno(status);
264 goto leave; 622 goto leave;
265 } 623 }
266 624
267 status = ocfs2_journal_dirty(handle, xattr_bh); 625 status = ocfs2_journal_dirty(handle, vb->vb_bh);
268 if (status < 0) { 626 if (status < 0) {
269 mlog_errno(status); 627 mlog_errno(status);
270 goto leave; 628 goto leave;
271 } 629 }
272 630
273 clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters; 631 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
274 632
275 if (why != RESTART_NONE && clusters_to_add) { 633 /*
276 if (why == RESTART_META) { 634 * We should have already allocated enough space before the transaction,
277 mlog(0, "restarting function.\n"); 635 * so no need to restart.
278 restart_func = 1; 636 */
279 } else { 637 BUG_ON(why != RESTART_NONE || clusters_to_add);
280 BUG_ON(why != RESTART_TRANS);
281
282 mlog(0, "restarting transaction.\n");
283 /* TODO: This can be more intelligent. */
284 credits = ocfs2_calc_extend_credits(osb->sb,
285 et.et_root_el,
286 clusters_to_add);
287 status = ocfs2_extend_trans(handle, credits);
288 if (status < 0) {
289 /* handle still has to be committed at
290 * this point. */
291 status = -ENOMEM;
292 mlog_errno(status);
293 goto leave;
294 }
295 goto restarted_transaction;
296 }
297 }
298 638
299leave: 639leave:
300 if (handle) {
301 ocfs2_commit_trans(osb, handle);
302 handle = NULL;
303 }
304 if (data_ac) {
305 ocfs2_free_alloc_context(data_ac);
306 data_ac = NULL;
307 }
308 if (meta_ac) {
309 ocfs2_free_alloc_context(meta_ac);
310 meta_ac = NULL;
311 }
312 if ((!status) && restart_func) {
313 restart_func = 0;
314 goto restart_all;
315 }
316 640
317 return status; 641 return status;
318} 642}
319 643
320static int __ocfs2_remove_xattr_range(struct inode *inode, 644static int __ocfs2_remove_xattr_range(struct inode *inode,
321 struct buffer_head *root_bh, 645 struct ocfs2_xattr_value_buf *vb,
322 struct ocfs2_xattr_value_root *xv,
323 u32 cpos, u32 phys_cpos, u32 len, 646 u32 cpos, u32 phys_cpos, u32 len,
324 struct ocfs2_cached_dealloc_ctxt *dealloc) 647 struct ocfs2_xattr_set_ctxt *ctxt)
325{ 648{
326 int ret; 649 int ret;
327 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 650 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
328 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 651 handle_t *handle = ctxt->handle;
329 struct inode *tl_inode = osb->osb_tl_inode;
330 handle_t *handle;
331 struct ocfs2_alloc_context *meta_ac = NULL;
332 struct ocfs2_extent_tree et; 652 struct ocfs2_extent_tree et;
333 653
334 ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv); 654 ocfs2_init_xattr_value_extent_tree(&et, inode, vb);
335 655
336 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 656 ret = vb->vb_access(handle, inode, vb->vb_bh,
657 OCFS2_JOURNAL_ACCESS_WRITE);
337 if (ret) { 658 if (ret) {
338 mlog_errno(ret); 659 mlog_errno(ret);
339 return ret;
340 }
341
342 mutex_lock(&tl_inode->i_mutex);
343
344 if (ocfs2_truncate_log_needs_flush(osb)) {
345 ret = __ocfs2_flush_truncate_log(osb);
346 if (ret < 0) {
347 mlog_errno(ret);
348 goto out;
349 }
350 }
351
352 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
353 if (IS_ERR(handle)) {
354 ret = PTR_ERR(handle);
355 mlog_errno(ret);
356 goto out; 660 goto out;
357 } 661 }
358 662
359 ret = ocfs2_journal_access(handle, inode, root_bh, 663 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
360 OCFS2_JOURNAL_ACCESS_WRITE); 664 &ctxt->dealloc);
361 if (ret) {
362 mlog_errno(ret);
363 goto out_commit;
364 }
365
366 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
367 dealloc);
368 if (ret) { 665 if (ret) {
369 mlog_errno(ret); 666 mlog_errno(ret);
370 goto out_commit; 667 goto out;
371 } 668 }
372 669
373 le32_add_cpu(&xv->xr_clusters, -len); 670 le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
374 671
375 ret = ocfs2_journal_dirty(handle, root_bh); 672 ret = ocfs2_journal_dirty(handle, vb->vb_bh);
376 if (ret) { 673 if (ret) {
377 mlog_errno(ret); 674 mlog_errno(ret);
378 goto out_commit; 675 goto out;
379 } 676 }
380 677
381 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); 678 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
382 if (ret) 679 if (ret)
383 mlog_errno(ret); 680 mlog_errno(ret);
384 681
385out_commit:
386 ocfs2_commit_trans(osb, handle);
387out: 682out:
388 mutex_unlock(&tl_inode->i_mutex);
389
390 if (meta_ac)
391 ocfs2_free_alloc_context(meta_ac);
392
393 return ret; 683 return ret;
394} 684}
395 685
396static int ocfs2_xattr_shrink_size(struct inode *inode, 686static int ocfs2_xattr_shrink_size(struct inode *inode,
397 u32 old_clusters, 687 u32 old_clusters,
398 u32 new_clusters, 688 u32 new_clusters,
399 struct buffer_head *root_bh, 689 struct ocfs2_xattr_value_buf *vb,
400 struct ocfs2_xattr_value_root *xv) 690 struct ocfs2_xattr_set_ctxt *ctxt)
401{ 691{
402 int ret = 0; 692 int ret = 0;
403 u32 trunc_len, cpos, phys_cpos, alloc_size; 693 u32 trunc_len, cpos, phys_cpos, alloc_size;
404 u64 block; 694 u64 block;
405 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
406 struct ocfs2_cached_dealloc_ctxt dealloc;
407
408 ocfs2_init_dealloc_ctxt(&dealloc);
409 695
410 if (old_clusters <= new_clusters) 696 if (old_clusters <= new_clusters)
411 return 0; 697 return 0;
@@ -414,7 +700,8 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
414 trunc_len = old_clusters - new_clusters; 700 trunc_len = old_clusters - new_clusters;
415 while (trunc_len) { 701 while (trunc_len) {
416 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 702 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
417 &alloc_size, &xv->xr_list); 703 &alloc_size,
704 &vb->vb_xv->xr_list);
418 if (ret) { 705 if (ret) {
419 mlog_errno(ret); 706 mlog_errno(ret);
420 goto out; 707 goto out;
@@ -423,9 +710,9 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
423 if (alloc_size > trunc_len) 710 if (alloc_size > trunc_len)
424 alloc_size = trunc_len; 711 alloc_size = trunc_len;
425 712
426 ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos, 713 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
427 phys_cpos, alloc_size, 714 phys_cpos, alloc_size,
428 &dealloc); 715 ctxt);
429 if (ret) { 716 if (ret) {
430 mlog_errno(ret); 717 mlog_errno(ret);
431 goto out; 718 goto out;
@@ -439,20 +726,17 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
439 } 726 }
440 727
441out: 728out:
442 ocfs2_schedule_truncate_log_flush(osb, 1);
443 ocfs2_run_deallocs(osb, &dealloc);
444
445 return ret; 729 return ret;
446} 730}
447 731
448static int ocfs2_xattr_value_truncate(struct inode *inode, 732static int ocfs2_xattr_value_truncate(struct inode *inode,
449 struct buffer_head *root_bh, 733 struct ocfs2_xattr_value_buf *vb,
450 struct ocfs2_xattr_value_root *xv, 734 int len,
451 int len) 735 struct ocfs2_xattr_set_ctxt *ctxt)
452{ 736{
453 int ret; 737 int ret;
454 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); 738 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
455 u32 old_clusters = le32_to_cpu(xv->xr_clusters); 739 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
456 740
457 if (new_clusters == old_clusters) 741 if (new_clusters == old_clusters)
458 return 0; 742 return 0;
@@ -460,11 +744,11 @@ static int ocfs2_xattr_value_truncate(struct inode *inode,
460 if (new_clusters > old_clusters) 744 if (new_clusters > old_clusters)
461 ret = ocfs2_xattr_extend_allocation(inode, 745 ret = ocfs2_xattr_extend_allocation(inode,
462 new_clusters - old_clusters, 746 new_clusters - old_clusters,
463 root_bh, xv); 747 vb, ctxt);
464 else 748 else
465 ret = ocfs2_xattr_shrink_size(inode, 749 ret = ocfs2_xattr_shrink_size(inode,
466 old_clusters, new_clusters, 750 old_clusters, new_clusters,
467 root_bh, xv); 751 vb, ctxt);
468 752
469 return ret; 753 return ret;
470} 754}
@@ -554,18 +838,14 @@ static int ocfs2_xattr_block_list(struct inode *inode,
554 if (!di->i_xattr_loc) 838 if (!di->i_xattr_loc)
555 return ret; 839 return ret;
556 840
557 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); 841 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
842 &blk_bh);
558 if (ret < 0) { 843 if (ret < 0) {
559 mlog_errno(ret); 844 mlog_errno(ret);
560 return ret; 845 return ret;
561 } 846 }
562 847
563 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 848 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
564 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
565 ret = -EIO;
566 goto cleanup;
567 }
568
569 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 849 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
570 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 850 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
571 ret = ocfs2_xattr_list_entries(inode, header, 851 ret = ocfs2_xattr_list_entries(inode, header,
@@ -575,7 +855,7 @@ static int ocfs2_xattr_block_list(struct inode *inode,
575 ret = ocfs2_xattr_tree_list_index_block(inode, xt, 855 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
576 buffer, buffer_size); 856 buffer, buffer_size);
577 } 857 }
578cleanup: 858
579 brelse(blk_bh); 859 brelse(blk_bh);
580 860
581 return ret; 861 return ret;
@@ -685,7 +965,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
685 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 965 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
686 /* Copy ocfs2_xattr_value */ 966 /* Copy ocfs2_xattr_value */
687 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 967 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
688 ret = ocfs2_read_block(inode, blkno, &bh); 968 ret = ocfs2_read_block(inode, blkno, &bh, NULL);
689 if (ret) { 969 if (ret) {
690 mlog_errno(ret); 970 mlog_errno(ret);
691 goto out; 971 goto out;
@@ -769,7 +1049,12 @@ static int ocfs2_xattr_block_get(struct inode *inode,
769 size_t size; 1049 size_t size;
770 int ret = -ENODATA, name_offset, name_len, block_off, i; 1050 int ret = -ENODATA, name_offset, name_len, block_off, i;
771 1051
772 memset(&xs->bucket, 0, sizeof(xs->bucket)); 1052 xs->bucket = ocfs2_xattr_bucket_new(inode);
1053 if (!xs->bucket) {
1054 ret = -ENOMEM;
1055 mlog_errno(ret);
1056 goto cleanup;
1057 }
773 1058
774 ret = ocfs2_xattr_block_find(inode, name_index, name, xs); 1059 ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
775 if (ret) { 1060 if (ret) {
@@ -795,11 +1080,11 @@ static int ocfs2_xattr_block_get(struct inode *inode,
795 1080
796 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1081 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
797 ret = ocfs2_xattr_bucket_get_name_value(inode, 1082 ret = ocfs2_xattr_bucket_get_name_value(inode,
798 xs->bucket.xh, 1083 bucket_xh(xs->bucket),
799 i, 1084 i,
800 &block_off, 1085 &block_off,
801 &name_offset); 1086 &name_offset);
802 xs->base = xs->bucket.bhs[block_off]->b_data; 1087 xs->base = bucket_block(xs->bucket, block_off);
803 } 1088 }
804 if (ocfs2_xattr_is_local(xs->here)) { 1089 if (ocfs2_xattr_is_local(xs->here)) {
805 memcpy(buffer, (void *)xs->base + 1090 memcpy(buffer, (void *)xs->base +
@@ -817,21 +1102,15 @@ static int ocfs2_xattr_block_get(struct inode *inode,
817 } 1102 }
818 ret = size; 1103 ret = size;
819cleanup: 1104cleanup:
820 for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++) 1105 ocfs2_xattr_bucket_free(xs->bucket);
821 brelse(xs->bucket.bhs[i]);
822 memset(&xs->bucket, 0, sizeof(xs->bucket));
823 1106
824 brelse(xs->xattr_bh); 1107 brelse(xs->xattr_bh);
825 xs->xattr_bh = NULL; 1108 xs->xattr_bh = NULL;
826 return ret; 1109 return ret;
827} 1110}
828 1111
829/* ocfs2_xattr_get() 1112int ocfs2_xattr_get_nolock(struct inode *inode,
830 * 1113 struct buffer_head *di_bh,
831 * Copy an extended attribute into the buffer provided.
832 * Buffer is NULL to compute the size of buffer required.
833 */
834static int ocfs2_xattr_get(struct inode *inode,
835 int name_index, 1114 int name_index,
836 const char *name, 1115 const char *name,
837 void *buffer, 1116 void *buffer,
@@ -839,7 +1118,6 @@ static int ocfs2_xattr_get(struct inode *inode,
839{ 1118{
840 int ret; 1119 int ret;
841 struct ocfs2_dinode *di = NULL; 1120 struct ocfs2_dinode *di = NULL;
842 struct buffer_head *di_bh = NULL;
843 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1121 struct ocfs2_inode_info *oi = OCFS2_I(inode);
844 struct ocfs2_xattr_search xis = { 1122 struct ocfs2_xattr_search xis = {
845 .not_found = -ENODATA, 1123 .not_found = -ENODATA,
@@ -854,11 +1132,6 @@ static int ocfs2_xattr_get(struct inode *inode,
854 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 1132 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
855 ret = -ENODATA; 1133 ret = -ENODATA;
856 1134
857 ret = ocfs2_inode_lock(inode, &di_bh, 0);
858 if (ret < 0) {
859 mlog_errno(ret);
860 return ret;
861 }
862 xis.inode_bh = xbs.inode_bh = di_bh; 1135 xis.inode_bh = xbs.inode_bh = di_bh;
863 di = (struct ocfs2_dinode *)di_bh->b_data; 1136 di = (struct ocfs2_dinode *)di_bh->b_data;
864 1137
@@ -869,6 +1142,32 @@ static int ocfs2_xattr_get(struct inode *inode,
869 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 1142 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
870 buffer_size, &xbs); 1143 buffer_size, &xbs);
871 up_read(&oi->ip_xattr_sem); 1144 up_read(&oi->ip_xattr_sem);
1145
1146 return ret;
1147}
1148
1149/* ocfs2_xattr_get()
1150 *
1151 * Copy an extended attribute into the buffer provided.
1152 * Buffer is NULL to compute the size of buffer required.
1153 */
1154static int ocfs2_xattr_get(struct inode *inode,
1155 int name_index,
1156 const char *name,
1157 void *buffer,
1158 size_t buffer_size)
1159{
1160 int ret;
1161 struct buffer_head *di_bh = NULL;
1162
1163 ret = ocfs2_inode_lock(inode, &di_bh, 0);
1164 if (ret < 0) {
1165 mlog_errno(ret);
1166 return ret;
1167 }
1168 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1169 name, buffer, buffer_size);
1170
872 ocfs2_inode_unlock(inode, 0); 1171 ocfs2_inode_unlock(inode, 0);
873 1172
874 brelse(di_bh); 1173 brelse(di_bh);
@@ -877,44 +1176,36 @@ static int ocfs2_xattr_get(struct inode *inode,
877} 1176}
878 1177
879static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1178static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1179 handle_t *handle,
880 struct ocfs2_xattr_value_root *xv, 1180 struct ocfs2_xattr_value_root *xv,
881 const void *value, 1181 const void *value,
882 int value_len) 1182 int value_len)
883{ 1183{
884 int ret = 0, i, cp_len, credits; 1184 int ret = 0, i, cp_len;
885 u16 blocksize = inode->i_sb->s_blocksize; 1185 u16 blocksize = inode->i_sb->s_blocksize;
886 u32 p_cluster, num_clusters; 1186 u32 p_cluster, num_clusters;
887 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 1187 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
888 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1188 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
889 u64 blkno; 1189 u64 blkno;
890 struct buffer_head *bh = NULL; 1190 struct buffer_head *bh = NULL;
891 handle_t *handle;
892 1191
893 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1192 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
894 1193
895 credits = clusters * bpc;
896 handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits);
897 if (IS_ERR(handle)) {
898 ret = PTR_ERR(handle);
899 mlog_errno(ret);
900 goto out;
901 }
902
903 while (cpos < clusters) { 1194 while (cpos < clusters) {
904 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1195 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
905 &num_clusters, &xv->xr_list); 1196 &num_clusters, &xv->xr_list);
906 if (ret) { 1197 if (ret) {
907 mlog_errno(ret); 1198 mlog_errno(ret);
908 goto out_commit; 1199 goto out;
909 } 1200 }
910 1201
911 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1202 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
912 1203
913 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1204 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
914 ret = ocfs2_read_block(inode, blkno, &bh); 1205 ret = ocfs2_read_block(inode, blkno, &bh, NULL);
915 if (ret) { 1206 if (ret) {
916 mlog_errno(ret); 1207 mlog_errno(ret);
917 goto out_commit; 1208 goto out;
918 } 1209 }
919 1210
920 ret = ocfs2_journal_access(handle, 1211 ret = ocfs2_journal_access(handle,
@@ -923,7 +1214,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
923 OCFS2_JOURNAL_ACCESS_WRITE); 1214 OCFS2_JOURNAL_ACCESS_WRITE);
924 if (ret < 0) { 1215 if (ret < 0) {
925 mlog_errno(ret); 1216 mlog_errno(ret);
926 goto out_commit; 1217 goto out;
927 } 1218 }
928 1219
929 cp_len = value_len > blocksize ? blocksize : value_len; 1220 cp_len = value_len > blocksize ? blocksize : value_len;
@@ -937,7 +1228,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
937 ret = ocfs2_journal_dirty(handle, bh); 1228 ret = ocfs2_journal_dirty(handle, bh);
938 if (ret < 0) { 1229 if (ret < 0) {
939 mlog_errno(ret); 1230 mlog_errno(ret);
940 goto out_commit; 1231 goto out;
941 } 1232 }
942 brelse(bh); 1233 brelse(bh);
943 bh = NULL; 1234 bh = NULL;
@@ -951,8 +1242,6 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
951 } 1242 }
952 cpos += num_clusters; 1243 cpos += num_clusters;
953 } 1244 }
954out_commit:
955 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
956out: 1245out:
957 brelse(bh); 1246 brelse(bh);
958 1247
@@ -960,28 +1249,22 @@ out:
960} 1249}
961 1250
962static int ocfs2_xattr_cleanup(struct inode *inode, 1251static int ocfs2_xattr_cleanup(struct inode *inode,
1252 handle_t *handle,
963 struct ocfs2_xattr_info *xi, 1253 struct ocfs2_xattr_info *xi,
964 struct ocfs2_xattr_search *xs, 1254 struct ocfs2_xattr_search *xs,
1255 struct ocfs2_xattr_value_buf *vb,
965 size_t offs) 1256 size_t offs)
966{ 1257{
967 handle_t *handle = NULL;
968 int ret = 0; 1258 int ret = 0;
969 size_t name_len = strlen(xi->name); 1259 size_t name_len = strlen(xi->name);
970 void *val = xs->base + offs; 1260 void *val = xs->base + offs;
971 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 1261 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
972 1262
973 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1263 ret = vb->vb_access(handle, inode, vb->vb_bh,
974 OCFS2_XATTR_BLOCK_UPDATE_CREDITS); 1264 OCFS2_JOURNAL_ACCESS_WRITE);
975 if (IS_ERR(handle)) {
976 ret = PTR_ERR(handle);
977 mlog_errno(ret);
978 goto out;
979 }
980 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
981 OCFS2_JOURNAL_ACCESS_WRITE);
982 if (ret) { 1265 if (ret) {
983 mlog_errno(ret); 1266 mlog_errno(ret);
984 goto out_commit; 1267 goto out;
985 } 1268 }
986 /* Decrease xattr count */ 1269 /* Decrease xattr count */
987 le16_add_cpu(&xs->header->xh_count, -1); 1270 le16_add_cpu(&xs->header->xh_count, -1);
@@ -989,35 +1272,27 @@ static int ocfs2_xattr_cleanup(struct inode *inode,
989 memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry)); 1272 memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
990 memset(val, 0, size); 1273 memset(val, 0, size);
991 1274
992 ret = ocfs2_journal_dirty(handle, xs->xattr_bh); 1275 ret = ocfs2_journal_dirty(handle, vb->vb_bh);
993 if (ret < 0) 1276 if (ret < 0)
994 mlog_errno(ret); 1277 mlog_errno(ret);
995out_commit:
996 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
997out: 1278out:
998 return ret; 1279 return ret;
999} 1280}
1000 1281
1001static int ocfs2_xattr_update_entry(struct inode *inode, 1282static int ocfs2_xattr_update_entry(struct inode *inode,
1283 handle_t *handle,
1002 struct ocfs2_xattr_info *xi, 1284 struct ocfs2_xattr_info *xi,
1003 struct ocfs2_xattr_search *xs, 1285 struct ocfs2_xattr_search *xs,
1286 struct ocfs2_xattr_value_buf *vb,
1004 size_t offs) 1287 size_t offs)
1005{ 1288{
1006 handle_t *handle = NULL; 1289 int ret;
1007 int ret = 0;
1008 1290
1009 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1291 ret = vb->vb_access(handle, inode, vb->vb_bh,
1010 OCFS2_XATTR_BLOCK_UPDATE_CREDITS); 1292 OCFS2_JOURNAL_ACCESS_WRITE);
1011 if (IS_ERR(handle)) {
1012 ret = PTR_ERR(handle);
1013 mlog_errno(ret);
1014 goto out;
1015 }
1016 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1017 OCFS2_JOURNAL_ACCESS_WRITE);
1018 if (ret) { 1293 if (ret) {
1019 mlog_errno(ret); 1294 mlog_errno(ret);
1020 goto out_commit; 1295 goto out;
1021 } 1296 }
1022 1297
1023 xs->here->xe_name_offset = cpu_to_le16(offs); 1298 xs->here->xe_name_offset = cpu_to_le16(offs);
@@ -1028,11 +1303,9 @@ static int ocfs2_xattr_update_entry(struct inode *inode,
1028 ocfs2_xattr_set_local(xs->here, 0); 1303 ocfs2_xattr_set_local(xs->here, 0);
1029 ocfs2_xattr_hash_entry(inode, xs->header, xs->here); 1304 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1030 1305
1031 ret = ocfs2_journal_dirty(handle, xs->xattr_bh); 1306 ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1032 if (ret < 0) 1307 if (ret < 0)
1033 mlog_errno(ret); 1308 mlog_errno(ret);
1034out_commit:
1035 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1036out: 1309out:
1037 return ret; 1310 return ret;
1038} 1311}
@@ -1045,6 +1318,8 @@ out:
1045static int ocfs2_xattr_set_value_outside(struct inode *inode, 1318static int ocfs2_xattr_set_value_outside(struct inode *inode,
1046 struct ocfs2_xattr_info *xi, 1319 struct ocfs2_xattr_info *xi,
1047 struct ocfs2_xattr_search *xs, 1320 struct ocfs2_xattr_search *xs,
1321 struct ocfs2_xattr_set_ctxt *ctxt,
1322 struct ocfs2_xattr_value_buf *vb,
1048 size_t offs) 1323 size_t offs)
1049{ 1324{
1050 size_t name_len = strlen(xi->name); 1325 size_t name_len = strlen(xi->name);
@@ -1062,20 +1337,20 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
1062 xv->xr_list.l_tree_depth = 0; 1337 xv->xr_list.l_tree_depth = 0;
1063 xv->xr_list.l_count = cpu_to_le16(1); 1338 xv->xr_list.l_count = cpu_to_le16(1);
1064 xv->xr_list.l_next_free_rec = 0; 1339 xv->xr_list.l_next_free_rec = 0;
1340 vb->vb_xv = xv;
1065 1341
1066 ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv, 1342 ret = ocfs2_xattr_value_truncate(inode, vb, xi->value_len, ctxt);
1067 xi->value_len);
1068 if (ret < 0) { 1343 if (ret < 0) {
1069 mlog_errno(ret); 1344 mlog_errno(ret);
1070 return ret; 1345 return ret;
1071 } 1346 }
1072 ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value, 1347 ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs);
1073 xi->value_len);
1074 if (ret < 0) { 1348 if (ret < 0) {
1075 mlog_errno(ret); 1349 mlog_errno(ret);
1076 return ret; 1350 return ret;
1077 } 1351 }
1078 ret = ocfs2_xattr_update_entry(inode, xi, xs, offs); 1352 ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb->vb_xv,
1353 xi->value, xi->value_len);
1079 if (ret < 0) 1354 if (ret < 0)
1080 mlog_errno(ret); 1355 mlog_errno(ret);
1081 1356
@@ -1195,6 +1470,7 @@ static void ocfs2_xattr_set_entry_local(struct inode *inode,
1195static int ocfs2_xattr_set_entry(struct inode *inode, 1470static int ocfs2_xattr_set_entry(struct inode *inode,
1196 struct ocfs2_xattr_info *xi, 1471 struct ocfs2_xattr_info *xi,
1197 struct ocfs2_xattr_search *xs, 1472 struct ocfs2_xattr_search *xs,
1473 struct ocfs2_xattr_set_ctxt *ctxt,
1198 int flag) 1474 int flag)
1199{ 1475{
1200 struct ocfs2_xattr_entry *last; 1476 struct ocfs2_xattr_entry *last;
@@ -1202,7 +1478,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1202 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 1478 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1203 size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name); 1479 size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1204 size_t size_l = 0; 1480 size_t size_l = 0;
1205 handle_t *handle = NULL; 1481 handle_t *handle = ctxt->handle;
1206 int free, i, ret; 1482 int free, i, ret;
1207 struct ocfs2_xattr_info xi_l = { 1483 struct ocfs2_xattr_info xi_l = {
1208 .name_index = xi->name_index, 1484 .name_index = xi->name_index,
@@ -1210,6 +1486,16 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1210 .value = xi->value, 1486 .value = xi->value,
1211 .value_len = xi->value_len, 1487 .value_len = xi->value_len,
1212 }; 1488 };
1489 struct ocfs2_xattr_value_buf vb = {
1490 .vb_bh = xs->xattr_bh,
1491 .vb_access = ocfs2_journal_access_di,
1492 };
1493
1494 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1495 BUG_ON(xs->xattr_bh == xs->inode_bh);
1496 vb.vb_access = ocfs2_journal_access_xb;
1497 } else
1498 BUG_ON(xs->xattr_bh != xs->inode_bh);
1213 1499
1214 /* Compute min_offs, last and free space. */ 1500 /* Compute min_offs, last and free space. */
1215 last = xs->header->xh_entries; 1501 last = xs->header->xh_entries;
@@ -1265,15 +1551,14 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1265 if (ocfs2_xattr_is_local(xs->here) && size == size_l) { 1551 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1266 /* Replace existing local xattr with tree root */ 1552 /* Replace existing local xattr with tree root */
1267 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, 1553 ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1268 offs); 1554 ctxt, &vb, offs);
1269 if (ret < 0) 1555 if (ret < 0)
1270 mlog_errno(ret); 1556 mlog_errno(ret);
1271 goto out; 1557 goto out;
1272 } else if (!ocfs2_xattr_is_local(xs->here)) { 1558 } else if (!ocfs2_xattr_is_local(xs->here)) {
1273 /* For existing xattr which has value outside */ 1559 /* For existing xattr which has value outside */
1274 struct ocfs2_xattr_value_root *xv = NULL; 1560 vb.vb_xv = (struct ocfs2_xattr_value_root *)
1275 xv = (struct ocfs2_xattr_value_root *)(val + 1561 (val + OCFS2_XATTR_SIZE(name_len));
1276 OCFS2_XATTR_SIZE(name_len));
1277 1562
1278 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { 1563 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1279 /* 1564 /*
@@ -1282,27 +1567,30 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1282 * then set new value with set_value_outside(). 1567 * then set new value with set_value_outside().
1283 */ 1568 */
1284 ret = ocfs2_xattr_value_truncate(inode, 1569 ret = ocfs2_xattr_value_truncate(inode,
1285 xs->xattr_bh, 1570 &vb,
1286 xv, 1571 xi->value_len,
1287 xi->value_len); 1572 ctxt);
1288 if (ret < 0) { 1573 if (ret < 0) {
1289 mlog_errno(ret); 1574 mlog_errno(ret);
1290 goto out; 1575 goto out;
1291 } 1576 }
1292 1577
1293 ret = __ocfs2_xattr_set_value_outside(inode, 1578 ret = ocfs2_xattr_update_entry(inode,
1294 xv, 1579 handle,
1295 xi->value, 1580 xi,
1296 xi->value_len); 1581 xs,
1582 &vb,
1583 offs);
1297 if (ret < 0) { 1584 if (ret < 0) {
1298 mlog_errno(ret); 1585 mlog_errno(ret);
1299 goto out; 1586 goto out;
1300 } 1587 }
1301 1588
1302 ret = ocfs2_xattr_update_entry(inode, 1589 ret = __ocfs2_xattr_set_value_outside(inode,
1303 xi, 1590 handle,
1304 xs, 1591 vb.vb_xv,
1305 offs); 1592 xi->value,
1593 xi->value_len);
1306 if (ret < 0) 1594 if (ret < 0)
1307 mlog_errno(ret); 1595 mlog_errno(ret);
1308 goto out; 1596 goto out;
@@ -1312,44 +1600,28 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1312 * just trucate old value to zero. 1600 * just trucate old value to zero.
1313 */ 1601 */
1314 ret = ocfs2_xattr_value_truncate(inode, 1602 ret = ocfs2_xattr_value_truncate(inode,
1315 xs->xattr_bh, 1603 &vb,
1316 xv, 1604 0,
1317 0); 1605 ctxt);
1318 if (ret < 0) 1606 if (ret < 0)
1319 mlog_errno(ret); 1607 mlog_errno(ret);
1320 } 1608 }
1321 } 1609 }
1322 } 1610 }
1323 1611
1324 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1612 ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh,
1325 OCFS2_INODE_UPDATE_CREDITS); 1613 OCFS2_JOURNAL_ACCESS_WRITE);
1326 if (IS_ERR(handle)) {
1327 ret = PTR_ERR(handle);
1328 mlog_errno(ret);
1329 goto out;
1330 }
1331
1332 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1333 OCFS2_JOURNAL_ACCESS_WRITE);
1334 if (ret) { 1614 if (ret) {
1335 mlog_errno(ret); 1615 mlog_errno(ret);
1336 goto out_commit; 1616 goto out;
1337 } 1617 }
1338 1618
1339 if (!(flag & OCFS2_INLINE_XATTR_FL)) { 1619 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1340 /* set extended attribute in external block. */ 1620 ret = vb.vb_access(handle, inode, vb.vb_bh,
1341 ret = ocfs2_extend_trans(handle, 1621 OCFS2_JOURNAL_ACCESS_WRITE);
1342 OCFS2_INODE_UPDATE_CREDITS +
1343 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1344 if (ret) {
1345 mlog_errno(ret);
1346 goto out_commit;
1347 }
1348 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1349 OCFS2_JOURNAL_ACCESS_WRITE);
1350 if (ret) { 1622 if (ret) {
1351 mlog_errno(ret); 1623 mlog_errno(ret);
1352 goto out_commit; 1624 goto out;
1353 } 1625 }
1354 } 1626 }
1355 1627
@@ -1363,7 +1635,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1363 ret = ocfs2_journal_dirty(handle, xs->xattr_bh); 1635 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1364 if (ret < 0) { 1636 if (ret < 0) {
1365 mlog_errno(ret); 1637 mlog_errno(ret);
1366 goto out_commit; 1638 goto out;
1367 } 1639 }
1368 } 1640 }
1369 1641
@@ -1391,25 +1663,19 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1391 oi->ip_dyn_features |= flag; 1663 oi->ip_dyn_features |= flag;
1392 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 1664 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1393 spin_unlock(&oi->ip_lock); 1665 spin_unlock(&oi->ip_lock);
1394 /* Update inode ctime */
1395 inode->i_ctime = CURRENT_TIME;
1396 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1397 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1398 1666
1399 ret = ocfs2_journal_dirty(handle, xs->inode_bh); 1667 ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1400 if (ret < 0) 1668 if (ret < 0)
1401 mlog_errno(ret); 1669 mlog_errno(ret);
1402 1670
1403out_commit:
1404 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1405
1406 if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { 1671 if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1407 /* 1672 /*
1408 * Set value outside in B tree. 1673 * Set value outside in B tree.
1409 * This is the second step for value size > INLINE_SIZE. 1674 * This is the second step for value size > INLINE_SIZE.
1410 */ 1675 */
1411 size_t offs = le16_to_cpu(xs->here->xe_name_offset); 1676 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1412 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs); 1677 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt,
1678 &vb, offs);
1413 if (ret < 0) { 1679 if (ret < 0) {
1414 int ret2; 1680 int ret2;
1415 1681
@@ -1418,41 +1684,56 @@ out_commit:
1418 * If set value outside failed, we have to clean 1684 * If set value outside failed, we have to clean
1419 * the junk tree root we have already set in local. 1685 * the junk tree root we have already set in local.
1420 */ 1686 */
1421 ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs); 1687 ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
1688 xi, xs, &vb, offs);
1422 if (ret2 < 0) 1689 if (ret2 < 0)
1423 mlog_errno(ret2); 1690 mlog_errno(ret2);
1424 } 1691 }
1425 } 1692 }
1426out: 1693out:
1427 return ret; 1694 return ret;
1428
1429} 1695}
1430 1696
1431static int ocfs2_remove_value_outside(struct inode*inode, 1697static int ocfs2_remove_value_outside(struct inode*inode,
1432 struct buffer_head *bh, 1698 struct ocfs2_xattr_value_buf *vb,
1433 struct ocfs2_xattr_header *header) 1699 struct ocfs2_xattr_header *header)
1434{ 1700{
1435 int ret = 0, i; 1701 int ret = 0, i;
1702 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1703 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1704
1705 ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1706
1707 ctxt.handle = ocfs2_start_trans(osb,
1708 ocfs2_remove_extent_credits(osb->sb));
1709 if (IS_ERR(ctxt.handle)) {
1710 ret = PTR_ERR(ctxt.handle);
1711 mlog_errno(ret);
1712 goto out;
1713 }
1436 1714
1437 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 1715 for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1438 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 1716 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1439 1717
1440 if (!ocfs2_xattr_is_local(entry)) { 1718 if (!ocfs2_xattr_is_local(entry)) {
1441 struct ocfs2_xattr_value_root *xv;
1442 void *val; 1719 void *val;
1443 1720
1444 val = (void *)header + 1721 val = (void *)header +
1445 le16_to_cpu(entry->xe_name_offset); 1722 le16_to_cpu(entry->xe_name_offset);
1446 xv = (struct ocfs2_xattr_value_root *) 1723 vb->vb_xv = (struct ocfs2_xattr_value_root *)
1447 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 1724 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1448 ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0); 1725 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
1449 if (ret < 0) { 1726 if (ret < 0) {
1450 mlog_errno(ret); 1727 mlog_errno(ret);
1451 return ret; 1728 break;
1452 } 1729 }
1453 } 1730 }
1454 } 1731 }
1455 1732
1733 ocfs2_commit_trans(osb, ctxt.handle);
1734 ocfs2_schedule_truncate_log_flush(osb, 1);
1735 ocfs2_run_deallocs(osb, &ctxt.dealloc);
1736out:
1456 return ret; 1737 return ret;
1457} 1738}
1458 1739
@@ -1463,12 +1744,16 @@ static int ocfs2_xattr_ibody_remove(struct inode *inode,
1463 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1744 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1464 struct ocfs2_xattr_header *header; 1745 struct ocfs2_xattr_header *header;
1465 int ret; 1746 int ret;
1747 struct ocfs2_xattr_value_buf vb = {
1748 .vb_bh = di_bh,
1749 .vb_access = ocfs2_journal_access_di,
1750 };
1466 1751
1467 header = (struct ocfs2_xattr_header *) 1752 header = (struct ocfs2_xattr_header *)
1468 ((void *)di + inode->i_sb->s_blocksize - 1753 ((void *)di + inode->i_sb->s_blocksize -
1469 le16_to_cpu(di->i_xattr_inline_size)); 1754 le16_to_cpu(di->i_xattr_inline_size));
1470 1755
1471 ret = ocfs2_remove_value_outside(inode, di_bh, header); 1756 ret = ocfs2_remove_value_outside(inode, &vb, header);
1472 1757
1473 return ret; 1758 return ret;
1474} 1759}
@@ -1478,11 +1763,15 @@ static int ocfs2_xattr_block_remove(struct inode *inode,
1478{ 1763{
1479 struct ocfs2_xattr_block *xb; 1764 struct ocfs2_xattr_block *xb;
1480 int ret = 0; 1765 int ret = 0;
1766 struct ocfs2_xattr_value_buf vb = {
1767 .vb_bh = blk_bh,
1768 .vb_access = ocfs2_journal_access_xb,
1769 };
1481 1770
1482 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1771 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1483 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1772 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1484 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 1773 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1485 ret = ocfs2_remove_value_outside(inode, blk_bh, header); 1774 ret = ocfs2_remove_value_outside(inode, &vb, header);
1486 } else 1775 } else
1487 ret = ocfs2_delete_xattr_index_block(inode, blk_bh); 1776 ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1488 1777
@@ -1502,24 +1791,19 @@ static int ocfs2_xattr_free_block(struct inode *inode,
1502 u64 blk, bg_blkno; 1791 u64 blk, bg_blkno;
1503 u16 bit; 1792 u16 bit;
1504 1793
1505 ret = ocfs2_read_block(inode, block, &blk_bh); 1794 ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
1506 if (ret < 0) { 1795 if (ret < 0) {
1507 mlog_errno(ret); 1796 mlog_errno(ret);
1508 goto out; 1797 goto out;
1509 } 1798 }
1510 1799
1511 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1512 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1513 ret = -EIO;
1514 goto out;
1515 }
1516
1517 ret = ocfs2_xattr_block_remove(inode, blk_bh); 1800 ret = ocfs2_xattr_block_remove(inode, blk_bh);
1518 if (ret < 0) { 1801 if (ret < 0) {
1519 mlog_errno(ret); 1802 mlog_errno(ret);
1520 goto out; 1803 goto out;
1521 } 1804 }
1522 1805
1806 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1523 blk = le64_to_cpu(xb->xb_blkno); 1807 blk = le64_to_cpu(xb->xb_blkno);
1524 bit = le16_to_cpu(xb->xb_suballoc_bit); 1808 bit = le16_to_cpu(xb->xb_suballoc_bit);
1525 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 1809 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
@@ -1606,8 +1890,8 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1606 mlog_errno(ret); 1890 mlog_errno(ret);
1607 goto out; 1891 goto out;
1608 } 1892 }
1609 ret = ocfs2_journal_access(handle, inode, di_bh, 1893 ret = ocfs2_journal_access_di(handle, inode, di_bh,
1610 OCFS2_JOURNAL_ACCESS_WRITE); 1894 OCFS2_JOURNAL_ACCESS_WRITE);
1611 if (ret) { 1895 if (ret) {
1612 mlog_errno(ret); 1896 mlog_errno(ret);
1613 goto out_commit; 1897 goto out_commit;
@@ -1714,7 +1998,8 @@ static int ocfs2_xattr_ibody_find(struct inode *inode,
1714 */ 1998 */
1715static int ocfs2_xattr_ibody_set(struct inode *inode, 1999static int ocfs2_xattr_ibody_set(struct inode *inode,
1716 struct ocfs2_xattr_info *xi, 2000 struct ocfs2_xattr_info *xi,
1717 struct ocfs2_xattr_search *xs) 2001 struct ocfs2_xattr_search *xs,
2002 struct ocfs2_xattr_set_ctxt *ctxt)
1718{ 2003{
1719 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2004 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1720 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2005 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
@@ -1731,7 +2016,7 @@ static int ocfs2_xattr_ibody_set(struct inode *inode,
1731 } 2016 }
1732 } 2017 }
1733 2018
1734 ret = ocfs2_xattr_set_entry(inode, xi, xs, 2019 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
1735 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL)); 2020 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
1736out: 2021out:
1737 up_write(&oi->ip_alloc_sem); 2022 up_write(&oi->ip_alloc_sem);
@@ -1758,19 +2043,15 @@ static int ocfs2_xattr_block_find(struct inode *inode,
1758 if (!di->i_xattr_loc) 2043 if (!di->i_xattr_loc)
1759 return ret; 2044 return ret;
1760 2045
1761 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); 2046 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2047 &blk_bh);
1762 if (ret < 0) { 2048 if (ret < 0) {
1763 mlog_errno(ret); 2049 mlog_errno(ret);
1764 return ret; 2050 return ret;
1765 } 2051 }
1766 2052
1767 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1768 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1769 ret = -EIO;
1770 goto cleanup;
1771 }
1772
1773 xs->xattr_bh = blk_bh; 2053 xs->xattr_bh = blk_bh;
2054 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1774 2055
1775 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 2056 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1776 xs->header = &xb->xb_attrs.xb_header; 2057 xs->header = &xb->xb_attrs.xb_header;
@@ -1804,13 +2085,13 @@ cleanup:
1804 */ 2085 */
1805static int ocfs2_xattr_block_set(struct inode *inode, 2086static int ocfs2_xattr_block_set(struct inode *inode,
1806 struct ocfs2_xattr_info *xi, 2087 struct ocfs2_xattr_info *xi,
1807 struct ocfs2_xattr_search *xs) 2088 struct ocfs2_xattr_search *xs,
2089 struct ocfs2_xattr_set_ctxt *ctxt)
1808{ 2090{
1809 struct buffer_head *new_bh = NULL; 2091 struct buffer_head *new_bh = NULL;
1810 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2092 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1811 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; 2093 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1812 struct ocfs2_alloc_context *meta_ac = NULL; 2094 handle_t *handle = ctxt->handle;
1813 handle_t *handle = NULL;
1814 struct ocfs2_xattr_block *xblk = NULL; 2095 struct ocfs2_xattr_block *xblk = NULL;
1815 u16 suballoc_bit_start; 2096 u16 suballoc_bit_start;
1816 u32 num_got; 2097 u32 num_got;
@@ -1818,45 +2099,29 @@ static int ocfs2_xattr_block_set(struct inode *inode,
1818 int ret; 2099 int ret;
1819 2100
1820 if (!xs->xattr_bh) { 2101 if (!xs->xattr_bh) {
1821 /* 2102 ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh,
1822 * Alloc one external block for extended attribute 2103 OCFS2_JOURNAL_ACCESS_CREATE);
1823 * outside of inode.
1824 */
1825 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
1826 if (ret < 0) { 2104 if (ret < 0) {
1827 mlog_errno(ret); 2105 mlog_errno(ret);
1828 goto out; 2106 goto end;
1829 }
1830 handle = ocfs2_start_trans(osb,
1831 OCFS2_XATTR_BLOCK_CREATE_CREDITS);
1832 if (IS_ERR(handle)) {
1833 ret = PTR_ERR(handle);
1834 mlog_errno(ret);
1835 goto out;
1836 }
1837 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1838 OCFS2_JOURNAL_ACCESS_CREATE);
1839 if (ret < 0) {
1840 mlog_errno(ret);
1841 goto out_commit;
1842 } 2107 }
1843 2108
1844 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, 2109 ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
1845 &suballoc_bit_start, &num_got, 2110 &suballoc_bit_start, &num_got,
1846 &first_blkno); 2111 &first_blkno);
1847 if (ret < 0) { 2112 if (ret < 0) {
1848 mlog_errno(ret); 2113 mlog_errno(ret);
1849 goto out_commit; 2114 goto end;
1850 } 2115 }
1851 2116
1852 new_bh = sb_getblk(inode->i_sb, first_blkno); 2117 new_bh = sb_getblk(inode->i_sb, first_blkno);
1853 ocfs2_set_new_buffer_uptodate(inode, new_bh); 2118 ocfs2_set_new_buffer_uptodate(inode, new_bh);
1854 2119
1855 ret = ocfs2_journal_access(handle, inode, new_bh, 2120 ret = ocfs2_journal_access_xb(handle, inode, new_bh,
1856 OCFS2_JOURNAL_ACCESS_CREATE); 2121 OCFS2_JOURNAL_ACCESS_CREATE);
1857 if (ret < 0) { 2122 if (ret < 0) {
1858 mlog_errno(ret); 2123 mlog_errno(ret);
1859 goto out_commit; 2124 goto end;
1860 } 2125 }
1861 2126
1862 /* Initialize ocfs2_xattr_block */ 2127 /* Initialize ocfs2_xattr_block */
@@ -1874,44 +2139,555 @@ static int ocfs2_xattr_block_set(struct inode *inode,
1874 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2139 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
1875 xs->here = xs->header->xh_entries; 2140 xs->here = xs->header->xh_entries;
1876 2141
1877
1878 ret = ocfs2_journal_dirty(handle, new_bh); 2142 ret = ocfs2_journal_dirty(handle, new_bh);
1879 if (ret < 0) { 2143 if (ret < 0) {
1880 mlog_errno(ret); 2144 mlog_errno(ret);
1881 goto out_commit; 2145 goto end;
1882 } 2146 }
1883 di->i_xattr_loc = cpu_to_le64(first_blkno); 2147 di->i_xattr_loc = cpu_to_le64(first_blkno);
1884 ret = ocfs2_journal_dirty(handle, xs->inode_bh); 2148 ocfs2_journal_dirty(handle, xs->inode_bh);
1885 if (ret < 0)
1886 mlog_errno(ret);
1887out_commit:
1888 ocfs2_commit_trans(osb, handle);
1889out:
1890 if (meta_ac)
1891 ocfs2_free_alloc_context(meta_ac);
1892 if (ret < 0)
1893 return ret;
1894 } else 2149 } else
1895 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2150 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1896 2151
1897 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { 2152 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
1898 /* Set extended attribute into external block */ 2153 /* Set extended attribute into external block */
1899 ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL); 2154 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2155 OCFS2_HAS_XATTR_FL);
1900 if (!ret || ret != -ENOSPC) 2156 if (!ret || ret != -ENOSPC)
1901 goto end; 2157 goto end;
1902 2158
1903 ret = ocfs2_xattr_create_index_block(inode, xs); 2159 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
1904 if (ret) 2160 if (ret)
1905 goto end; 2161 goto end;
1906 } 2162 }
1907 2163
1908 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs); 2164 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
1909 2165
1910end: 2166end:
1911 2167
1912 return ret; 2168 return ret;
1913} 2169}
1914 2170
2171/* Check whether the new xattr can be inserted into the inode. */
2172static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2173 struct ocfs2_xattr_info *xi,
2174 struct ocfs2_xattr_search *xs)
2175{
2176 u64 value_size;
2177 struct ocfs2_xattr_entry *last;
2178 int free, i;
2179 size_t min_offs = xs->end - xs->base;
2180
2181 if (!xs->header)
2182 return 0;
2183
2184 last = xs->header->xh_entries;
2185
2186 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2187 size_t offs = le16_to_cpu(last->xe_name_offset);
2188 if (offs < min_offs)
2189 min_offs = offs;
2190 last += 1;
2191 }
2192
2193 free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
2194 if (free < 0)
2195 return 0;
2196
2197 BUG_ON(!xs->not_found);
2198
2199 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2200 value_size = OCFS2_XATTR_ROOT_SIZE;
2201 else
2202 value_size = OCFS2_XATTR_SIZE(xi->value_len);
2203
2204 if (free >= sizeof(struct ocfs2_xattr_entry) +
2205 OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
2206 return 1;
2207
2208 return 0;
2209}
2210
2211static int ocfs2_calc_xattr_set_need(struct inode *inode,
2212 struct ocfs2_dinode *di,
2213 struct ocfs2_xattr_info *xi,
2214 struct ocfs2_xattr_search *xis,
2215 struct ocfs2_xattr_search *xbs,
2216 int *clusters_need,
2217 int *meta_need,
2218 int *credits_need)
2219{
2220 int ret = 0, old_in_xb = 0;
2221 int clusters_add = 0, meta_add = 0, credits = 0;
2222 struct buffer_head *bh = NULL;
2223 struct ocfs2_xattr_block *xb = NULL;
2224 struct ocfs2_xattr_entry *xe = NULL;
2225 struct ocfs2_xattr_value_root *xv = NULL;
2226 char *base = NULL;
2227 int name_offset, name_len = 0;
2228 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2229 xi->value_len);
2230 u64 value_size;
2231
2232 /*
2233 * Calculate the clusters we need to write.
2234 * No matter whether we replace an old one or add a new one,
2235 * we need this for writing.
2236 */
2237 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2238 credits += new_clusters *
2239 ocfs2_clusters_to_blocks(inode->i_sb, 1);
2240
2241 if (xis->not_found && xbs->not_found) {
2242 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2243
2244 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2245 clusters_add += new_clusters;
2246 credits += ocfs2_calc_extend_credits(inode->i_sb,
2247 &def_xv.xv.xr_list,
2248 new_clusters);
2249 }
2250
2251 goto meta_guess;
2252 }
2253
2254 if (!xis->not_found) {
2255 xe = xis->here;
2256 name_offset = le16_to_cpu(xe->xe_name_offset);
2257 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2258 base = xis->base;
2259 credits += OCFS2_INODE_UPDATE_CREDITS;
2260 } else {
2261 int i, block_off = 0;
2262 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2263 xe = xbs->here;
2264 name_offset = le16_to_cpu(xe->xe_name_offset);
2265 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2266 i = xbs->here - xbs->header->xh_entries;
2267 old_in_xb = 1;
2268
2269 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2270 ret = ocfs2_xattr_bucket_get_name_value(inode,
2271 bucket_xh(xbs->bucket),
2272 i, &block_off,
2273 &name_offset);
2274 base = bucket_block(xbs->bucket, block_off);
2275 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2276 } else {
2277 base = xbs->base;
2278 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2279 }
2280 }
2281
2282 /*
2283 * delete a xattr doesn't need metadata and cluster allocation.
2284 * so just calculate the credits and return.
2285 *
2286 * The credits for removing the value tree will be extended
2287 * by ocfs2_remove_extent itself.
2288 */
2289 if (!xi->value) {
2290 if (!ocfs2_xattr_is_local(xe))
2291 credits += ocfs2_remove_extent_credits(inode->i_sb);
2292
2293 goto out;
2294 }
2295
2296 /* do cluster allocation guess first. */
2297 value_size = le64_to_cpu(xe->xe_value_size);
2298
2299 if (old_in_xb) {
2300 /*
2301 * In xattr set, we always try to set the xe in inode first,
2302 * so if it can be inserted into inode successfully, the old
2303 * one will be removed from the xattr block, and this xattr
2304 * will be inserted into inode as a new xattr in inode.
2305 */
2306 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
2307 clusters_add += new_clusters;
2308 credits += ocfs2_remove_extent_credits(inode->i_sb) +
2309 OCFS2_INODE_UPDATE_CREDITS;
2310 if (!ocfs2_xattr_is_local(xe))
2311 credits += ocfs2_calc_extend_credits(
2312 inode->i_sb,
2313 &def_xv.xv.xr_list,
2314 new_clusters);
2315 goto out;
2316 }
2317 }
2318
2319 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2320 /* the new values will be stored outside. */
2321 u32 old_clusters = 0;
2322
2323 if (!ocfs2_xattr_is_local(xe)) {
2324 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2325 value_size);
2326 xv = (struct ocfs2_xattr_value_root *)
2327 (base + name_offset + name_len);
2328 value_size = OCFS2_XATTR_ROOT_SIZE;
2329 } else
2330 xv = &def_xv.xv;
2331
2332 if (old_clusters >= new_clusters) {
2333 credits += ocfs2_remove_extent_credits(inode->i_sb);
2334 goto out;
2335 } else {
2336 meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
2337 clusters_add += new_clusters - old_clusters;
2338 credits += ocfs2_calc_extend_credits(inode->i_sb,
2339 &xv->xr_list,
2340 new_clusters -
2341 old_clusters);
2342 if (value_size >= OCFS2_XATTR_ROOT_SIZE)
2343 goto out;
2344 }
2345 } else {
2346 /*
2347 * Now the new value will be stored inside. So if the new
2348 * value is smaller than the size of value root or the old
2349 * value, we don't need any allocation, otherwise we have
2350 * to guess metadata allocation.
2351 */
2352 if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
2353 (!ocfs2_xattr_is_local(xe) &&
2354 OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
2355 goto out;
2356 }
2357
2358meta_guess:
2359 /* calculate metadata allocation. */
2360 if (di->i_xattr_loc) {
2361 if (!xbs->xattr_bh) {
2362 ret = ocfs2_read_xattr_block(inode,
2363 le64_to_cpu(di->i_xattr_loc),
2364 &bh);
2365 if (ret) {
2366 mlog_errno(ret);
2367 goto out;
2368 }
2369
2370 xb = (struct ocfs2_xattr_block *)bh->b_data;
2371 } else
2372 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2373
2374 /*
2375 * If there is already an xattr tree, good, we can calculate
2376 * like other b-trees. Otherwise we may have the chance of
2377 * create a tree, the credit calculation is borrowed from
2378 * ocfs2_calc_extend_credits with root_el = NULL. And the
2379 * new tree will be cluster based, so no meta is needed.
2380 */
2381 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2382 struct ocfs2_extent_list *el =
2383 &xb->xb_attrs.xb_root.xt_list;
2384 meta_add += ocfs2_extend_meta_needed(el);
2385 credits += ocfs2_calc_extend_credits(inode->i_sb,
2386 el, 1);
2387 } else
2388 credits += OCFS2_SUBALLOC_ALLOC + 1;
2389
2390 /*
2391 * This cluster will be used either for new bucket or for
2392 * new xattr block.
2393 * If the cluster size is the same as the bucket size, one
2394 * more is needed since we may need to extend the bucket
2395 * also.
2396 */
2397 clusters_add += 1;
2398 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2399 if (OCFS2_XATTR_BUCKET_SIZE ==
2400 OCFS2_SB(inode->i_sb)->s_clustersize) {
2401 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2402 clusters_add += 1;
2403 }
2404 } else {
2405 meta_add += 1;
2406 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
2407 }
2408out:
2409 if (clusters_need)
2410 *clusters_need = clusters_add;
2411 if (meta_need)
2412 *meta_need = meta_add;
2413 if (credits_need)
2414 *credits_need = credits;
2415 brelse(bh);
2416 return ret;
2417}
2418
2419static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2420 struct ocfs2_dinode *di,
2421 struct ocfs2_xattr_info *xi,
2422 struct ocfs2_xattr_search *xis,
2423 struct ocfs2_xattr_search *xbs,
2424 struct ocfs2_xattr_set_ctxt *ctxt,
2425 int *credits)
2426{
2427 int clusters_add, meta_add, ret;
2428 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2429
2430 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
2431
2432 ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
2433
2434 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
2435 &clusters_add, &meta_add, credits);
2436 if (ret) {
2437 mlog_errno(ret);
2438 return ret;
2439 }
2440
2441 mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2442 "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2443
2444 if (meta_add) {
2445 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
2446 &ctxt->meta_ac);
2447 if (ret) {
2448 mlog_errno(ret);
2449 goto out;
2450 }
2451 }
2452
2453 if (clusters_add) {
2454 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
2455 if (ret)
2456 mlog_errno(ret);
2457 }
2458out:
2459 if (ret) {
2460 if (ctxt->meta_ac) {
2461 ocfs2_free_alloc_context(ctxt->meta_ac);
2462 ctxt->meta_ac = NULL;
2463 }
2464
2465 /*
2466 * We cannot have an error and a non null ctxt->data_ac.
2467 */
2468 }
2469
2470 return ret;
2471}
2472
2473static int __ocfs2_xattr_set_handle(struct inode *inode,
2474 struct ocfs2_dinode *di,
2475 struct ocfs2_xattr_info *xi,
2476 struct ocfs2_xattr_search *xis,
2477 struct ocfs2_xattr_search *xbs,
2478 struct ocfs2_xattr_set_ctxt *ctxt)
2479{
2480 int ret = 0, credits, old_found;
2481
2482 if (!xi->value) {
2483 /* Remove existing extended attribute */
2484 if (!xis->not_found)
2485 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2486 else if (!xbs->not_found)
2487 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2488 } else {
2489 /* We always try to set extended attribute into inode first*/
2490 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2491 if (!ret && !xbs->not_found) {
2492 /*
2493 * If succeed and that extended attribute existing in
2494 * external block, then we will remove it.
2495 */
2496 xi->value = NULL;
2497 xi->value_len = 0;
2498
2499 old_found = xis->not_found;
2500 xis->not_found = -ENODATA;
2501 ret = ocfs2_calc_xattr_set_need(inode,
2502 di,
2503 xi,
2504 xis,
2505 xbs,
2506 NULL,
2507 NULL,
2508 &credits);
2509 xis->not_found = old_found;
2510 if (ret) {
2511 mlog_errno(ret);
2512 goto out;
2513 }
2514
2515 ret = ocfs2_extend_trans(ctxt->handle, credits +
2516 ctxt->handle->h_buffer_credits);
2517 if (ret) {
2518 mlog_errno(ret);
2519 goto out;
2520 }
2521 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2522 } else if (ret == -ENOSPC) {
2523 if (di->i_xattr_loc && !xbs->xattr_bh) {
2524 ret = ocfs2_xattr_block_find(inode,
2525 xi->name_index,
2526 xi->name, xbs);
2527 if (ret)
2528 goto out;
2529
2530 old_found = xis->not_found;
2531 xis->not_found = -ENODATA;
2532 ret = ocfs2_calc_xattr_set_need(inode,
2533 di,
2534 xi,
2535 xis,
2536 xbs,
2537 NULL,
2538 NULL,
2539 &credits);
2540 xis->not_found = old_found;
2541 if (ret) {
2542 mlog_errno(ret);
2543 goto out;
2544 }
2545
2546 ret = ocfs2_extend_trans(ctxt->handle, credits +
2547 ctxt->handle->h_buffer_credits);
2548 if (ret) {
2549 mlog_errno(ret);
2550 goto out;
2551 }
2552 }
2553 /*
2554 * If no space in inode, we will set extended attribute
2555 * into external block.
2556 */
2557 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2558 if (ret)
2559 goto out;
2560 if (!xis->not_found) {
2561 /*
2562 * If succeed and that extended attribute
2563 * existing in inode, we will remove it.
2564 */
2565 xi->value = NULL;
2566 xi->value_len = 0;
2567 xbs->not_found = -ENODATA;
2568 ret = ocfs2_calc_xattr_set_need(inode,
2569 di,
2570 xi,
2571 xis,
2572 xbs,
2573 NULL,
2574 NULL,
2575 &credits);
2576 if (ret) {
2577 mlog_errno(ret);
2578 goto out;
2579 }
2580
2581 ret = ocfs2_extend_trans(ctxt->handle, credits +
2582 ctxt->handle->h_buffer_credits);
2583 if (ret) {
2584 mlog_errno(ret);
2585 goto out;
2586 }
2587 ret = ocfs2_xattr_ibody_set(inode, xi,
2588 xis, ctxt);
2589 }
2590 }
2591 }
2592
2593 if (!ret) {
2594 /* Update inode ctime. */
2595 ret = ocfs2_journal_access(ctxt->handle, inode, xis->inode_bh,
2596 OCFS2_JOURNAL_ACCESS_WRITE);
2597 if (ret) {
2598 mlog_errno(ret);
2599 goto out;
2600 }
2601
2602 inode->i_ctime = CURRENT_TIME;
2603 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
2604 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
2605 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
2606 }
2607out:
2608 return ret;
2609}
2610
2611/*
2612 * This function only called duing creating inode
2613 * for init security/acl xattrs of the new inode.
2614 * All transanction credits have been reserved in mknod.
2615 */
2616int ocfs2_xattr_set_handle(handle_t *handle,
2617 struct inode *inode,
2618 struct buffer_head *di_bh,
2619 int name_index,
2620 const char *name,
2621 const void *value,
2622 size_t value_len,
2623 int flags,
2624 struct ocfs2_alloc_context *meta_ac,
2625 struct ocfs2_alloc_context *data_ac)
2626{
2627 struct ocfs2_dinode *di;
2628 int ret;
2629
2630 struct ocfs2_xattr_info xi = {
2631 .name_index = name_index,
2632 .name = name,
2633 .value = value,
2634 .value_len = value_len,
2635 };
2636
2637 struct ocfs2_xattr_search xis = {
2638 .not_found = -ENODATA,
2639 };
2640
2641 struct ocfs2_xattr_search xbs = {
2642 .not_found = -ENODATA,
2643 };
2644
2645 struct ocfs2_xattr_set_ctxt ctxt = {
2646 .handle = handle,
2647 .meta_ac = meta_ac,
2648 .data_ac = data_ac,
2649 };
2650
2651 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2652 return -EOPNOTSUPP;
2653
2654 /*
2655 * In extreme situation, may need xattr bucket when
2656 * block size is too small. And we have already reserved
2657 * the credits for bucket in mknod.
2658 */
2659 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
2660 xbs.bucket = ocfs2_xattr_bucket_new(inode);
2661 if (!xbs.bucket) {
2662 mlog_errno(-ENOMEM);
2663 return -ENOMEM;
2664 }
2665 }
2666
2667 xis.inode_bh = xbs.inode_bh = di_bh;
2668 di = (struct ocfs2_dinode *)di_bh->b_data;
2669
2670 down_write(&OCFS2_I(inode)->ip_xattr_sem);
2671
2672 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2673 if (ret)
2674 goto cleanup;
2675 if (xis.not_found) {
2676 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2677 if (ret)
2678 goto cleanup;
2679 }
2680
2681 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2682
2683cleanup:
2684 up_write(&OCFS2_I(inode)->ip_xattr_sem);
2685 brelse(xbs.xattr_bh);
2686 ocfs2_xattr_bucket_free(xbs.bucket);
2687
2688 return ret;
2689}
2690
1915/* 2691/*
1916 * ocfs2_xattr_set() 2692 * ocfs2_xattr_set()
1917 * 2693 *
@@ -1928,8 +2704,10 @@ int ocfs2_xattr_set(struct inode *inode,
1928{ 2704{
1929 struct buffer_head *di_bh = NULL; 2705 struct buffer_head *di_bh = NULL;
1930 struct ocfs2_dinode *di; 2706 struct ocfs2_dinode *di;
1931 int ret; 2707 int ret, credits;
1932 u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 2708 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2709 struct inode *tl_inode = osb->osb_tl_inode;
2710 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1933 2711
1934 struct ocfs2_xattr_info xi = { 2712 struct ocfs2_xattr_info xi = {
1935 .name_index = name_index, 2713 .name_index = name_index,
@@ -1949,10 +2727,20 @@ int ocfs2_xattr_set(struct inode *inode,
1949 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) 2727 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1950 return -EOPNOTSUPP; 2728 return -EOPNOTSUPP;
1951 2729
2730 /*
2731 * Only xbs will be used on indexed trees. xis doesn't need a
2732 * bucket.
2733 */
2734 xbs.bucket = ocfs2_xattr_bucket_new(inode);
2735 if (!xbs.bucket) {
2736 mlog_errno(-ENOMEM);
2737 return -ENOMEM;
2738 }
2739
1952 ret = ocfs2_inode_lock(inode, &di_bh, 1); 2740 ret = ocfs2_inode_lock(inode, &di_bh, 1);
1953 if (ret < 0) { 2741 if (ret < 0) {
1954 mlog_errno(ret); 2742 mlog_errno(ret);
1955 return ret; 2743 goto cleanup_nolock;
1956 } 2744 }
1957 xis.inode_bh = xbs.inode_bh = di_bh; 2745 xis.inode_bh = xbs.inode_bh = di_bh;
1958 di = (struct ocfs2_dinode *)di_bh->b_data; 2746 di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -1984,55 +2772,53 @@ int ocfs2_xattr_set(struct inode *inode,
1984 goto cleanup; 2772 goto cleanup;
1985 } 2773 }
1986 2774
1987 if (!value) { 2775
1988 /* Remove existing extended attribute */ 2776 mutex_lock(&tl_inode->i_mutex);
1989 if (!xis.not_found) 2777
1990 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); 2778 if (ocfs2_truncate_log_needs_flush(osb)) {
1991 else if (!xbs.not_found) 2779 ret = __ocfs2_flush_truncate_log(osb);
1992 ret = ocfs2_xattr_block_set(inode, &xi, &xbs); 2780 if (ret < 0) {
1993 } else { 2781 mutex_unlock(&tl_inode->i_mutex);
1994 /* We always try to set extended attribute into inode first*/ 2782 mlog_errno(ret);
1995 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); 2783 goto cleanup;
1996 if (!ret && !xbs.not_found) {
1997 /*
1998 * If succeed and that extended attribute existing in
1999 * external block, then we will remove it.
2000 */
2001 xi.value = NULL;
2002 xi.value_len = 0;
2003 ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2004 } else if (ret == -ENOSPC) {
2005 if (di->i_xattr_loc && !xbs.xattr_bh) {
2006 ret = ocfs2_xattr_block_find(inode, name_index,
2007 name, &xbs);
2008 if (ret)
2009 goto cleanup;
2010 }
2011 /*
2012 * If no space in inode, we will set extended attribute
2013 * into external block.
2014 */
2015 ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2016 if (ret)
2017 goto cleanup;
2018 if (!xis.not_found) {
2019 /*
2020 * If succeed and that extended attribute
2021 * existing in inode, we will remove it.
2022 */
2023 xi.value = NULL;
2024 xi.value_len = 0;
2025 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2026 }
2027 } 2784 }
2028 } 2785 }
2786 mutex_unlock(&tl_inode->i_mutex);
2787
2788 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
2789 &xbs, &ctxt, &credits);
2790 if (ret) {
2791 mlog_errno(ret);
2792 goto cleanup;
2793 }
2794
2795 /* we need to update inode's ctime field, so add credit for it. */
2796 credits += OCFS2_INODE_UPDATE_CREDITS;
2797 ctxt.handle = ocfs2_start_trans(osb, credits);
2798 if (IS_ERR(ctxt.handle)) {
2799 ret = PTR_ERR(ctxt.handle);
2800 mlog_errno(ret);
2801 goto cleanup;
2802 }
2803
2804 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2805
2806 ocfs2_commit_trans(osb, ctxt.handle);
2807
2808 if (ctxt.data_ac)
2809 ocfs2_free_alloc_context(ctxt.data_ac);
2810 if (ctxt.meta_ac)
2811 ocfs2_free_alloc_context(ctxt.meta_ac);
2812 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
2813 ocfs2_schedule_truncate_log_flush(osb, 1);
2814 ocfs2_run_deallocs(osb, &ctxt.dealloc);
2029cleanup: 2815cleanup:
2030 up_write(&OCFS2_I(inode)->ip_xattr_sem); 2816 up_write(&OCFS2_I(inode)->ip_xattr_sem);
2031 ocfs2_inode_unlock(inode, 1); 2817 ocfs2_inode_unlock(inode, 1);
2818cleanup_nolock:
2032 brelse(di_bh); 2819 brelse(di_bh);
2033 brelse(xbs.xattr_bh); 2820 brelse(xbs.xattr_bh);
2034 for (i = 0; i < blk_per_bucket; i++) 2821 ocfs2_xattr_bucket_free(xbs.bucket);
2035 brelse(xbs.bucket.bhs[i]);
2036 2822
2037 return ret; 2823 return ret;
2038} 2824}
@@ -2107,7 +2893,7 @@ typedef int (xattr_bucket_func)(struct inode *inode,
2107 void *para); 2893 void *para);
2108 2894
2109static int ocfs2_find_xe_in_bucket(struct inode *inode, 2895static int ocfs2_find_xe_in_bucket(struct inode *inode,
2110 struct buffer_head *header_bh, 2896 struct ocfs2_xattr_bucket *bucket,
2111 int name_index, 2897 int name_index,
2112 const char *name, 2898 const char *name,
2113 u32 name_hash, 2899 u32 name_hash,
@@ -2115,11 +2901,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
2115 int *found) 2901 int *found)
2116{ 2902{
2117 int i, ret = 0, cmp = 1, block_off, new_offset; 2903 int i, ret = 0, cmp = 1, block_off, new_offset;
2118 struct ocfs2_xattr_header *xh = 2904 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
2119 (struct ocfs2_xattr_header *)header_bh->b_data;
2120 size_t name_len = strlen(name); 2905 size_t name_len = strlen(name);
2121 struct ocfs2_xattr_entry *xe = NULL; 2906 struct ocfs2_xattr_entry *xe = NULL;
2122 struct buffer_head *name_bh = NULL;
2123 char *xe_name; 2907 char *xe_name;
2124 2908
2125 /* 2909 /*
@@ -2150,19 +2934,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
2150 break; 2934 break;
2151 } 2935 }
2152 2936
2153 ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off,
2154 &name_bh);
2155 if (ret) {
2156 mlog_errno(ret);
2157 break;
2158 }
2159 xe_name = name_bh->b_data + new_offset;
2160 2937
2161 cmp = memcmp(name, xe_name, name_len); 2938 xe_name = bucket_block(bucket, block_off) + new_offset;
2162 brelse(name_bh); 2939 if (!memcmp(name, xe_name, name_len)) {
2163 name_bh = NULL;
2164
2165 if (cmp == 0) {
2166 *xe_index = i; 2940 *xe_index = i;
2167 *found = 1; 2941 *found = 1;
2168 ret = 0; 2942 ret = 0;
@@ -2192,39 +2966,42 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
2192 struct ocfs2_xattr_search *xs) 2966 struct ocfs2_xattr_search *xs)
2193{ 2967{
2194 int ret, found = 0; 2968 int ret, found = 0;
2195 struct buffer_head *bh = NULL;
2196 struct buffer_head *lower_bh = NULL;
2197 struct ocfs2_xattr_header *xh = NULL; 2969 struct ocfs2_xattr_header *xh = NULL;
2198 struct ocfs2_xattr_entry *xe = NULL; 2970 struct ocfs2_xattr_entry *xe = NULL;
2199 u16 index = 0; 2971 u16 index = 0;
2200 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 2972 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2201 int low_bucket = 0, bucket, high_bucket; 2973 int low_bucket = 0, bucket, high_bucket;
2974 struct ocfs2_xattr_bucket *search;
2202 u32 last_hash; 2975 u32 last_hash;
2203 u64 blkno; 2976 u64 blkno, lower_blkno = 0;
2204 2977
2205 ret = ocfs2_read_block(inode, p_blkno, &bh); 2978 search = ocfs2_xattr_bucket_new(inode);
2979 if (!search) {
2980 ret = -ENOMEM;
2981 mlog_errno(ret);
2982 goto out;
2983 }
2984
2985 ret = ocfs2_read_xattr_bucket(search, p_blkno);
2206 if (ret) { 2986 if (ret) {
2207 mlog_errno(ret); 2987 mlog_errno(ret);
2208 goto out; 2988 goto out;
2209 } 2989 }
2210 2990
2211 xh = (struct ocfs2_xattr_header *)bh->b_data; 2991 xh = bucket_xh(search);
2212 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; 2992 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
2213
2214 while (low_bucket <= high_bucket) { 2993 while (low_bucket <= high_bucket) {
2215 brelse(bh); 2994 ocfs2_xattr_bucket_relse(search);
2216 bh = NULL;
2217 bucket = (low_bucket + high_bucket) / 2;
2218 2995
2996 bucket = (low_bucket + high_bucket) / 2;
2219 blkno = p_blkno + bucket * blk_per_bucket; 2997 blkno = p_blkno + bucket * blk_per_bucket;
2220 2998 ret = ocfs2_read_xattr_bucket(search, blkno);
2221 ret = ocfs2_read_block(inode, blkno, &bh);
2222 if (ret) { 2999 if (ret) {
2223 mlog_errno(ret); 3000 mlog_errno(ret);
2224 goto out; 3001 goto out;
2225 } 3002 }
2226 3003
2227 xh = (struct ocfs2_xattr_header *)bh->b_data; 3004 xh = bucket_xh(search);
2228 xe = &xh->xh_entries[0]; 3005 xe = &xh->xh_entries[0];
2229 if (name_hash < le32_to_cpu(xe->xe_name_hash)) { 3006 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
2230 high_bucket = bucket - 1; 3007 high_bucket = bucket - 1;
@@ -2241,10 +3018,8 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
2241 3018
2242 last_hash = le32_to_cpu(xe->xe_name_hash); 3019 last_hash = le32_to_cpu(xe->xe_name_hash);
2243 3020
2244 /* record lower_bh which may be the insert place. */ 3021 /* record lower_blkno which may be the insert place. */
2245 brelse(lower_bh); 3022 lower_blkno = blkno;
2246 lower_bh = bh;
2247 bh = NULL;
2248 3023
2249 if (name_hash > le32_to_cpu(xe->xe_name_hash)) { 3024 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
2250 low_bucket = bucket + 1; 3025 low_bucket = bucket + 1;
@@ -2252,7 +3027,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
2252 } 3027 }
2253 3028
2254 /* the searched xattr should reside in this bucket if exists. */ 3029 /* the searched xattr should reside in this bucket if exists. */
2255 ret = ocfs2_find_xe_in_bucket(inode, lower_bh, 3030 ret = ocfs2_find_xe_in_bucket(inode, search,
2256 name_index, name, name_hash, 3031 name_index, name, name_hash,
2257 &index, &found); 3032 &index, &found);
2258 if (ret) { 3033 if (ret) {
@@ -2267,46 +3042,29 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
2267 * When the xattr's hash value is in the gap of 2 buckets, we will 3042 * When the xattr's hash value is in the gap of 2 buckets, we will
2268 * always set it to the previous bucket. 3043 * always set it to the previous bucket.
2269 */ 3044 */
2270 if (!lower_bh) { 3045 if (!lower_blkno)
2271 /* 3046 lower_blkno = p_blkno;
2272 * We can't find any bucket whose first name_hash is less 3047
2273 * than the find name_hash. 3048 /* This should be in cache - we just read it during the search */
2274 */ 3049 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
2275 BUG_ON(bh->b_blocknr != p_blkno); 3050 if (ret) {
2276 lower_bh = bh; 3051 mlog_errno(ret);
2277 bh = NULL; 3052 goto out;
2278 } 3053 }
2279 xs->bucket.bhs[0] = lower_bh;
2280 xs->bucket.xh = (struct ocfs2_xattr_header *)
2281 xs->bucket.bhs[0]->b_data;
2282 lower_bh = NULL;
2283 3054
2284 xs->header = xs->bucket.xh; 3055 xs->header = bucket_xh(xs->bucket);
2285 xs->base = xs->bucket.bhs[0]->b_data; 3056 xs->base = bucket_block(xs->bucket, 0);
2286 xs->end = xs->base + inode->i_sb->s_blocksize; 3057 xs->end = xs->base + inode->i_sb->s_blocksize;
2287 3058
2288 if (found) { 3059 if (found) {
2289 /*
2290 * If we have found the xattr enty, read all the blocks in
2291 * this bucket.
2292 */
2293 ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1,
2294 blk_per_bucket - 1, &xs->bucket.bhs[1],
2295 0);
2296 if (ret) {
2297 mlog_errno(ret);
2298 goto out;
2299 }
2300
2301 xs->here = &xs->header->xh_entries[index]; 3060 xs->here = &xs->header->xh_entries[index];
2302 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name, 3061 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
2303 (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index); 3062 (unsigned long long)bucket_blkno(xs->bucket), index);
2304 } else 3063 } else
2305 ret = -ENODATA; 3064 ret = -ENODATA;
2306 3065
2307out: 3066out:
2308 brelse(bh); 3067 ocfs2_xattr_bucket_free(search);
2309 brelse(lower_bh);
2310 return ret; 3068 return ret;
2311} 3069}
2312 3070
@@ -2357,53 +3115,50 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2357 xattr_bucket_func *func, 3115 xattr_bucket_func *func,
2358 void *para) 3116 void *para)
2359{ 3117{
2360 int i, j, ret = 0; 3118 int i, ret = 0;
2361 int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2362 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); 3119 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
2363 u32 num_buckets = clusters * bpc; 3120 u32 num_buckets = clusters * bpc;
2364 struct ocfs2_xattr_bucket bucket; 3121 struct ocfs2_xattr_bucket *bucket;
2365 3122
2366 memset(&bucket, 0, sizeof(bucket)); 3123 bucket = ocfs2_xattr_bucket_new(inode);
3124 if (!bucket) {
3125 mlog_errno(-ENOMEM);
3126 return -ENOMEM;
3127 }
2367 3128
2368 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", 3129 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2369 clusters, (unsigned long long)blkno); 3130 clusters, (unsigned long long)blkno);
2370 3131
2371 for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { 3132 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
2372 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, 3133 ret = ocfs2_read_xattr_bucket(bucket, blkno);
2373 bucket.bhs, 0);
2374 if (ret) { 3134 if (ret) {
2375 mlog_errno(ret); 3135 mlog_errno(ret);
2376 goto out; 3136 break;
2377 } 3137 }
2378 3138
2379 bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data;
2380 /* 3139 /*
2381 * The real bucket num in this series of blocks is stored 3140 * The real bucket num in this series of blocks is stored
2382 * in the 1st bucket. 3141 * in the 1st bucket.
2383 */ 3142 */
2384 if (i == 0) 3143 if (i == 0)
2385 num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets); 3144 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
2386 3145
2387 mlog(0, "iterating xattr bucket %llu, first hash %u\n", 3146 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
2388 (unsigned long long)blkno, 3147 (unsigned long long)blkno,
2389 le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash)); 3148 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
2390 if (func) { 3149 if (func) {
2391 ret = func(inode, &bucket, para); 3150 ret = func(inode, bucket, para);
2392 if (ret) { 3151 if (ret)
2393 mlog_errno(ret); 3152 mlog_errno(ret);
2394 break; 3153 /* Fall through to bucket_relse() */
2395 }
2396 } 3154 }
2397 3155
2398 for (j = 0; j < blk_per_bucket; j++) 3156 ocfs2_xattr_bucket_relse(bucket);
2399 brelse(bucket.bhs[j]); 3157 if (ret)
2400 memset(&bucket, 0, sizeof(bucket)); 3158 break;
2401 } 3159 }
2402 3160
2403out: 3161 ocfs2_xattr_bucket_free(bucket);
2404 for (j = 0; j < blk_per_bucket; j++)
2405 brelse(bucket.bhs[j]);
2406
2407 return ret; 3162 return ret;
2408} 3163}
2409 3164
@@ -2441,21 +3196,21 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
2441 int i, block_off, new_offset; 3196 int i, block_off, new_offset;
2442 const char *prefix, *name; 3197 const char *prefix, *name;
2443 3198
2444 for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) { 3199 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
2445 struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i]; 3200 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
2446 type = ocfs2_xattr_get_type(entry); 3201 type = ocfs2_xattr_get_type(entry);
2447 prefix = ocfs2_xattr_prefix(type); 3202 prefix = ocfs2_xattr_prefix(type);
2448 3203
2449 if (prefix) { 3204 if (prefix) {
2450 ret = ocfs2_xattr_bucket_get_name_value(inode, 3205 ret = ocfs2_xattr_bucket_get_name_value(inode,
2451 bucket->xh, 3206 bucket_xh(bucket),
2452 i, 3207 i,
2453 &block_off, 3208 &block_off,
2454 &new_offset); 3209 &new_offset);
2455 if (ret) 3210 if (ret)
2456 break; 3211 break;
2457 3212
2458 name = (const char *)bucket->bhs[block_off]->b_data + 3213 name = (const char *)bucket_block(bucket, block_off) +
2459 new_offset; 3214 new_offset;
2460 ret = ocfs2_xattr_list_entry(xl->buffer, 3215 ret = ocfs2_xattr_list_entry(xl->buffer,
2461 xl->buffer_size, 3216 xl->buffer_size,
@@ -2540,32 +3295,34 @@ static void swap_xe(void *a, void *b, int size)
2540/* 3295/*
2541 * When the ocfs2_xattr_block is filled up, new bucket will be created 3296 * When the ocfs2_xattr_block is filled up, new bucket will be created
2542 * and all the xattr entries will be moved to the new bucket. 3297 * and all the xattr entries will be moved to the new bucket.
3298 * The header goes at the start of the bucket, and the names+values are
3299 * filled from the end. This is why *target starts as the last buffer.
2543 * Note: we need to sort the entries since they are not saved in order 3300 * Note: we need to sort the entries since they are not saved in order
2544 * in the ocfs2_xattr_block. 3301 * in the ocfs2_xattr_block.
2545 */ 3302 */
2546static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, 3303static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2547 struct buffer_head *xb_bh, 3304 struct buffer_head *xb_bh,
2548 struct buffer_head *xh_bh, 3305 struct ocfs2_xattr_bucket *bucket)
2549 struct buffer_head *data_bh)
2550{ 3306{
2551 int i, blocksize = inode->i_sb->s_blocksize; 3307 int i, blocksize = inode->i_sb->s_blocksize;
3308 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2552 u16 offset, size, off_change; 3309 u16 offset, size, off_change;
2553 struct ocfs2_xattr_entry *xe; 3310 struct ocfs2_xattr_entry *xe;
2554 struct ocfs2_xattr_block *xb = 3311 struct ocfs2_xattr_block *xb =
2555 (struct ocfs2_xattr_block *)xb_bh->b_data; 3312 (struct ocfs2_xattr_block *)xb_bh->b_data;
2556 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; 3313 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
2557 struct ocfs2_xattr_header *xh = 3314 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
2558 (struct ocfs2_xattr_header *)xh_bh->b_data;
2559 u16 count = le16_to_cpu(xb_xh->xh_count); 3315 u16 count = le16_to_cpu(xb_xh->xh_count);
2560 char *target = xh_bh->b_data, *src = xb_bh->b_data; 3316 char *src = xb_bh->b_data;
3317 char *target = bucket_block(bucket, blks - 1);
2561 3318
2562 mlog(0, "cp xattr from block %llu to bucket %llu\n", 3319 mlog(0, "cp xattr from block %llu to bucket %llu\n",
2563 (unsigned long long)xb_bh->b_blocknr, 3320 (unsigned long long)xb_bh->b_blocknr,
2564 (unsigned long long)xh_bh->b_blocknr); 3321 (unsigned long long)bucket_blkno(bucket));
3322
3323 for (i = 0; i < blks; i++)
3324 memset(bucket_block(bucket, i), 0, blocksize);
2565 3325
2566 memset(xh_bh->b_data, 0, blocksize);
2567 if (data_bh)
2568 memset(data_bh->b_data, 0, blocksize);
2569 /* 3326 /*
2570 * Since the xe_name_offset is based on ocfs2_xattr_header, 3327 * Since the xe_name_offset is based on ocfs2_xattr_header,
2571 * there is a offset change corresponding to the change of 3328 * there is a offset change corresponding to the change of
@@ -2577,8 +3334,6 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2577 size = blocksize - offset; 3334 size = blocksize - offset;
2578 3335
2579 /* copy all the names and values. */ 3336 /* copy all the names and values. */
2580 if (data_bh)
2581 target = data_bh->b_data;
2582 memcpy(target + offset, src + offset, size); 3337 memcpy(target + offset, src + offset, size);
2583 3338
2584 /* Init new header now. */ 3339 /* Init new header now. */
@@ -2588,7 +3343,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2588 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); 3343 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
2589 3344
2590 /* copy all the entries. */ 3345 /* copy all the entries. */
2591 target = xh_bh->b_data; 3346 target = bucket_block(bucket, 0);
2592 offset = offsetof(struct ocfs2_xattr_header, xh_entries); 3347 offset = offsetof(struct ocfs2_xattr_header, xh_entries);
2593 size = count * sizeof(struct ocfs2_xattr_entry); 3348 size = count * sizeof(struct ocfs2_xattr_entry);
2594 memcpy(target + offset, (char *)xb_xh + offset, size); 3349 memcpy(target + offset, (char *)xb_xh + offset, size);
@@ -2614,73 +3369,47 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2614 * While if the entry is in index b-tree, "bucket" indicates the 3369 * While if the entry is in index b-tree, "bucket" indicates the
2615 * real place of the xattr. 3370 * real place of the xattr.
2616 */ 3371 */
2617static int ocfs2_xattr_update_xattr_search(struct inode *inode, 3372static void ocfs2_xattr_update_xattr_search(struct inode *inode,
2618 struct ocfs2_xattr_search *xs, 3373 struct ocfs2_xattr_search *xs,
2619 struct buffer_head *old_bh, 3374 struct buffer_head *old_bh)
2620 struct buffer_head *new_bh)
2621{ 3375{
2622 int ret = 0;
2623 char *buf = old_bh->b_data; 3376 char *buf = old_bh->b_data;
2624 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; 3377 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
2625 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; 3378 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
2626 int i, blocksize = inode->i_sb->s_blocksize; 3379 int i;
2627 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2628
2629 xs->bucket.bhs[0] = new_bh;
2630 get_bh(new_bh);
2631 xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data;
2632 xs->header = xs->bucket.xh;
2633 3380
2634 xs->base = new_bh->b_data; 3381 xs->header = bucket_xh(xs->bucket);
3382 xs->base = bucket_block(xs->bucket, 0);
2635 xs->end = xs->base + inode->i_sb->s_blocksize; 3383 xs->end = xs->base + inode->i_sb->s_blocksize;
2636 3384
2637 if (!xs->not_found) { 3385 if (xs->not_found)
2638 if (OCFS2_XATTR_BUCKET_SIZE != blocksize) { 3386 return;
2639 ret = ocfs2_read_blocks(inode,
2640 xs->bucket.bhs[0]->b_blocknr + 1,
2641 blk_per_bucket - 1, &xs->bucket.bhs[1],
2642 0);
2643 if (ret) {
2644 mlog_errno(ret);
2645 return ret;
2646 }
2647
2648 }
2649 i = xs->here - old_xh->xh_entries;
2650 xs->here = &xs->header->xh_entries[i];
2651 }
2652 3387
2653 return ret; 3388 i = xs->here - old_xh->xh_entries;
3389 xs->here = &xs->header->xh_entries[i];
2654} 3390}
2655 3391
2656static int ocfs2_xattr_create_index_block(struct inode *inode, 3392static int ocfs2_xattr_create_index_block(struct inode *inode,
2657 struct ocfs2_xattr_search *xs) 3393 struct ocfs2_xattr_search *xs,
3394 struct ocfs2_xattr_set_ctxt *ctxt)
2658{ 3395{
2659 int ret, credits = OCFS2_SUBALLOC_ALLOC; 3396 int ret;
2660 u32 bit_off, len; 3397 u32 bit_off, len;
2661 u64 blkno; 3398 u64 blkno;
2662 handle_t *handle; 3399 handle_t *handle = ctxt->handle;
2663 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3400 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2664 struct ocfs2_inode_info *oi = OCFS2_I(inode); 3401 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2665 struct ocfs2_alloc_context *data_ac;
2666 struct buffer_head *xh_bh = NULL, *data_bh = NULL;
2667 struct buffer_head *xb_bh = xs->xattr_bh; 3402 struct buffer_head *xb_bh = xs->xattr_bh;
2668 struct ocfs2_xattr_block *xb = 3403 struct ocfs2_xattr_block *xb =
2669 (struct ocfs2_xattr_block *)xb_bh->b_data; 3404 (struct ocfs2_xattr_block *)xb_bh->b_data;
2670 struct ocfs2_xattr_tree_root *xr; 3405 struct ocfs2_xattr_tree_root *xr;
2671 u16 xb_flags = le16_to_cpu(xb->xb_flags); 3406 u16 xb_flags = le16_to_cpu(xb->xb_flags);
2672 u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2673 3407
2674 mlog(0, "create xattr index block for %llu\n", 3408 mlog(0, "create xattr index block for %llu\n",
2675 (unsigned long long)xb_bh->b_blocknr); 3409 (unsigned long long)xb_bh->b_blocknr);
2676 3410
2677 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); 3411 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
2678 3412 BUG_ON(!xs->bucket);
2679 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
2680 if (ret) {
2681 mlog_errno(ret);
2682 goto out;
2683 }
2684 3413
2685 /* 3414 /*
2686 * XXX: 3415 * XXX:
@@ -2689,29 +3418,18 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
2689 */ 3418 */
2690 down_write(&oi->ip_alloc_sem); 3419 down_write(&oi->ip_alloc_sem);
2691 3420
2692 /* 3421 ret = ocfs2_journal_access_xb(handle, inode, xb_bh,
2693 * 3 more credits, one for xattr block update, one for the 1st block 3422 OCFS2_JOURNAL_ACCESS_WRITE);
2694 * of the new xattr bucket and one for the value/data.
2695 */
2696 credits += 3;
2697 handle = ocfs2_start_trans(osb, credits);
2698 if (IS_ERR(handle)) {
2699 ret = PTR_ERR(handle);
2700 mlog_errno(ret);
2701 goto out_sem;
2702 }
2703
2704 ret = ocfs2_journal_access(handle, inode, xb_bh,
2705 OCFS2_JOURNAL_ACCESS_WRITE);
2706 if (ret) { 3423 if (ret) {
2707 mlog_errno(ret); 3424 mlog_errno(ret);
2708 goto out_commit; 3425 goto out;
2709 } 3426 }
2710 3427
2711 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len); 3428 ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
3429 1, 1, &bit_off, &len);
2712 if (ret) { 3430 if (ret) {
2713 mlog_errno(ret); 3431 mlog_errno(ret);
2714 goto out_commit; 3432 goto out;
2715 } 3433 }
2716 3434
2717 /* 3435 /*
@@ -2724,51 +3442,23 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
2724 mlog(0, "allocate 1 cluster from %llu to xattr block\n", 3442 mlog(0, "allocate 1 cluster from %llu to xattr block\n",
2725 (unsigned long long)blkno); 3443 (unsigned long long)blkno);
2726 3444
2727 xh_bh = sb_getblk(inode->i_sb, blkno); 3445 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
2728 if (!xh_bh) { 3446 if (ret) {
2729 ret = -EIO;
2730 mlog_errno(ret); 3447 mlog_errno(ret);
2731 goto out_commit; 3448 goto out;
2732 } 3449 }
2733 3450
2734 ocfs2_set_new_buffer_uptodate(inode, xh_bh); 3451 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
2735 3452 OCFS2_JOURNAL_ACCESS_CREATE);
2736 ret = ocfs2_journal_access(handle, inode, xh_bh,
2737 OCFS2_JOURNAL_ACCESS_CREATE);
2738 if (ret) { 3453 if (ret) {
2739 mlog_errno(ret); 3454 mlog_errno(ret);
2740 goto out_commit; 3455 goto out;
2741 }
2742
2743 if (bpb > 1) {
2744 data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
2745 if (!data_bh) {
2746 ret = -EIO;
2747 mlog_errno(ret);
2748 goto out_commit;
2749 }
2750
2751 ocfs2_set_new_buffer_uptodate(inode, data_bh);
2752
2753 ret = ocfs2_journal_access(handle, inode, data_bh,
2754 OCFS2_JOURNAL_ACCESS_CREATE);
2755 if (ret) {
2756 mlog_errno(ret);
2757 goto out_commit;
2758 }
2759 } 3456 }
2760 3457
2761 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh); 3458 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
3459 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
2762 3460
2763 ocfs2_journal_dirty(handle, xh_bh); 3461 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
2764 if (data_bh)
2765 ocfs2_journal_dirty(handle, data_bh);
2766
2767 ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
2768 if (ret) {
2769 mlog_errno(ret);
2770 goto out_commit;
2771 }
2772 3462
2773 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 3463 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
2774 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 3464 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
@@ -2787,24 +3477,10 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
2787 3477
2788 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); 3478 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
2789 3479
2790 ret = ocfs2_journal_dirty(handle, xb_bh); 3480 ocfs2_journal_dirty(handle, xb_bh);
2791 if (ret) {
2792 mlog_errno(ret);
2793 goto out_commit;
2794 }
2795
2796out_commit:
2797 ocfs2_commit_trans(osb, handle);
2798
2799out_sem:
2800 up_write(&oi->ip_alloc_sem);
2801 3481
2802out: 3482out:
2803 if (data_ac) 3483 up_write(&oi->ip_alloc_sem);
2804 ocfs2_free_alloc_context(data_ac);
2805
2806 brelse(xh_bh);
2807 brelse(data_bh);
2808 3484
2809 return ret; 3485 return ret;
2810} 3486}
@@ -2829,29 +3505,18 @@ static int cmp_xe_offset(const void *a, const void *b)
2829 * so that we can spare some space for insertion. 3505 * so that we can spare some space for insertion.
2830 */ 3506 */
2831static int ocfs2_defrag_xattr_bucket(struct inode *inode, 3507static int ocfs2_defrag_xattr_bucket(struct inode *inode,
3508 handle_t *handle,
2832 struct ocfs2_xattr_bucket *bucket) 3509 struct ocfs2_xattr_bucket *bucket)
2833{ 3510{
2834 int ret, i; 3511 int ret, i;
2835 size_t end, offset, len, value_len; 3512 size_t end, offset, len, value_len;
2836 struct ocfs2_xattr_header *xh; 3513 struct ocfs2_xattr_header *xh;
2837 char *entries, *buf, *bucket_buf = NULL; 3514 char *entries, *buf, *bucket_buf = NULL;
2838 u64 blkno = bucket->bhs[0]->b_blocknr; 3515 u64 blkno = bucket_blkno(bucket);
2839 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2840 u16 xh_free_start; 3516 u16 xh_free_start;
2841 size_t blocksize = inode->i_sb->s_blocksize; 3517 size_t blocksize = inode->i_sb->s_blocksize;
2842 handle_t *handle;
2843 struct buffer_head **bhs;
2844 struct ocfs2_xattr_entry *xe; 3518 struct ocfs2_xattr_entry *xe;
2845 3519
2846 bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
2847 GFP_NOFS);
2848 if (!bhs)
2849 return -ENOMEM;
2850
2851 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0);
2852 if (ret)
2853 goto out;
2854
2855 /* 3520 /*
2856 * In order to make the operation more efficient and generic, 3521 * In order to make the operation more efficient and generic,
2857 * we copy all the blocks into a contiguous memory and do the 3522 * we copy all the blocks into a contiguous memory and do the
@@ -2865,26 +3530,16 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2865 } 3530 }
2866 3531
2867 buf = bucket_buf; 3532 buf = bucket_buf;
2868 for (i = 0; i < blk_per_bucket; i++, buf += blocksize) 3533 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
2869 memcpy(buf, bhs[i]->b_data, blocksize); 3534 memcpy(buf, bucket_block(bucket, i), blocksize);
2870 3535
2871 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket); 3536 ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
2872 if (IS_ERR(handle)) { 3537 OCFS2_JOURNAL_ACCESS_WRITE);
2873 ret = PTR_ERR(handle); 3538 if (ret < 0) {
2874 handle = NULL;
2875 mlog_errno(ret); 3539 mlog_errno(ret);
2876 goto out; 3540 goto out;
2877 } 3541 }
2878 3542
2879 for (i = 0; i < blk_per_bucket; i++) {
2880 ret = ocfs2_journal_access(handle, inode, bhs[i],
2881 OCFS2_JOURNAL_ACCESS_WRITE);
2882 if (ret < 0) {
2883 mlog_errno(ret);
2884 goto commit;
2885 }
2886 }
2887
2888 xh = (struct ocfs2_xattr_header *)bucket_buf; 3543 xh = (struct ocfs2_xattr_header *)bucket_buf;
2889 entries = (char *)xh->xh_entries; 3544 entries = (char *)xh->xh_entries;
2890 xh_free_start = le16_to_cpu(xh->xh_free_start); 3545 xh_free_start = le16_to_cpu(xh->xh_free_start);
@@ -2940,7 +3595,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2940 "bucket %llu\n", (unsigned long long)blkno); 3595 "bucket %llu\n", (unsigned long long)blkno);
2941 3596
2942 if (xh_free_start == end) 3597 if (xh_free_start == end)
2943 goto commit; 3598 goto out;
2944 3599
2945 memset(bucket_buf + xh_free_start, 0, end - xh_free_start); 3600 memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
2946 xh->xh_free_start = cpu_to_le16(end); 3601 xh->xh_free_start = cpu_to_le16(end);
@@ -2951,169 +3606,94 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2951 cmp_xe, swap_xe); 3606 cmp_xe, swap_xe);
2952 3607
2953 buf = bucket_buf; 3608 buf = bucket_buf;
2954 for (i = 0; i < blk_per_bucket; i++, buf += blocksize) { 3609 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
2955 memcpy(bhs[i]->b_data, buf, blocksize); 3610 memcpy(bucket_block(bucket, i), buf, blocksize);
2956 ocfs2_journal_dirty(handle, bhs[i]); 3611 ocfs2_xattr_bucket_journal_dirty(handle, bucket);
2957 }
2958 3612
2959commit:
2960 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2961out: 3613out:
2962
2963 if (bhs) {
2964 for (i = 0; i < blk_per_bucket; i++)
2965 brelse(bhs[i]);
2966 }
2967 kfree(bhs);
2968
2969 kfree(bucket_buf); 3614 kfree(bucket_buf);
2970 return ret; 3615 return ret;
2971} 3616}
2972 3617
2973/* 3618/*
2974 * Move half nums of the xattr bucket in the previous cluster to this new 3619 * prev_blkno points to the start of an existing extent. new_blkno
2975 * cluster. We only touch the last cluster of the previous extend record. 3620 * points to a newly allocated extent. Because we know each of our
3621 * clusters contains more than bucket, we can easily split one cluster
3622 * at a bucket boundary. So we take the last cluster of the existing
3623 * extent and split it down the middle. We move the last half of the
3624 * buckets in the last cluster of the existing extent over to the new
3625 * extent.
3626 *
3627 * first_bh is the buffer at prev_blkno so we can update the existing
3628 * extent's bucket count. header_bh is the bucket were we were hoping
3629 * to insert our xattr. If the bucket move places the target in the new
3630 * extent, we'll update first_bh and header_bh after modifying the old
3631 * extent.
2976 * 3632 *
2977 * first_bh is the first buffer_head of a series of bucket in the same 3633 * first_hash will be set as the 1st xe's name_hash in the new extent.
2978 * extent rec and header_bh is the header of one bucket in this cluster.
2979 * They will be updated if we move the data header_bh contains to the new
2980 * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
2981 */ 3634 */
2982static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, 3635static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
2983 handle_t *handle, 3636 handle_t *handle,
2984 struct buffer_head **first_bh, 3637 struct ocfs2_xattr_bucket *first,
2985 struct buffer_head **header_bh, 3638 struct ocfs2_xattr_bucket *target,
2986 u64 new_blkno, 3639 u64 new_blkno,
2987 u64 prev_blkno,
2988 u32 num_clusters, 3640 u32 num_clusters,
2989 u32 *first_hash) 3641 u32 *first_hash)
2990{ 3642{
2991 int i, ret, credits; 3643 int ret;
2992 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 3644 struct super_block *sb = inode->i_sb;
2993 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 3645 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
2994 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 3646 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
2995 int blocksize = inode->i_sb->s_blocksize; 3647 int to_move = num_buckets / 2;
2996 struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL; 3648 u64 src_blkno;
2997 struct ocfs2_xattr_header *new_xh; 3649 u64 last_cluster_blkno = bucket_blkno(first) +
2998 struct ocfs2_xattr_header *xh = 3650 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
2999 (struct ocfs2_xattr_header *)((*first_bh)->b_data);
3000
3001 BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3002 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3003
3004 prev_bh = *first_bh;
3005 get_bh(prev_bh);
3006 xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
3007 3651
3008 prev_blkno += (num_clusters - 1) * bpc + bpc / 2; 3652 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
3653 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
3009 3654
3010 mlog(0, "move half of xattrs in cluster %llu to %llu\n", 3655 mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3011 (unsigned long long)prev_blkno, (unsigned long long)new_blkno); 3656 (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
3012 3657
3013 /* 3658 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
3014 * We need to update the 1st half of the new cluster and 3659 last_cluster_blkno, new_blkno,
3015 * 1 more for the update of the 1st bucket of the previous 3660 to_move, first_hash);
3016 * extent record.
3017 */
3018 credits = bpc / 2 + 1;
3019 ret = ocfs2_extend_trans(handle, credits);
3020 if (ret) { 3661 if (ret) {
3021 mlog_errno(ret); 3662 mlog_errno(ret);
3022 goto out; 3663 goto out;
3023 } 3664 }
3024 3665
3025 ret = ocfs2_journal_access(handle, inode, prev_bh, 3666 /* This is the first bucket that got moved */
3026 OCFS2_JOURNAL_ACCESS_WRITE); 3667 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
3027 if (ret) {
3028 mlog_errno(ret);
3029 goto out;
3030 }
3031 3668
3032 for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) { 3669 /*
3033 old_bh = new_bh = NULL; 3670 * If the target bucket was part of the moved buckets, we need to
3034 new_bh = sb_getblk(inode->i_sb, new_blkno); 3671 * update first and target.
3035 if (!new_bh) { 3672 */
3036 ret = -EIO; 3673 if (bucket_blkno(target) >= src_blkno) {
3037 mlog_errno(ret); 3674 /* Find the block for the new target bucket */
3038 goto out; 3675 src_blkno = new_blkno +
3039 } 3676 (bucket_blkno(target) - src_blkno);
3040 3677
3041 ocfs2_set_new_buffer_uptodate(inode, new_bh); 3678 ocfs2_xattr_bucket_relse(first);
3679 ocfs2_xattr_bucket_relse(target);
3042 3680
3043 ret = ocfs2_journal_access(handle, inode, new_bh, 3681 /*
3044 OCFS2_JOURNAL_ACCESS_CREATE); 3682 * These shouldn't fail - the buffers are in the
3045 if (ret < 0) { 3683 * journal from ocfs2_cp_xattr_bucket().
3684 */
3685 ret = ocfs2_read_xattr_bucket(first, new_blkno);
3686 if (ret) {
3046 mlog_errno(ret); 3687 mlog_errno(ret);
3047 brelse(new_bh);
3048 goto out; 3688 goto out;
3049 } 3689 }
3050 3690 ret = ocfs2_read_xattr_bucket(target, src_blkno);
3051 ret = ocfs2_read_block(inode, prev_blkno, &old_bh); 3691 if (ret)
3052 if (ret < 0) {
3053 mlog_errno(ret); 3692 mlog_errno(ret);
3054 brelse(new_bh);
3055 goto out;
3056 }
3057 3693
3058 memcpy(new_bh->b_data, old_bh->b_data, blocksize);
3059
3060 if (i == 0) {
3061 new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
3062 new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
3063
3064 if (first_hash)
3065 *first_hash = le32_to_cpu(
3066 new_xh->xh_entries[0].xe_name_hash);
3067 new_first_bh = new_bh;
3068 get_bh(new_first_bh);
3069 }
3070
3071 ocfs2_journal_dirty(handle, new_bh);
3072
3073 if (*header_bh == old_bh) {
3074 brelse(*header_bh);
3075 *header_bh = new_bh;
3076 get_bh(*header_bh);
3077
3078 brelse(*first_bh);
3079 *first_bh = new_first_bh;
3080 get_bh(*first_bh);
3081 }
3082 brelse(new_bh);
3083 brelse(old_bh);
3084 } 3694 }
3085 3695
3086 le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
3087
3088 ocfs2_journal_dirty(handle, prev_bh);
3089out: 3696out:
3090 brelse(prev_bh);
3091 brelse(new_first_bh);
3092 return ret;
3093}
3094
3095static int ocfs2_read_xattr_bucket(struct inode *inode,
3096 u64 blkno,
3097 struct buffer_head **bhs,
3098 int new)
3099{
3100 int ret = 0;
3101 u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3102
3103 if (!new)
3104 return ocfs2_read_blocks(inode, blkno,
3105 blk_per_bucket, bhs, 0);
3106
3107 for (i = 0; i < blk_per_bucket; i++) {
3108 bhs[i] = sb_getblk(inode->i_sb, blkno + i);
3109 if (bhs[i] == NULL) {
3110 ret = -EIO;
3111 mlog_errno(ret);
3112 break;
3113 }
3114 ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
3115 }
3116
3117 return ret; 3697 return ret;
3118} 3698}
3119 3699
@@ -3178,8 +3758,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
3178{ 3758{
3179 int ret, i; 3759 int ret, i;
3180 int count, start, len, name_value_len = 0, xe_len, name_offset = 0; 3760 int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3181 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3761 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3182 struct buffer_head **s_bhs, **t_bhs = NULL;
3183 struct ocfs2_xattr_header *xh; 3762 struct ocfs2_xattr_header *xh;
3184 struct ocfs2_xattr_entry *xe; 3763 struct ocfs2_xattr_entry *xe;
3185 int blocksize = inode->i_sb->s_blocksize; 3764 int blocksize = inode->i_sb->s_blocksize;
@@ -3187,47 +3766,52 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
3187 mlog(0, "move some of xattrs from bucket %llu to %llu\n", 3766 mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3188 (unsigned long long)blk, (unsigned long long)new_blk); 3767 (unsigned long long)blk, (unsigned long long)new_blk);
3189 3768
3190 s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); 3769 s_bucket = ocfs2_xattr_bucket_new(inode);
3191 if (!s_bhs) 3770 t_bucket = ocfs2_xattr_bucket_new(inode);
3192 return -ENOMEM; 3771 if (!s_bucket || !t_bucket) {
3193 3772 ret = -ENOMEM;
3194 ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0);
3195 if (ret) {
3196 mlog_errno(ret); 3773 mlog_errno(ret);
3197 goto out; 3774 goto out;
3198 } 3775 }
3199 3776
3200 ret = ocfs2_journal_access(handle, inode, s_bhs[0], 3777 ret = ocfs2_read_xattr_bucket(s_bucket, blk);
3201 OCFS2_JOURNAL_ACCESS_WRITE);
3202 if (ret) { 3778 if (ret) {
3203 mlog_errno(ret); 3779 mlog_errno(ret);
3204 goto out; 3780 goto out;
3205 } 3781 }
3206 3782
3207 t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); 3783 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
3208 if (!t_bhs) { 3784 OCFS2_JOURNAL_ACCESS_WRITE);
3209 ret = -ENOMEM; 3785 if (ret) {
3786 mlog_errno(ret);
3210 goto out; 3787 goto out;
3211 } 3788 }
3212 3789
3213 ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head); 3790 /*
3791 * Even if !new_bucket_head, we're overwriting t_bucket. Thus,
3792 * there's no need to read it.
3793 */
3794 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
3214 if (ret) { 3795 if (ret) {
3215 mlog_errno(ret); 3796 mlog_errno(ret);
3216 goto out; 3797 goto out;
3217 } 3798 }
3218 3799
3219 for (i = 0; i < blk_per_bucket; i++) { 3800 /*
3220 ret = ocfs2_journal_access(handle, inode, t_bhs[i], 3801 * Hey, if we're overwriting t_bucket, what difference does
3221 new_bucket_head ? 3802 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the
3222 OCFS2_JOURNAL_ACCESS_CREATE : 3803 * same part of ocfs2_cp_xattr_bucket().
3223 OCFS2_JOURNAL_ACCESS_WRITE); 3804 */
3224 if (ret) { 3805 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3225 mlog_errno(ret); 3806 new_bucket_head ?
3226 goto out; 3807 OCFS2_JOURNAL_ACCESS_CREATE :
3227 } 3808 OCFS2_JOURNAL_ACCESS_WRITE);
3809 if (ret) {
3810 mlog_errno(ret);
3811 goto out;
3228 } 3812 }
3229 3813
3230 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; 3814 xh = bucket_xh(s_bucket);
3231 count = le16_to_cpu(xh->xh_count); 3815 count = le16_to_cpu(xh->xh_count);
3232 start = ocfs2_xattr_find_divide_pos(xh); 3816 start = ocfs2_xattr_find_divide_pos(xh);
3233 3817
@@ -3239,10 +3823,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
3239 * The hash value is set as one larger than 3823 * The hash value is set as one larger than
3240 * that of the last entry in the previous bucket. 3824 * that of the last entry in the previous bucket.
3241 */ 3825 */
3242 for (i = 0; i < blk_per_bucket; i++) 3826 for (i = 0; i < t_bucket->bu_blocks; i++)
3243 memset(t_bhs[i]->b_data, 0, blocksize); 3827 memset(bucket_block(t_bucket, i), 0, blocksize);
3244 3828
3245 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; 3829 xh = bucket_xh(t_bucket);
3246 xh->xh_free_start = cpu_to_le16(blocksize); 3830 xh->xh_free_start = cpu_to_le16(blocksize);
3247 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; 3831 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3248 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); 3832 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
@@ -3251,11 +3835,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
3251 } 3835 }
3252 3836
3253 /* copy the whole bucket to the new first. */ 3837 /* copy the whole bucket to the new first. */
3254 for (i = 0; i < blk_per_bucket; i++) 3838 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3255 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3256 3839
3257 /* update the new bucket. */ 3840 /* update the new bucket. */
3258 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; 3841 xh = bucket_xh(t_bucket);
3259 3842
3260 /* 3843 /*
3261 * Calculate the total name/value len and xh_free_start for 3844 * Calculate the total name/value len and xh_free_start for
@@ -3319,11 +3902,7 @@ set_num_buckets:
3319 else 3902 else
3320 xh->xh_num_buckets = 0; 3903 xh->xh_num_buckets = 0;
3321 3904
3322 for (i = 0; i < blk_per_bucket; i++) { 3905 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3323 ocfs2_journal_dirty(handle, t_bhs[i]);
3324 if (ret)
3325 mlog_errno(ret);
3326 }
3327 3906
3328 /* store the first_hash of the new bucket. */ 3907 /* store the first_hash of the new bucket. */
3329 if (first_hash) 3908 if (first_hash)
@@ -3337,29 +3916,18 @@ set_num_buckets:
3337 if (start == count) 3916 if (start == count)
3338 goto out; 3917 goto out;
3339 3918
3340 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; 3919 xh = bucket_xh(s_bucket);
3341 memset(&xh->xh_entries[start], 0, 3920 memset(&xh->xh_entries[start], 0,
3342 sizeof(struct ocfs2_xattr_entry) * (count - start)); 3921 sizeof(struct ocfs2_xattr_entry) * (count - start));
3343 xh->xh_count = cpu_to_le16(start); 3922 xh->xh_count = cpu_to_le16(start);
3344 xh->xh_free_start = cpu_to_le16(name_offset); 3923 xh->xh_free_start = cpu_to_le16(name_offset);
3345 xh->xh_name_value_len = cpu_to_le16(name_value_len); 3924 xh->xh_name_value_len = cpu_to_le16(name_value_len);
3346 3925
3347 ocfs2_journal_dirty(handle, s_bhs[0]); 3926 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
3348 if (ret)
3349 mlog_errno(ret);
3350 3927
3351out: 3928out:
3352 if (s_bhs) { 3929 ocfs2_xattr_bucket_free(s_bucket);
3353 for (i = 0; i < blk_per_bucket; i++) 3930 ocfs2_xattr_bucket_free(t_bucket);
3354 brelse(s_bhs[i]);
3355 }
3356 kfree(s_bhs);
3357
3358 if (t_bhs) {
3359 for (i = 0; i < blk_per_bucket; i++)
3360 brelse(t_bhs[i]);
3361 }
3362 kfree(t_bhs);
3363 3931
3364 return ret; 3932 return ret;
3365} 3933}
@@ -3376,10 +3944,8 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
3376 u64 t_blkno, 3944 u64 t_blkno,
3377 int t_is_new) 3945 int t_is_new)
3378{ 3946{
3379 int ret, i; 3947 int ret;
3380 int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3948 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3381 int blocksize = inode->i_sb->s_blocksize;
3382 struct buffer_head **s_bhs, **t_bhs = NULL;
3383 3949
3384 BUG_ON(s_blkno == t_blkno); 3950 BUG_ON(s_blkno == t_blkno);
3385 3951
@@ -3387,92 +3953,115 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
3387 (unsigned long long)s_blkno, (unsigned long long)t_blkno, 3953 (unsigned long long)s_blkno, (unsigned long long)t_blkno,
3388 t_is_new); 3954 t_is_new);
3389 3955
3390 s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, 3956 s_bucket = ocfs2_xattr_bucket_new(inode);
3391 GFP_NOFS); 3957 t_bucket = ocfs2_xattr_bucket_new(inode);
3392 if (!s_bhs) 3958 if (!s_bucket || !t_bucket) {
3393 return -ENOMEM; 3959 ret = -ENOMEM;
3960 mlog_errno(ret);
3961 goto out;
3962 }
3394 3963
3395 ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0); 3964 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
3396 if (ret) 3965 if (ret)
3397 goto out; 3966 goto out;
3398 3967
3399 t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, 3968 /*
3400 GFP_NOFS); 3969 * Even if !t_is_new, we're overwriting t_bucket. Thus,
3401 if (!t_bhs) { 3970 * there's no need to read it.
3402 ret = -ENOMEM; 3971 */
3972 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
3973 if (ret)
3403 goto out; 3974 goto out;
3404 }
3405 3975
3406 ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new); 3976 /*
3977 * Hey, if we're overwriting t_bucket, what difference does
3978 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new
3979 * cluster to fill, we came here from
3980 * ocfs2_mv_xattr_buckets(), and it is really new -
3981 * ACCESS_CREATE is required. But we also might have moved data
3982 * out of t_bucket before extending back into it.
3983 * ocfs2_add_new_xattr_bucket() can do this - its call to
3984 * ocfs2_add_new_xattr_cluster() may have created a new extent
3985 * and copied out the end of the old extent. Then it re-extends
3986 * the old extent back to create space for new xattrs. That's
3987 * how we get here, and the bucket isn't really new.
3988 */
3989 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3990 t_is_new ?
3991 OCFS2_JOURNAL_ACCESS_CREATE :
3992 OCFS2_JOURNAL_ACCESS_WRITE);
3407 if (ret) 3993 if (ret)
3408 goto out; 3994 goto out;
3409 3995
3410 for (i = 0; i < blk_per_bucket; i++) { 3996 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3411 ret = ocfs2_journal_access(handle, inode, t_bhs[i], 3997 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3412 t_is_new ?
3413 OCFS2_JOURNAL_ACCESS_CREATE :
3414 OCFS2_JOURNAL_ACCESS_WRITE);
3415 if (ret)
3416 goto out;
3417 }
3418
3419 for (i = 0; i < blk_per_bucket; i++) {
3420 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3421 ocfs2_journal_dirty(handle, t_bhs[i]);
3422 }
3423 3998
3424out: 3999out:
3425 if (s_bhs) { 4000 ocfs2_xattr_bucket_free(t_bucket);
3426 for (i = 0; i < blk_per_bucket; i++) 4001 ocfs2_xattr_bucket_free(s_bucket);
3427 brelse(s_bhs[i]);
3428 }
3429 kfree(s_bhs);
3430
3431 if (t_bhs) {
3432 for (i = 0; i < blk_per_bucket; i++)
3433 brelse(t_bhs[i]);
3434 }
3435 kfree(t_bhs);
3436 4002
3437 return ret; 4003 return ret;
3438} 4004}
3439 4005
3440/* 4006/*
3441 * Copy one xattr cluster from src_blk to to_blk. 4007 * src_blk points to the start of an existing extent. last_blk points to
3442 * The to_blk will become the first bucket header of the cluster, so its 4008 * last cluster in that extent. to_blk points to a newly allocated
3443 * xh_num_buckets will be initialized as the bucket num in the cluster. 4009 * extent. We copy the buckets from the cluster at last_blk to the new
4010 * extent. If start_bucket is non-zero, we skip that many buckets before
4011 * we start copying. The new extent's xh_num_buckets gets set to the
4012 * number of buckets we copied. The old extent's xh_num_buckets shrinks
4013 * by the same amount.
3444 */ 4014 */
3445static int ocfs2_cp_xattr_cluster(struct inode *inode, 4015static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
3446 handle_t *handle, 4016 u64 src_blk, u64 last_blk, u64 to_blk,
3447 struct buffer_head *first_bh, 4017 unsigned int start_bucket,
3448 u64 src_blk,
3449 u64 to_blk,
3450 u32 *first_hash) 4018 u32 *first_hash)
3451{ 4019{
3452 int i, ret, credits; 4020 int i, ret, credits;
3453 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4021 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3454 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 4022 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3455 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); 4023 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3456 struct buffer_head *bh = NULL; 4024 struct ocfs2_xattr_bucket *old_first, *new_first;
3457 struct ocfs2_xattr_header *xh; 4025
3458 u64 to_blk_start = to_blk; 4026 mlog(0, "mv xattrs from cluster %llu to %llu\n",
4027 (unsigned long long)last_blk, (unsigned long long)to_blk);
4028
4029 BUG_ON(start_bucket >= num_buckets);
4030 if (start_bucket) {
4031 num_buckets -= start_bucket;
4032 last_blk += (start_bucket * blks_per_bucket);
4033 }
4034
4035 /* The first bucket of the original extent */
4036 old_first = ocfs2_xattr_bucket_new(inode);
4037 /* The first bucket of the new extent */
4038 new_first = ocfs2_xattr_bucket_new(inode);
4039 if (!old_first || !new_first) {
4040 ret = -ENOMEM;
4041 mlog_errno(ret);
4042 goto out;
4043 }
3459 4044
3460 mlog(0, "cp xattrs from cluster %llu to %llu\n", 4045 ret = ocfs2_read_xattr_bucket(old_first, src_blk);
3461 (unsigned long long)src_blk, (unsigned long long)to_blk); 4046 if (ret) {
4047 mlog_errno(ret);
4048 goto out;
4049 }
3462 4050
3463 /* 4051 /*
3464 * We need to update the new cluster and 1 more for the update of 4052 * We need to update the first bucket of the old extent and all
3465 * the 1st bucket of the previous extent rec. 4053 * the buckets going to the new extent.
3466 */ 4054 */
3467 credits = bpc + 1; 4055 credits = ((num_buckets + 1) * blks_per_bucket) +
4056 handle->h_buffer_credits;
3468 ret = ocfs2_extend_trans(handle, credits); 4057 ret = ocfs2_extend_trans(handle, credits);
3469 if (ret) { 4058 if (ret) {
3470 mlog_errno(ret); 4059 mlog_errno(ret);
3471 goto out; 4060 goto out;
3472 } 4061 }
3473 4062
3474 ret = ocfs2_journal_access(handle, inode, first_bh, 4063 ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
3475 OCFS2_JOURNAL_ACCESS_WRITE); 4064 OCFS2_JOURNAL_ACCESS_WRITE);
3476 if (ret) { 4065 if (ret) {
3477 mlog_errno(ret); 4066 mlog_errno(ret);
3478 goto out; 4067 goto out;
@@ -3480,45 +4069,45 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
3480 4069
3481 for (i = 0; i < num_buckets; i++) { 4070 for (i = 0; i < num_buckets; i++) {
3482 ret = ocfs2_cp_xattr_bucket(inode, handle, 4071 ret = ocfs2_cp_xattr_bucket(inode, handle,
3483 src_blk, to_blk, 1); 4072 last_blk + (i * blks_per_bucket),
4073 to_blk + (i * blks_per_bucket),
4074 1);
3484 if (ret) { 4075 if (ret) {
3485 mlog_errno(ret); 4076 mlog_errno(ret);
3486 goto out; 4077 goto out;
3487 } 4078 }
3488
3489 src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3490 to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3491 } 4079 }
3492 4080
3493 /* update the old bucket header. */ 4081 /*
3494 xh = (struct ocfs2_xattr_header *)first_bh->b_data; 4082 * Get the new bucket ready before we dirty anything
3495 le16_add_cpu(&xh->xh_num_buckets, -num_buckets); 4083 * (This actually shouldn't fail, because we already dirtied
3496 4084 * it once in ocfs2_cp_xattr_bucket()).
3497 ocfs2_journal_dirty(handle, first_bh); 4085 */
3498 4086 ret = ocfs2_read_xattr_bucket(new_first, to_blk);
3499 /* update the new bucket header. */ 4087 if (ret) {
3500 ret = ocfs2_read_block(inode, to_blk_start, &bh);
3501 if (ret < 0) {
3502 mlog_errno(ret); 4088 mlog_errno(ret);
3503 goto out; 4089 goto out;
3504 } 4090 }
3505 4091 ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
3506 ret = ocfs2_journal_access(handle, inode, bh, 4092 OCFS2_JOURNAL_ACCESS_WRITE);
3507 OCFS2_JOURNAL_ACCESS_WRITE);
3508 if (ret) { 4093 if (ret) {
3509 mlog_errno(ret); 4094 mlog_errno(ret);
3510 goto out; 4095 goto out;
3511 } 4096 }
3512 4097
3513 xh = (struct ocfs2_xattr_header *)bh->b_data; 4098 /* Now update the headers */
3514 xh->xh_num_buckets = cpu_to_le16(num_buckets); 4099 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4100 ocfs2_xattr_bucket_journal_dirty(handle, old_first);
3515 4101
3516 ocfs2_journal_dirty(handle, bh); 4102 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4103 ocfs2_xattr_bucket_journal_dirty(handle, new_first);
3517 4104
3518 if (first_hash) 4105 if (first_hash)
3519 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4106 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4107
3520out: 4108out:
3521 brelse(bh); 4109 ocfs2_xattr_bucket_free(new_first);
4110 ocfs2_xattr_bucket_free(old_first);
3522 return ret; 4111 return ret;
3523} 4112}
3524 4113
@@ -3534,7 +4123,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
3534 u32 *first_hash) 4123 u32 *first_hash)
3535{ 4124{
3536 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4125 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3537 int ret, credits = 2 * blk_per_bucket; 4126 int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
3538 4127
3539 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4128 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
3540 4129
@@ -3577,43 +4166,49 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
3577 */ 4166 */
3578static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, 4167static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3579 handle_t *handle, 4168 handle_t *handle,
3580 struct buffer_head **first_bh, 4169 struct ocfs2_xattr_bucket *first,
3581 struct buffer_head **header_bh, 4170 struct ocfs2_xattr_bucket *target,
3582 u64 new_blk, 4171 u64 new_blk,
3583 u64 prev_blk,
3584 u32 prev_clusters, 4172 u32 prev_clusters,
3585 u32 *v_start, 4173 u32 *v_start,
3586 int *extend) 4174 int *extend)
3587{ 4175{
3588 int ret = 0; 4176 int ret;
3589 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3590 4177
3591 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", 4178 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
3592 (unsigned long long)prev_blk, prev_clusters, 4179 (unsigned long long)bucket_blkno(first), prev_clusters,
3593 (unsigned long long)new_blk); 4180 (unsigned long long)new_blk);
3594 4181
3595 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) 4182 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
3596 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 4183 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
3597 handle, 4184 handle,
3598 first_bh, 4185 first, target,
3599 header_bh,
3600 new_blk, 4186 new_blk,
3601 prev_blk,
3602 prev_clusters, 4187 prev_clusters,
3603 v_start); 4188 v_start);
3604 else { 4189 if (ret)
3605 u64 last_blk = prev_blk + bpc * (prev_clusters - 1); 4190 mlog_errno(ret);
3606 4191 } else {
3607 if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk) 4192 /* The start of the last cluster in the first extent */
3608 ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh, 4193 u64 last_blk = bucket_blkno(first) +
3609 last_blk, new_blk, 4194 ((prev_clusters - 1) *
4195 ocfs2_clusters_to_blocks(inode->i_sb, 1));
4196
4197 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
4198 ret = ocfs2_mv_xattr_buckets(inode, handle,
4199 bucket_blkno(first),
4200 last_blk, new_blk, 0,
3610 v_start); 4201 v_start);
3611 else { 4202 if (ret)
4203 mlog_errno(ret);
4204 } else {
3612 ret = ocfs2_divide_xattr_cluster(inode, handle, 4205 ret = ocfs2_divide_xattr_cluster(inode, handle,
3613 last_blk, new_blk, 4206 last_blk, new_blk,
3614 v_start); 4207 v_start);
4208 if (ret)
4209 mlog_errno(ret);
3615 4210
3616 if ((*header_bh)->b_blocknr == last_blk && extend) 4211 if ((bucket_blkno(target) == last_blk) && extend)
3617 *extend = 0; 4212 *extend = 0;
3618 } 4213 }
3619 } 4214 }
@@ -3639,56 +4234,37 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3639 */ 4234 */
3640static int ocfs2_add_new_xattr_cluster(struct inode *inode, 4235static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3641 struct buffer_head *root_bh, 4236 struct buffer_head *root_bh,
3642 struct buffer_head **first_bh, 4237 struct ocfs2_xattr_bucket *first,
3643 struct buffer_head **header_bh, 4238 struct ocfs2_xattr_bucket *target,
3644 u32 *num_clusters, 4239 u32 *num_clusters,
3645 u32 prev_cpos, 4240 u32 prev_cpos,
3646 u64 prev_blkno, 4241 int *extend,
3647 int *extend) 4242 struct ocfs2_xattr_set_ctxt *ctxt)
3648{ 4243{
3649 int ret, credits; 4244 int ret;
3650 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 4245 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3651 u32 prev_clusters = *num_clusters; 4246 u32 prev_clusters = *num_clusters;
3652 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; 4247 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
3653 u64 block; 4248 u64 block;
3654 handle_t *handle = NULL; 4249 handle_t *handle = ctxt->handle;
3655 struct ocfs2_alloc_context *data_ac = NULL;
3656 struct ocfs2_alloc_context *meta_ac = NULL;
3657 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4250 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3658 struct ocfs2_extent_tree et; 4251 struct ocfs2_extent_tree et;
3659 4252
3660 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " 4253 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
3661 "previous xattr blkno = %llu\n", 4254 "previous xattr blkno = %llu\n",
3662 (unsigned long long)OCFS2_I(inode)->ip_blkno, 4255 (unsigned long long)OCFS2_I(inode)->ip_blkno,
3663 prev_cpos, (unsigned long long)prev_blkno); 4256 prev_cpos, (unsigned long long)bucket_blkno(first));
3664 4257
3665 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); 4258 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
3666 4259
3667 ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, 4260 ret = ocfs2_journal_access_xb(handle, inode, root_bh,
3668 &data_ac, &meta_ac); 4261 OCFS2_JOURNAL_ACCESS_WRITE);
3669 if (ret) {
3670 mlog_errno(ret);
3671 goto leave;
3672 }
3673
3674 credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
3675 clusters_to_add);
3676 handle = ocfs2_start_trans(osb, credits);
3677 if (IS_ERR(handle)) {
3678 ret = PTR_ERR(handle);
3679 handle = NULL;
3680 mlog_errno(ret);
3681 goto leave;
3682 }
3683
3684 ret = ocfs2_journal_access(handle, inode, root_bh,
3685 OCFS2_JOURNAL_ACCESS_WRITE);
3686 if (ret < 0) { 4262 if (ret < 0) {
3687 mlog_errno(ret); 4263 mlog_errno(ret);
3688 goto leave; 4264 goto leave;
3689 } 4265 }
3690 4266
3691 ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 4267 ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
3692 clusters_to_add, &bit_off, &num_bits); 4268 clusters_to_add, &bit_off, &num_bits);
3693 if (ret < 0) { 4269 if (ret < 0) {
3694 if (ret != -ENOSPC) 4270 if (ret != -ENOSPC)
@@ -3702,7 +4278,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3702 mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n", 4278 mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
3703 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); 4279 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3704 4280
3705 if (prev_blkno + prev_clusters * bpc == block && 4281 if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
3706 (prev_clusters + num_bits) << osb->s_clustersize_bits <= 4282 (prev_clusters + num_bits) << osb->s_clustersize_bits <=
3707 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { 4283 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
3708 /* 4284 /*
@@ -3721,10 +4297,9 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3721 } else { 4297 } else {
3722 ret = ocfs2_adjust_xattr_cross_cluster(inode, 4298 ret = ocfs2_adjust_xattr_cross_cluster(inode,
3723 handle, 4299 handle,
3724 first_bh, 4300 first,
3725 header_bh, 4301 target,
3726 block, 4302 block,
3727 prev_blkno,
3728 prev_clusters, 4303 prev_clusters,
3729 &v_start, 4304 &v_start,
3730 extend); 4305 extend);
@@ -3734,149 +4309,137 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3734 } 4309 }
3735 } 4310 }
3736 4311
3737 if (handle->h_buffer_credits < credits) {
3738 /*
3739 * The journal has been restarted before, and don't
3740 * have enough space for the insertion, so extend it
3741 * here.
3742 */
3743 ret = ocfs2_extend_trans(handle, credits);
3744 if (ret) {
3745 mlog_errno(ret);
3746 goto leave;
3747 }
3748 }
3749 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", 4312 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
3750 num_bits, (unsigned long long)block, v_start); 4313 num_bits, (unsigned long long)block, v_start);
3751 ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, 4314 ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
3752 num_bits, 0, meta_ac); 4315 num_bits, 0, ctxt->meta_ac);
3753 if (ret < 0) { 4316 if (ret < 0) {
3754 mlog_errno(ret); 4317 mlog_errno(ret);
3755 goto leave; 4318 goto leave;
3756 } 4319 }
3757 4320
3758 ret = ocfs2_journal_dirty(handle, root_bh); 4321 ret = ocfs2_journal_dirty(handle, root_bh);
3759 if (ret < 0) { 4322 if (ret < 0)
3760 mlog_errno(ret); 4323 mlog_errno(ret);
3761 goto leave;
3762 }
3763 4324
3764leave: 4325leave:
3765 if (handle)
3766 ocfs2_commit_trans(osb, handle);
3767 if (data_ac)
3768 ocfs2_free_alloc_context(data_ac);
3769 if (meta_ac)
3770 ocfs2_free_alloc_context(meta_ac);
3771
3772 return ret; 4326 return ret;
3773} 4327}
3774 4328
3775/* 4329/*
3776 * Extend a new xattr bucket and move xattrs to the end one by one until 4330 * We are given an extent. 'first' is the bucket at the very front of
3777 * We meet with start_bh. Only move half of the xattrs to the bucket after it. 4331 * the extent. The extent has space for an additional bucket past
4332 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number
4333 * of the target bucket. We wish to shift every bucket past the target
4334 * down one, filling in that additional space. When we get back to the
4335 * target, we split the target between itself and the now-empty bucket
4336 * at target+1 (aka, target_blkno + blks_per_bucket).
3778 */ 4337 */
3779static int ocfs2_extend_xattr_bucket(struct inode *inode, 4338static int ocfs2_extend_xattr_bucket(struct inode *inode,
3780 struct buffer_head *first_bh, 4339 handle_t *handle,
3781 struct buffer_head *start_bh, 4340 struct ocfs2_xattr_bucket *first,
4341 u64 target_blk,
3782 u32 num_clusters) 4342 u32 num_clusters)
3783{ 4343{
3784 int ret, credits; 4344 int ret, credits;
3785 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4345 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3786 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4346 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3787 u64 start_blk = start_bh->b_blocknr, end_blk; 4347 u64 end_blk;
3788 u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb); 4348 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
3789 handle_t *handle;
3790 struct ocfs2_xattr_header *first_xh =
3791 (struct ocfs2_xattr_header *)first_bh->b_data;
3792 u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
3793 4349
3794 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting " 4350 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
3795 "from %llu, len = %u\n", (unsigned long long)start_blk, 4351 "from %llu, len = %u\n", (unsigned long long)target_blk,
3796 (unsigned long long)first_bh->b_blocknr, num_clusters); 4352 (unsigned long long)bucket_blkno(first), num_clusters);
3797 4353
3798 BUG_ON(bucket >= num_buckets); 4354 /* The extent must have room for an additional bucket */
4355 BUG_ON(new_bucket >=
4356 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
3799 4357
3800 end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket; 4358 /* end_blk points to the last existing bucket */
4359 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
3801 4360
3802 /* 4361 /*
3803 * We will touch all the buckets after the start_bh(include it). 4362 * end_blk is the start of the last existing bucket.
3804 * Add one more bucket and modify the first_bh. 4363 * Thus, (end_blk - target_blk) covers the target bucket and
4364 * every bucket after it up to, but not including, the last
4365 * existing bucket. Then we add the last existing bucket, the
4366 * new bucket, and the first bucket (3 * blk_per_bucket).
3805 */ 4367 */
3806 credits = end_blk - start_blk + 2 * blk_per_bucket + 1; 4368 credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
3807 handle = ocfs2_start_trans(osb, credits); 4369 handle->h_buffer_credits;
3808 if (IS_ERR(handle)) { 4370 ret = ocfs2_extend_trans(handle, credits);
3809 ret = PTR_ERR(handle); 4371 if (ret) {
3810 handle = NULL;
3811 mlog_errno(ret); 4372 mlog_errno(ret);
3812 goto out; 4373 goto out;
3813 } 4374 }
3814 4375
3815 ret = ocfs2_journal_access(handle, inode, first_bh, 4376 ret = ocfs2_xattr_bucket_journal_access(handle, first,
3816 OCFS2_JOURNAL_ACCESS_WRITE); 4377 OCFS2_JOURNAL_ACCESS_WRITE);
3817 if (ret) { 4378 if (ret) {
3818 mlog_errno(ret); 4379 mlog_errno(ret);
3819 goto commit; 4380 goto out;
3820 } 4381 }
3821 4382
3822 while (end_blk != start_blk) { 4383 while (end_blk != target_blk) {
3823 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, 4384 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
3824 end_blk + blk_per_bucket, 0); 4385 end_blk + blk_per_bucket, 0);
3825 if (ret) 4386 if (ret)
3826 goto commit; 4387 goto out;
3827 end_blk -= blk_per_bucket; 4388 end_blk -= blk_per_bucket;
3828 } 4389 }
3829 4390
3830 /* Move half of the xattr in start_blk to the next bucket. */ 4391 /* Move half of the xattr in target_blkno to the next bucket. */
3831 ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk, 4392 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
3832 start_blk + blk_per_bucket, NULL, 0); 4393 target_blk + blk_per_bucket, NULL, 0);
3833 4394
3834 le16_add_cpu(&first_xh->xh_num_buckets, 1); 4395 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
3835 ocfs2_journal_dirty(handle, first_bh); 4396 ocfs2_xattr_bucket_journal_dirty(handle, first);
3836 4397
3837commit:
3838 ocfs2_commit_trans(osb, handle);
3839out: 4398out:
3840 return ret; 4399 return ret;
3841} 4400}
3842 4401
3843/* 4402/*
3844 * Add new xattr bucket in an extent record and adjust the buckets accordingly. 4403 * Add new xattr bucket in an extent record and adjust the buckets
3845 * xb_bh is the ocfs2_xattr_block. 4404 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the
3846 * We will move all the buckets starting from header_bh to the next place. As 4405 * bucket we want to insert into.
3847 * for this one, half num of its xattrs will be moved to the next one. 4406 *
4407 * In the easy case, we will move all the buckets after target down by
4408 * one. Half of target's xattrs will be moved to the next bucket.
3848 * 4409 *
3849 * We will allocate a new cluster if current cluster is full and adjust 4410 * If current cluster is full, we'll allocate a new one. This may not
3850 * header_bh and first_bh if the insert place is moved to the new cluster. 4411 * be contiguous. The underlying calls will make sure that there is
4412 * space for the insert, shifting buckets around if necessary.
4413 * 'target' may be moved by those calls.
3851 */ 4414 */
3852static int ocfs2_add_new_xattr_bucket(struct inode *inode, 4415static int ocfs2_add_new_xattr_bucket(struct inode *inode,
3853 struct buffer_head *xb_bh, 4416 struct buffer_head *xb_bh,
3854 struct buffer_head *header_bh) 4417 struct ocfs2_xattr_bucket *target,
4418 struct ocfs2_xattr_set_ctxt *ctxt)
3855{ 4419{
3856 struct ocfs2_xattr_header *first_xh = NULL;
3857 struct buffer_head *first_bh = NULL;
3858 struct ocfs2_xattr_block *xb = 4420 struct ocfs2_xattr_block *xb =
3859 (struct ocfs2_xattr_block *)xb_bh->b_data; 4421 (struct ocfs2_xattr_block *)xb_bh->b_data;
3860 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; 4422 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3861 struct ocfs2_extent_list *el = &xb_root->xt_list; 4423 struct ocfs2_extent_list *el = &xb_root->xt_list;
3862 struct ocfs2_xattr_header *xh = 4424 u32 name_hash =
3863 (struct ocfs2_xattr_header *)header_bh->b_data; 4425 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
3864 u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 4426 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3865 struct super_block *sb = inode->i_sb;
3866 struct ocfs2_super *osb = OCFS2_SB(sb);
3867 int ret, num_buckets, extend = 1; 4427 int ret, num_buckets, extend = 1;
3868 u64 p_blkno; 4428 u64 p_blkno;
3869 u32 e_cpos, num_clusters; 4429 u32 e_cpos, num_clusters;
4430 /* The bucket at the front of the extent */
4431 struct ocfs2_xattr_bucket *first;
3870 4432
3871 mlog(0, "Add new xattr bucket starting form %llu\n", 4433 mlog(0, "Add new xattr bucket starting from %llu\n",
3872 (unsigned long long)header_bh->b_blocknr); 4434 (unsigned long long)bucket_blkno(target));
3873 4435
3874 /* 4436 /* The first bucket of the original extent */
3875 * Add refrence for header_bh here because it may be 4437 first = ocfs2_xattr_bucket_new(inode);
3876 * changed in ocfs2_add_new_xattr_cluster and we need 4438 if (!first) {
3877 * to free it in the end. 4439 ret = -ENOMEM;
3878 */ 4440 mlog_errno(ret);
3879 get_bh(header_bh); 4441 goto out;
4442 }
3880 4443
3881 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, 4444 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
3882 &num_clusters, el); 4445 &num_clusters, el);
@@ -3885,40 +4448,45 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
3885 goto out; 4448 goto out;
3886 } 4449 }
3887 4450
3888 ret = ocfs2_read_block(inode, p_blkno, &first_bh); 4451 ret = ocfs2_read_xattr_bucket(first, p_blkno);
3889 if (ret) { 4452 if (ret) {
3890 mlog_errno(ret); 4453 mlog_errno(ret);
3891 goto out; 4454 goto out;
3892 } 4455 }
3893 4456
3894 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; 4457 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
3895 first_xh = (struct ocfs2_xattr_header *)first_bh->b_data; 4458 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
3896 4459 /*
3897 if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) { 4460 * This can move first+target if the target bucket moves
4461 * to the new extent.
4462 */
3898 ret = ocfs2_add_new_xattr_cluster(inode, 4463 ret = ocfs2_add_new_xattr_cluster(inode,
3899 xb_bh, 4464 xb_bh,
3900 &first_bh, 4465 first,
3901 &header_bh, 4466 target,
3902 &num_clusters, 4467 &num_clusters,
3903 e_cpos, 4468 e_cpos,
3904 p_blkno, 4469 &extend,
3905 &extend); 4470 ctxt);
3906 if (ret) { 4471 if (ret) {
3907 mlog_errno(ret); 4472 mlog_errno(ret);
3908 goto out; 4473 goto out;
3909 } 4474 }
3910 } 4475 }
3911 4476
3912 if (extend) 4477 if (extend) {
3913 ret = ocfs2_extend_xattr_bucket(inode, 4478 ret = ocfs2_extend_xattr_bucket(inode,
3914 first_bh, 4479 ctxt->handle,
3915 header_bh, 4480 first,
4481 bucket_blkno(target),
3916 num_clusters); 4482 num_clusters);
3917 if (ret) 4483 if (ret)
3918 mlog_errno(ret); 4484 mlog_errno(ret);
4485 }
4486
3919out: 4487out:
3920 brelse(first_bh); 4488 ocfs2_xattr_bucket_free(first);
3921 brelse(header_bh); 4489
3922 return ret; 4490 return ret;
3923} 4491}
3924 4492
@@ -3929,7 +4497,7 @@ static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
3929 int block_off = offs >> inode->i_sb->s_blocksize_bits; 4497 int block_off = offs >> inode->i_sb->s_blocksize_bits;
3930 4498
3931 offs = offs % inode->i_sb->s_blocksize; 4499 offs = offs % inode->i_sb->s_blocksize;
3932 return bucket->bhs[block_off]->b_data + offs; 4500 return bucket_block(bucket, block_off) + offs;
3933} 4501}
3934 4502
3935/* 4503/*
@@ -3984,7 +4552,7 @@ static void ocfs2_xattr_set_entry_normal(struct inode *inode,
3984 xe->xe_value_size = 0; 4552 xe->xe_value_size = 0;
3985 4553
3986 val = ocfs2_xattr_bucket_get_val(inode, 4554 val = ocfs2_xattr_bucket_get_val(inode,
3987 &xs->bucket, offs); 4555 xs->bucket, offs);
3988 memset(val + OCFS2_XATTR_SIZE(name_len), 0, 4556 memset(val + OCFS2_XATTR_SIZE(name_len), 0,
3989 size - OCFS2_XATTR_SIZE(name_len)); 4557 size - OCFS2_XATTR_SIZE(name_len));
3990 if (OCFS2_XATTR_SIZE(xi->value_len) > 0) 4558 if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
@@ -4062,8 +4630,7 @@ set_new_name_value:
4062 xh->xh_free_start = cpu_to_le16(offs); 4630 xh->xh_free_start = cpu_to_le16(offs);
4063 } 4631 }
4064 4632
4065 val = ocfs2_xattr_bucket_get_val(inode, 4633 val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
4066 &xs->bucket, offs - size);
4067 xe->xe_name_offset = cpu_to_le16(offs - size); 4634 xe->xe_name_offset = cpu_to_le16(offs - size);
4068 4635
4069 memset(val, 0, size); 4636 memset(val, 0, size);
@@ -4079,125 +4646,45 @@ set_new_name_value:
4079 return; 4646 return;
4080} 4647}
4081 4648
4082static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
4083 handle_t *handle,
4084 struct ocfs2_xattr_search *xs,
4085 struct buffer_head **bhs,
4086 u16 bh_num)
4087{
4088 int ret = 0, off, block_off;
4089 struct ocfs2_xattr_entry *xe = xs->here;
4090
4091 /*
4092 * First calculate all the blocks we should journal_access
4093 * and journal_dirty. The first block should always be touched.
4094 */
4095 ret = ocfs2_journal_dirty(handle, bhs[0]);
4096 if (ret)
4097 mlog_errno(ret);
4098
4099 /* calc the data. */
4100 off = le16_to_cpu(xe->xe_name_offset);
4101 block_off = off >> inode->i_sb->s_blocksize_bits;
4102 ret = ocfs2_journal_dirty(handle, bhs[block_off]);
4103 if (ret)
4104 mlog_errno(ret);
4105
4106 return ret;
4107}
4108
4109/* 4649/*
4110 * Set the xattr entry in the specified bucket. 4650 * Set the xattr entry in the specified bucket.
4111 * The bucket is indicated by xs->bucket and it should have the enough 4651 * The bucket is indicated by xs->bucket and it should have the enough
4112 * space for the xattr insertion. 4652 * space for the xattr insertion.
4113 */ 4653 */
4114static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, 4654static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4655 handle_t *handle,
4115 struct ocfs2_xattr_info *xi, 4656 struct ocfs2_xattr_info *xi,
4116 struct ocfs2_xattr_search *xs, 4657 struct ocfs2_xattr_search *xs,
4117 u32 name_hash, 4658 u32 name_hash,
4118 int local) 4659 int local)
4119{ 4660{
4120 int i, ret; 4661 int ret;
4121 handle_t *handle = NULL; 4662 u64 blkno;
4122 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4123 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4124 4663
4125 mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n", 4664 mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4126 (unsigned long)xi->value_len, xi->name_index, 4665 (unsigned long)xi->value_len, xi->name_index,
4127 (unsigned long long)xs->bucket.bhs[0]->b_blocknr); 4666 (unsigned long long)bucket_blkno(xs->bucket));
4128 4667
4129 if (!xs->bucket.bhs[1]) { 4668 if (!xs->bucket->bu_bhs[1]) {
4130 ret = ocfs2_read_blocks(inode, 4669 blkno = bucket_blkno(xs->bucket);
4131 xs->bucket.bhs[0]->b_blocknr + 1, 4670 ocfs2_xattr_bucket_relse(xs->bucket);
4132 blk_per_bucket - 1, &xs->bucket.bhs[1], 4671 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
4133 0);
4134 if (ret) { 4672 if (ret) {
4135 mlog_errno(ret); 4673 mlog_errno(ret);
4136 goto out; 4674 goto out;
4137 } 4675 }
4138 } 4676 }
4139 4677
4140 handle = ocfs2_start_trans(osb, blk_per_bucket); 4678 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4141 if (IS_ERR(handle)) { 4679 OCFS2_JOURNAL_ACCESS_WRITE);
4142 ret = PTR_ERR(handle); 4680 if (ret < 0) {
4143 handle = NULL;
4144 mlog_errno(ret); 4681 mlog_errno(ret);
4145 goto out; 4682 goto out;
4146 } 4683 }
4147 4684
4148 for (i = 0; i < blk_per_bucket; i++) {
4149 ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i],
4150 OCFS2_JOURNAL_ACCESS_WRITE);
4151 if (ret < 0) {
4152 mlog_errno(ret);
4153 goto out;
4154 }
4155 }
4156
4157 ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local); 4685 ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4686 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4158 4687
4159 /*Only dirty the blocks we have touched in set xattr. */
4160 ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
4161 xs->bucket.bhs, blk_per_bucket);
4162 if (ret)
4163 mlog_errno(ret);
4164out:
4165 ocfs2_commit_trans(osb, handle);
4166
4167 return ret;
4168}
4169
4170static int ocfs2_xattr_value_update_size(struct inode *inode,
4171 struct buffer_head *xe_bh,
4172 struct ocfs2_xattr_entry *xe,
4173 u64 new_size)
4174{
4175 int ret;
4176 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4177 handle_t *handle = NULL;
4178
4179 handle = ocfs2_start_trans(osb, 1);
4180 if (IS_ERR(handle)) {
4181 ret = -ENOMEM;
4182 mlog_errno(ret);
4183 goto out;
4184 }
4185
4186 ret = ocfs2_journal_access(handle, inode, xe_bh,
4187 OCFS2_JOURNAL_ACCESS_WRITE);
4188 if (ret < 0) {
4189 mlog_errno(ret);
4190 goto out_commit;
4191 }
4192
4193 xe->xe_value_size = cpu_to_le64(new_size);
4194
4195 ret = ocfs2_journal_dirty(handle, xe_bh);
4196 if (ret < 0)
4197 mlog_errno(ret);
4198
4199out_commit:
4200 ocfs2_commit_trans(osb, handle);
4201out: 4688out:
4202 return ret; 4689 return ret;
4203} 4690}
@@ -4210,18 +4697,19 @@ out:
4210 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. 4697 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4211 */ 4698 */
4212static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, 4699static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4213 struct buffer_head *header_bh, 4700 struct ocfs2_xattr_bucket *bucket,
4214 int xe_off, 4701 int xe_off,
4215 int len) 4702 int len,
4703 struct ocfs2_xattr_set_ctxt *ctxt)
4216{ 4704{
4217 int ret, offset; 4705 int ret, offset;
4218 u64 value_blk; 4706 u64 value_blk;
4219 struct buffer_head *value_bh = NULL;
4220 struct ocfs2_xattr_value_root *xv;
4221 struct ocfs2_xattr_entry *xe; 4707 struct ocfs2_xattr_entry *xe;
4222 struct ocfs2_xattr_header *xh = 4708 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4223 (struct ocfs2_xattr_header *)header_bh->b_data;
4224 size_t blocksize = inode->i_sb->s_blocksize; 4709 size_t blocksize = inode->i_sb->s_blocksize;
4710 struct ocfs2_xattr_value_buf vb = {
4711 .vb_access = ocfs2_journal_access,
4712 };
4225 4713
4226 xe = &xh->xh_entries[xe_off]; 4714 xe = &xh->xh_entries[xe_off];
4227 4715
@@ -4234,49 +4722,58 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4234 4722
4235 /* We don't allow ocfs2_xattr_value to be stored in different block. */ 4723 /* We don't allow ocfs2_xattr_value to be stored in different block. */
4236 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); 4724 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4237 value_blk += header_bh->b_blocknr;
4238 4725
4239 ret = ocfs2_read_block(inode, value_blk, &value_bh); 4726 vb.vb_bh = bucket->bu_bhs[value_blk];
4240 if (ret) { 4727 BUG_ON(!vb.vb_bh);
4241 mlog_errno(ret);
4242 goto out;
4243 }
4244 4728
4245 xv = (struct ocfs2_xattr_value_root *) 4729 vb.vb_xv = (struct ocfs2_xattr_value_root *)
4246 (value_bh->b_data + offset % blocksize); 4730 (vb.vb_bh->b_data + offset % blocksize);
4247 4731
4248 mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n", 4732 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
4249 xe_off, (unsigned long long)header_bh->b_blocknr, len); 4733 OCFS2_JOURNAL_ACCESS_WRITE);
4250 ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
4251 if (ret) { 4734 if (ret) {
4252 mlog_errno(ret); 4735 mlog_errno(ret);
4253 goto out; 4736 goto out;
4254 } 4737 }
4255 4738
4256 ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len); 4739 /*
4740 * From here on out we have to dirty the bucket. The generic
4741 * value calls only modify one of the bucket's bhs, but we need
4742 * to send the bucket at once. So if they error, they *could* have
4743 * modified something. We have to assume they did, and dirty
4744 * the whole bucket. This leaves us in a consistent state.
4745 */
4746 mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4747 xe_off, (unsigned long long)bucket_blkno(bucket), len);
4748 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
4257 if (ret) { 4749 if (ret) {
4258 mlog_errno(ret); 4750 mlog_errno(ret);
4259 goto out; 4751 goto out_dirty;
4260 } 4752 }
4261 4753
4754 xe->xe_value_size = cpu_to_le64(len);
4755
4756out_dirty:
4757 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
4758
4262out: 4759out:
4263 brelse(value_bh);
4264 return ret; 4760 return ret;
4265} 4761}
4266 4762
4267static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, 4763static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4268 struct ocfs2_xattr_search *xs, 4764 struct ocfs2_xattr_search *xs,
4269 int len) 4765 int len,
4766 struct ocfs2_xattr_set_ctxt *ctxt)
4270{ 4767{
4271 int ret, offset; 4768 int ret, offset;
4272 struct ocfs2_xattr_entry *xe = xs->here; 4769 struct ocfs2_xattr_entry *xe = xs->here;
4273 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base; 4770 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4274 4771
4275 BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe)); 4772 BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4276 4773
4277 offset = xe - xh->xh_entries; 4774 offset = xe - xh->xh_entries;
4278 ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0], 4775 ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket,
4279 offset, len); 4776 offset, len, ctxt);
4280 if (ret) 4777 if (ret)
4281 mlog_errno(ret); 4778 mlog_errno(ret);
4282 4779
@@ -4284,6 +4781,7 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4284} 4781}
4285 4782
4286static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode, 4783static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4784 handle_t *handle,
4287 struct ocfs2_xattr_search *xs, 4785 struct ocfs2_xattr_search *xs,
4288 char *val, 4786 char *val,
4289 int value_len) 4787 int value_len)
@@ -4299,7 +4797,8 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4299 4797
4300 xv = (struct ocfs2_xattr_value_root *)(xs->base + offset); 4798 xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4301 4799
4302 return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len); 4800 return __ocfs2_xattr_set_value_outside(inode, handle,
4801 xv, val, value_len);
4303} 4802}
4304 4803
4305static int ocfs2_rm_xattr_cluster(struct inode *inode, 4804static int ocfs2_rm_xattr_cluster(struct inode *inode,
@@ -4343,15 +4842,15 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
4343 } 4842 }
4344 } 4843 }
4345 4844
4346 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); 4845 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
4347 if (IS_ERR(handle)) { 4846 if (IS_ERR(handle)) {
4348 ret = -ENOMEM; 4847 ret = -ENOMEM;
4349 mlog_errno(ret); 4848 mlog_errno(ret);
4350 goto out; 4849 goto out;
4351 } 4850 }
4352 4851
4353 ret = ocfs2_journal_access(handle, inode, root_bh, 4852 ret = ocfs2_journal_access_xb(handle, inode, root_bh,
4354 OCFS2_JOURNAL_ACCESS_WRITE); 4853 OCFS2_JOURNAL_ACCESS_WRITE);
4355 if (ret) { 4854 if (ret) {
4356 mlog_errno(ret); 4855 mlog_errno(ret);
4357 goto out_commit; 4856 goto out_commit;
@@ -4392,26 +4891,19 @@ out:
4392} 4891}
4393 4892
4394static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, 4893static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4894 handle_t *handle,
4395 struct ocfs2_xattr_search *xs) 4895 struct ocfs2_xattr_search *xs)
4396{ 4896{
4397 handle_t *handle = NULL; 4897 struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4398 struct ocfs2_xattr_header *xh = xs->bucket.xh;
4399 struct ocfs2_xattr_entry *last = &xh->xh_entries[ 4898 struct ocfs2_xattr_entry *last = &xh->xh_entries[
4400 le16_to_cpu(xh->xh_count) - 1]; 4899 le16_to_cpu(xh->xh_count) - 1];
4401 int ret = 0; 4900 int ret = 0;
4402 4901
4403 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1); 4902 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4404 if (IS_ERR(handle)) { 4903 OCFS2_JOURNAL_ACCESS_WRITE);
4405 ret = PTR_ERR(handle);
4406 mlog_errno(ret);
4407 return;
4408 }
4409
4410 ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0],
4411 OCFS2_JOURNAL_ACCESS_WRITE);
4412 if (ret) { 4904 if (ret) {
4413 mlog_errno(ret); 4905 mlog_errno(ret);
4414 goto out_commit; 4906 return;
4415 } 4907 }
4416 4908
4417 /* Remove the old entry. */ 4909 /* Remove the old entry. */
@@ -4420,11 +4912,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4420 memset(last, 0, sizeof(struct ocfs2_xattr_entry)); 4912 memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4421 le16_add_cpu(&xh->xh_count, -1); 4913 le16_add_cpu(&xh->xh_count, -1);
4422 4914
4423 ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]); 4915 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4424 if (ret < 0)
4425 mlog_errno(ret);
4426out_commit:
4427 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
4428} 4916}
4429 4917
4430/* 4918/*
@@ -4440,7 +4928,8 @@ out_commit:
4440 */ 4928 */
4441static int ocfs2_xattr_set_in_bucket(struct inode *inode, 4929static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4442 struct ocfs2_xattr_info *xi, 4930 struct ocfs2_xattr_info *xi,
4443 struct ocfs2_xattr_search *xs) 4931 struct ocfs2_xattr_search *xs,
4932 struct ocfs2_xattr_set_ctxt *ctxt)
4444{ 4933{
4445 int ret, local = 1; 4934 int ret, local = 1;
4446 size_t value_len; 4935 size_t value_len;
@@ -4468,7 +4957,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4468 value_len = 0; 4957 value_len = 0;
4469 4958
4470 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, 4959 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4471 value_len); 4960 value_len,
4961 ctxt);
4472 if (ret) 4962 if (ret)
4473 goto out; 4963 goto out;
4474 4964
@@ -4488,7 +4978,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4488 xi->value_len = OCFS2_XATTR_ROOT_SIZE; 4978 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4489 } 4979 }
4490 4980
4491 ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local); 4981 ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
4982 name_hash, local);
4492 if (ret) { 4983 if (ret) {
4493 mlog_errno(ret); 4984 mlog_errno(ret);
4494 goto out; 4985 goto out;
@@ -4499,7 +4990,7 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4499 4990
4500 /* allocate the space now for the outside block storage. */ 4991 /* allocate the space now for the outside block storage. */
4501 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, 4992 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4502 value_len); 4993 value_len, ctxt);
4503 if (ret) { 4994 if (ret) {
4504 mlog_errno(ret); 4995 mlog_errno(ret);
4505 4996
@@ -4509,13 +5000,14 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4509 * storage and we have allocated xattr already, 5000 * storage and we have allocated xattr already,
4510 * so need to remove it. 5001 * so need to remove it.
4511 */ 5002 */
4512 ocfs2_xattr_bucket_remove_xs(inode, xs); 5003 ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
4513 } 5004 }
4514 goto out; 5005 goto out;
4515 } 5006 }
4516 5007
4517set_value_outside: 5008set_value_outside:
4518 ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len); 5009 ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
5010 xs, val, value_len);
4519out: 5011out:
4520 return ret; 5012 return ret;
4521} 5013}
@@ -4530,7 +5022,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4530 struct ocfs2_xattr_bucket *bucket, 5022 struct ocfs2_xattr_bucket *bucket,
4531 const char *name) 5023 const char *name)
4532{ 5024{
4533 struct ocfs2_xattr_header *xh = bucket->xh; 5025 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4534 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); 5026 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
4535 5027
4536 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) 5028 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
@@ -4540,7 +5032,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4540 xh->xh_entries[0].xe_name_hash) { 5032 xh->xh_entries[0].xe_name_hash) {
4541 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " 5033 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
4542 "hash = %u\n", 5034 "hash = %u\n",
4543 (unsigned long long)bucket->bhs[0]->b_blocknr, 5035 (unsigned long long)bucket_blkno(bucket),
4544 le32_to_cpu(xh->xh_entries[0].xe_name_hash)); 5036 le32_to_cpu(xh->xh_entries[0].xe_name_hash));
4545 return -ENOSPC; 5037 return -ENOSPC;
4546 } 5038 }
@@ -4550,16 +5042,16 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4550 5042
4551static int ocfs2_xattr_set_entry_index_block(struct inode *inode, 5043static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
4552 struct ocfs2_xattr_info *xi, 5044 struct ocfs2_xattr_info *xi,
4553 struct ocfs2_xattr_search *xs) 5045 struct ocfs2_xattr_search *xs,
5046 struct ocfs2_xattr_set_ctxt *ctxt)
4554{ 5047{
4555 struct ocfs2_xattr_header *xh; 5048 struct ocfs2_xattr_header *xh;
4556 struct ocfs2_xattr_entry *xe; 5049 struct ocfs2_xattr_entry *xe;
4557 u16 count, header_size, xh_free_start; 5050 u16 count, header_size, xh_free_start;
4558 int i, free, max_free, need, old; 5051 int free, max_free, need, old;
4559 size_t value_size = 0, name_len = strlen(xi->name); 5052 size_t value_size = 0, name_len = strlen(xi->name);
4560 size_t blocksize = inode->i_sb->s_blocksize; 5053 size_t blocksize = inode->i_sb->s_blocksize;
4561 int ret, allocation = 0; 5054 int ret, allocation = 0;
4562 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4563 5055
4564 mlog_entry("Set xattr %s in xattr index block\n", xi->name); 5056 mlog_entry("Set xattr %s in xattr index block\n", xi->name);
4565 5057
@@ -4574,7 +5066,7 @@ try_again:
4574 5066
4575 mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " 5067 mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
4576 "of %u which exceed block size\n", 5068 "of %u which exceed block size\n",
4577 (unsigned long long)xs->bucket.bhs[0]->b_blocknr, 5069 (unsigned long long)bucket_blkno(xs->bucket),
4578 header_size); 5070 header_size);
4579 5071
4580 if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) 5072 if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
@@ -4614,11 +5106,13 @@ try_again:
4614 mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, " 5106 mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
4615 "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len =" 5107 "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
4616 " %u\n", xs->not_found, 5108 " %u\n", xs->not_found,
4617 (unsigned long long)xs->bucket.bhs[0]->b_blocknr, 5109 (unsigned long long)bucket_blkno(xs->bucket),
4618 free, need, max_free, le16_to_cpu(xh->xh_free_start), 5110 free, need, max_free, le16_to_cpu(xh->xh_free_start),
4619 le16_to_cpu(xh->xh_name_value_len)); 5111 le16_to_cpu(xh->xh_name_value_len));
4620 5112
4621 if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { 5113 if (free < need ||
5114 (xs->not_found &&
5115 count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
4622 if (need <= max_free && 5116 if (need <= max_free &&
4623 count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { 5117 count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4624 /* 5118 /*
@@ -4626,7 +5120,8 @@ try_again:
4626 * name/value will be moved, the xe shouldn't be changed 5120 * name/value will be moved, the xe shouldn't be changed
4627 * in xs. 5121 * in xs.
4628 */ 5122 */
4629 ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket); 5123 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5124 xs->bucket);
4630 if (ret) { 5125 if (ret) {
4631 mlog_errno(ret); 5126 mlog_errno(ret);
4632 goto out; 5127 goto out;
@@ -4658,7 +5153,7 @@ try_again:
4658 * add a new bucket for the insert. 5153 * add a new bucket for the insert.
4659 */ 5154 */
4660 ret = ocfs2_check_xattr_bucket_collision(inode, 5155 ret = ocfs2_check_xattr_bucket_collision(inode,
4661 &xs->bucket, 5156 xs->bucket,
4662 xi->name); 5157 xi->name);
4663 if (ret) { 5158 if (ret) {
4664 mlog_errno(ret); 5159 mlog_errno(ret);
@@ -4667,17 +5162,21 @@ try_again:
4667 5162
4668 ret = ocfs2_add_new_xattr_bucket(inode, 5163 ret = ocfs2_add_new_xattr_bucket(inode,
4669 xs->xattr_bh, 5164 xs->xattr_bh,
4670 xs->bucket.bhs[0]); 5165 xs->bucket,
5166 ctxt);
4671 if (ret) { 5167 if (ret) {
4672 mlog_errno(ret); 5168 mlog_errno(ret);
4673 goto out; 5169 goto out;
4674 } 5170 }
4675 5171
4676 for (i = 0; i < blk_per_bucket; i++) 5172 /*
4677 brelse(xs->bucket.bhs[i]); 5173 * ocfs2_add_new_xattr_bucket() will have updated
4678 5174 * xs->bucket if it moved, but it will not have updated
4679 memset(&xs->bucket, 0, sizeof(xs->bucket)); 5175 * any of the other search fields. Thus, we drop it and
4680 5176 * re-search. Everything should be cached, so it'll be
5177 * quick.
5178 */
5179 ocfs2_xattr_bucket_relse(xs->bucket);
4681 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, 5180 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
4682 xi->name_index, 5181 xi->name_index,
4683 xi->name, xs); 5182 xi->name, xs);
@@ -4689,7 +5188,7 @@ try_again:
4689 } 5188 }
4690 5189
4691xattr_set: 5190xattr_set:
4692 ret = ocfs2_xattr_set_in_bucket(inode, xi, xs); 5191 ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
4693out: 5192out:
4694 mlog_exit(ret); 5193 mlog_exit(ret);
4695 return ret; 5194 return ret;
@@ -4700,24 +5199,41 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
4700 void *para) 5199 void *para)
4701{ 5200{
4702 int ret = 0; 5201 int ret = 0;
4703 struct ocfs2_xattr_header *xh = bucket->xh; 5202 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4704 u16 i; 5203 u16 i;
4705 struct ocfs2_xattr_entry *xe; 5204 struct ocfs2_xattr_entry *xe;
5205 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5206 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5207 int credits = ocfs2_remove_extent_credits(osb->sb) +
5208 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5209
5210
5211 ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
4706 5212
4707 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { 5213 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4708 xe = &xh->xh_entries[i]; 5214 xe = &xh->xh_entries[i];
4709 if (ocfs2_xattr_is_local(xe)) 5215 if (ocfs2_xattr_is_local(xe))
4710 continue; 5216 continue;
4711 5217
4712 ret = ocfs2_xattr_bucket_value_truncate(inode, 5218 ctxt.handle = ocfs2_start_trans(osb, credits);
4713 bucket->bhs[0], 5219 if (IS_ERR(ctxt.handle)) {
4714 i, 0); 5220 ret = PTR_ERR(ctxt.handle);
5221 mlog_errno(ret);
5222 break;
5223 }
5224
5225 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5226 i, 0, &ctxt);
5227
5228 ocfs2_commit_trans(osb, ctxt.handle);
4715 if (ret) { 5229 if (ret) {
4716 mlog_errno(ret); 5230 mlog_errno(ret);
4717 break; 5231 break;
4718 } 5232 }
4719 } 5233 }
4720 5234
5235 ocfs2_schedule_truncate_log_flush(osb, 1);
5236 ocfs2_run_deallocs(osb, &ctxt.dealloc);
4721 return ret; 5237 return ret;
4722} 5238}
4723 5239
@@ -4768,6 +5284,74 @@ out:
4768} 5284}
4769 5285
4770/* 5286/*
5287 * 'security' attributes support
5288 */
5289static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
5290 size_t list_size, const char *name,
5291 size_t name_len)
5292{
5293 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
5294 const size_t total_len = prefix_len + name_len + 1;
5295
5296 if (list && total_len <= list_size) {
5297 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
5298 memcpy(list + prefix_len, name, name_len);
5299 list[prefix_len + name_len] = '\0';
5300 }
5301 return total_len;
5302}
5303
5304static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
5305 void *buffer, size_t size)
5306{
5307 if (strcmp(name, "") == 0)
5308 return -EINVAL;
5309 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
5310 buffer, size);
5311}
5312
5313static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
5314 const void *value, size_t size, int flags)
5315{
5316 if (strcmp(name, "") == 0)
5317 return -EINVAL;
5318
5319 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
5320 size, flags);
5321}
5322
5323int ocfs2_init_security_get(struct inode *inode,
5324 struct inode *dir,
5325 struct ocfs2_security_xattr_info *si)
5326{
5327 /* check whether ocfs2 support feature xattr */
5328 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
5329 return -EOPNOTSUPP;
5330 return security_inode_init_security(inode, dir, &si->name, &si->value,
5331 &si->value_len);
5332}
5333
5334int ocfs2_init_security_set(handle_t *handle,
5335 struct inode *inode,
5336 struct buffer_head *di_bh,
5337 struct ocfs2_security_xattr_info *si,
5338 struct ocfs2_alloc_context *xattr_ac,
5339 struct ocfs2_alloc_context *data_ac)
5340{
5341 return ocfs2_xattr_set_handle(handle, inode, di_bh,
5342 OCFS2_XATTR_INDEX_SECURITY,
5343 si->name, si->value, si->value_len, 0,
5344 xattr_ac, data_ac);
5345}
5346
5347struct xattr_handler ocfs2_xattr_security_handler = {
5348 .prefix = XATTR_SECURITY_PREFIX,
5349 .list = ocfs2_xattr_security_list,
5350 .get = ocfs2_xattr_security_get,
5351 .set = ocfs2_xattr_security_set,
5352};
5353
5354/*
4771 * 'trusted' attributes support 5355 * 'trusted' attributes support
4772 */ 5356 */
4773static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, 5357static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 1d8314c7656d..5a1ebc789f7e 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -30,13 +30,58 @@ enum ocfs2_xattr_type {
30 OCFS2_XATTR_MAX 30 OCFS2_XATTR_MAX
31}; 31};
32 32
33struct ocfs2_security_xattr_info {
34 int enable;
35 char *name;
36 void *value;
37 size_t value_len;
38};
39
33extern struct xattr_handler ocfs2_xattr_user_handler; 40extern struct xattr_handler ocfs2_xattr_user_handler;
34extern struct xattr_handler ocfs2_xattr_trusted_handler; 41extern struct xattr_handler ocfs2_xattr_trusted_handler;
42extern struct xattr_handler ocfs2_xattr_security_handler;
43#ifdef CONFIG_OCFS2_FS_POSIX_ACL
44extern struct xattr_handler ocfs2_xattr_acl_access_handler;
45extern struct xattr_handler ocfs2_xattr_acl_default_handler;
46#endif
35extern struct xattr_handler *ocfs2_xattr_handlers[]; 47extern struct xattr_handler *ocfs2_xattr_handlers[];
36 48
37ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); 49ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
50int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int,
51 const char *, void *, size_t);
38int ocfs2_xattr_set(struct inode *, int, const char *, const void *, 52int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
39 size_t, int); 53 size_t, int);
54int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *,
55 int, const char *, const void *, size_t, int,
56 struct ocfs2_alloc_context *,
57 struct ocfs2_alloc_context *);
40int ocfs2_xattr_remove(struct inode *, struct buffer_head *); 58int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
59int ocfs2_init_security_get(struct inode *, struct inode *,
60 struct ocfs2_security_xattr_info *);
61int ocfs2_init_security_set(handle_t *, struct inode *,
62 struct buffer_head *,
63 struct ocfs2_security_xattr_info *,
64 struct ocfs2_alloc_context *,
65 struct ocfs2_alloc_context *);
66int ocfs2_calc_security_init(struct inode *,
67 struct ocfs2_security_xattr_info *,
68 int *, int *, struct ocfs2_alloc_context **);
69int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
70 int, struct ocfs2_security_xattr_info *,
71 int *, int *, struct ocfs2_alloc_context **);
72
73/*
74 * xattrs can live inside an inode, as part of an external xattr block,
75 * or inside an xattr bucket, which is the leaf of a tree rooted in an
76 * xattr block. Some of the xattr calls, especially the value setting
77 * functions, want to treat each of these locations as equal. Let's wrap
78 * them in a structure that we can pass around instead of raw buffer_heads.
79 */
80struct ocfs2_xattr_value_buf {
81 struct buffer_head *vb_bh;
82 ocfs2_journal_access_func vb_access;
83 struct ocfs2_xattr_value_root *vb_xv;
84};
85
41 86
42#endif /* OCFS2_XATTR_H */ 87#endif /* OCFS2_XATTR_H */