aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/gfs2')
-rw-r--r--fs/gfs2/Kconfig46
-rw-r--r--fs/gfs2/Makefile42
-rw-r--r--fs/gfs2/acl.c316
-rw-r--r--fs/gfs2/acl.h37
-rw-r--r--fs/gfs2/bits.c182
-rw-r--r--fs/gfs2/bits.h28
-rw-r--r--fs/gfs2/bmap.c1098
-rw-r--r--fs/gfs2/bmap.h35
-rw-r--r--fs/gfs2/daemon.c229
-rw-r--r--fs/gfs2/daemon.h20
-rw-r--r--fs/gfs2/dir.c1963
-rw-r--r--fs/gfs2/dir.h73
-rw-r--r--fs/gfs2/eaops.c189
-rw-r--r--fs/gfs2/eaops.h30
-rw-r--r--fs/gfs2/eattr.c1568
-rw-r--r--fs/gfs2/eattr.h88
-rw-r--r--fs/gfs2/format.h21
-rw-r--r--fs/gfs2/gfs2.h31
-rw-r--r--fs/gfs2/glock.c2480
-rw-r--r--fs/gfs2/glock.h166
-rw-r--r--fs/gfs2/glops.c492
-rw-r--r--fs/gfs2/glops.h23
-rw-r--r--fs/gfs2/incore.h684
-rw-r--r--fs/gfs2/inode.c1854
-rw-r--r--fs/gfs2/inode.h72
-rw-r--r--fs/gfs2/lm.c243
-rw-r--r--fs/gfs2/lm.h42
-rw-r--r--fs/gfs2/lm_interface.h295
-rw-r--r--fs/gfs2/locking.c191
-rw-r--r--fs/gfs2/locking/dlm/Makefile3
-rw-r--r--fs/gfs2/locking/dlm/lock.c538
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h191
-rw-r--r--fs/gfs2/locking/dlm/main.c64
-rw-r--r--fs/gfs2/locking/dlm/mount.c255
-rw-r--r--fs/gfs2/locking/dlm/plock.c298
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c218
-rw-r--r--fs/gfs2/locking/dlm/thread.c352
-rw-r--r--fs/gfs2/locking/nolock/Makefile3
-rw-r--r--fs/gfs2/locking/nolock/main.c264
-rw-r--r--fs/gfs2/log.c592
-rw-r--r--fs/gfs2/log.h65
-rw-r--r--fs/gfs2/lops.c803
-rw-r--r--fs/gfs2/lops.h96
-rw-r--r--fs/gfs2/lvb.c53
-rw-r--r--fs/gfs2/lvb.h20
-rw-r--r--fs/gfs2/main.c114
-rw-r--r--fs/gfs2/meta_io.c887
-rw-r--r--fs/gfs2/meta_io.h89
-rw-r--r--fs/gfs2/mount.c215
-rw-r--r--fs/gfs2/mount.h15
-rw-r--r--fs/gfs2/ondisk.c517
-rw-r--r--fs/gfs2/ops_address.c636
-rw-r--r--fs/gfs2/ops_address.h17
-rw-r--r--fs/gfs2/ops_dentry.c124
-rw-r--r--fs/gfs2/ops_dentry.h15
-rw-r--r--fs/gfs2/ops_export.c298
-rw-r--r--fs/gfs2/ops_export.h15
-rw-r--r--fs/gfs2/ops_file.c997
-rw-r--r--fs/gfs2/ops_file.h20
-rw-r--r--fs/gfs2/ops_fstype.c905
-rw-r--r--fs/gfs2/ops_fstype.h16
-rw-r--r--fs/gfs2/ops_inode.c1197
-rw-r--r--fs/gfs2/ops_inode.h18
-rw-r--r--fs/gfs2/ops_super.c387
-rw-r--r--fs/gfs2/ops_super.h15
-rw-r--r--fs/gfs2/ops_vm.c198
-rw-r--r--fs/gfs2/ops_vm.h16
-rw-r--r--fs/gfs2/page.c283
-rw-r--r--fs/gfs2/page.h23
-rw-r--r--fs/gfs2/quota.c1303
-rw-r--r--fs/gfs2/quota.h34
-rw-r--r--fs/gfs2/recovery.c580
-rw-r--r--fs/gfs2/recovery.h32
-rw-r--r--fs/gfs2/rgrp.c1369
-rw-r--r--fs/gfs2/rgrp.h62
-rw-r--r--fs/gfs2/super.c950
-rw-r--r--fs/gfs2/super.h54
-rw-r--r--fs/gfs2/sys.c582
-rw-r--r--fs/gfs2/sys.h24
-rw-r--r--fs/gfs2/trans.c193
-rw-r--r--fs/gfs2/trans.h35
-rw-r--r--fs/gfs2/unlinked.c458
-rw-r--r--fs/gfs2/unlinked.h25
-rw-r--r--fs/gfs2/util.c245
-rw-r--r--fs/gfs2/util.h169
85 files changed, 29485 insertions, 0 deletions
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
new file mode 100644
index 000000000000..17cb44bea1c0
--- /dev/null
+++ b/fs/gfs2/Kconfig
@@ -0,0 +1,46 @@
1config GFS2_FS
2 tristate "GFS2 file system support"
3 default m
4 depends on EXPERIMENTAL
5 select FS_POSIX_ACL
6 select SYSFS
7 help
8 A cluster filesystem.
9
10 Allows a cluster of computers to simultaneously use a block device
11 that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
12 and writes to the block device like a local filesystem, but also uses
13 a lock module to allow the computers coordinate their I/O so
14 filesystem consistency is maintained. One of the nifty features of
15 GFS is perfect consistency -- changes made to the filesystem on one
16 machine show up immediately on all other machines in the cluster.
17
18 To use the GFS2 filesystem, you will need to enable one or more of
19 the below locking modules. Documentation and utilities for GFS2 can
20 be found here: http://sources.redhat.com/cluster/gfs/
21
22config GFS2_FS_LOCKING_NOLOCK
23 tristate "GFS2 \"nolock\" locking module"
24 depends on GFS2_FS
25 help
26 Single node locking module for GFS2.
27
28 Use this module if you want to use GFS2 on a single node without
29 its clustering features. You can still take advantage of the
30 large file support, and upgrade to running a full cluster later on
31 if required.
32
33 If you will only be using GFS2 in cluster mode, you do not need this
34 module.
35
36config GFS2_FS_LOCKING_DLM
37 tristate "GFS2 DLM locking module"
38 depends on GFS2_FS
39 select DLM
40 help
41 Multiple node locking module for GFS2
42
43 Most users of GFS2 will require this module. It provides the locking
44 interface between GFS2 and the DLM, which is required to use GFS2
45 in a cluster environment.
46
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
new file mode 100644
index 000000000000..88f927948113
--- /dev/null
+++ b/fs/gfs2/Makefile
@@ -0,0 +1,42 @@
1obj-$(CONFIG_GFS2_FS) += gfs2.o
2gfs2-y := \
3 acl.o \
4 bits.o \
5 bmap.o \
6 daemon.o \
7 dir.o \
8 eaops.o \
9 eattr.o \
10 glock.o \
11 glops.o \
12 inode.o \
13 lm.o \
14 log.o \
15 lops.o \
16 locking.o \
17 lvb.o \
18 main.o \
19 meta_io.o \
20 mount.o \
21 ondisk.o \
22 ops_address.o \
23 ops_dentry.o \
24 ops_export.o \
25 ops_file.o \
26 ops_fstype.o \
27 ops_inode.o \
28 ops_super.o \
29 ops_vm.o \
30 page.o \
31 quota.o \
32 recovery.o \
33 rgrp.o \
34 super.o \
35 sys.o \
36 trans.o \
37 unlinked.o \
38 util.o
39
40obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/
41obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/
42
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
new file mode 100644
index 000000000000..e9d05fe94357
--- /dev/null
+++ b/fs/gfs2/acl.c
@@ -0,0 +1,316 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/posix_acl.h>
16#include <linux/posix_acl_xattr.h>
17#include <asm/semaphore.h>
18#include <linux/gfs2_ondisk.h>
19
20#include "gfs2.h"
21#include "lm_interface.h"
22#include "incore.h"
23#include "acl.h"
24#include "eaops.h"
25#include "eattr.h"
26#include "glock.h"
27#include "inode.h"
28#include "meta_io.h"
29#include "trans.h"
30#include "util.h"
31
32#define ACL_ACCESS 1
33#define ACL_DEFAULT 0
34
35int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
36 struct gfs2_ea_request *er,
37 int *remove, mode_t *mode)
38{
39 struct posix_acl *acl;
40 int error;
41
42 error = gfs2_acl_validate_remove(ip, access);
43 if (error)
44 return error;
45
46 if (!er->er_data)
47 return -EINVAL;
48
49 acl = posix_acl_from_xattr(er->er_data, er->er_data_len);
50 if (IS_ERR(acl))
51 return PTR_ERR(acl);
52 if (!acl) {
53 *remove = 1;
54 return 0;
55 }
56
57 error = posix_acl_valid(acl);
58 if (error)
59 goto out;
60
61 if (access) {
62 error = posix_acl_equiv_mode(acl, mode);
63 if (!error)
64 *remove = 1;
65 else if (error > 0)
66 error = 0;
67 }
68
69 out:
70 posix_acl_release(acl);
71
72 return error;
73}
74
75int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
76{
77 if (!ip->i_sbd->sd_args.ar_posix_acl)
78 return -EOPNOTSUPP;
79 if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER))
80 return -EPERM;
81 if (S_ISLNK(ip->i_di.di_mode))
82 return -EOPNOTSUPP;
83 if (!access && !S_ISDIR(ip->i_di.di_mode))
84 return -EACCES;
85
86 return 0;
87}
88
89static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
90 struct gfs2_ea_location *el, char **data, unsigned int *len)
91{
92 struct gfs2_ea_request er;
93 struct gfs2_ea_location el_this;
94 int error;
95
96 if (!ip->i_di.di_eattr)
97 return 0;
98
99 memset(&er, 0, sizeof(struct gfs2_ea_request));
100 if (access) {
101 er.er_name = GFS2_POSIX_ACL_ACCESS;
102 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
103 } else {
104 er.er_name = GFS2_POSIX_ACL_DEFAULT;
105 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
106 }
107 er.er_type = GFS2_EATYPE_SYS;
108
109 if (!el)
110 el = &el_this;
111
112 error = gfs2_ea_find(ip, &er, el);
113 if (error)
114 return error;
115 if (!el->el_ea)
116 return 0;
117 if (!GFS2_EA_DATA_LEN(el->el_ea))
118 goto out;
119
120 er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea);
121 er.er_data = kmalloc(er.er_data_len, GFP_KERNEL);
122 error = -ENOMEM;
123 if (!er.er_data)
124 goto out;
125
126 error = gfs2_ea_get_copy(ip, el, er.er_data);
127 if (error)
128 goto out_kfree;
129
130 if (acl) {
131 *acl = posix_acl_from_xattr(er.er_data, er.er_data_len);
132 if (IS_ERR(*acl))
133 error = PTR_ERR(*acl);
134 }
135
136 out_kfree:
137 if (error || !data)
138 kfree(er.er_data);
139 else {
140 *data = er.er_data;
141 *len = er.er_data_len;
142 }
143
144 out:
145 if (error || el == &el_this)
146 brelse(el->el_bh);
147
148 return error;
149}
150
151/**
152 * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something
153 * @inode: the file we want to do something to
154 * @mask: what we want to do
155 *
156 * Returns: errno
157 */
158
159int gfs2_check_acl_locked(struct inode *inode, int mask)
160{
161 struct posix_acl *acl = NULL;
162 int error;
163
164 error = acl_get(inode->u.generic_ip, ACL_ACCESS, &acl, NULL, NULL, NULL);
165 if (error)
166 return error;
167
168 if (acl) {
169 error = posix_acl_permission(inode, acl, mask);
170 posix_acl_release(acl);
171 return error;
172 }
173
174 return -EAGAIN;
175}
176
177int gfs2_check_acl(struct inode *inode, int mask)
178{
179 struct gfs2_inode *ip = inode->u.generic_ip;
180 struct gfs2_holder i_gh;
181 int error;
182
183 error = gfs2_glock_nq_init(ip->i_gl,
184 LM_ST_SHARED, LM_FLAG_ANY,
185 &i_gh);
186 if (!error) {
187 error = gfs2_check_acl_locked(inode, mask);
188 gfs2_glock_dq_uninit(&i_gh);
189 }
190
191 return error;
192}
193
194static int munge_mode(struct gfs2_inode *ip, mode_t mode)
195{
196 struct gfs2_sbd *sdp = ip->i_sbd;
197 struct buffer_head *dibh;
198 int error;
199
200 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
201 if (error)
202 return error;
203
204 error = gfs2_meta_inode_buffer(ip, &dibh);
205 if (!error) {
206 gfs2_assert_withdraw(sdp,
207 (ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT));
208 ip->i_di.di_mode = mode;
209 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
210 gfs2_dinode_out(&ip->i_di, dibh->b_data);
211 brelse(dibh);
212 }
213
214 gfs2_trans_end(sdp);
215
216 return 0;
217}
218
219int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
220{
221 struct gfs2_sbd *sdp = dip->i_sbd;
222 struct posix_acl *acl = NULL, *clone;
223 struct gfs2_ea_request er;
224 mode_t mode = ip->i_di.di_mode;
225 int error;
226
227 if (!sdp->sd_args.ar_posix_acl)
228 return 0;
229 if (S_ISLNK(ip->i_di.di_mode))
230 return 0;
231
232 memset(&er, 0, sizeof(struct gfs2_ea_request));
233 er.er_type = GFS2_EATYPE_SYS;
234
235 error = acl_get(dip, ACL_DEFAULT, &acl, NULL,
236 &er.er_data, &er.er_data_len);
237 if (error)
238 return error;
239 if (!acl) {
240 mode &= ~current->fs->umask;
241 if (mode != ip->i_di.di_mode)
242 error = munge_mode(ip, mode);
243 return error;
244 }
245
246 clone = posix_acl_clone(acl, GFP_KERNEL);
247 error = -ENOMEM;
248 if (!clone)
249 goto out;
250 posix_acl_release(acl);
251 acl = clone;
252
253 if (S_ISDIR(ip->i_di.di_mode)) {
254 er.er_name = GFS2_POSIX_ACL_DEFAULT;
255 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
256 error = gfs2_system_eaops.eo_set(ip, &er);
257 if (error)
258 goto out;
259 }
260
261 error = posix_acl_create_masq(acl, &mode);
262 if (error < 0)
263 goto out;
264 if (error > 0) {
265 er.er_name = GFS2_POSIX_ACL_ACCESS;
266 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
267 posix_acl_to_xattr(acl, er.er_data, er.er_data_len);
268 er.er_mode = mode;
269 er.er_flags = GFS2_ERF_MODE;
270 error = gfs2_system_eaops.eo_set(ip, &er);
271 if (error)
272 goto out;
273 } else
274 munge_mode(ip, mode);
275
276 out:
277 posix_acl_release(acl);
278 kfree(er.er_data);
279 return error;
280}
281
282int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
283{
284 struct posix_acl *acl = NULL, *clone;
285 struct gfs2_ea_location el;
286 char *data;
287 unsigned int len;
288 int error;
289
290 error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len);
291 if (error)
292 return error;
293 if (!acl)
294 return gfs2_setattr_simple(ip, attr);
295
296 clone = posix_acl_clone(acl, GFP_KERNEL);
297 error = -ENOMEM;
298 if (!clone)
299 goto out;
300 posix_acl_release(acl);
301 acl = clone;
302
303 error = posix_acl_chmod_masq(acl, attr->ia_mode);
304 if (!error) {
305 posix_acl_to_xattr(acl, data, len);
306 error = gfs2_ea_acl_chmod(ip, &el, attr, data);
307 }
308
309 out:
310 posix_acl_release(acl);
311 brelse(el.el_bh);
312 kfree(data);
313
314 return error;
315}
316
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
new file mode 100644
index 000000000000..a174b4f6bcc2
--- /dev/null
+++ b/fs/gfs2/acl.h
@@ -0,0 +1,37 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __ACL_DOT_H__
11#define __ACL_DOT_H__
12
13#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
14#define GFS2_POSIX_ACL_ACCESS_LEN 16
15#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
16#define GFS2_POSIX_ACL_DEFAULT_LEN 17
17
18#define GFS2_ACL_IS_ACCESS(name, len) \
19 ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \
20 !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len)))
21
22#define GFS2_ACL_IS_DEFAULT(name, len) \
23 ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \
24 !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len)))
25
26struct gfs2_ea_request;
27
28int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
29 struct gfs2_ea_request *er,
30 int *remove, mode_t *mode);
31int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access);
32int gfs2_check_acl_locked(struct inode *inode, int mask);
33int gfs2_check_acl(struct inode *inode, int mask);
34int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip);
35int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
36
37#endif /* __ACL_DOT_H__ */
diff --git a/fs/gfs2/bits.c b/fs/gfs2/bits.c
new file mode 100644
index 000000000000..49585e3de095
--- /dev/null
+++ b/fs/gfs2/bits.c
@@ -0,0 +1,182 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10/*
11 * These routines are used by the resource group routines (rgrp.c)
12 * to keep track of block allocation. Each block is represented by two
13 * bits. One bit indicates whether or not the block is used. (1=used,
14 * 0=free) The other bit indicates whether or not the block contains a
15 * dinode or not. (1=dinode, 0=not-dinode) So, each byte represents
16 * GFS2_NBBY (i.e. 4) blocks.
17 */
18
19#include <linux/sched.h>
20#include <linux/slab.h>
21#include <linux/spinlock.h>
22#include <linux/completion.h>
23#include <linux/buffer_head.h>
24#include <linux/gfs2_ondisk.h>
25#include <asm/semaphore.h>
26
27#include "gfs2.h"
28#include "lm_interface.h"
29#include "incore.h"
30#include "bits.h"
31#include "util.h"
32
33static const char valid_change[16] = {
34 /* current */
35 /* n */ 0, 1, 0, 1,
36 /* e */ 1, 0, 0, 0,
37 /* w */ 0, 0, 0, 0,
38 1, 0, 0, 0
39};
40
41/**
42 * gfs2_setbit - Set a bit in the bitmaps
43 * @buffer: the buffer that holds the bitmaps
44 * @buflen: the length (in bytes) of the buffer
45 * @block: the block to set
46 * @new_state: the new state of the block
47 *
48 */
49
50void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
51 unsigned int buflen, uint32_t block, unsigned char new_state)
52{
53 unsigned char *byte, *end, cur_state;
54 unsigned int bit;
55
56 byte = buffer + (block / GFS2_NBBY);
57 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
58 end = buffer + buflen;
59
60 gfs2_assert(rgd->rd_sbd, byte < end);
61
62 cur_state = (*byte >> bit) & GFS2_BIT_MASK;
63
64 if (valid_change[new_state * 4 + cur_state]) {
65 *byte ^= cur_state << bit;
66 *byte |= new_state << bit;
67 } else
68 gfs2_consist_rgrpd(rgd);
69}
70
71/**
72 * gfs2_testbit - test a bit in the bitmaps
73 * @buffer: the buffer that holds the bitmaps
74 * @buflen: the length (in bytes) of the buffer
75 * @block: the block to read
76 *
77 */
78
79unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
80 unsigned int buflen, uint32_t block)
81{
82 unsigned char *byte, *end, cur_state;
83 unsigned int bit;
84
85 byte = buffer + (block / GFS2_NBBY);
86 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
87 end = buffer + buflen;
88
89 gfs2_assert(rgd->rd_sbd, byte < end);
90
91 cur_state = (*byte >> bit) & GFS2_BIT_MASK;
92
93 return cur_state;
94}
95
96/**
97 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
98 * a block in a given allocation state.
99 * @buffer: the buffer that holds the bitmaps
100 * @buflen: the length (in bytes) of the buffer
101 * @goal: start search at this block's bit-pair (within @buffer)
102 * @old_state: GFS2_BLKST_XXX the state of the block we're looking for;
103 * bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0)
104 *
105 * Scope of @goal and returned block number is only within this bitmap buffer,
106 * not entire rgrp or filesystem. @buffer will be offset from the actual
107 * beginning of a bitmap block buffer, skipping any header structures.
108 *
109 * Return: the block number (bitmap buffer scope) that was found
110 */
111
112uint32_t gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
113 unsigned int buflen, uint32_t goal,
114 unsigned char old_state)
115{
116 unsigned char *byte, *end, alloc;
117 uint32_t blk = goal;
118 unsigned int bit;
119
120 byte = buffer + (goal / GFS2_NBBY);
121 bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
122 end = buffer + buflen;
123 alloc = (old_state & 1) ? 0 : 0x55;
124
125 while (byte < end) {
126 if ((*byte & 0x55) == alloc) {
127 blk += (8 - bit) >> 1;
128
129 bit = 0;
130 byte++;
131
132 continue;
133 }
134
135 if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
136 return blk;
137
138 bit += GFS2_BIT_SIZE;
139 if (bit >= 8) {
140 bit = 0;
141 byte++;
142 }
143
144 blk++;
145 }
146
147 return BFITNOENT;
148}
149
150/**
151 * gfs2_bitcount - count the number of bits in a certain state
152 * @buffer: the buffer that holds the bitmaps
153 * @buflen: the length (in bytes) of the buffer
154 * @state: the state of the block we're looking for
155 *
156 * Returns: The number of bits
157 */
158
159uint32_t gfs2_bitcount(struct gfs2_rgrpd *rgd, unsigned char *buffer,
160 unsigned int buflen, unsigned char state)
161{
162 unsigned char *byte = buffer;
163 unsigned char *end = buffer + buflen;
164 unsigned char state1 = state << 2;
165 unsigned char state2 = state << 4;
166 unsigned char state3 = state << 6;
167 uint32_t count = 0;
168
169 for (; byte < end; byte++) {
170 if (((*byte) & 0x03) == state)
171 count++;
172 if (((*byte) & 0x0C) == state1)
173 count++;
174 if (((*byte) & 0x30) == state2)
175 count++;
176 if (((*byte) & 0xC0) == state3)
177 count++;
178 }
179
180 return count;
181}
182
diff --git a/fs/gfs2/bits.h b/fs/gfs2/bits.h
new file mode 100644
index 000000000000..36ccbdcb1eef
--- /dev/null
+++ b/fs/gfs2/bits.h
@@ -0,0 +1,28 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __BITS_DOT_H__
11#define __BITS_DOT_H__
12
13#define BFITNOENT 0xFFFFFFFF
14
15void gfs2_setbit(struct gfs2_rgrpd *rgd,
16 unsigned char *buffer, unsigned int buflen,
17 uint32_t block, unsigned char new_state);
18unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
19 unsigned char *buffer, unsigned int buflen,
20 uint32_t block);
21uint32_t gfs2_bitfit(struct gfs2_rgrpd *rgd,
22 unsigned char *buffer, unsigned int buflen,
23 uint32_t goal, unsigned char old_state);
24uint32_t gfs2_bitcount(struct gfs2_rgrpd *rgd,
25 unsigned char *buffer, unsigned int buflen,
26 unsigned char state);
27
28#endif /* __BITS_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
new file mode 100644
index 000000000000..c7723119acb6
--- /dev/null
+++ b/fs/gfs2/bmap.c
@@ -0,0 +1,1098 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "bmap.h"
23#include "glock.h"
24#include "inode.h"
25#include "meta_io.h"
26#include "page.h"
27#include "quota.h"
28#include "rgrp.h"
29#include "trans.h"
30#include "dir.h"
31#include "util.h"
32
33/* This doesn't need to be that large as max 64 bit pointers in a 4k
34 * block is 512, so __u16 is fine for that. It saves stack space to
35 * keep it small.
36 */
37struct metapath {
38 __u16 mp_list[GFS2_MAX_META_HEIGHT];
39};
40
41typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
42 struct buffer_head *bh, uint64_t *top,
43 uint64_t *bottom, unsigned int height,
44 void *data);
45
46struct strip_mine {
47 int sm_first;
48 unsigned int sm_height;
49};
50
51/**
52 * @gfs2_unstuffer_sync - Synchronously unstuff a dinode
53 * @ip:
54 * @dibh:
55 * @block:
56 * @private:
57 *
58 * Cheat and use a metadata buffer instead of a data page.
59 *
60 * Returns: errno
61 */
62
63int gfs2_unstuffer_sync(struct gfs2_inode *ip, struct buffer_head *dibh,
64 uint64_t block, void *private)
65{
66 struct buffer_head *bh;
67 int error;
68
69 bh = gfs2_meta_new(ip->i_gl, block);
70
71 gfs2_buffer_copy_tail(bh, 0, dibh, sizeof(struct gfs2_dinode));
72
73 set_buffer_dirty(bh);
74 error = sync_dirty_buffer(bh);
75
76 brelse(bh);
77
78 return error;
79}
80
81/**
82 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
83 * @ip: The GFS2 inode to unstuff
84 * @unstuffer: the routine that handles unstuffing a non-zero length file
85 * @private: private data for the unstuffer
86 *
87 * This routine unstuffs a dinode and returns it to a "normal" state such
88 * that the height can be grown in the traditional way.
89 *
90 * Returns: errno
91 */
92
93int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer,
94 void *private)
95{
96 struct buffer_head *bh, *dibh;
97 uint64_t block = 0;
98 int isdir = gfs2_is_dir(ip);
99 int error;
100
101 down_write(&ip->i_rw_mutex);
102
103 error = gfs2_meta_inode_buffer(ip, &dibh);
104 if (error)
105 goto out;
106
107 if (ip->i_di.di_size) {
108 /* Get a free block, fill it with the stuffed data,
109 and write it out to disk */
110
111 if (isdir) {
112 block = gfs2_alloc_meta(ip);
113
114 error = gfs2_dir_get_buffer(ip, block, 1, &bh);
115 if (error)
116 goto out_brelse;
117 gfs2_buffer_copy_tail(bh,
118 sizeof(struct gfs2_meta_header),
119 dibh, sizeof(struct gfs2_dinode));
120 brelse(bh);
121 } else {
122 block = gfs2_alloc_data(ip);
123
124 error = unstuffer(ip, dibh, block, private);
125 if (error)
126 goto out_brelse;
127 }
128 }
129
130 /* Set up the pointer to the new block */
131
132 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
133
134 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
135
136 if (ip->i_di.di_size) {
137 *(uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)) =
138 cpu_to_be64(block);
139 ip->i_di.di_blocks++;
140 }
141
142 ip->i_di.di_height = 1;
143
144 gfs2_dinode_out(&ip->i_di, dibh->b_data);
145
146 out_brelse:
147 brelse(dibh);
148
149 out:
150 up_write(&ip->i_rw_mutex);
151
152 return error;
153}
154
155/**
156 * calc_tree_height - Calculate the height of a metadata tree
157 * @ip: The GFS2 inode
158 * @size: The proposed size of the file
159 *
160 * Work out how tall a metadata tree needs to be in order to accommodate a
161 * file of a particular size. If size is less than the current size of
162 * the inode, then the current size of the inode is used instead of the
163 * supplied one.
164 *
165 * Returns: the height the tree should be
166 */
167
168static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size)
169{
170 struct gfs2_sbd *sdp = ip->i_sbd;
171 uint64_t *arr;
172 unsigned int max, height;
173
174 if (ip->i_di.di_size > size)
175 size = ip->i_di.di_size;
176
177 if (gfs2_is_dir(ip)) {
178 arr = sdp->sd_jheightsize;
179 max = sdp->sd_max_jheight;
180 } else {
181 arr = sdp->sd_heightsize;
182 max = sdp->sd_max_height;
183 }
184
185 for (height = 0; height < max; height++)
186 if (arr[height] >= size)
187 break;
188
189 return height;
190}
191
192/**
193 * build_height - Build a metadata tree of the requested height
194 * @ip: The GFS2 inode
195 * @height: The height to build to
196 *
197 * This routine makes sure that the metadata tree is tall enough to hold
198 * "size" bytes of data.
199 *
200 * Returns: errno
201 */
202
203static int build_height(struct gfs2_inode *ip, int height)
204{
205 struct gfs2_sbd *sdp = ip->i_sbd;
206 struct buffer_head *bh, *dibh;
207 uint64_t block = 0, *bp;
208 unsigned int x;
209 int new_block;
210 int error;
211
212 while (ip->i_di.di_height < height) {
213 error = gfs2_meta_inode_buffer(ip, &dibh);
214 if (error)
215 return error;
216
217 new_block = 0;
218 bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode));
219 for (x = 0; x < sdp->sd_diptrs; x++, bp++)
220 if (*bp) {
221 new_block = 1;
222 break;
223 }
224
225 if (new_block) {
226 /* Get a new block, fill it with the old direct
227 pointers, and write it out */
228
229 block = gfs2_alloc_meta(ip);
230
231 bh = gfs2_meta_new(ip->i_gl, block);
232 gfs2_trans_add_bh(ip->i_gl, bh, 1);
233 gfs2_metatype_set(bh,
234 GFS2_METATYPE_IN,
235 GFS2_FORMAT_IN);
236 gfs2_buffer_copy_tail(bh,
237 sizeof(struct gfs2_meta_header),
238 dibh, sizeof(struct gfs2_dinode));
239
240 brelse(bh);
241 }
242
243 /* Set up the new direct pointer and write it out to disk */
244
245 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
246
247 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
248
249 if (new_block) {
250 *(uint64_t *)(dibh->b_data +
251 sizeof(struct gfs2_dinode)) =
252 cpu_to_be64(block);
253 ip->i_di.di_blocks++;
254 }
255
256 ip->i_di.di_height++;
257
258 gfs2_dinode_out(&ip->i_di, dibh->b_data);
259 brelse(dibh);
260 }
261
262 return 0;
263}
264
265/**
266 * find_metapath - Find path through the metadata tree
267 * @ip: The inode pointer
268 * @mp: The metapath to return the result in
269 * @block: The disk block to look up
270 *
271 * This routine returns a struct metapath structure that defines a path
272 * through the metadata of inode "ip" to get to block "block".
273 *
274 * Example:
275 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
276 * filesystem with a blocksize of 4096.
277 *
278 * find_metapath() would return a struct metapath structure set to:
279 * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
280 * and mp_list[2] = 165.
281 *
282 * That means that in order to get to the block containing the byte at
283 * offset 101342453, we would load the indirect block pointed to by pointer
284 * 0 in the dinode. We would then load the indirect block pointed to by
285 * pointer 48 in that indirect block. We would then load the data block
286 * pointed to by pointer 165 in that indirect block.
287 *
288 * ----------------------------------------
289 * | Dinode | |
290 * | | 4|
291 * | |0 1 2 3 4 5 9|
292 * | | 6|
293 * ----------------------------------------
294 * |
295 * |
296 * V
297 * ----------------------------------------
298 * | Indirect Block |
299 * | 5|
300 * | 4 4 4 4 4 5 5 1|
301 * |0 5 6 7 8 9 0 1 2|
302 * ----------------------------------------
303 * |
304 * |
305 * V
306 * ----------------------------------------
307 * | Indirect Block |
308 * | 1 1 1 1 1 5|
309 * | 6 6 6 6 6 1|
310 * |0 3 4 5 6 7 2|
311 * ----------------------------------------
312 * |
313 * |
314 * V
315 * ----------------------------------------
316 * | Data block containing offset |
317 * | 101342453 |
318 * | |
319 * | |
320 * ----------------------------------------
321 *
322 */
323
324static void find_metapath(struct gfs2_inode *ip, uint64_t block,
325 struct metapath *mp)
326{
327 struct gfs2_sbd *sdp = ip->i_sbd;
328 uint64_t b = block;
329 unsigned int i;
330
331 for (i = ip->i_di.di_height; i--;)
332 mp->mp_list[i] = (__u16)do_div(b, sdp->sd_inptrs);
333
334}
335
336/**
337 * metapointer - Return pointer to start of metadata in a buffer
338 * @bh: The buffer
339 * @height: The metadata height (0 = dinode)
340 * @mp: The metapath
341 *
342 * Return a pointer to the block number of the next height of the metadata
343 * tree given a buffer containing the pointer to the current height of the
344 * metadata tree.
345 */
346
347static inline uint64_t *metapointer(struct buffer_head *bh,
348 unsigned int height, struct metapath *mp)
349{
350 unsigned int head_size = (height > 0) ?
351 sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
352
353 return ((uint64_t *)(bh->b_data + head_size)) + mp->mp_list[height];
354}
355
356/**
357 * lookup_block - Get the next metadata block in metadata tree
358 * @ip: The GFS2 inode
359 * @bh: Buffer containing the pointers to metadata blocks
360 * @height: The height of the tree (0 = dinode)
361 * @mp: The metapath
362 * @create: Non-zero if we may create a new meatdata block
363 * @new: Used to indicate if we did create a new metadata block
364 * @block: the returned disk block number
365 *
366 * Given a metatree, complete to a particular height, checks to see if the next
367 * height of the tree exists. If not the next height of the tree is created.
368 * The block number of the next height of the metadata tree is returned.
369 *
370 */
371
372static void lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
373 unsigned int height, struct metapath *mp, int create,
374 int *new, uint64_t *block)
375{
376 uint64_t *ptr = metapointer(bh, height, mp);
377
378 if (*ptr) {
379 *block = be64_to_cpu(*ptr);
380 return;
381 }
382
383 *block = 0;
384
385 if (!create)
386 return;
387
388 if (height == ip->i_di.di_height - 1 &&
389 !gfs2_is_dir(ip))
390 *block = gfs2_alloc_data(ip);
391 else
392 *block = gfs2_alloc_meta(ip);
393
394 gfs2_trans_add_bh(ip->i_gl, bh, 1);
395
396 *ptr = cpu_to_be64(*block);
397 ip->i_di.di_blocks++;
398
399 *new = 1;
400}
401
402/**
403 * gfs2_block_map - Map a block from an inode to a disk block
404 * @ip: The GFS2 inode
405 * @lblock: The logical block number
406 * @new: Value/Result argument (1 = may create/did create new blocks)
407 * @dblock: the disk block number of the start of an extent
408 * @extlen: the size of the extent
409 *
410 * Find the block number on the current device which corresponds to an
411 * inode's block. If the block had to be created, "new" will be set.
412 *
413 * Returns: errno
414 */
415
416int gfs2_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new,
417 uint64_t *dblock, uint32_t *extlen)
418{
419 struct gfs2_sbd *sdp = ip->i_sbd;
420 struct buffer_head *bh;
421 struct metapath mp;
422 int create = *new;
423 unsigned int bsize;
424 unsigned int height;
425 unsigned int end_of_metadata;
426 unsigned int x;
427 int error = 0;
428
429 *new = 0;
430 *dblock = 0;
431 if (extlen)
432 *extlen = 0;
433
434 if (create)
435 down_write(&ip->i_rw_mutex);
436 else
437 down_read(&ip->i_rw_mutex);
438
439 if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
440 goto out;
441
442 bsize = (gfs2_is_dir(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
443
444 height = calc_tree_height(ip, (lblock + 1) * bsize);
445 if (ip->i_di.di_height < height) {
446 if (!create)
447 goto out;
448
449 error = build_height(ip, height);
450 if (error)
451 goto out;
452 }
453
454 find_metapath(ip, lblock, &mp);
455 end_of_metadata = ip->i_di.di_height - 1;
456
457 error = gfs2_meta_inode_buffer(ip, &bh);
458 if (error)
459 goto out;
460
461 for (x = 0; x < end_of_metadata; x++) {
462 lookup_block(ip, bh, x, &mp, create, new, dblock);
463 brelse(bh);
464 if (!*dblock)
465 goto out;
466
467 error = gfs2_meta_indirect_buffer(ip, x+1, *dblock, *new, &bh);
468 if (error)
469 goto out;
470 }
471
472 lookup_block(ip, bh, end_of_metadata, &mp, create, new, dblock);
473
474 if (extlen && *dblock) {
475 *extlen = 1;
476
477 if (!*new) {
478 uint64_t tmp_dblock;
479 int tmp_new;
480 unsigned int nptrs;
481
482 nptrs = (end_of_metadata) ? sdp->sd_inptrs :
483 sdp->sd_diptrs;
484
485 while (++mp.mp_list[end_of_metadata] < nptrs) {
486 lookup_block(ip, bh, end_of_metadata, &mp,
487 0, &tmp_new, &tmp_dblock);
488
489 if (*dblock + *extlen != tmp_dblock)
490 break;
491
492 (*extlen)++;
493 }
494 }
495 }
496
497 brelse(bh);
498
499 if (*new) {
500 error = gfs2_meta_inode_buffer(ip, &bh);
501 if (!error) {
502 gfs2_trans_add_bh(ip->i_gl, bh, 1);
503 gfs2_dinode_out(&ip->i_di, bh->b_data);
504 brelse(bh);
505 }
506 }
507
508 out:
509 if (create)
510 up_write(&ip->i_rw_mutex);
511 else
512 up_read(&ip->i_rw_mutex);
513
514 return error;
515}
516
517/**
518 * recursive_scan - recursively scan through the end of a file
519 * @ip: the inode
520 * @dibh: the dinode buffer
521 * @mp: the path through the metadata to the point to start
522 * @height: the height the recursion is at
523 * @block: the indirect block to look at
524 * @first: 1 if this is the first block
525 * @bc: the call to make for each piece of metadata
526 * @data: data opaque to this function to pass to @bc
527 *
528 * When this is first called @height and @block should be zero and
529 * @first should be 1.
530 *
531 * Returns: errno
532 */
533
534static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
535 struct metapath *mp, unsigned int height,
536 uint64_t block, int first, block_call_t bc,
537 void *data)
538{
539 struct gfs2_sbd *sdp = ip->i_sbd;
540 struct buffer_head *bh = NULL;
541 uint64_t *top, *bottom;
542 uint64_t bn;
543 int error;
544 int mh_size = sizeof(struct gfs2_meta_header);
545
546 if (!height) {
547 error = gfs2_meta_inode_buffer(ip, &bh);
548 if (error)
549 return error;
550 dibh = bh;
551
552 top = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) +
553 mp->mp_list[0];
554 bottom = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) +
555 sdp->sd_diptrs;
556 } else {
557 error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
558 if (error)
559 return error;
560
561 top = (uint64_t *)(bh->b_data + mh_size) +
562 ((first) ? mp->mp_list[height] : 0);
563
564 bottom = (uint64_t *)(bh->b_data + mh_size) + sdp->sd_inptrs;
565 }
566
567 error = bc(ip, dibh, bh, top, bottom, height, data);
568 if (error)
569 goto out;
570
571 if (height < ip->i_di.di_height - 1)
572 for (; top < bottom; top++, first = 0) {
573 if (!*top)
574 continue;
575
576 bn = be64_to_cpu(*top);
577
578 error = recursive_scan(ip, dibh, mp, height + 1, bn,
579 first, bc, data);
580 if (error)
581 break;
582 }
583
584 out:
585 brelse(bh);
586
587 return error;
588}
589
590/**
591 * do_strip - Look for a layer a particular layer of the file and strip it off
592 * @ip: the inode
593 * @dibh: the dinode buffer
594 * @bh: A buffer of pointers
595 * @top: The first pointer in the buffer
596 * @bottom: One more than the last pointer
597 * @height: the height this buffer is at
598 * @data: a pointer to a struct strip_mine
599 *
600 * Returns: errno
601 */
602
603static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
604 struct buffer_head *bh, uint64_t *top, uint64_t *bottom,
605 unsigned int height, void *data)
606{
607 struct strip_mine *sm = (struct strip_mine *)data;
608 struct gfs2_sbd *sdp = ip->i_sbd;
609 struct gfs2_rgrp_list rlist;
610 uint64_t bn, bstart;
611 uint32_t blen;
612 uint64_t *p;
613 unsigned int rg_blocks = 0;
614 int metadata;
615 unsigned int revokes = 0;
616 int x;
617 int error;
618
619 if (!*top)
620 sm->sm_first = 0;
621
622 if (height != sm->sm_height)
623 return 0;
624
625 if (sm->sm_first) {
626 top++;
627 sm->sm_first = 0;
628 }
629
630 metadata = (height != ip->i_di.di_height - 1);
631 if (metadata)
632 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
633
634 error = gfs2_rindex_hold(sdp, &ip->i_alloc.al_ri_gh);
635 if (error)
636 return error;
637
638 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
639 bstart = 0;
640 blen = 0;
641
642 for (p = top; p < bottom; p++) {
643 if (!*p)
644 continue;
645
646 bn = be64_to_cpu(*p);
647
648 if (bstart + blen == bn)
649 blen++;
650 else {
651 if (bstart)
652 gfs2_rlist_add(sdp, &rlist, bstart);
653
654 bstart = bn;
655 blen = 1;
656 }
657 }
658
659 if (bstart)
660 gfs2_rlist_add(sdp, &rlist, bstart);
661 else
662 goto out; /* Nothing to do */
663
664 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
665
666 for (x = 0; x < rlist.rl_rgrps; x++) {
667 struct gfs2_rgrpd *rgd;
668 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
669 rg_blocks += rgd->rd_ri.ri_length;
670 }
671
672 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
673 if (error)
674 goto out_rlist;
675
676 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
677 RES_INDIRECT + RES_STATFS + RES_QUOTA,
678 revokes);
679 if (error)
680 goto out_rg_gunlock;
681
682 down_write(&ip->i_rw_mutex);
683
684 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
685 gfs2_trans_add_bh(ip->i_gl, bh, 1);
686
687 bstart = 0;
688 blen = 0;
689
690 for (p = top; p < bottom; p++) {
691 if (!*p)
692 continue;
693
694 bn = be64_to_cpu(*p);
695
696 if (bstart + blen == bn)
697 blen++;
698 else {
699 if (bstart) {
700 if (metadata)
701 gfs2_free_meta(ip, bstart, blen);
702 else
703 gfs2_free_data(ip, bstart, blen);
704 }
705
706 bstart = bn;
707 blen = 1;
708 }
709
710 *p = 0;
711 if (!ip->i_di.di_blocks)
712 gfs2_consist_inode(ip);
713 ip->i_di.di_blocks--;
714 }
715 if (bstart) {
716 if (metadata)
717 gfs2_free_meta(ip, bstart, blen);
718 else
719 gfs2_free_data(ip, bstart, blen);
720 }
721
722 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
723
724 gfs2_dinode_out(&ip->i_di, dibh->b_data);
725
726 up_write(&ip->i_rw_mutex);
727
728 gfs2_trans_end(sdp);
729
730 out_rg_gunlock:
731 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
732
733 out_rlist:
734 gfs2_rlist_free(&rlist);
735
736 out:
737 gfs2_glock_dq_uninit(&ip->i_alloc.al_ri_gh);
738
739 return error;
740}
741
742/**
743 * do_grow - Make a file look bigger than it is
744 * @ip: the inode
745 * @size: the size to set the file to
746 *
747 * Called with an exclusive lock on @ip.
748 *
749 * Returns: errno
750 */
751
752static int do_grow(struct gfs2_inode *ip, uint64_t size)
753{
754 struct gfs2_sbd *sdp = ip->i_sbd;
755 struct gfs2_alloc *al;
756 struct buffer_head *dibh;
757 unsigned int h;
758 int error;
759
760 al = gfs2_alloc_get(ip);
761
762 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
763 if (error)
764 goto out;
765
766 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
767 if (error)
768 goto out_gunlock_q;
769
770 al->al_requested = sdp->sd_max_height + RES_DATA;
771
772 error = gfs2_inplace_reserve(ip);
773 if (error)
774 goto out_gunlock_q;
775
776 error = gfs2_trans_begin(sdp,
777 sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
778 RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
779 if (error)
780 goto out_ipres;
781
782 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
783 if (gfs2_is_stuffed(ip)) {
784 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
785 NULL);
786 if (error)
787 goto out_end_trans;
788 }
789
790 h = calc_tree_height(ip, size);
791 if (ip->i_di.di_height < h) {
792 down_write(&ip->i_rw_mutex);
793 error = build_height(ip, h);
794 up_write(&ip->i_rw_mutex);
795 if (error)
796 goto out_end_trans;
797 }
798 }
799
800 ip->i_di.di_size = size;
801 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
802
803 error = gfs2_meta_inode_buffer(ip, &dibh);
804 if (error)
805 goto out_end_trans;
806
807 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
808 gfs2_dinode_out(&ip->i_di, dibh->b_data);
809 brelse(dibh);
810
811 out_end_trans:
812 gfs2_trans_end(sdp);
813
814 out_ipres:
815 gfs2_inplace_release(ip);
816
817 out_gunlock_q:
818 gfs2_quota_unlock(ip);
819
820 out:
821 gfs2_alloc_put(ip);
822
823 return error;
824}
825
826static int trunc_start(struct gfs2_inode *ip, uint64_t size)
827{
828 struct gfs2_sbd *sdp = ip->i_sbd;
829 struct buffer_head *dibh;
830 int journaled = gfs2_is_jdata(ip);
831 int error;
832
833 error = gfs2_trans_begin(sdp,
834 RES_DINODE + ((journaled) ? RES_JDATA : 0), 0);
835 if (error)
836 return error;
837
838 error = gfs2_meta_inode_buffer(ip, &dibh);
839 if (error)
840 goto out;
841
842 if (gfs2_is_stuffed(ip)) {
843 ip->i_di.di_size = size;
844 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
845 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
846 gfs2_dinode_out(&ip->i_di, dibh->b_data);
847 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
848 error = 1;
849
850 } else {
851 if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1))
852 error = gfs2_block_truncate_page(ip->i_vnode->i_mapping);
853
854 if (!error) {
855 ip->i_di.di_size = size;
856 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
857 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
858 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
859 gfs2_dinode_out(&ip->i_di, dibh->b_data);
860 }
861 }
862
863 brelse(dibh);
864
865 out:
866 gfs2_trans_end(sdp);
867
868 return error;
869}
870
871static int trunc_dealloc(struct gfs2_inode *ip, uint64_t size)
872{
873 unsigned int height = ip->i_di.di_height;
874 uint64_t lblock;
875 struct metapath mp;
876 int error;
877
878 if (!size)
879 lblock = 0;
880 else
881 lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift;
882
883 find_metapath(ip, lblock, &mp);
884 gfs2_alloc_get(ip);
885
886 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
887 if (error)
888 goto out;
889
890 while (height--) {
891 struct strip_mine sm;
892 sm.sm_first = !!size;
893 sm.sm_height = height;
894
895 error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm);
896 if (error)
897 break;
898 }
899
900 gfs2_quota_unhold(ip);
901
902 out:
903 gfs2_alloc_put(ip);
904 return error;
905}
906
907static int trunc_end(struct gfs2_inode *ip)
908{
909 struct gfs2_sbd *sdp = ip->i_sbd;
910 struct buffer_head *dibh;
911 int error;
912
913 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
914 if (error)
915 return error;
916
917 down_write(&ip->i_rw_mutex);
918
919 error = gfs2_meta_inode_buffer(ip, &dibh);
920 if (error)
921 goto out;
922
923 if (!ip->i_di.di_size) {
924 ip->i_di.di_height = 0;
925 ip->i_di.di_goal_meta =
926 ip->i_di.di_goal_data =
927 ip->i_num.no_addr;
928 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
929 }
930 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
931 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
932
933 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
934 gfs2_dinode_out(&ip->i_di, dibh->b_data);
935 brelse(dibh);
936
937 out:
938 up_write(&ip->i_rw_mutex);
939
940 gfs2_trans_end(sdp);
941
942 return error;
943}
944
945/**
946 * do_shrink - make a file smaller
947 * @ip: the inode
948 * @size: the size to make the file
949 * @truncator: function to truncate the last partial block
950 *
951 * Called with an exclusive lock on @ip.
952 *
953 * Returns: errno
954 */
955
956static int do_shrink(struct gfs2_inode *ip, uint64_t size)
957{
958 int error;
959
960 error = trunc_start(ip, size);
961 if (error < 0)
962 return error;
963 if (error > 0)
964 return 0;
965
966 error = trunc_dealloc(ip, size);
967 if (!error)
968 error = trunc_end(ip);
969
970 return error;
971}
972
973/**
974 * gfs2_truncatei - make a file a given size
975 * @ip: the inode
976 * @size: the size to make the file
977 * @truncator: function to truncate the last partial block
978 *
979 * The file size can grow, shrink, or stay the same size.
980 *
981 * Returns: errno
982 */
983
984int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size)
985{
986 int error;
987
988 if (gfs2_assert_warn(ip->i_sbd, S_ISREG(ip->i_di.di_mode)))
989 return -EINVAL;
990
991 if (size > ip->i_di.di_size)
992 error = do_grow(ip, size);
993 else
994 error = do_shrink(ip, size);
995
996 return error;
997}
998
999int gfs2_truncatei_resume(struct gfs2_inode *ip)
1000{
1001 int error;
1002 error = trunc_dealloc(ip, ip->i_di.di_size);
1003 if (!error)
1004 error = trunc_end(ip);
1005 return error;
1006}
1007
1008int gfs2_file_dealloc(struct gfs2_inode *ip)
1009{
1010 return trunc_dealloc(ip, 0);
1011}
1012
1013/**
1014 * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
1015 * @ip: the file
1016 * @len: the number of bytes to be written to the file
1017 * @data_blocks: returns the number of data blocks required
1018 * @ind_blocks: returns the number of indirect blocks required
1019 *
1020 */
1021
1022void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
1023 unsigned int *data_blocks, unsigned int *ind_blocks)
1024{
1025 struct gfs2_sbd *sdp = ip->i_sbd;
1026 unsigned int tmp;
1027
1028 if (gfs2_is_dir(ip)) {
1029 *data_blocks = DIV_ROUND_UP(len, sdp->sd_jbsize) + 2;
1030 *ind_blocks = 3 * (sdp->sd_max_jheight - 1);
1031 } else {
1032 *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
1033 *ind_blocks = 3 * (sdp->sd_max_height - 1);
1034 }
1035
1036 for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
1037 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
1038 *ind_blocks += tmp;
1039 }
1040}
1041
1042/**
1043 * gfs2_write_alloc_required - figure out if a write will require an allocation
1044 * @ip: the file being written to
1045 * @offset: the offset to write to
1046 * @len: the number of bytes being written
1047 * @alloc_required: set to 1 if an alloc is required, 0 otherwise
1048 *
1049 * Returns: errno
1050 */
1051
1052int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset,
1053 unsigned int len, int *alloc_required)
1054{
1055 struct gfs2_sbd *sdp = ip->i_sbd;
1056 uint64_t lblock, lblock_stop, dblock;
1057 uint32_t extlen;
1058 int new = 0;
1059 int error = 0;
1060
1061 *alloc_required = 0;
1062
1063 if (!len)
1064 return 0;
1065
1066 if (gfs2_is_stuffed(ip)) {
1067 if (offset + len >
1068 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
1069 *alloc_required = 1;
1070 return 0;
1071 }
1072
1073 if (gfs2_is_dir(ip)) {
1074 unsigned int bsize = sdp->sd_jbsize;
1075 lblock = offset;
1076 do_div(lblock, bsize);
1077 lblock_stop = offset + len + bsize - 1;
1078 do_div(lblock_stop, bsize);
1079 } else {
1080 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1081 lblock = offset >> shift;
1082 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1083 }
1084
1085 for (; lblock < lblock_stop; lblock += extlen) {
1086 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
1087 if (error)
1088 return error;
1089
1090 if (!dblock) {
1091 *alloc_required = 1;
1092 return 0;
1093 }
1094 }
1095
1096 return 0;
1097}
1098
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
new file mode 100644
index 000000000000..ee9ec8d7515c
--- /dev/null
+++ b/fs/gfs2/bmap.h
@@ -0,0 +1,35 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __BMAP_DOT_H__
11#define __BMAP_DOT_H__
12
13typedef int (*gfs2_unstuffer_t) (struct gfs2_inode * ip,
14 struct buffer_head * dibh, uint64_t block,
15 void *private);
16int gfs2_unstuffer_sync(struct gfs2_inode *ip, struct buffer_head *dibh,
17 uint64_t block, void *private);
18int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer,
19 void *private);
20
21int gfs2_block_map(struct gfs2_inode *ip,
22 uint64_t lblock, int *new,
23 uint64_t *dblock, uint32_t *extlen);
24
25int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size);
26int gfs2_truncatei_resume(struct gfs2_inode *ip);
27int gfs2_file_dealloc(struct gfs2_inode *ip);
28
29void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
30 unsigned int *data_blocks,
31 unsigned int *ind_blocks);
32int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset,
33 unsigned int len, int *alloc_required);
34
35#endif /* __BMAP_DOT_H__ */
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
new file mode 100644
index 000000000000..94317dc7e42c
--- /dev/null
+++ b/fs/gfs2/daemon.c
@@ -0,0 +1,229 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/kthread.h>
16#include <linux/delay.h>
17#include <linux/gfs2_ondisk.h>
18#include <asm/semaphore.h>
19
20#include "gfs2.h"
21#include "lm_interface.h"
22#include "incore.h"
23#include "daemon.h"
24#include "glock.h"
25#include "log.h"
26#include "quota.h"
27#include "recovery.h"
28#include "super.h"
29#include "unlinked.h"
30#include "util.h"
31
32/* This uses schedule_timeout() instead of msleep() because it's good for
33 the daemons to wake up more often than the timeout when unmounting so
34 the user's unmount doesn't sit there forever.
35
36 The kthread functions used to start these daemons block and flush signals. */
37
38/**
39 * gfs2_scand - Look for cached glocks and inodes to toss from memory
40 * @sdp: Pointer to GFS2 superblock
41 *
42 * One of these daemons runs, finding candidates to add to sd_reclaim_list.
43 * See gfs2_glockd()
44 */
45
46int gfs2_scand(void *data)
47{
48 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
49 unsigned long t;
50
51 while (!kthread_should_stop()) {
52 gfs2_scand_internal(sdp);
53 t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
54 schedule_timeout_interruptible(t);
55 }
56
57 return 0;
58}
59
60/**
61 * gfs2_glockd - Reclaim unused glock structures
62 * @sdp: Pointer to GFS2 superblock
63 *
64 * One or more of these daemons run, reclaiming glocks on sd_reclaim_list.
65 * Number of daemons can be set by user, with num_glockd mount option.
66 */
67
68int gfs2_glockd(void *data)
69{
70 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
71 DECLARE_WAITQUEUE(wait_chan, current);
72
73 while (!kthread_should_stop()) {
74 while (atomic_read(&sdp->sd_reclaim_count))
75 gfs2_reclaim_glock(sdp);
76
77 set_current_state(TASK_INTERRUPTIBLE);
78 add_wait_queue(&sdp->sd_reclaim_wq, &wait_chan);
79 if (!atomic_read(&sdp->sd_reclaim_count) &&
80 !kthread_should_stop())
81 schedule();
82 remove_wait_queue(&sdp->sd_reclaim_wq, &wait_chan);
83 set_current_state(TASK_RUNNING);
84 }
85
86 return 0;
87}
88
89/**
90 * gfs2_recoverd - Recover dead machine's journals
91 * @sdp: Pointer to GFS2 superblock
92 *
93 */
94
95int gfs2_recoverd(void *data)
96{
97 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
98 unsigned long t;
99
100 while (!kthread_should_stop()) {
101 gfs2_check_journals(sdp);
102 t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ;
103 schedule_timeout_interruptible(t);
104 }
105
106 return 0;
107}
108
109/**
110 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
111 * @sdp: Pointer to GFS2 superblock
112 *
113 * Also, periodically check to make sure that we're using the most recent
114 * journal index.
115 */
116
117int gfs2_logd(void *data)
118{
119 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
120 struct gfs2_holder ji_gh;
121 unsigned long t;
122
123 while (!kthread_should_stop()) {
124 /* Advance the log tail */
125
126 t = sdp->sd_log_flush_time +
127 gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
128
129 gfs2_ail1_empty(sdp, DIO_ALL);
130
131 if (time_after_eq(jiffies, t)) {
132 gfs2_log_flush(sdp);
133 sdp->sd_log_flush_time = jiffies;
134 }
135
136 /* Check for latest journal index */
137
138 t = sdp->sd_jindex_refresh_time +
139 gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ;
140
141 if (time_after_eq(jiffies, t)) {
142 if (!gfs2_jindex_hold(sdp, &ji_gh))
143 gfs2_glock_dq_uninit(&ji_gh);
144 sdp->sd_jindex_refresh_time = jiffies;
145 }
146
147 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
148 schedule_timeout_interruptible(t);
149 }
150
151 return 0;
152}
153
154/**
155 * gfs2_quotad - Write cached quota changes into the quota file
156 * @sdp: Pointer to GFS2 superblock
157 *
158 */
159
160int gfs2_quotad(void *data)
161{
162 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
163 unsigned long t;
164 int error;
165
166 while (!kthread_should_stop()) {
167 /* Update the master statfs file */
168
169 t = sdp->sd_statfs_sync_time +
170 gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
171
172 if (time_after_eq(jiffies, t)) {
173 error = gfs2_statfs_sync(sdp);
174 if (error &&
175 error != -EROFS &&
176 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
177 fs_err(sdp, "quotad: (1) error=%d\n", error);
178 sdp->sd_statfs_sync_time = jiffies;
179 }
180
181 /* Update quota file */
182
183 t = sdp->sd_quota_sync_time +
184 gfs2_tune_get(sdp, gt_quota_quantum) * HZ;
185
186 if (time_after_eq(jiffies, t)) {
187 error = gfs2_quota_sync(sdp);
188 if (error &&
189 error != -EROFS &&
190 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
191 fs_err(sdp, "quotad: (2) error=%d\n", error);
192 sdp->sd_quota_sync_time = jiffies;
193 }
194
195 gfs2_quota_scan(sdp);
196
197 t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
198 schedule_timeout_interruptible(t);
199 }
200
201 return 0;
202}
203
204/**
205 * gfs2_inoded - Deallocate unlinked inodes
206 * @sdp: Pointer to GFS2 superblock
207 *
208 */
209
210int gfs2_inoded(void *data)
211{
212 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
213 unsigned long t;
214 int error;
215
216 while (!kthread_should_stop()) {
217 error = gfs2_unlinked_dealloc(sdp);
218 if (error &&
219 error != -EROFS &&
220 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
221 fs_err(sdp, "inoded: error = %d\n", error);
222
223 t = gfs2_tune_get(sdp, gt_inoded_secs) * HZ;
224 schedule_timeout_interruptible(t);
225 }
226
227 return 0;
228}
229
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h
new file mode 100644
index 000000000000..a27fdeda5fbb
--- /dev/null
+++ b/fs/gfs2/daemon.h
@@ -0,0 +1,20 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __DAEMON_DOT_H__
11#define __DAEMON_DOT_H__
12
13int gfs2_scand(void *data);
14int gfs2_glockd(void *data);
15int gfs2_recoverd(void *data);
16int gfs2_logd(void *data);
17int gfs2_quotad(void *data);
18int gfs2_inoded(void *data);
19
20#endif /* __DAEMON_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
new file mode 100644
index 000000000000..66917f2c64aa
--- /dev/null
+++ b/fs/gfs2/dir.c
@@ -0,0 +1,1963 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10/*
11* Implements Extendible Hashing as described in:
12* "Extendible Hashing" by Fagin, et al in
13* __ACM Trans. on Database Systems__, Sept 1979.
14*
15*
16* Here's the layout of dirents which is essentially the same as that of ext2
17* within a single block. The field de_name_len is the number of bytes
18* actually required for the name (no null terminator). The field de_rec_len
19* is the number of bytes allocated to the dirent. The offset of the next
20* dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
21* deleted, the preceding dirent inherits its allocated space, ie
22* prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
23* by adding de_rec_len to the current dirent, this essentially causes the
24* deleted dirent to get jumped over when iterating through all the dirents.
25*
26* When deleting the first dirent in a block, there is no previous dirent so
27* the field de_ino is set to zero to designate it as deleted. When allocating
28* a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
29* first dirent has (de_ino == 0) and de_rec_len is large enough, this first
30* dirent is allocated. Otherwise it must go through all the 'used' dirents
31* searching for one in which the amount of total space minus the amount of
32* used space will provide enough space for the new dirent.
33*
34* There are two types of blocks in which dirents reside. In a stuffed dinode,
35* the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
36* the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
37* beginning of the leaf block. The dirents reside in leaves when
38*
39* dip->i_di.di_flags & GFS2_DIF_EXHASH is true
40*
41* Otherwise, the dirents are "linear", within a single stuffed dinode block.
42*
43* When the dirents are in leaves, the actual contents of the directory file are
44* used as an array of 64-bit block pointers pointing to the leaf blocks. The
45* dirents are NOT in the directory file itself. There can be more than one block
46* pointer in the array that points to the same leaf. In fact, when a directory
47* is first converted from linear to exhash, all of the pointers point to the
48* same leaf.
49*
50* When a leaf is completely full, the size of the hash table can be
51* doubled unless it is already at the maximum size which is hard coded into
52* GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
53* but never before the maximum hash table size has been reached.
54*/
55
56#include <linux/sched.h>
57#include <linux/slab.h>
58#include <linux/spinlock.h>
59#include <linux/completion.h>
60#include <linux/buffer_head.h>
61#include <linux/sort.h>
62#include <linux/gfs2_ondisk.h>
63#include <linux/crc32.h>
64#include <asm/semaphore.h>
65
66#include "gfs2.h"
67#include "lm_interface.h"
68#include "incore.h"
69#include "dir.h"
70#include "glock.h"
71#include "inode.h"
72#include "meta_io.h"
73#include "quota.h"
74#include "rgrp.h"
75#include "trans.h"
76#include "bmap.h"
77#include "util.h"
78
79#define IS_LEAF 1 /* Hashed (leaf) directory */
80#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */
81
82#if 1
83#define gfs2_disk_hash2offset(h) (((uint64_t)(h)) >> 1)
84#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)) << 1))
85#else
86#define gfs2_disk_hash2offset(h) (((uint64_t)(h)))
87#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p))))
88#endif
89
90typedef int (*leaf_call_t) (struct gfs2_inode *dip,
91 uint32_t index, uint32_t len, uint64_t leaf_no,
92 void *data);
93
94int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
95 struct buffer_head **bhp)
96{
97 struct buffer_head *bh;
98 int error = 0;
99
100 if (new) {
101 bh = gfs2_meta_new(ip->i_gl, block);
102 gfs2_trans_add_bh(ip->i_gl, bh, 1);
103 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
104 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
105 } else {
106 error = gfs2_meta_read(ip->i_gl, block, DIO_START | DIO_WAIT,
107 &bh);
108 if (error)
109 return error;
110 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) {
111 brelse(bh);
112 return -EIO;
113 }
114 }
115
116 *bhp = bh;
117 return 0;
118}
119
120
121
122static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
123 unsigned int offset, unsigned int size)
124
125{
126 struct buffer_head *dibh;
127 int error;
128
129 error = gfs2_meta_inode_buffer(ip, &dibh);
130 if (error)
131 return error;
132
133 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
134 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
135 if (ip->i_di.di_size < offset + size)
136 ip->i_di.di_size = offset + size;
137 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
138 gfs2_dinode_out(&ip->i_di, dibh->b_data);
139
140 brelse(dibh);
141
142 return size;
143}
144
145
146
147/**
148 * gfs2_dir_write_data - Write directory information to the inode
149 * @ip: The GFS2 inode
150 * @buf: The buffer containing information to be written
151 * @offset: The file offset to start writing at
152 * @size: The amount of data to write
153 *
154 * Returns: The number of bytes correctly written or error code
155 */
156static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
157 uint64_t offset, unsigned int size)
158{
159 struct gfs2_sbd *sdp = ip->i_sbd;
160 struct buffer_head *dibh;
161 uint64_t lblock, dblock;
162 uint32_t extlen = 0;
163 unsigned int o;
164 int copied = 0;
165 int error = 0;
166
167 if (!size)
168 return 0;
169
170 if (gfs2_is_stuffed(ip) &&
171 offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
172 return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset,
173 size);
174
175 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
176 return -EINVAL;
177
178 if (gfs2_is_stuffed(ip)) {
179 error = gfs2_unstuff_dinode(ip, NULL, NULL);
180 if (error)
181 return error;
182 }
183
184 lblock = offset;
185 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
186
187 while (copied < size) {
188 unsigned int amount;
189 struct buffer_head *bh;
190 int new;
191
192 amount = size - copied;
193 if (amount > sdp->sd_sb.sb_bsize - o)
194 amount = sdp->sd_sb.sb_bsize - o;
195
196 if (!extlen) {
197 new = 1;
198 error = gfs2_block_map(ip, lblock, &new, &dblock,
199 &extlen);
200 if (error)
201 goto fail;
202 error = -EIO;
203 if (gfs2_assert_withdraw(sdp, dblock))
204 goto fail;
205 }
206
207 error = gfs2_dir_get_buffer(ip, dblock,
208 (amount == sdp->sd_jbsize) ?
209 1 : new, &bh);
210 if (error)
211 goto fail;
212
213 gfs2_trans_add_bh(ip->i_gl, bh, 1);
214 memcpy(bh->b_data + o, buf, amount);
215 brelse(bh);
216 if (error)
217 goto fail;
218
219 copied += amount;
220 lblock++;
221 dblock++;
222 extlen--;
223
224 o = sizeof(struct gfs2_meta_header);
225 }
226
227out:
228 error = gfs2_meta_inode_buffer(ip, &dibh);
229 if (error)
230 return error;
231
232 if (ip->i_di.di_size < offset + copied)
233 ip->i_di.di_size = offset + copied;
234 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
235
236 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
237 gfs2_dinode_out(&ip->i_di, dibh->b_data);
238 brelse(dibh);
239
240 return copied;
241fail:
242 if (copied)
243 goto out;
244 return error;
245}
246
247static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
248 unsigned int offset, unsigned int size)
249{
250 struct buffer_head *dibh;
251 int error;
252
253 error = gfs2_meta_inode_buffer(ip, &dibh);
254 if (!error) {
255 offset += sizeof(struct gfs2_dinode);
256 memcpy(buf, dibh->b_data + offset, size);
257 brelse(dibh);
258 }
259
260 return (error) ? error : size;
261}
262
263
264/**
265 * gfs2_dir_read_data - Read a data from a directory inode
266 * @ip: The GFS2 Inode
267 * @buf: The buffer to place result into
268 * @offset: File offset to begin jdata_readng from
269 * @size: Amount of data to transfer
270 *
271 * Returns: The amount of data actually copied or the error
272 */
273static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf,
274 uint64_t offset, unsigned int size)
275{
276 struct gfs2_sbd *sdp = ip->i_sbd;
277 uint64_t lblock, dblock;
278 uint32_t extlen = 0;
279 unsigned int o;
280 int copied = 0;
281 int error = 0;
282
283 if (offset >= ip->i_di.di_size)
284 return 0;
285
286 if ((offset + size) > ip->i_di.di_size)
287 size = ip->i_di.di_size - offset;
288
289 if (!size)
290 return 0;
291
292 if (gfs2_is_stuffed(ip))
293 return gfs2_dir_read_stuffed(ip, buf, (unsigned int)offset,
294 size);
295
296 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
297 return -EINVAL;
298
299 lblock = offset;
300 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
301
302 while (copied < size) {
303 unsigned int amount;
304 struct buffer_head *bh;
305 int new;
306
307 amount = size - copied;
308 if (amount > sdp->sd_sb.sb_bsize - o)
309 amount = sdp->sd_sb.sb_bsize - o;
310
311 if (!extlen) {
312 new = 0;
313 error = gfs2_block_map(ip, lblock, &new, &dblock,
314 &extlen);
315 if (error)
316 goto fail;
317 }
318
319 if (extlen > 1)
320 gfs2_meta_ra(ip->i_gl, dblock, extlen);
321
322 if (dblock) {
323 error = gfs2_dir_get_buffer(ip, dblock, new, &bh);
324 if (error)
325 goto fail;
326 dblock++;
327 extlen--;
328 } else
329 bh = NULL;
330
331 memcpy(buf, bh->b_data + o, amount);
332 brelse(bh);
333 if (error)
334 goto fail;
335
336 copied += amount;
337 lblock++;
338
339 o = sizeof(struct gfs2_meta_header);
340 }
341
342 return copied;
343fail:
344 return (copied) ? copied : error;
345}
346
347typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
348 const struct qstr *name,
349 void *opaque);
350
351static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent,
352 const struct qstr *name, int ret)
353{
354 if (dent->de_inum.no_addr != 0 &&
355 be32_to_cpu(dent->de_hash) == name->hash &&
356 be16_to_cpu(dent->de_name_len) == name->len &&
357 memcmp((char *)(dent+1), name->name, name->len) == 0)
358 return ret;
359 return 0;
360}
361
362static int gfs2_dirent_find(const struct gfs2_dirent *dent,
363 const struct qstr *name,
364 void *opaque)
365{
366 return __gfs2_dirent_find(dent, name, 1);
367}
368
369static int gfs2_dirent_prev(const struct gfs2_dirent *dent,
370 const struct qstr *name,
371 void *opaque)
372{
373 return __gfs2_dirent_find(dent, name, 2);
374}
375
376/*
377 * name->name holds ptr to start of block.
378 * name->len holds size of block.
379 */
380static int gfs2_dirent_last(const struct gfs2_dirent *dent,
381 const struct qstr *name,
382 void *opaque)
383{
384 const char *start = name->name;
385 const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len);
386 if (name->len == (end - start))
387 return 1;
388 return 0;
389}
390
391static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
392 const struct qstr *name,
393 void *opaque)
394{
395 unsigned required = GFS2_DIRENT_SIZE(name->len);
396 unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
397 unsigned totlen = be16_to_cpu(dent->de_rec_len);
398
399 if (!dent->de_inum.no_addr)
400 actual = GFS2_DIRENT_SIZE(0);
401 if ((totlen - actual) >= required)
402 return 1;
403 return 0;
404}
405
406struct dirent_gather {
407 const struct gfs2_dirent **pdent;
408 unsigned offset;
409};
410
411static int gfs2_dirent_gather(const struct gfs2_dirent *dent,
412 const struct qstr *name,
413 void *opaque)
414{
415 struct dirent_gather *g = opaque;
416 if (dent->de_inum.no_addr) {
417 g->pdent[g->offset++] = dent;
418 }
419 return 0;
420}
421
422/*
423 * Other possible things to check:
424 * - Inode located within filesystem size (and on valid block)
425 * - Valid directory entry type
426 * Not sure how heavy-weight we want to make this... could also check
427 * hash is correct for example, but that would take a lot of extra time.
428 * For now the most important thing is to check that the various sizes
429 * are correct.
430 */
431static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset,
432 unsigned int size, unsigned int len, int first)
433{
434 const char *msg = "gfs2_dirent too small";
435 if (unlikely(size < sizeof(struct gfs2_dirent)))
436 goto error;
437 msg = "gfs2_dirent misaligned";
438 if (unlikely(offset & 0x7))
439 goto error;
440 msg = "gfs2_dirent points beyond end of block";
441 if (unlikely(offset + size > len))
442 goto error;
443 msg = "zero inode number";
444 if (unlikely(!first && !dent->de_inum.no_addr))
445 goto error;
446 msg = "name length is greater than space in dirent";
447 if (dent->de_inum.no_addr &&
448 unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) >
449 size))
450 goto error;
451 return 0;
452error:
453 printk(KERN_WARNING "gfs2_check_dirent: %s (%s)\n", msg,
454 first ? "first in block" : "not first in block");
455 return -EIO;
456}
457
458static int gfs2_dirent_offset(const void *buf)
459{
460 const struct gfs2_meta_header *h = buf;
461 int offset;
462
463 BUG_ON(buf == NULL);
464
465 switch(be32_to_cpu(h->mh_type)) {
466 case GFS2_METATYPE_LF:
467 offset = sizeof(struct gfs2_leaf);
468 break;
469 case GFS2_METATYPE_DI:
470 offset = sizeof(struct gfs2_dinode);
471 break;
472 default:
473 goto wrong_type;
474 }
475 return offset;
476wrong_type:
477 printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n",
478 be32_to_cpu(h->mh_type));
479 return -1;
480}
481
482static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode,
483 void *buf,
484 unsigned int len, gfs2_dscan_t scan,
485 const struct qstr *name,
486 void *opaque)
487{
488 struct gfs2_dirent *dent, *prev;
489 unsigned offset;
490 unsigned size;
491 int ret = 0;
492
493 ret = gfs2_dirent_offset(buf);
494 if (ret < 0)
495 goto consist_inode;
496
497 offset = ret;
498 prev = NULL;
499 dent = (struct gfs2_dirent *)(buf + offset);
500 size = be16_to_cpu(dent->de_rec_len);
501 if (gfs2_check_dirent(dent, offset, size, len, 1))
502 goto consist_inode;
503 do {
504 ret = scan(dent, name, opaque);
505 if (ret)
506 break;
507 offset += size;
508 if (offset == len)
509 break;
510 prev = dent;
511 dent = (struct gfs2_dirent *)(buf + offset);
512 size = be16_to_cpu(dent->de_rec_len);
513 if (gfs2_check_dirent(dent, offset, size, len, 0))
514 goto consist_inode;
515 } while(1);
516
517 switch(ret) {
518 case 0:
519 return NULL;
520 case 1:
521 return dent;
522 case 2:
523 return prev ? prev : dent;
524 default:
525 BUG_ON(ret > 0);
526 return ERR_PTR(ret);
527 }
528
529consist_inode:
530 gfs2_consist_inode(inode->u.generic_ip);
531 return ERR_PTR(-EIO);
532}
533
534
535/**
536 * dirent_first - Return the first dirent
537 * @dip: the directory
538 * @bh: The buffer
539 * @dent: Pointer to list of dirents
540 *
541 * return first dirent whether bh points to leaf or stuffed dinode
542 *
543 * Returns: IS_LEAF, IS_DINODE, or -errno
544 */
545
546static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh,
547 struct gfs2_dirent **dent)
548{
549 struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data;
550
551 if (be32_to_cpu(h->mh_type) == GFS2_METATYPE_LF) {
552 if (gfs2_meta_check(dip->i_sbd, bh))
553 return -EIO;
554 *dent = (struct gfs2_dirent *)(bh->b_data +
555 sizeof(struct gfs2_leaf));
556 return IS_LEAF;
557 } else {
558 if (gfs2_metatype_check(dip->i_sbd, bh, GFS2_METATYPE_DI))
559 return -EIO;
560 *dent = (struct gfs2_dirent *)(bh->b_data +
561 sizeof(struct gfs2_dinode));
562 return IS_DINODE;
563 }
564}
565
566/**
567 * dirent_next - Next dirent
568 * @dip: the directory
569 * @bh: The buffer
570 * @dent: Pointer to list of dirents
571 *
572 * Returns: 0 on success, error code otherwise
573 */
574
575static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
576 struct gfs2_dirent **dent)
577{
578 struct gfs2_dirent *tmp, *cur;
579 char *bh_end;
580 uint16_t cur_rec_len;
581
582 cur = *dent;
583 bh_end = bh->b_data + bh->b_size;
584 cur_rec_len = be16_to_cpu(cur->de_rec_len);
585
586 if ((char *)cur + cur_rec_len >= bh_end) {
587 if ((char *)cur + cur_rec_len > bh_end) {
588 gfs2_consist_inode(dip);
589 return -EIO;
590 }
591 return -ENOENT;
592 }
593
594 tmp = (struct gfs2_dirent *)((char *)cur + cur_rec_len);
595
596 if ((char *)tmp + be16_to_cpu(tmp->de_rec_len) > bh_end) {
597 gfs2_consist_inode(dip);
598 return -EIO;
599 }
600
601 if (cur_rec_len == 0) {
602 gfs2_consist_inode(dip);
603 return -EIO;
604 }
605
606 /* Only the first dent could ever have de_inum.no_addr == 0 */
607 if (!tmp->de_inum.no_addr) {
608 gfs2_consist_inode(dip);
609 return -EIO;
610 }
611
612 *dent = tmp;
613
614 return 0;
615}
616
617/**
618 * dirent_del - Delete a dirent
619 * @dip: The GFS2 inode
620 * @bh: The buffer
621 * @prev: The previous dirent
622 * @cur: The current dirent
623 *
624 */
625
626static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
627 struct gfs2_dirent *prev, struct gfs2_dirent *cur)
628{
629 uint16_t cur_rec_len, prev_rec_len;
630
631 if (!cur->de_inum.no_addr) {
632 gfs2_consist_inode(dip);
633 return;
634 }
635
636 gfs2_trans_add_bh(dip->i_gl, bh, 1);
637
638 /* If there is no prev entry, this is the first entry in the block.
639 The de_rec_len is already as big as it needs to be. Just zero
640 out the inode number and return. */
641
642 if (!prev) {
643 cur->de_inum.no_addr = 0; /* No endianess worries */
644 return;
645 }
646
647 /* Combine this dentry with the previous one. */
648
649 prev_rec_len = be16_to_cpu(prev->de_rec_len);
650 cur_rec_len = be16_to_cpu(cur->de_rec_len);
651
652 if ((char *)prev + prev_rec_len != (char *)cur)
653 gfs2_consist_inode(dip);
654 if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size)
655 gfs2_consist_inode(dip);
656
657 prev_rec_len += cur_rec_len;
658 prev->de_rec_len = cpu_to_be16(prev_rec_len);
659}
660
661/*
662 * Takes a dent from which to grab space as an argument. Returns the
663 * newly created dent.
664 */
665struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
666 struct gfs2_dirent *dent,
667 const struct qstr *name,
668 struct buffer_head *bh)
669{
670 struct gfs2_inode *ip = inode->u.generic_ip;
671 struct gfs2_dirent *ndent;
672 unsigned offset = 0, totlen;
673
674 if (dent->de_inum.no_addr)
675 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
676 totlen = be16_to_cpu(dent->de_rec_len);
677 BUG_ON(offset + name->len > totlen);
678 gfs2_trans_add_bh(ip->i_gl, bh, 1);
679 ndent = (struct gfs2_dirent *)((char *)dent + offset);
680 dent->de_rec_len = cpu_to_be16(offset);
681 gfs2_qstr2dirent(name, totlen - offset, ndent);
682 return ndent;
683}
684
685static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode,
686 struct buffer_head *bh,
687 const struct qstr *name)
688{
689 struct gfs2_dirent *dent;
690 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
691 gfs2_dirent_find_space, name, NULL);
692 if (!dent || IS_ERR(dent))
693 return dent;
694 return gfs2_init_dirent(inode, dent, name, bh);
695}
696
697static int get_leaf(struct gfs2_inode *dip, uint64_t leaf_no,
698 struct buffer_head **bhp)
699{
700 int error;
701
702 error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_START | DIO_WAIT, bhp);
703 if (!error && gfs2_metatype_check(dip->i_sbd, *bhp, GFS2_METATYPE_LF))
704 error = -EIO;
705
706 return error;
707}
708
709/**
710 * get_leaf_nr - Get a leaf number associated with the index
711 * @dip: The GFS2 inode
712 * @index:
713 * @leaf_out:
714 *
715 * Returns: 0 on success, error code otherwise
716 */
717
718static int get_leaf_nr(struct gfs2_inode *dip, uint32_t index,
719 uint64_t *leaf_out)
720{
721 uint64_t leaf_no;
722 int error;
723
724 error = gfs2_dir_read_data(dip, (char *)&leaf_no,
725 index * sizeof(uint64_t),
726 sizeof(uint64_t));
727 if (error != sizeof(uint64_t))
728 return (error < 0) ? error : -EIO;
729
730 *leaf_out = be64_to_cpu(leaf_no);
731
732 return 0;
733}
734
735static int get_first_leaf(struct gfs2_inode *dip, uint32_t index,
736 struct buffer_head **bh_out)
737{
738 uint64_t leaf_no;
739 int error;
740
741 error = get_leaf_nr(dip, index, &leaf_no);
742 if (!error)
743 error = get_leaf(dip, leaf_no, bh_out);
744
745 return error;
746}
747
748static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
749 const struct qstr *name,
750 gfs2_dscan_t scan,
751 struct buffer_head **pbh)
752{
753 struct buffer_head *bh;
754 struct gfs2_dirent *dent;
755 struct gfs2_inode *ip = inode->u.generic_ip;
756 int error;
757
758 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
759 struct gfs2_leaf *leaf;
760 unsigned hsize = 1 << ip->i_di.di_depth;
761 unsigned index;
762 u64 ln;
763 if (hsize * sizeof(u64) != ip->i_di.di_size) {
764 gfs2_consist_inode(ip);
765 return ERR_PTR(-EIO);
766 }
767
768 index = name->hash >> (32 - ip->i_di.di_depth);
769 error = get_first_leaf(ip, index, &bh);
770 if (error)
771 return ERR_PTR(error);
772 do {
773 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
774 scan, name, NULL);
775 if (dent)
776 goto got_dent;
777 leaf = (struct gfs2_leaf *)bh->b_data;
778 ln = be64_to_cpu(leaf->lf_next);
779 brelse(bh);
780 if (!ln)
781 break;
782 error = get_leaf(ip, ln, &bh);
783 } while(!error);
784
785 return error ? ERR_PTR(error) : NULL;
786 }
787
788 error = gfs2_meta_inode_buffer(ip, &bh);
789 if (error)
790 return ERR_PTR(error);
791 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL);
792got_dent:
793 *pbh = bh;
794 return dent;
795}
796
797static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth)
798{
799 struct gfs2_inode *ip = inode->u.generic_ip;
800 u64 bn = gfs2_alloc_meta(ip);
801 struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn);
802 struct gfs2_leaf *leaf;
803 struct gfs2_dirent *dent;
804 struct qstr name = { .name = "", .len = 0, .hash = 0 };
805 if (!bh)
806 return NULL;
807 gfs2_trans_add_bh(ip->i_gl, bh, 1);
808 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
809 leaf = (struct gfs2_leaf *)bh->b_data;
810 leaf->lf_depth = cpu_to_be16(depth);
811 leaf->lf_entries = cpu_to_be16(0);
812 leaf->lf_dirent_format = cpu_to_be16(GFS2_FORMAT_DE);
813 leaf->lf_next = cpu_to_be64(0);
814 memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved));
815 dent = (struct gfs2_dirent *)(leaf+1);
816 gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent);
817 *pbh = bh;
818 return leaf;
819}
820
821/**
822 * dir_make_exhash - Convert a stuffed directory into an ExHash directory
823 * @dip: The GFS2 inode
824 *
825 * Returns: 0 on success, error code otherwise
826 */
827
828static int dir_make_exhash(struct inode *inode)
829{
830 struct gfs2_inode *dip = inode->u.generic_ip;
831 struct gfs2_sbd *sdp = dip->i_sbd;
832 struct gfs2_dirent *dent;
833 struct qstr args;
834 struct buffer_head *bh, *dibh;
835 struct gfs2_leaf *leaf;
836 int y;
837 uint32_t x;
838 uint64_t *lp, bn;
839 int error;
840
841 error = gfs2_meta_inode_buffer(dip, &dibh);
842 if (error)
843 return error;
844
845 /* Turn over a new leaf */
846
847 leaf = new_leaf(inode, &bh, 0);
848 if (!leaf)
849 return -ENOSPC;
850 bn = bh->b_blocknr;
851
852 gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16));
853 leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries);
854
855 /* Copy dirents */
856
857 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh,
858 sizeof(struct gfs2_dinode));
859
860 /* Find last entry */
861
862 x = 0;
863 args.len = bh->b_size - sizeof(struct gfs2_dinode) +
864 sizeof(struct gfs2_leaf);
865 args.name = bh->b_data;
866 dent = gfs2_dirent_scan(dip->i_vnode, bh->b_data, bh->b_size,
867 gfs2_dirent_last, &args, NULL);
868 if (!dent) {
869 brelse(bh);
870 brelse(dibh);
871 return -EIO;
872 }
873 if (IS_ERR(dent)) {
874 brelse(bh);
875 brelse(dibh);
876 return PTR_ERR(dent);
877 }
878
879 /* Adjust the last dirent's record length
880 (Remember that dent still points to the last entry.) */
881
882 dent->de_rec_len = cpu_to_be16(be16_to_cpu(dent->de_rec_len) +
883 sizeof(struct gfs2_dinode) -
884 sizeof(struct gfs2_leaf));
885
886 brelse(bh);
887
888 /* We're done with the new leaf block, now setup the new
889 hash table. */
890
891 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
892 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
893
894 lp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode));
895
896 for (x = sdp->sd_hash_ptrs; x--; lp++)
897 *lp = cpu_to_be64(bn);
898
899 dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
900 dip->i_di.di_blocks++;
901 dip->i_di.di_flags |= GFS2_DIF_EXHASH;
902 dip->i_di.di_payload_format = 0;
903
904 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
905 dip->i_di.di_depth = y;
906
907 gfs2_dinode_out(&dip->i_di, dibh->b_data);
908
909 brelse(dibh);
910
911 return 0;
912}
913
914/**
915 * dir_split_leaf - Split a leaf block into two
916 * @dip: The GFS2 inode
917 * @index:
918 * @leaf_no:
919 *
920 * Returns: 0 on success, error code on failure
921 */
922
923static int dir_split_leaf(struct inode *inode, const struct qstr *name)
924{
925 struct gfs2_inode *dip = inode->u.generic_ip;
926 struct buffer_head *nbh, *obh, *dibh;
927 struct gfs2_leaf *nleaf, *oleaf;
928 struct gfs2_dirent *dent, *prev = NULL, *next = NULL, *new;
929 uint32_t start, len, half_len, divider;
930 uint64_t bn, *lp, leaf_no;
931 uint32_t index;
932 int x, moved = 0;
933 int error;
934
935 index = name->hash >> (32 - dip->i_di.di_depth);
936 error = get_leaf_nr(dip, index, &leaf_no);
937 if (error)
938 return error;
939
940 /* Get the old leaf block */
941 error = get_leaf(dip, leaf_no, &obh);
942 if (error)
943 return error;
944
945 oleaf = (struct gfs2_leaf *)obh->b_data;
946 if (dip->i_di.di_depth == be16_to_cpu(oleaf->lf_depth)) {
947 brelse(obh);
948 return 1; /* can't split */
949 }
950
951 gfs2_trans_add_bh(dip->i_gl, obh, 1);
952
953 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1);
954 if (!nleaf) {
955 brelse(obh);
956 return -ENOSPC;
957 }
958 bn = nbh->b_blocknr;
959
960 /* Compute the start and len of leaf pointers in the hash table. */
961 len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth));
962 half_len = len >> 1;
963 if (!half_len) {
964 printk(KERN_WARNING "di_depth %u lf_depth %u index %u\n", dip->i_di.di_depth, be16_to_cpu(oleaf->lf_depth), index);
965 gfs2_consist_inode(dip);
966 error = -EIO;
967 goto fail_brelse;
968 }
969
970 start = (index & ~(len - 1));
971
972 /* Change the pointers.
973 Don't bother distinguishing stuffed from non-stuffed.
974 This code is complicated enough already. */
975 lp = kmalloc(half_len * sizeof(uint64_t), GFP_NOFS | __GFP_NOFAIL);
976 /* Change the pointers */
977 for (x = 0; x < half_len; x++)
978 lp[x] = cpu_to_be64(bn);
979
980 error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(uint64_t),
981 half_len * sizeof(uint64_t));
982 if (error != half_len * sizeof(uint64_t)) {
983 if (error >= 0)
984 error = -EIO;
985 goto fail_lpfree;
986 }
987
988 kfree(lp);
989
990 /* Compute the divider */
991 divider = (start + half_len) << (32 - dip->i_di.di_depth);
992
993 /* Copy the entries */
994 dirent_first(dip, obh, &dent);
995
996 do {
997 next = dent;
998 if (dirent_next(dip, obh, &next))
999 next = NULL;
1000
1001 if (dent->de_inum.no_addr &&
1002 be32_to_cpu(dent->de_hash) < divider) {
1003 struct qstr str;
1004 str.name = (char*)(dent+1);
1005 str.len = be16_to_cpu(dent->de_name_len);
1006 str.hash = be32_to_cpu(dent->de_hash);
1007 new = gfs2_dirent_alloc(inode, nbh, &str);
1008 if (IS_ERR(new)) {
1009 error = PTR_ERR(new);
1010 break;
1011 }
1012
1013 new->de_inum = dent->de_inum; /* No endian worries */
1014 new->de_type = dent->de_type; /* No endian worries */
1015 nleaf->lf_entries = cpu_to_be16(be16_to_cpu(nleaf->lf_entries)+1);
1016
1017 dirent_del(dip, obh, prev, dent);
1018
1019 if (!oleaf->lf_entries)
1020 gfs2_consist_inode(dip);
1021 oleaf->lf_entries = cpu_to_be16(be16_to_cpu(oleaf->lf_entries)-1);
1022
1023 if (!prev)
1024 prev = dent;
1025
1026 moved = 1;
1027 } else {
1028 prev = dent;
1029 }
1030 dent = next;
1031 } while (dent);
1032
1033 oleaf->lf_depth = nleaf->lf_depth;
1034
1035 error = gfs2_meta_inode_buffer(dip, &dibh);
1036 if (!gfs2_assert_withdraw(dip->i_sbd, !error)) {
1037 dip->i_di.di_blocks++;
1038 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1039 brelse(dibh);
1040 }
1041
1042 brelse(obh);
1043 brelse(nbh);
1044
1045 return error;
1046
1047fail_lpfree:
1048 kfree(lp);
1049
1050fail_brelse:
1051 brelse(obh);
1052 brelse(nbh);
1053 return error;
1054}
1055
1056/**
1057 * dir_double_exhash - Double size of ExHash table
1058 * @dip: The GFS2 dinode
1059 *
1060 * Returns: 0 on success, error code on failure
1061 */
1062
1063static int dir_double_exhash(struct gfs2_inode *dip)
1064{
1065 struct gfs2_sbd *sdp = dip->i_sbd;
1066 struct buffer_head *dibh;
1067 uint32_t hsize;
1068 uint64_t *buf;
1069 uint64_t *from, *to;
1070 uint64_t block;
1071 int x;
1072 int error = 0;
1073
1074 hsize = 1 << dip->i_di.di_depth;
1075 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1076 gfs2_consist_inode(dip);
1077 return -EIO;
1078 }
1079
1080 /* Allocate both the "from" and "to" buffers in one big chunk */
1081
1082 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL);
1083
1084 for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
1085 error = gfs2_dir_read_data(dip, (char *)buf,
1086 block * sdp->sd_hash_bsize,
1087 sdp->sd_hash_bsize);
1088 if (error != sdp->sd_hash_bsize) {
1089 if (error >= 0)
1090 error = -EIO;
1091 goto fail;
1092 }
1093
1094 from = buf;
1095 to = (uint64_t *)((char *)buf + sdp->sd_hash_bsize);
1096
1097 for (x = sdp->sd_hash_ptrs; x--; from++) {
1098 *to++ = *from; /* No endianess worries */
1099 *to++ = *from;
1100 }
1101
1102 error = gfs2_dir_write_data(dip,
1103 (char *)buf + sdp->sd_hash_bsize,
1104 block * sdp->sd_sb.sb_bsize,
1105 sdp->sd_sb.sb_bsize);
1106 if (error != sdp->sd_sb.sb_bsize) {
1107 if (error >= 0)
1108 error = -EIO;
1109 goto fail;
1110 }
1111 }
1112
1113 kfree(buf);
1114
1115 error = gfs2_meta_inode_buffer(dip, &dibh);
1116 if (!gfs2_assert_withdraw(sdp, !error)) {
1117 dip->i_di.di_depth++;
1118 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1119 brelse(dibh);
1120 }
1121
1122 return error;
1123
1124 fail:
1125 kfree(buf);
1126
1127 return error;
1128}
1129
1130/**
1131 * compare_dents - compare directory entries by hash value
1132 * @a: first dent
1133 * @b: second dent
1134 *
1135 * When comparing the hash entries of @a to @b:
1136 * gt: returns 1
1137 * lt: returns -1
1138 * eq: returns 0
1139 */
1140
1141static int compare_dents(const void *a, const void *b)
1142{
1143 struct gfs2_dirent *dent_a, *dent_b;
1144 uint32_t hash_a, hash_b;
1145 int ret = 0;
1146
1147 dent_a = *(struct gfs2_dirent **)a;
1148 hash_a = be32_to_cpu(dent_a->de_hash);
1149
1150 dent_b = *(struct gfs2_dirent **)b;
1151 hash_b = be32_to_cpu(dent_b->de_hash);
1152
1153 if (hash_a > hash_b)
1154 ret = 1;
1155 else if (hash_a < hash_b)
1156 ret = -1;
1157 else {
1158 unsigned int len_a = be16_to_cpu(dent_a->de_name_len);
1159 unsigned int len_b = be16_to_cpu(dent_b->de_name_len);
1160
1161 if (len_a > len_b)
1162 ret = 1;
1163 else if (len_a < len_b)
1164 ret = -1;
1165 else
1166 ret = memcmp((char *)(dent_a + 1),
1167 (char *)(dent_b + 1),
1168 len_a);
1169 }
1170
1171 return ret;
1172}
1173
1174/**
1175 * do_filldir_main - read out directory entries
1176 * @dip: The GFS2 inode
1177 * @offset: The offset in the file to read from
1178 * @opaque: opaque data to pass to filldir
1179 * @filldir: The function to pass entries to
1180 * @darr: an array of struct gfs2_dirent pointers to read
1181 * @entries: the number of entries in darr
1182 * @copied: pointer to int that's non-zero if a entry has been copied out
1183 *
1184 * Jump through some hoops to make sure that if there are hash collsions,
1185 * they are read out at the beginning of a buffer. We want to minimize
1186 * the possibility that they will fall into different readdir buffers or
1187 * that someone will want to seek to that location.
1188 *
1189 * Returns: errno, >0 on exception from filldir
1190 */
1191
1192static int do_filldir_main(struct gfs2_inode *dip, uint64_t *offset,
1193 void *opaque, gfs2_filldir_t filldir,
1194 const struct gfs2_dirent **darr, uint32_t entries,
1195 int *copied)
1196{
1197 const struct gfs2_dirent *dent, *dent_next;
1198 struct gfs2_inum inum;
1199 uint64_t off, off_next;
1200 unsigned int x, y;
1201 int run = 0;
1202 int error = 0;
1203
1204 sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
1205
1206 dent_next = darr[0];
1207 off_next = be32_to_cpu(dent_next->de_hash);
1208 off_next = gfs2_disk_hash2offset(off_next);
1209
1210 for (x = 0, y = 1; x < entries; x++, y++) {
1211 dent = dent_next;
1212 off = off_next;
1213
1214 if (y < entries) {
1215 dent_next = darr[y];
1216 off_next = be32_to_cpu(dent_next->de_hash);
1217 off_next = gfs2_disk_hash2offset(off_next);
1218
1219 if (off < *offset)
1220 continue;
1221 *offset = off;
1222
1223 if (off_next == off) {
1224 if (*copied && !run)
1225 return 1;
1226 run = 1;
1227 } else
1228 run = 0;
1229 } else {
1230 if (off < *offset)
1231 continue;
1232 *offset = off;
1233 }
1234
1235 gfs2_inum_in(&inum, (char *)&dent->de_inum);
1236
1237 error = filldir(opaque, (char *)(dent + 1),
1238 be16_to_cpu(dent->de_name_len),
1239 off, &inum,
1240 be16_to_cpu(dent->de_type));
1241 if (error)
1242 return 1;
1243
1244 *copied = 1;
1245 }
1246
1247 /* Increment the *offset by one, so the next time we come into the
1248 do_filldir fxn, we get the next entry instead of the last one in the
1249 current leaf */
1250
1251 (*offset)++;
1252
1253 return 0;
1254}
1255
1256static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1257 gfs2_filldir_t filldir, int *copied,
1258 unsigned *depth, u64 leaf_no)
1259{
1260 struct gfs2_inode *ip = inode->u.generic_ip;
1261 struct buffer_head *bh;
1262 struct gfs2_leaf *lf;
1263 unsigned entries = 0;
1264 unsigned leaves = 0;
1265 const struct gfs2_dirent **darr, *dent;
1266 struct dirent_gather g;
1267 struct buffer_head **larr;
1268 int leaf = 0;
1269 int error, i;
1270 u64 lfn = leaf_no;
1271
1272 do {
1273 error = get_leaf(ip, lfn, &bh);
1274 if (error)
1275 goto out;
1276 lf = (struct gfs2_leaf *)bh->b_data;
1277 if (leaves == 0)
1278 *depth = be16_to_cpu(lf->lf_depth);
1279 entries += be16_to_cpu(lf->lf_entries);
1280 leaves++;
1281 lfn = be64_to_cpu(lf->lf_next);
1282 brelse(bh);
1283 } while(lfn);
1284
1285 if (!entries)
1286 return 0;
1287
1288 error = -ENOMEM;
1289 larr = kmalloc((leaves + entries) * sizeof(void*), GFP_KERNEL);
1290 if (!larr)
1291 goto out;
1292 darr = (const struct gfs2_dirent **)(larr + leaves);
1293 g.pdent = darr;
1294 g.offset = 0;
1295 lfn = leaf_no;
1296
1297 do {
1298 error = get_leaf(ip, lfn, &bh);
1299 if (error)
1300 goto out_kfree;
1301 lf = (struct gfs2_leaf *)bh->b_data;
1302 lfn = be64_to_cpu(lf->lf_next);
1303 if (lf->lf_entries) {
1304 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
1305 gfs2_dirent_gather, NULL, &g);
1306 error = PTR_ERR(dent);
1307 if (IS_ERR(dent)) {
1308 goto out_kfree;
1309 }
1310 error = 0;
1311 larr[leaf++] = bh;
1312 } else {
1313 brelse(bh);
1314 }
1315 } while(lfn);
1316
1317 error = do_filldir_main(ip, offset, opaque, filldir, darr,
1318 entries, copied);
1319out_kfree:
1320 for(i = 0; i < leaf; i++)
1321 brelse(larr[i]);
1322 kfree(larr);
1323out:
1324 return error;
1325}
1326
1327/**
1328 * dir_e_read - Reads the entries from a directory into a filldir buffer
1329 * @dip: dinode pointer
1330 * @offset: the hash of the last entry read shifted to the right once
1331 * @opaque: buffer for the filldir function to fill
1332 * @filldir: points to the filldir function to use
1333 *
1334 * Returns: errno
1335 */
1336
1337static int dir_e_read(struct inode *inode, uint64_t *offset, void *opaque,
1338 gfs2_filldir_t filldir)
1339{
1340 struct gfs2_inode *dip = inode->u.generic_ip;
1341 struct gfs2_sbd *sdp = dip->i_sbd;
1342 uint32_t hsize, len = 0;
1343 uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
1344 uint32_t hash, index;
1345 uint64_t *lp;
1346 int copied = 0;
1347 int error = 0;
1348 unsigned depth;
1349
1350 hsize = 1 << dip->i_di.di_depth;
1351 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1352 gfs2_consist_inode(dip);
1353 return -EIO;
1354 }
1355
1356 hash = gfs2_dir_offset2hash(*offset);
1357 index = hash >> (32 - dip->i_di.di_depth);
1358
1359 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1360 if (!lp)
1361 return -ENOMEM;
1362
1363 while (index < hsize) {
1364 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1365 ht_offset = index - lp_offset;
1366
1367 if (ht_offset_cur != ht_offset) {
1368 error = gfs2_dir_read_data(dip, (char *)lp,
1369 ht_offset * sizeof(uint64_t),
1370 sdp->sd_hash_bsize);
1371 if (error != sdp->sd_hash_bsize) {
1372 if (error >= 0)
1373 error = -EIO;
1374 goto out;
1375 }
1376 ht_offset_cur = ht_offset;
1377 }
1378
1379 error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
1380 &copied, &depth,
1381 be64_to_cpu(lp[lp_offset]));
1382 if (error)
1383 break;
1384
1385 len = 1 << (dip->i_di.di_depth - depth);
1386 index = (index & ~(len - 1)) + len;
1387 }
1388
1389out:
1390 kfree(lp);
1391 if (error > 0)
1392 error = 0;
1393 return error;
1394}
1395
1396int gfs2_dir_read(struct inode *inode, uint64_t *offset, void *opaque,
1397 gfs2_filldir_t filldir)
1398{
1399 struct gfs2_inode *dip = inode->u.generic_ip;
1400 struct dirent_gather g;
1401 const struct gfs2_dirent **darr, *dent;
1402 struct buffer_head *dibh;
1403 int copied = 0;
1404 int error;
1405
1406 if (!dip->i_di.di_entries)
1407 return 0;
1408
1409 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1410 return dir_e_read(inode, offset, opaque, filldir);
1411
1412 if (!gfs2_is_stuffed(dip)) {
1413 gfs2_consist_inode(dip);
1414 return -EIO;
1415 }
1416
1417 error = gfs2_meta_inode_buffer(dip, &dibh);
1418 if (error)
1419 return error;
1420
1421 error = -ENOMEM;
1422 darr = kmalloc(dip->i_di.di_entries * sizeof(struct gfs2_dirent *),
1423 GFP_KERNEL);
1424 if (darr) {
1425 g.pdent = darr;
1426 g.offset = 0;
1427 dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size,
1428 gfs2_dirent_gather, NULL, &g);
1429 if (IS_ERR(dent)) {
1430 error = PTR_ERR(dent);
1431 goto out;
1432 }
1433 error = do_filldir_main(dip, offset, opaque, filldir, darr,
1434 dip->i_di.di_entries, &copied);
1435out:
1436 kfree(darr);
1437 }
1438
1439 if (error > 0)
1440 error = 0;
1441
1442 brelse(dibh);
1443
1444 return error;
1445}
1446
1447/**
1448 * gfs2_dir_search - Search a directory
1449 * @dip: The GFS2 inode
1450 * @filename:
1451 * @inode:
1452 *
1453 * This routine searches a directory for a file or another directory.
1454 * Assumes a glock is held on dip.
1455 *
1456 * Returns: errno
1457 */
1458
1459int gfs2_dir_search(struct inode *dir, const struct qstr *name,
1460 struct gfs2_inum *inum, unsigned int *type)
1461{
1462 struct buffer_head *bh;
1463 struct gfs2_dirent *dent;
1464
1465 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1466 if (dent) {
1467 if (IS_ERR(dent))
1468 return PTR_ERR(dent);
1469 if (inum)
1470 gfs2_inum_in(inum, (char *)&dent->de_inum);
1471 if (type)
1472 *type = be16_to_cpu(dent->de_type);
1473 brelse(bh);
1474 return 0;
1475 }
1476 return -ENOENT;
1477}
1478
1479static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1480{
1481 struct buffer_head *bh, *obh;
1482 struct gfs2_inode *ip = inode->u.generic_ip;
1483 struct gfs2_leaf *leaf, *oleaf;
1484 int error;
1485 u32 index;
1486 u64 bn;
1487
1488 index = name->hash >> (32 - ip->i_di.di_depth);
1489 error = get_first_leaf(ip, index, &obh);
1490 if (error)
1491 return error;
1492 do {
1493 oleaf = (struct gfs2_leaf *)obh->b_data;
1494 bn = be64_to_cpu(oleaf->lf_next);
1495 if (!bn)
1496 break;
1497 brelse(obh);
1498 error = get_leaf(ip, bn, &obh);
1499 if (error)
1500 return error;
1501 } while(1);
1502
1503 gfs2_trans_add_bh(ip->i_gl, obh, 1);
1504
1505 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth));
1506 if (!leaf) {
1507 brelse(obh);
1508 return -ENOSPC;
1509 }
1510 oleaf->lf_next = cpu_to_be64(bn);
1511 brelse(bh);
1512 brelse(obh);
1513
1514 error = gfs2_meta_inode_buffer(ip, &bh);
1515 if (error)
1516 return error;
1517 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1518 ip->i_di.di_blocks++;
1519 gfs2_dinode_out(&ip->i_di, bh->b_data);
1520 brelse(bh);
1521 return 0;
1522}
1523
1524/**
1525 * gfs2_dir_add - Add new filename into directory
1526 * @dip: The GFS2 inode
1527 * @filename: The new name
1528 * @inode: The inode number of the entry
1529 * @type: The type of the entry
1530 *
1531 * Returns: 0 on success, error code on failure
1532 */
1533
1534int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1535 const struct gfs2_inum *inum, unsigned type)
1536{
1537 struct gfs2_inode *ip = inode->u.generic_ip;
1538 struct buffer_head *bh;
1539 struct gfs2_dirent *dent;
1540 struct gfs2_leaf *leaf;
1541 int error;
1542
1543 while(1) {
1544 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space,
1545 &bh);
1546 if (dent) {
1547 if (IS_ERR(dent))
1548 return PTR_ERR(dent);
1549 dent = gfs2_init_dirent(inode, dent, name, bh);
1550 gfs2_inum_out(inum, (char *)&dent->de_inum);
1551 dent->de_type = cpu_to_be16(type);
1552 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
1553 leaf = (struct gfs2_leaf *)bh->b_data;
1554 leaf->lf_entries = cpu_to_be16(be16_to_cpu(leaf->lf_entries) + 1);
1555 }
1556 brelse(bh);
1557 error = gfs2_meta_inode_buffer(ip, &bh);
1558 if (error)
1559 break;
1560 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1561 ip->i_di.di_entries++;
1562 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
1563 gfs2_dinode_out(&ip->i_di, bh->b_data);
1564 brelse(bh);
1565 error = 0;
1566 break;
1567 }
1568 if (!(ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
1569 error = dir_make_exhash(inode);
1570 if (error)
1571 break;
1572 continue;
1573 }
1574 error = dir_split_leaf(inode, name);
1575 if (error == 0)
1576 continue;
1577 if (error < 0)
1578 break;
1579 if (ip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) {
1580 error = dir_double_exhash(ip);
1581 if (error)
1582 break;
1583 error = dir_split_leaf(inode, name);
1584 if (error < 0)
1585 break;
1586 if (error == 0)
1587 continue;
1588 }
1589 error = dir_new_leaf(inode, name);
1590 if (!error)
1591 continue;
1592 error = -ENOSPC;
1593 break;
1594 }
1595 return error;
1596}
1597
1598
1599/**
1600 * gfs2_dir_del - Delete a directory entry
1601 * @dip: The GFS2 inode
1602 * @filename: The filename
1603 *
1604 * Returns: 0 on success, error code on failure
1605 */
1606
1607int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1608{
1609 struct gfs2_dirent *dent, *prev = NULL;
1610 struct buffer_head *bh;
1611 int error;
1612
1613 /* Returns _either_ the entry (if its first in block) or the
1614 previous entry otherwise */
1615 dent = gfs2_dirent_search(dip->i_vnode, name, gfs2_dirent_prev, &bh);
1616 if (!dent) {
1617 gfs2_consist_inode(dip);
1618 return -EIO;
1619 }
1620 if (IS_ERR(dent)) {
1621 gfs2_consist_inode(dip);
1622 return PTR_ERR(dent);
1623 }
1624 /* If not first in block, adjust pointers accordingly */
1625 if (gfs2_dirent_find(dent, name, NULL) == 0) {
1626 prev = dent;
1627 dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len));
1628 }
1629
1630 dirent_del(dip, bh, prev, dent);
1631 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
1632 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
1633 u16 entries = be16_to_cpu(leaf->lf_entries);
1634 if (!entries)
1635 gfs2_consist_inode(dip);
1636 leaf->lf_entries = cpu_to_be16(--entries);
1637 brelse(bh);
1638 }
1639
1640 error = gfs2_meta_inode_buffer(dip, &bh);
1641 if (error)
1642 return error;
1643
1644 if (!dip->i_di.di_entries)
1645 gfs2_consist_inode(dip);
1646 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1647 dip->i_di.di_entries--;
1648 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1649 gfs2_dinode_out(&dip->i_di, bh->b_data);
1650 brelse(bh);
1651
1652 return error;
1653}
1654
1655/**
1656 * gfs2_dir_mvino - Change inode number of directory entry
1657 * @dip: The GFS2 inode
1658 * @filename:
1659 * @new_inode:
1660 *
1661 * This routine changes the inode number of a directory entry. It's used
1662 * by rename to change ".." when a directory is moved.
1663 * Assumes a glock is held on dvp.
1664 *
1665 * Returns: errno
1666 */
1667
1668int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1669 struct gfs2_inum *inum, unsigned int new_type)
1670{
1671 struct buffer_head *bh;
1672 struct gfs2_dirent *dent;
1673 int error;
1674
1675 dent = gfs2_dirent_search(dip->i_vnode, filename, gfs2_dirent_find, &bh);
1676 if (!dent) {
1677 gfs2_consist_inode(dip);
1678 return -EIO;
1679 }
1680 if (IS_ERR(dent))
1681 return PTR_ERR(dent);
1682
1683 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1684 gfs2_inum_out(inum, (char *)&dent->de_inum);
1685 dent->de_type = cpu_to_be16(new_type);
1686
1687 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
1688 brelse(bh);
1689 error = gfs2_meta_inode_buffer(dip, &bh);
1690 if (error)
1691 return error;
1692 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1693 }
1694
1695 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1696 gfs2_dinode_out(&dip->i_di, bh->b_data);
1697 brelse(bh);
1698 return 0;
1699}
1700
1701/**
1702 * foreach_leaf - call a function for each leaf in a directory
1703 * @dip: the directory
1704 * @lc: the function to call for each each
1705 * @data: private data to pass to it
1706 *
1707 * Returns: errno
1708 */
1709
1710static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
1711{
1712 struct gfs2_sbd *sdp = dip->i_sbd;
1713 struct buffer_head *bh;
1714 struct gfs2_leaf *leaf;
1715 uint32_t hsize, len;
1716 uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
1717 uint32_t index = 0;
1718 uint64_t *lp;
1719 uint64_t leaf_no;
1720 int error = 0;
1721
1722 hsize = 1 << dip->i_di.di_depth;
1723 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1724 gfs2_consist_inode(dip);
1725 return -EIO;
1726 }
1727
1728 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1729 if (!lp)
1730 return -ENOMEM;
1731
1732 while (index < hsize) {
1733 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1734 ht_offset = index - lp_offset;
1735
1736 if (ht_offset_cur != ht_offset) {
1737 error = gfs2_dir_read_data(dip, (char *)lp,
1738 ht_offset * sizeof(uint64_t),
1739 sdp->sd_hash_bsize);
1740 if (error != sdp->sd_hash_bsize) {
1741 if (error >= 0)
1742 error = -EIO;
1743 goto out;
1744 }
1745 ht_offset_cur = ht_offset;
1746 }
1747
1748 leaf_no = be64_to_cpu(lp[lp_offset]);
1749 if (leaf_no) {
1750 error = get_leaf(dip, leaf_no, &bh);
1751 if (error)
1752 goto out;
1753 leaf = (struct gfs2_leaf *)bh->b_data;
1754 brelse(bh);
1755
1756 len = 1 << (dip->i_di.di_depth - be16_to_cpu(leaf->lf_depth));
1757
1758 error = lc(dip, index, len, leaf_no, data);
1759 if (error)
1760 goto out;
1761
1762 index = (index & ~(len - 1)) + len;
1763 } else
1764 index++;
1765 }
1766
1767 if (index != hsize) {
1768 gfs2_consist_inode(dip);
1769 error = -EIO;
1770 }
1771
1772 out:
1773 kfree(lp);
1774
1775 return error;
1776}
1777
1778/**
1779 * leaf_dealloc - Deallocate a directory leaf
1780 * @dip: the directory
1781 * @index: the hash table offset in the directory
1782 * @len: the number of pointers to this leaf
1783 * @leaf_no: the leaf number
1784 * @data: not used
1785 *
1786 * Returns: errno
1787 */
1788
1789static int leaf_dealloc(struct gfs2_inode *dip, uint32_t index, uint32_t len,
1790 uint64_t leaf_no, void *data)
1791{
1792 struct gfs2_sbd *sdp = dip->i_sbd;
1793 struct gfs2_leaf *tmp_leaf;
1794 struct gfs2_rgrp_list rlist;
1795 struct buffer_head *bh, *dibh;
1796 uint64_t blk, nblk;
1797 unsigned int rg_blocks = 0, l_blocks = 0;
1798 char *ht;
1799 unsigned int x, size = len * sizeof(uint64_t);
1800 int error;
1801
1802 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1803
1804 ht = kzalloc(size, GFP_KERNEL);
1805 if (!ht)
1806 return -ENOMEM;
1807
1808 gfs2_alloc_get(dip);
1809
1810 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1811 if (error)
1812 goto out;
1813
1814 error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh);
1815 if (error)
1816 goto out_qs;
1817
1818 /* Count the number of leaves */
1819
1820 for (blk = leaf_no; blk; blk = nblk) {
1821 error = get_leaf(dip, blk, &bh);
1822 if (error)
1823 goto out_rlist;
1824 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1825 nblk = be64_to_cpu(tmp_leaf->lf_next);
1826 brelse(bh);
1827
1828 gfs2_rlist_add(sdp, &rlist, blk);
1829 l_blocks++;
1830 }
1831
1832 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1833
1834 for (x = 0; x < rlist.rl_rgrps; x++) {
1835 struct gfs2_rgrpd *rgd;
1836 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1837 rg_blocks += rgd->rd_ri.ri_length;
1838 }
1839
1840 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1841 if (error)
1842 goto out_rlist;
1843
1844 error = gfs2_trans_begin(sdp,
1845 rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) +
1846 RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks);
1847 if (error)
1848 goto out_rg_gunlock;
1849
1850 for (blk = leaf_no; blk; blk = nblk) {
1851 error = get_leaf(dip, blk, &bh);
1852 if (error)
1853 goto out_end_trans;
1854 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1855 nblk = be64_to_cpu(tmp_leaf->lf_next);
1856 brelse(bh);
1857
1858 gfs2_free_meta(dip, blk, 1);
1859
1860 if (!dip->i_di.di_blocks)
1861 gfs2_consist_inode(dip);
1862 dip->i_di.di_blocks--;
1863 }
1864
1865 error = gfs2_dir_write_data(dip, ht, index * sizeof(uint64_t), size);
1866 if (error != size) {
1867 if (error >= 0)
1868 error = -EIO;
1869 goto out_end_trans;
1870 }
1871
1872 error = gfs2_meta_inode_buffer(dip, &dibh);
1873 if (error)
1874 goto out_end_trans;
1875
1876 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1877 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1878 brelse(dibh);
1879
1880 out_end_trans:
1881 gfs2_trans_end(sdp);
1882
1883 out_rg_gunlock:
1884 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1885
1886 out_rlist:
1887 gfs2_rlist_free(&rlist);
1888 gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh);
1889
1890 out_qs:
1891 gfs2_quota_unhold(dip);
1892
1893 out:
1894 gfs2_alloc_put(dip);
1895 kfree(ht);
1896
1897 return error;
1898}
1899
1900/**
1901 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory
1902 * @dip: the directory
1903 *
1904 * Dealloc all on-disk directory leaves to FREEMETA state
1905 * Change on-disk inode type to "regular file"
1906 *
1907 * Returns: errno
1908 */
1909
1910int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
1911{
1912 struct gfs2_sbd *sdp = dip->i_sbd;
1913 struct buffer_head *bh;
1914 int error;
1915
1916 /* Dealloc on-disk leaves to FREEMETA state */
1917 error = foreach_leaf(dip, leaf_dealloc, NULL);
1918 if (error)
1919 return error;
1920
1921 /* Make this a regular file in case we crash.
1922 (We don't want to free these blocks a second time.) */
1923
1924 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1925 if (error)
1926 return error;
1927
1928 error = gfs2_meta_inode_buffer(dip, &bh);
1929 if (!error) {
1930 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1931 ((struct gfs2_dinode *)bh->b_data)->di_mode =
1932 cpu_to_be32(S_IFREG);
1933 brelse(bh);
1934 }
1935
1936 gfs2_trans_end(sdp);
1937
1938 return error;
1939}
1940
1941/**
1942 * gfs2_diradd_alloc_required - find if adding entry will require an allocation
1943 * @ip: the file being written to
1944 * @filname: the filename that's going to be added
1945 *
1946 * Returns: 1 if alloc required, 0 if not, -ve on error
1947 */
1948
1949int gfs2_diradd_alloc_required(struct inode *inode,
1950 const struct qstr *name)
1951{
1952 struct gfs2_dirent *dent;
1953 struct buffer_head *bh;
1954
1955 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh);
1956 if (!dent)
1957 return 1;
1958 if (IS_ERR(dent))
1959 return PTR_ERR(dent);
1960 brelse(bh);
1961 return 0;
1962}
1963
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
new file mode 100644
index 000000000000..42b3a1f34deb
--- /dev/null
+++ b/fs/gfs2/dir.h
@@ -0,0 +1,73 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __DIR_DOT_H__
11#define __DIR_DOT_H__
12
13/**
14 * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read()
15 * @opaque: opaque data used by the function
16 * @name: the name of the directory entry
17 * @length: the length of the name
18 * @offset: the entry's offset in the directory
19 * @inum: the inode number the entry points to
20 * @type: the type of inode the entry points to
21 *
22 * Returns: 0 on success, 1 if buffer full
23 */
24
25typedef int (*gfs2_filldir_t) (void *opaque,
26 const char *name, unsigned int length,
27 uint64_t offset,
28 struct gfs2_inum *inum, unsigned int type);
29
30int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
31 struct gfs2_inum *inum, unsigned int *type);
32int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
33 const struct gfs2_inum *inum, unsigned int type);
34int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
35int gfs2_dir_read(struct inode *inode, uint64_t * offset, void *opaque,
36 gfs2_filldir_t filldir);
37int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
38 struct gfs2_inum *new_inum, unsigned int new_type);
39
40int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
41
42int gfs2_diradd_alloc_required(struct inode *dir,
43 const struct qstr *filename);
44int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
45 struct buffer_head **bhp);
46
47static inline uint32_t gfs2_disk_hash(const char *data, int len)
48{
49 return crc32_le(0xFFFFFFFF, data, len) ^ 0xFFFFFFFF;
50}
51
52
53static inline void gfs2_str2qstr(struct qstr *name, const char *fname)
54{
55 name->name = fname;
56 name->len = strlen(fname);
57 name->hash = gfs2_disk_hash(name->name, name->len);
58}
59
60/* N.B. This probably ought to take inum & type as args as well */
61static inline void gfs2_qstr2dirent(const struct qstr *name, u16 reclen, struct gfs2_dirent *dent)
62{
63 dent->de_inum.no_addr = cpu_to_be64(0);
64 dent->de_inum.no_formal_ino = cpu_to_be64(0);
65 dent->de_hash = cpu_to_be32(name->hash);
66 dent->de_rec_len = cpu_to_be16(reclen);
67 dent->de_name_len = cpu_to_be16(name->len);
68 dent->de_type = cpu_to_be16(0);
69 memset(dent->__pad, 0, sizeof(dent->__pad));
70 memcpy((char*)(dent+1), name->name, name->len);
71}
72
73#endif /* __DIR_DOT_H__ */
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
new file mode 100644
index 000000000000..4b9f6cff7a34
--- /dev/null
+++ b/fs/gfs2/eaops.c
@@ -0,0 +1,189 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
16#include <linux/gfs2_ondisk.h>
17#include <asm/semaphore.h>
18#include <asm/uaccess.h>
19
20#include "gfs2.h"
21#include "lm_interface.h"
22#include "incore.h"
23#include "acl.h"
24#include "eaops.h"
25#include "eattr.h"
26#include "util.h"
27
28/**
29 * gfs2_ea_name2type - get the type of the ea, and truncate type from the name
30 * @namep: ea name, possibly with type appended
31 *
32 * Returns: GFS2_EATYPE_XXX
33 */
34
35unsigned int gfs2_ea_name2type(const char *name, char **truncated_name)
36{
37 unsigned int type;
38
39 if (strncmp(name, "system.", 7) == 0) {
40 type = GFS2_EATYPE_SYS;
41 if (truncated_name)
42 *truncated_name = strchr(name, '.') + 1;
43 } else if (strncmp(name, "user.", 5) == 0) {
44 type = GFS2_EATYPE_USR;
45 if (truncated_name)
46 *truncated_name = strchr(name, '.') + 1;
47 } else {
48 type = GFS2_EATYPE_UNUSED;
49 if (truncated_name)
50 *truncated_name = NULL;
51 }
52
53 return type;
54}
55
56static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
57{
58 struct inode *inode = ip->i_vnode;
59 int error = permission(inode, MAY_READ, NULL);
60 if (error)
61 return error;
62
63 return gfs2_ea_get_i(ip, er);
64}
65
66static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
67{
68 struct inode *inode = ip->i_vnode;
69
70 if (S_ISREG(inode->i_mode) ||
71 (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
72 int error = permission(inode, MAY_WRITE, NULL);
73 if (error)
74 return error;
75 } else
76 return -EPERM;
77
78 return gfs2_ea_set_i(ip, er);
79}
80
81static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
82{
83 struct inode *inode = ip->i_vnode;
84
85 if (S_ISREG(inode->i_mode) ||
86 (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
87 int error = permission(inode, MAY_WRITE, NULL);
88 if (error)
89 return error;
90 } else
91 return -EPERM;
92
93 return gfs2_ea_remove_i(ip, er);
94}
95
96static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
97{
98 if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
99 !GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len) &&
100 !capable(CAP_SYS_ADMIN))
101 return -EPERM;
102
103 if (ip->i_sbd->sd_args.ar_posix_acl == 0 &&
104 (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) ||
105 GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)))
106 return -EOPNOTSUPP;
107
108
109
110 return gfs2_ea_get_i(ip, er);
111}
112
113static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
114{
115 int remove = 0;
116 int error;
117
118 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
119 if (!(er->er_flags & GFS2_ERF_MODE)) {
120 er->er_mode = ip->i_di.di_mode;
121 er->er_flags |= GFS2_ERF_MODE;
122 }
123 error = gfs2_acl_validate_set(ip, 1, er,
124 &remove, &er->er_mode);
125 if (error)
126 return error;
127 error = gfs2_ea_set_i(ip, er);
128 if (error)
129 return error;
130 if (remove)
131 gfs2_ea_remove_i(ip, er);
132 return 0;
133
134 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
135 error = gfs2_acl_validate_set(ip, 0, er,
136 &remove, NULL);
137 if (error)
138 return error;
139 if (!remove)
140 error = gfs2_ea_set_i(ip, er);
141 else {
142 error = gfs2_ea_remove_i(ip, er);
143 if (error == -ENODATA)
144 error = 0;
145 }
146 return error;
147 }
148
149 return -EPERM;
150}
151
152static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
153{
154 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
155 int error = gfs2_acl_validate_remove(ip, 1);
156 if (error)
157 return error;
158
159 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
160 int error = gfs2_acl_validate_remove(ip, 0);
161 if (error)
162 return error;
163
164 } else
165 return -EPERM;
166
167 return gfs2_ea_remove_i(ip, er);
168}
169
170struct gfs2_eattr_operations gfs2_user_eaops = {
171 .eo_get = user_eo_get,
172 .eo_set = user_eo_set,
173 .eo_remove = user_eo_remove,
174 .eo_name = "user",
175};
176
177struct gfs2_eattr_operations gfs2_system_eaops = {
178 .eo_get = system_eo_get,
179 .eo_set = system_eo_set,
180 .eo_remove = system_eo_remove,
181 .eo_name = "system",
182};
183
184struct gfs2_eattr_operations *gfs2_ea_ops[] = {
185 NULL,
186 &gfs2_user_eaops,
187 &gfs2_system_eaops,
188};
189
diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h
new file mode 100644
index 000000000000..f83c497eddca
--- /dev/null
+++ b/fs/gfs2/eaops.h
@@ -0,0 +1,30 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __EAOPS_DOT_H__
11#define __EAOPS_DOT_H__
12
13struct gfs2_ea_request;
14
15struct gfs2_eattr_operations {
16 int (*eo_get) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
17 int (*eo_set) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
18 int (*eo_remove) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
19 char *eo_name;
20};
21
22unsigned int gfs2_ea_name2type(const char *name, char **truncated_name);
23
24extern struct gfs2_eattr_operations gfs2_user_eaops;
25extern struct gfs2_eattr_operations gfs2_system_eaops;
26
27extern struct gfs2_eattr_operations *gfs2_ea_ops[];
28
29#endif /* __EAOPS_DOT_H__ */
30
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
new file mode 100644
index 000000000000..8219d471f06c
--- /dev/null
+++ b/fs/gfs2/eattr.c
@@ -0,0 +1,1568 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
16#include <linux/gfs2_ondisk.h>
17#include <asm/semaphore.h>
18#include <asm/uaccess.h>
19
20#include "gfs2.h"
21#include "lm_interface.h"
22#include "incore.h"
23#include "acl.h"
24#include "eaops.h"
25#include "eattr.h"
26#include "glock.h"
27#include "inode.h"
28#include "meta_io.h"
29#include "quota.h"
30#include "rgrp.h"
31#include "trans.h"
32#include "util.h"
33
34/**
35 * ea_calc_size - returns the acutal number of bytes the request will take up
36 * (not counting any unstuffed data blocks)
37 * @sdp:
38 * @er:
39 * @size:
40 *
41 * Returns: 1 if the EA should be stuffed
42 */
43
44static int ea_calc_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er,
45 unsigned int *size)
46{
47 *size = GFS2_EAREQ_SIZE_STUFFED(er);
48 if (*size <= sdp->sd_jbsize)
49 return 1;
50
51 *size = GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er);
52
53 return 0;
54}
55
56static int ea_check_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er)
57{
58 unsigned int size;
59
60 if (er->er_data_len > GFS2_EA_MAX_DATA_LEN)
61 return -ERANGE;
62
63 ea_calc_size(sdp, er, &size);
64
65 /* This can only happen with 512 byte blocks */
66 if (size > sdp->sd_jbsize)
67 return -ERANGE;
68
69 return 0;
70}
71
72typedef int (*ea_call_t) (struct gfs2_inode *ip,
73 struct buffer_head *bh,
74 struct gfs2_ea_header *ea,
75 struct gfs2_ea_header *prev,
76 void *private);
77
78static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
79 ea_call_t ea_call, void *data)
80{
81 struct gfs2_ea_header *ea, *prev = NULL;
82 int error = 0;
83
84 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_EA))
85 return -EIO;
86
87 for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) {
88 if (!GFS2_EA_REC_LEN(ea))
89 goto fail;
90 if (!(bh->b_data <= (char *)ea &&
91 (char *)GFS2_EA2NEXT(ea) <=
92 bh->b_data + bh->b_size))
93 goto fail;
94 if (!GFS2_EATYPE_VALID(ea->ea_type))
95 goto fail;
96
97 error = ea_call(ip, bh, ea, prev, data);
98 if (error)
99 return error;
100
101 if (GFS2_EA_IS_LAST(ea)) {
102 if ((char *)GFS2_EA2NEXT(ea) !=
103 bh->b_data + bh->b_size)
104 goto fail;
105 break;
106 }
107 }
108
109 return error;
110
111 fail:
112 gfs2_consist_inode(ip);
113 return -EIO;
114}
115
116static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
117{
118 struct buffer_head *bh, *eabh;
119 uint64_t *eablk, *end;
120 int error;
121
122 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
123 DIO_START | DIO_WAIT, &bh);
124 if (error)
125 return error;
126
127 if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) {
128 error = ea_foreach_i(ip, bh, ea_call, data);
129 goto out;
130 }
131
132 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_IN)) {
133 error = -EIO;
134 goto out;
135 }
136
137 eablk = (uint64_t *)(bh->b_data + sizeof(struct gfs2_meta_header));
138 end = eablk + ip->i_sbd->sd_inptrs;
139
140 for (; eablk < end; eablk++) {
141 uint64_t bn;
142
143 if (!*eablk)
144 break;
145 bn = be64_to_cpu(*eablk);
146
147 error = gfs2_meta_read(ip->i_gl, bn, DIO_START | DIO_WAIT,
148 &eabh);
149 if (error)
150 break;
151 error = ea_foreach_i(ip, eabh, ea_call, data);
152 brelse(eabh);
153 if (error)
154 break;
155 }
156 out:
157 brelse(bh);
158
159 return error;
160}
161
162struct ea_find {
163 struct gfs2_ea_request *ef_er;
164 struct gfs2_ea_location *ef_el;
165};
166
167static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
168 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
169 void *private)
170{
171 struct ea_find *ef = private;
172 struct gfs2_ea_request *er = ef->ef_er;
173
174 if (ea->ea_type == GFS2_EATYPE_UNUSED)
175 return 0;
176
177 if (ea->ea_type == er->er_type) {
178 if (ea->ea_name_len == er->er_name_len &&
179 !memcmp(GFS2_EA2NAME(ea), er->er_name, ea->ea_name_len)) {
180 struct gfs2_ea_location *el = ef->ef_el;
181 get_bh(bh);
182 el->el_bh = bh;
183 el->el_ea = ea;
184 el->el_prev = prev;
185 return 1;
186 }
187 }
188
189#if 0
190 else if ((ip->i_di.di_flags & GFS2_DIF_EA_PACKED) &&
191 er->er_type == GFS2_EATYPE_SYS)
192 return 1;
193#endif
194
195 return 0;
196}
197
198int gfs2_ea_find(struct gfs2_inode *ip, struct gfs2_ea_request *er,
199 struct gfs2_ea_location *el)
200{
201 struct ea_find ef;
202 int error;
203
204 ef.ef_er = er;
205 ef.ef_el = el;
206
207 memset(el, 0, sizeof(struct gfs2_ea_location));
208
209 error = ea_foreach(ip, ea_find_i, &ef);
210 if (error > 0)
211 return 0;
212
213 return error;
214}
215
216/**
217 * ea_dealloc_unstuffed -
218 * @ip:
219 * @bh:
220 * @ea:
221 * @prev:
222 * @private:
223 *
224 * Take advantage of the fact that all unstuffed blocks are
225 * allocated from the same RG. But watch, this may not always
226 * be true.
227 *
228 * Returns: errno
229 */
230
231static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
232 struct gfs2_ea_header *ea,
233 struct gfs2_ea_header *prev, void *private)
234{
235 int *leave = private;
236 struct gfs2_sbd *sdp = ip->i_sbd;
237 struct gfs2_rgrpd *rgd;
238 struct gfs2_holder rg_gh;
239 struct buffer_head *dibh;
240 uint64_t *dataptrs, bn = 0;
241 uint64_t bstart = 0;
242 unsigned int blen = 0;
243 unsigned int blks = 0;
244 unsigned int x;
245 int error;
246
247 if (GFS2_EA_IS_STUFFED(ea))
248 return 0;
249
250 dataptrs = GFS2_EA2DATAPTRS(ea);
251 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++)
252 if (*dataptrs) {
253 blks++;
254 bn = be64_to_cpu(*dataptrs);
255 }
256 if (!blks)
257 return 0;
258
259 rgd = gfs2_blk2rgrpd(sdp, bn);
260 if (!rgd) {
261 gfs2_consist_inode(ip);
262 return -EIO;
263 }
264
265 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
266 if (error)
267 return error;
268
269 error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length +
270 RES_DINODE + RES_EATTR + RES_STATFS +
271 RES_QUOTA, blks);
272 if (error)
273 goto out_gunlock;
274
275 gfs2_trans_add_bh(ip->i_gl, bh, 1);
276
277 dataptrs = GFS2_EA2DATAPTRS(ea);
278 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
279 if (!*dataptrs)
280 break;
281 bn = be64_to_cpu(*dataptrs);
282
283 if (bstart + blen == bn)
284 blen++;
285 else {
286 if (bstart)
287 gfs2_free_meta(ip, bstart, blen);
288 bstart = bn;
289 blen = 1;
290 }
291
292 *dataptrs = 0;
293 if (!ip->i_di.di_blocks)
294 gfs2_consist_inode(ip);
295 ip->i_di.di_blocks--;
296 }
297 if (bstart)
298 gfs2_free_meta(ip, bstart, blen);
299
300 if (prev && !leave) {
301 uint32_t len;
302
303 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
304 prev->ea_rec_len = cpu_to_be32(len);
305
306 if (GFS2_EA_IS_LAST(ea))
307 prev->ea_flags |= GFS2_EAFLAG_LAST;
308 } else {
309 ea->ea_type = GFS2_EATYPE_UNUSED;
310 ea->ea_num_ptrs = 0;
311 }
312
313 error = gfs2_meta_inode_buffer(ip, &dibh);
314 if (!error) {
315 ip->i_di.di_ctime = get_seconds();
316 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
317 gfs2_dinode_out(&ip->i_di, dibh->b_data);
318 brelse(dibh);
319 }
320
321 gfs2_trans_end(sdp);
322
323 out_gunlock:
324 gfs2_glock_dq_uninit(&rg_gh);
325
326 return error;
327}
328
329static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
330 struct gfs2_ea_header *ea,
331 struct gfs2_ea_header *prev, int leave)
332{
333 struct gfs2_alloc *al;
334 int error;
335
336 al = gfs2_alloc_get(ip);
337
338 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
339 if (error)
340 goto out_alloc;
341
342 error = gfs2_rindex_hold(ip->i_sbd, &al->al_ri_gh);
343 if (error)
344 goto out_quota;
345
346 error = ea_dealloc_unstuffed(ip,
347 bh, ea, prev,
348 (leave) ? &error : NULL);
349
350 gfs2_glock_dq_uninit(&al->al_ri_gh);
351
352 out_quota:
353 gfs2_quota_unhold(ip);
354
355 out_alloc:
356 gfs2_alloc_put(ip);
357
358 return error;
359}
360
361
362static int gfs2_ea_repack_i(struct gfs2_inode *ip)
363{
364 return -EOPNOTSUPP;
365}
366
367int gfs2_ea_repack(struct gfs2_inode *ip)
368{
369 struct gfs2_holder gh;
370 int error;
371
372 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
373 if (error)
374 return error;
375
376 /* Some sort of permissions checking would be nice */
377
378 error = gfs2_ea_repack_i(ip);
379
380 gfs2_glock_dq_uninit(&gh);
381
382 return error;
383}
384
385struct ea_list {
386 struct gfs2_ea_request *ei_er;
387 unsigned int ei_size;
388};
389
390static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
391 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
392 void *private)
393{
394 struct ea_list *ei = private;
395 struct gfs2_ea_request *er = ei->ei_er;
396 unsigned int ea_size = GFS2_EA_STRLEN(ea);
397
398 if (ea->ea_type == GFS2_EATYPE_UNUSED)
399 return 0;
400
401 if (er->er_data_len) {
402 char *prefix;
403 unsigned int l;
404 char c = 0;
405
406 if (ei->ei_size + ea_size > er->er_data_len)
407 return -ERANGE;
408
409 if (ea->ea_type == GFS2_EATYPE_USR) {
410 prefix = "user.";
411 l = 5;
412 } else {
413 prefix = "system.";
414 l = 7;
415 }
416
417 memcpy(er->er_data + ei->ei_size,
418 prefix, l);
419 memcpy(er->er_data + ei->ei_size + l,
420 GFS2_EA2NAME(ea),
421 ea->ea_name_len);
422 memcpy(er->er_data + ei->ei_size +
423 ea_size - 1,
424 &c, 1);
425 }
426
427 ei->ei_size += ea_size;
428
429 return 0;
430}
431
432/**
433 * gfs2_ea_list -
434 * @ip:
435 * @er:
436 *
437 * Returns: actual size of data on success, -errno on error
438 */
439
440int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er)
441{
442 struct gfs2_holder i_gh;
443 int error;
444
445 if (!er->er_data || !er->er_data_len) {
446 er->er_data = NULL;
447 er->er_data_len = 0;
448 }
449
450 error = gfs2_glock_nq_init(ip->i_gl,
451 LM_ST_SHARED, LM_FLAG_ANY,
452 &i_gh);
453 if (error)
454 return error;
455
456 if (ip->i_di.di_eattr) {
457 struct ea_list ei = { .ei_er = er, .ei_size = 0 };
458
459 error = ea_foreach(ip, ea_list_i, &ei);
460 if (!error)
461 error = ei.ei_size;
462 }
463
464 gfs2_glock_dq_uninit(&i_gh);
465
466 return error;
467}
468
469/**
470 * ea_get_unstuffed - actually copies the unstuffed data into the
471 * request buffer
472 * @ip:
473 * @ea:
474 * @data:
475 *
476 * Returns: errno
477 */
478
479static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
480 char *data)
481{
482 struct gfs2_sbd *sdp = ip->i_sbd;
483 struct buffer_head **bh;
484 unsigned int amount = GFS2_EA_DATA_LEN(ea);
485 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
486 uint64_t *dataptrs = GFS2_EA2DATAPTRS(ea);
487 unsigned int x;
488 int error = 0;
489
490 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
491 if (!bh)
492 return -ENOMEM;
493
494 for (x = 0; x < nptrs; x++) {
495 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs),
496 DIO_START, bh + x);
497 if (error) {
498 while (x--)
499 brelse(bh[x]);
500 goto out;
501 }
502 dataptrs++;
503 }
504
505 for (x = 0; x < nptrs; x++) {
506 error = gfs2_meta_reread(sdp, bh[x], DIO_WAIT);
507 if (error) {
508 for (; x < nptrs; x++)
509 brelse(bh[x]);
510 goto out;
511 }
512 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
513 for (; x < nptrs; x++)
514 brelse(bh[x]);
515 error = -EIO;
516 goto out;
517 }
518
519 memcpy(data,
520 bh[x]->b_data + sizeof(struct gfs2_meta_header),
521 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
522
523 amount -= sdp->sd_jbsize;
524 data += sdp->sd_jbsize;
525
526 brelse(bh[x]);
527 }
528
529 out:
530 kfree(bh);
531
532 return error;
533}
534
535int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
536 char *data)
537{
538 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
539 memcpy(data,
540 GFS2_EA2DATA(el->el_ea),
541 GFS2_EA_DATA_LEN(el->el_ea));
542 return 0;
543 } else
544 return ea_get_unstuffed(ip, el->el_ea, data);
545}
546
547/**
548 * gfs2_ea_get_i -
549 * @ip:
550 * @er:
551 *
552 * Returns: actual size of data on success, -errno on error
553 */
554
555int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
556{
557 struct gfs2_ea_location el;
558 int error;
559
560 if (!ip->i_di.di_eattr)
561 return -ENODATA;
562
563 error = gfs2_ea_find(ip, er, &el);
564 if (error)
565 return error;
566 if (!el.el_ea)
567 return -ENODATA;
568
569 if (er->er_data_len) {
570 if (GFS2_EA_DATA_LEN(el.el_ea) > er->er_data_len)
571 error = -ERANGE;
572 else
573 error = gfs2_ea_get_copy(ip, &el, er->er_data);
574 }
575 if (!error)
576 error = GFS2_EA_DATA_LEN(el.el_ea);
577
578 brelse(el.el_bh);
579
580 return error;
581}
582
583/**
584 * gfs2_ea_get -
585 * @ip:
586 * @er:
587 *
588 * Returns: actual size of data on success, -errno on error
589 */
590
591int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
592{
593 struct gfs2_holder i_gh;
594 int error;
595
596 if (!er->er_name_len ||
597 er->er_name_len > GFS2_EA_MAX_NAME_LEN)
598 return -EINVAL;
599 if (!er->er_data || !er->er_data_len) {
600 er->er_data = NULL;
601 er->er_data_len = 0;
602 }
603
604 error = gfs2_glock_nq_init(ip->i_gl,
605 LM_ST_SHARED, LM_FLAG_ANY,
606 &i_gh);
607 if (error)
608 return error;
609
610 error = gfs2_ea_ops[er->er_type]->eo_get(ip, er);
611
612 gfs2_glock_dq_uninit(&i_gh);
613
614 return error;
615}
616
617/**
618 * ea_alloc_blk - allocates a new block for extended attributes.
619 * @ip: A pointer to the inode that's getting extended attributes
620 * @bhp:
621 *
622 * Returns: errno
623 */
624
625static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
626{
627 struct gfs2_sbd *sdp = ip->i_sbd;
628 struct gfs2_ea_header *ea;
629 uint64_t block;
630
631 block = gfs2_alloc_meta(ip);
632
633 *bhp = gfs2_meta_new(ip->i_gl, block);
634 gfs2_trans_add_bh(ip->i_gl, *bhp, 1);
635 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
636 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header));
637
638 ea = GFS2_EA_BH2FIRST(*bhp);
639 ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize);
640 ea->ea_type = GFS2_EATYPE_UNUSED;
641 ea->ea_flags = GFS2_EAFLAG_LAST;
642 ea->ea_num_ptrs = 0;
643
644 ip->i_di.di_blocks++;
645
646 return 0;
647}
648
649/**
650 * ea_write - writes the request info to an ea, creating new blocks if
651 * necessary
652 * @ip: inode that is being modified
653 * @ea: the location of the new ea in a block
654 * @er: the write request
655 *
656 * Note: does not update ea_rec_len or the GFS2_EAFLAG_LAST bin of ea_flags
657 *
658 * returns : errno
659 */
660
661static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
662 struct gfs2_ea_request *er)
663{
664 struct gfs2_sbd *sdp = ip->i_sbd;
665
666 ea->ea_data_len = cpu_to_be32(er->er_data_len);
667 ea->ea_name_len = er->er_name_len;
668 ea->ea_type = er->er_type;
669 ea->__pad = 0;
670
671 memcpy(GFS2_EA2NAME(ea), er->er_name, er->er_name_len);
672
673 if (GFS2_EAREQ_SIZE_STUFFED(er) <= sdp->sd_jbsize) {
674 ea->ea_num_ptrs = 0;
675 memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len);
676 } else {
677 uint64_t *dataptr = GFS2_EA2DATAPTRS(ea);
678 const char *data = er->er_data;
679 unsigned int data_len = er->er_data_len;
680 unsigned int copy;
681 unsigned int x;
682
683 ea->ea_num_ptrs = DIV_ROUND_UP(er->er_data_len, sdp->sd_jbsize);
684 for (x = 0; x < ea->ea_num_ptrs; x++) {
685 struct buffer_head *bh;
686 uint64_t block;
687 int mh_size = sizeof(struct gfs2_meta_header);
688
689 block = gfs2_alloc_meta(ip);
690
691 bh = gfs2_meta_new(ip->i_gl, block);
692 gfs2_trans_add_bh(ip->i_gl, bh, 1);
693 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
694
695 ip->i_di.di_blocks++;
696
697 copy = (data_len > sdp->sd_jbsize) ? sdp->sd_jbsize :
698 data_len;
699 memcpy(bh->b_data + mh_size, data, copy);
700 if (copy < sdp->sd_jbsize)
701 memset(bh->b_data + mh_size + copy, 0,
702 sdp->sd_jbsize - copy);
703
704 *dataptr++ = cpu_to_be64((uint64_t)bh->b_blocknr);
705 data += copy;
706 data_len -= copy;
707
708 brelse(bh);
709 }
710
711 gfs2_assert_withdraw(sdp, !data_len);
712 }
713
714 return 0;
715}
716
717typedef int (*ea_skeleton_call_t) (struct gfs2_inode *ip,
718 struct gfs2_ea_request *er,
719 void *private);
720
721static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
722 unsigned int blks,
723 ea_skeleton_call_t skeleton_call,
724 void *private)
725{
726 struct gfs2_alloc *al;
727 struct buffer_head *dibh;
728 int error;
729
730 al = gfs2_alloc_get(ip);
731
732 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
733 if (error)
734 goto out;
735
736 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
737 if (error)
738 goto out_gunlock_q;
739
740 al->al_requested = blks;
741
742 error = gfs2_inplace_reserve(ip);
743 if (error)
744 goto out_gunlock_q;
745
746 error = gfs2_trans_begin(ip->i_sbd,
747 blks + al->al_rgd->rd_ri.ri_length +
748 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
749 if (error)
750 goto out_ipres;
751
752 error = skeleton_call(ip, er, private);
753 if (error)
754 goto out_end_trans;
755
756 error = gfs2_meta_inode_buffer(ip, &dibh);
757 if (!error) {
758 if (er->er_flags & GFS2_ERF_MODE) {
759 gfs2_assert_withdraw(ip->i_sbd,
760 (ip->i_di.di_mode & S_IFMT) ==
761 (er->er_mode & S_IFMT));
762 ip->i_di.di_mode = er->er_mode;
763 }
764 ip->i_di.di_ctime = get_seconds();
765 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
766 gfs2_dinode_out(&ip->i_di, dibh->b_data);
767 brelse(dibh);
768 }
769
770 out_end_trans:
771 gfs2_trans_end(ip->i_sbd);
772
773 out_ipres:
774 gfs2_inplace_release(ip);
775
776 out_gunlock_q:
777 gfs2_quota_unlock(ip);
778
779 out:
780 gfs2_alloc_put(ip);
781
782 return error;
783}
784
785static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
786 void *private)
787{
788 struct buffer_head *bh;
789 int error;
790
791 error = ea_alloc_blk(ip, &bh);
792 if (error)
793 return error;
794
795 ip->i_di.di_eattr = bh->b_blocknr;
796 error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er);
797
798 brelse(bh);
799
800 return error;
801}
802
803/**
804 * ea_init - initializes a new eattr block
805 * @ip:
806 * @er:
807 *
808 * Returns: errno
809 */
810
811static int ea_init(struct gfs2_inode *ip, struct gfs2_ea_request *er)
812{
813 unsigned int jbsize = ip->i_sbd->sd_jbsize;
814 unsigned int blks = 1;
815
816 if (GFS2_EAREQ_SIZE_STUFFED(er) > jbsize)
817 blks += DIV_ROUND_UP(er->er_data_len, jbsize);
818
819 return ea_alloc_skeleton(ip, er, blks, ea_init_i, NULL);
820}
821
822static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea)
823{
824 uint32_t ea_size = GFS2_EA_SIZE(ea);
825 struct gfs2_ea_header *new = (struct gfs2_ea_header *)((char *)ea +
826 ea_size);
827 uint32_t new_size = GFS2_EA_REC_LEN(ea) - ea_size;
828 int last = ea->ea_flags & GFS2_EAFLAG_LAST;
829
830 ea->ea_rec_len = cpu_to_be32(ea_size);
831 ea->ea_flags ^= last;
832
833 new->ea_rec_len = cpu_to_be32(new_size);
834 new->ea_flags = last;
835
836 return new;
837}
838
839static void ea_set_remove_stuffed(struct gfs2_inode *ip,
840 struct gfs2_ea_location *el)
841{
842 struct gfs2_ea_header *ea = el->el_ea;
843 struct gfs2_ea_header *prev = el->el_prev;
844 uint32_t len;
845
846 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
847
848 if (!prev || !GFS2_EA_IS_STUFFED(ea)) {
849 ea->ea_type = GFS2_EATYPE_UNUSED;
850 return;
851 } else if (GFS2_EA2NEXT(prev) != ea) {
852 prev = GFS2_EA2NEXT(prev);
853 gfs2_assert_withdraw(ip->i_sbd, GFS2_EA2NEXT(prev) == ea);
854 }
855
856 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
857 prev->ea_rec_len = cpu_to_be32(len);
858
859 if (GFS2_EA_IS_LAST(ea))
860 prev->ea_flags |= GFS2_EAFLAG_LAST;
861}
862
863struct ea_set {
864 int ea_split;
865
866 struct gfs2_ea_request *es_er;
867 struct gfs2_ea_location *es_el;
868
869 struct buffer_head *es_bh;
870 struct gfs2_ea_header *es_ea;
871};
872
873static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
874 struct gfs2_ea_header *ea, struct ea_set *es)
875{
876 struct gfs2_ea_request *er = es->es_er;
877 struct buffer_head *dibh;
878 int error;
879
880 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + 2 * RES_EATTR, 0);
881 if (error)
882 return error;
883
884 gfs2_trans_add_bh(ip->i_gl, bh, 1);
885
886 if (es->ea_split)
887 ea = ea_split_ea(ea);
888
889 ea_write(ip, ea, er);
890
891 if (es->es_el)
892 ea_set_remove_stuffed(ip, es->es_el);
893
894 error = gfs2_meta_inode_buffer(ip, &dibh);
895 if (error)
896 goto out;
897
898 if (er->er_flags & GFS2_ERF_MODE) {
899 gfs2_assert_withdraw(ip->i_sbd,
900 (ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT));
901 ip->i_di.di_mode = er->er_mode;
902 }
903 ip->i_di.di_ctime = get_seconds();
904 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
905 gfs2_dinode_out(&ip->i_di, dibh->b_data);
906 brelse(dibh);
907 out:
908 gfs2_trans_end(ip->i_sbd);
909
910 return error;
911}
912
913static int ea_set_simple_alloc(struct gfs2_inode *ip,
914 struct gfs2_ea_request *er, void *private)
915{
916 struct ea_set *es = private;
917 struct gfs2_ea_header *ea = es->es_ea;
918 int error;
919
920 gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1);
921
922 if (es->ea_split)
923 ea = ea_split_ea(ea);
924
925 error = ea_write(ip, ea, er);
926 if (error)
927 return error;
928
929 if (es->es_el)
930 ea_set_remove_stuffed(ip, es->es_el);
931
932 return 0;
933}
934
935static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh,
936 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
937 void *private)
938{
939 struct ea_set *es = private;
940 unsigned int size;
941 int stuffed;
942 int error;
943
944 stuffed = ea_calc_size(ip->i_sbd, es->es_er, &size);
945
946 if (ea->ea_type == GFS2_EATYPE_UNUSED) {
947 if (GFS2_EA_REC_LEN(ea) < size)
948 return 0;
949 if (!GFS2_EA_IS_STUFFED(ea)) {
950 error = ea_remove_unstuffed(ip, bh, ea, prev, 1);
951 if (error)
952 return error;
953 }
954 es->ea_split = 0;
955 } else if (GFS2_EA_REC_LEN(ea) - GFS2_EA_SIZE(ea) >= size)
956 es->ea_split = 1;
957 else
958 return 0;
959
960 if (stuffed) {
961 error = ea_set_simple_noalloc(ip, bh, ea, es);
962 if (error)
963 return error;
964 } else {
965 unsigned int blks;
966
967 es->es_bh = bh;
968 es->es_ea = ea;
969 blks = 2 + DIV_ROUND_UP(es->es_er->er_data_len,
970 ip->i_sbd->sd_jbsize);
971
972 error = ea_alloc_skeleton(ip, es->es_er, blks,
973 ea_set_simple_alloc, es);
974 if (error)
975 return error;
976 }
977
978 return 1;
979}
980
981static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
982 void *private)
983{
984 struct gfs2_sbd *sdp = ip->i_sbd;
985 struct buffer_head *indbh, *newbh;
986 uint64_t *eablk;
987 int error;
988 int mh_size = sizeof(struct gfs2_meta_header);
989
990 if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
991 uint64_t *end;
992
993 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
994 DIO_START | DIO_WAIT, &indbh);
995 if (error)
996 return error;
997
998 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
999 error = -EIO;
1000 goto out;
1001 }
1002
1003 eablk = (uint64_t *)(indbh->b_data + mh_size);
1004 end = eablk + sdp->sd_inptrs;
1005
1006 for (; eablk < end; eablk++)
1007 if (!*eablk)
1008 break;
1009
1010 if (eablk == end) {
1011 error = -ENOSPC;
1012 goto out;
1013 }
1014
1015 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
1016 } else {
1017 uint64_t blk;
1018
1019 blk = gfs2_alloc_meta(ip);
1020
1021 indbh = gfs2_meta_new(ip->i_gl, blk);
1022 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
1023 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
1024 gfs2_buffer_clear_tail(indbh, mh_size);
1025
1026 eablk = (uint64_t *)(indbh->b_data + mh_size);
1027 *eablk = cpu_to_be64(ip->i_di.di_eattr);
1028 ip->i_di.di_eattr = blk;
1029 ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
1030 ip->i_di.di_blocks++;
1031
1032 eablk++;
1033 }
1034
1035 error = ea_alloc_blk(ip, &newbh);
1036 if (error)
1037 goto out;
1038
1039 *eablk = cpu_to_be64((uint64_t)newbh->b_blocknr);
1040 error = ea_write(ip, GFS2_EA_BH2FIRST(newbh), er);
1041 brelse(newbh);
1042 if (error)
1043 goto out;
1044
1045 if (private)
1046 ea_set_remove_stuffed(ip, (struct gfs2_ea_location *)private);
1047
1048 out:
1049 brelse(indbh);
1050
1051 return error;
1052}
1053
1054static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
1055 struct gfs2_ea_location *el)
1056{
1057 struct ea_set es;
1058 unsigned int blks = 2;
1059 int error;
1060
1061 memset(&es, 0, sizeof(struct ea_set));
1062 es.es_er = er;
1063 es.es_el = el;
1064
1065 error = ea_foreach(ip, ea_set_simple, &es);
1066 if (error > 0)
1067 return 0;
1068 if (error)
1069 return error;
1070
1071 if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT))
1072 blks++;
1073 if (GFS2_EAREQ_SIZE_STUFFED(er) > ip->i_sbd->sd_jbsize)
1074 blks += DIV_ROUND_UP(er->er_data_len, ip->i_sbd->sd_jbsize);
1075
1076 return ea_alloc_skeleton(ip, er, blks, ea_set_block, el);
1077}
1078
1079static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
1080 struct gfs2_ea_location *el)
1081{
1082 if (el->el_prev && GFS2_EA2NEXT(el->el_prev) != el->el_ea) {
1083 el->el_prev = GFS2_EA2NEXT(el->el_prev);
1084 gfs2_assert_withdraw(ip->i_sbd,
1085 GFS2_EA2NEXT(el->el_prev) == el->el_ea);
1086 }
1087
1088 return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev,0);
1089}
1090
1091int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1092{
1093 struct gfs2_ea_location el;
1094 int error;
1095
1096 if (!ip->i_di.di_eattr) {
1097 if (er->er_flags & XATTR_REPLACE)
1098 return -ENODATA;
1099 return ea_init(ip, er);
1100 }
1101
1102 error = gfs2_ea_find(ip, er, &el);
1103 if (error)
1104 return error;
1105
1106 if (el.el_ea) {
1107 if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY) {
1108 brelse(el.el_bh);
1109 return -EPERM;
1110 }
1111
1112 error = -EEXIST;
1113 if (!(er->er_flags & XATTR_CREATE)) {
1114 int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
1115 error = ea_set_i(ip, er, &el);
1116 if (!error && unstuffed)
1117 ea_set_remove_unstuffed(ip, &el);
1118 }
1119
1120 brelse(el.el_bh);
1121 } else {
1122 error = -ENODATA;
1123 if (!(er->er_flags & XATTR_REPLACE))
1124 error = ea_set_i(ip, er, NULL);
1125 }
1126
1127 return error;
1128}
1129
1130int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1131{
1132 struct gfs2_holder i_gh;
1133 int error;
1134
1135 if (!er->er_name_len ||
1136 er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1137 return -EINVAL;
1138 if (!er->er_data || !er->er_data_len) {
1139 er->er_data = NULL;
1140 er->er_data_len = 0;
1141 }
1142 error = ea_check_size(ip->i_sbd, er);
1143 if (error)
1144 return error;
1145
1146 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1147 if (error)
1148 return error;
1149
1150 if (IS_IMMUTABLE(ip->i_vnode))
1151 error = -EPERM;
1152 else
1153 error = gfs2_ea_ops[er->er_type]->eo_set(ip, er);
1154
1155 gfs2_glock_dq_uninit(&i_gh);
1156
1157 return error;
1158}
1159
1160static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1161{
1162 struct gfs2_ea_header *ea = el->el_ea;
1163 struct gfs2_ea_header *prev = el->el_prev;
1164 struct buffer_head *dibh;
1165 int error;
1166
1167 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + RES_EATTR, 0);
1168 if (error)
1169 return error;
1170
1171 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
1172
1173 if (prev) {
1174 uint32_t len;
1175
1176 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
1177 prev->ea_rec_len = cpu_to_be32(len);
1178
1179 if (GFS2_EA_IS_LAST(ea))
1180 prev->ea_flags |= GFS2_EAFLAG_LAST;
1181 } else
1182 ea->ea_type = GFS2_EATYPE_UNUSED;
1183
1184 error = gfs2_meta_inode_buffer(ip, &dibh);
1185 if (!error) {
1186 ip->i_di.di_ctime = get_seconds();
1187 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1188 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1189 brelse(dibh);
1190 }
1191
1192 gfs2_trans_end(ip->i_sbd);
1193
1194 return error;
1195}
1196
1197int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1198{
1199 struct gfs2_ea_location el;
1200 int error;
1201
1202 if (!ip->i_di.di_eattr)
1203 return -ENODATA;
1204
1205 error = gfs2_ea_find(ip, er, &el);
1206 if (error)
1207 return error;
1208 if (!el.el_ea)
1209 return -ENODATA;
1210
1211 if (GFS2_EA_IS_STUFFED(el.el_ea))
1212 error = ea_remove_stuffed(ip, &el);
1213 else
1214 error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev,
1215 0);
1216
1217 brelse(el.el_bh);
1218
1219 return error;
1220}
1221
1222/**
1223 * gfs2_ea_remove - sets (or creates or replaces) an extended attribute
1224 * @ip: pointer to the inode of the target file
1225 * @er: request information
1226 *
1227 * Returns: errno
1228 */
1229
1230int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1231{
1232 struct gfs2_holder i_gh;
1233 int error;
1234
1235 if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1236 return -EINVAL;
1237
1238 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1239 if (error)
1240 return error;
1241
1242 if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode))
1243 error = -EPERM;
1244 else
1245 error = gfs2_ea_ops[er->er_type]->eo_remove(ip, er);
1246
1247 gfs2_glock_dq_uninit(&i_gh);
1248
1249 return error;
1250}
1251
1252static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1253 struct gfs2_ea_header *ea, char *data)
1254{
1255 struct gfs2_sbd *sdp = ip->i_sbd;
1256 struct buffer_head **bh;
1257 unsigned int amount = GFS2_EA_DATA_LEN(ea);
1258 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
1259 uint64_t *dataptrs = GFS2_EA2DATAPTRS(ea);
1260 unsigned int x;
1261 int error;
1262
1263 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
1264 if (!bh)
1265 return -ENOMEM;
1266
1267 error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
1268 if (error)
1269 goto out;
1270
1271 for (x = 0; x < nptrs; x++) {
1272 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs),
1273 DIO_START, bh + x);
1274 if (error) {
1275 while (x--)
1276 brelse(bh[x]);
1277 goto fail;
1278 }
1279 dataptrs++;
1280 }
1281
1282 for (x = 0; x < nptrs; x++) {
1283 error = gfs2_meta_reread(sdp, bh[x], DIO_WAIT);
1284 if (error) {
1285 for (; x < nptrs; x++)
1286 brelse(bh[x]);
1287 goto fail;
1288 }
1289 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
1290 for (; x < nptrs; x++)
1291 brelse(bh[x]);
1292 error = -EIO;
1293 goto fail;
1294 }
1295
1296 gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
1297
1298 memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header),
1299 data,
1300 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
1301
1302 amount -= sdp->sd_jbsize;
1303 data += sdp->sd_jbsize;
1304
1305 brelse(bh[x]);
1306 }
1307
1308 out:
1309 kfree(bh);
1310
1311 return error;
1312
1313 fail:
1314 gfs2_trans_end(sdp);
1315 kfree(bh);
1316
1317 return error;
1318}
1319
1320int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
1321 struct iattr *attr, char *data)
1322{
1323 struct buffer_head *dibh;
1324 int error;
1325
1326 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
1327 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + RES_EATTR, 0);
1328 if (error)
1329 return error;
1330
1331 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
1332 memcpy(GFS2_EA2DATA(el->el_ea),
1333 data,
1334 GFS2_EA_DATA_LEN(el->el_ea));
1335 } else
1336 error = ea_acl_chmod_unstuffed(ip, el->el_ea, data);
1337
1338 if (error)
1339 return error;
1340
1341 error = gfs2_meta_inode_buffer(ip, &dibh);
1342 if (!error) {
1343 error = inode_setattr(ip->i_vnode, attr);
1344 gfs2_assert_warn(ip->i_sbd, !error);
1345 gfs2_inode_attr_out(ip);
1346 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1347 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1348 brelse(dibh);
1349 }
1350
1351 gfs2_trans_end(ip->i_sbd);
1352
1353 return error;
1354}
1355
1356static int ea_dealloc_indirect(struct gfs2_inode *ip)
1357{
1358 struct gfs2_sbd *sdp = ip->i_sbd;
1359 struct gfs2_rgrp_list rlist;
1360 struct buffer_head *indbh, *dibh;
1361 uint64_t *eablk, *end;
1362 unsigned int rg_blocks = 0;
1363 uint64_t bstart = 0;
1364 unsigned int blen = 0;
1365 unsigned int blks = 0;
1366 unsigned int x;
1367 int error;
1368
1369 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1370
1371 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
1372 DIO_START | DIO_WAIT, &indbh);
1373 if (error)
1374 return error;
1375
1376 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
1377 error = -EIO;
1378 goto out;
1379 }
1380
1381 eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1382 end = eablk + sdp->sd_inptrs;
1383
1384 for (; eablk < end; eablk++) {
1385 uint64_t bn;
1386
1387 if (!*eablk)
1388 break;
1389 bn = be64_to_cpu(*eablk);
1390
1391 if (bstart + blen == bn)
1392 blen++;
1393 else {
1394 if (bstart)
1395 gfs2_rlist_add(sdp, &rlist, bstart);
1396 bstart = bn;
1397 blen = 1;
1398 }
1399 blks++;
1400 }
1401 if (bstart)
1402 gfs2_rlist_add(sdp, &rlist, bstart);
1403 else
1404 goto out;
1405
1406 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1407
1408 for (x = 0; x < rlist.rl_rgrps; x++) {
1409 struct gfs2_rgrpd *rgd;
1410 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1411 rg_blocks += rgd->rd_ri.ri_length;
1412 }
1413
1414 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1415 if (error)
1416 goto out_rlist_free;
1417
1418 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
1419 RES_INDIRECT + RES_STATFS +
1420 RES_QUOTA, blks);
1421 if (error)
1422 goto out_gunlock;
1423
1424 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
1425
1426 eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1427 bstart = 0;
1428 blen = 0;
1429
1430 for (; eablk < end; eablk++) {
1431 uint64_t bn;
1432
1433 if (!*eablk)
1434 break;
1435 bn = be64_to_cpu(*eablk);
1436
1437 if (bstart + blen == bn)
1438 blen++;
1439 else {
1440 if (bstart)
1441 gfs2_free_meta(ip, bstart, blen);
1442 bstart = bn;
1443 blen = 1;
1444 }
1445
1446 *eablk = 0;
1447 if (!ip->i_di.di_blocks)
1448 gfs2_consist_inode(ip);
1449 ip->i_di.di_blocks--;
1450 }
1451 if (bstart)
1452 gfs2_free_meta(ip, bstart, blen);
1453
1454 ip->i_di.di_flags &= ~GFS2_DIF_EA_INDIRECT;
1455
1456 error = gfs2_meta_inode_buffer(ip, &dibh);
1457 if (!error) {
1458 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1459 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1460 brelse(dibh);
1461 }
1462
1463 gfs2_trans_end(sdp);
1464
1465 out_gunlock:
1466 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1467
1468 out_rlist_free:
1469 gfs2_rlist_free(&rlist);
1470
1471 out:
1472 brelse(indbh);
1473
1474 return error;
1475}
1476
1477static int ea_dealloc_block(struct gfs2_inode *ip)
1478{
1479 struct gfs2_sbd *sdp = ip->i_sbd;
1480 struct gfs2_alloc *al = &ip->i_alloc;
1481 struct gfs2_rgrpd *rgd;
1482 struct buffer_head *dibh;
1483 int error;
1484
1485 rgd = gfs2_blk2rgrpd(sdp, ip->i_di.di_eattr);
1486 if (!rgd) {
1487 gfs2_consist_inode(ip);
1488 return -EIO;
1489 }
1490
1491 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
1492 &al->al_rgd_gh);
1493 if (error)
1494 return error;
1495
1496 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_DINODE +
1497 RES_STATFS + RES_QUOTA, 1);
1498 if (error)
1499 goto out_gunlock;
1500
1501 gfs2_free_meta(ip, ip->i_di.di_eattr, 1);
1502
1503 ip->i_di.di_eattr = 0;
1504 if (!ip->i_di.di_blocks)
1505 gfs2_consist_inode(ip);
1506 ip->i_di.di_blocks--;
1507
1508 error = gfs2_meta_inode_buffer(ip, &dibh);
1509 if (!error) {
1510 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1511 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1512 brelse(dibh);
1513 }
1514
1515 gfs2_trans_end(sdp);
1516
1517 out_gunlock:
1518 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1519
1520 return error;
1521}
1522
1523/**
1524 * gfs2_ea_dealloc - deallocate the extended attribute fork
1525 * @ip: the inode
1526 *
1527 * Returns: errno
1528 */
1529
1530int gfs2_ea_dealloc(struct gfs2_inode *ip)
1531{
1532 struct gfs2_alloc *al;
1533 int error;
1534
1535 al = gfs2_alloc_get(ip);
1536
1537 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1538 if (error)
1539 goto out_alloc;
1540
1541 error = gfs2_rindex_hold(ip->i_sbd, &al->al_ri_gh);
1542 if (error)
1543 goto out_quota;
1544
1545 error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
1546 if (error)
1547 goto out_rindex;
1548
1549 if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
1550 error = ea_dealloc_indirect(ip);
1551 if (error)
1552 goto out_rindex;
1553 }
1554
1555 error = ea_dealloc_block(ip);
1556
1557 out_rindex:
1558 gfs2_glock_dq_uninit(&al->al_ri_gh);
1559
1560 out_quota:
1561 gfs2_quota_unhold(ip);
1562
1563 out_alloc:
1564 gfs2_alloc_put(ip);
1565
1566 return error;
1567}
1568
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/eattr.h
new file mode 100644
index 000000000000..2b4152b1fcbe
--- /dev/null
+++ b/fs/gfs2/eattr.h
@@ -0,0 +1,88 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __EATTR_DOT_H__
11#define __EATTR_DOT_H__
12
13#define GFS2_EA_REC_LEN(ea) be32_to_cpu((ea)->ea_rec_len)
14#define GFS2_EA_DATA_LEN(ea) be32_to_cpu((ea)->ea_data_len)
15
16#define GFS2_EA_SIZE(ea) \
17ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
18 ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
19 (sizeof(uint64_t) * (ea)->ea_num_ptrs)), 8)
20
21#define GFS2_EA_STRLEN(ea) \
22((((ea)->ea_type == GFS2_EATYPE_USR) ? 5 : 7) + (ea)->ea_name_len + 1)
23
24#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
25#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
26
27#define GFS2_EAREQ_SIZE_STUFFED(er) \
28ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
29
30#define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
31ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
32 sizeof(uint64_t) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
33
34#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
35#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
36
37#define GFS2_EA2DATAPTRS(ea) \
38((uint64_t *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
39
40#define GFS2_EA2NEXT(ea) \
41((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea)))
42
43#define GFS2_EA_BH2FIRST(bh) \
44((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header)))
45
46#define GFS2_ERF_MODE 0x80000000
47
48struct gfs2_ea_request {
49 char *er_name;
50 char *er_data;
51 unsigned int er_name_len;
52 unsigned int er_data_len;
53 unsigned int er_type; /* GFS2_EATYPE_... */
54 int er_flags;
55 mode_t er_mode;
56};
57
58struct gfs2_ea_location {
59 struct buffer_head *el_bh;
60 struct gfs2_ea_header *el_ea;
61 struct gfs2_ea_header *el_prev;
62};
63
64int gfs2_ea_repack(struct gfs2_inode *ip);
65
66int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
67int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
68int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
69
70int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er);
71int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er);
72int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er);
73int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er);
74
75int gfs2_ea_dealloc(struct gfs2_inode *ip);
76
77/* Exported to acl.c */
78
79int gfs2_ea_find(struct gfs2_inode *ip,
80 struct gfs2_ea_request *er,
81 struct gfs2_ea_location *el);
82int gfs2_ea_get_copy(struct gfs2_inode *ip,
83 struct gfs2_ea_location *el,
84 char *data);
85int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
86 struct iattr *attr, char *data);
87
88#endif /* __EATTR_DOT_H__ */
diff --git a/fs/gfs2/format.h b/fs/gfs2/format.h
new file mode 100644
index 000000000000..c7bf32ce3eca
--- /dev/null
+++ b/fs/gfs2/format.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __FORMAT_DOT_H__
11#define __FORMAT_DOT_H__
12
13static const uint32_t gfs2_old_fs_formats[] = {
14 0
15};
16
17static const uint32_t gfs2_old_multihost_formats[] = {
18 0
19};
20
21#endif /* __FORMAT_DOT_H__ */
diff --git a/fs/gfs2/gfs2.h b/fs/gfs2/gfs2.h
new file mode 100644
index 000000000000..57175f70e2bd
--- /dev/null
+++ b/fs/gfs2/gfs2.h
@@ -0,0 +1,31 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GFS2_DOT_H__
11#define __GFS2_DOT_H__
12
13enum {
14 NO_CREATE = 0,
15 CREATE = 1,
16};
17
18enum {
19 NO_WAIT = 0,
20 WAIT = 1,
21};
22
23enum {
24 NO_FORCE = 0,
25 FORCE = 1,
26};
27
28#define GFS2_FAST_NAME_SIZE 8
29
30#endif /* __GFS2_DOT_H__ */
31
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
new file mode 100644
index 000000000000..6a1b42cf4df4
--- /dev/null
+++ b/fs/gfs2/glock.c
@@ -0,0 +1,2480 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/delay.h>
16#include <linux/sort.h>
17#include <linux/jhash.h>
18#include <linux/kref.h>
19#include <linux/kallsyms.h>
20#include <linux/gfs2_ondisk.h>
21#include <asm/semaphore.h>
22#include <asm/uaccess.h>
23
24#include "gfs2.h"
25#include "lm_interface.h"
26#include "incore.h"
27#include "glock.h"
28#include "glops.h"
29#include "inode.h"
30#include "lm.h"
31#include "lops.h"
32#include "meta_io.h"
33#include "quota.h"
34#include "super.h"
35#include "util.h"
36
37/* Must be kept in sync with the beginning of struct gfs2_glock */
38struct glock_plug {
39 struct list_head gl_list;
40 unsigned long gl_flags;
41};
42
43struct greedy {
44 struct gfs2_holder gr_gh;
45 struct work_struct gr_work;
46};
47
48typedef void (*glock_examiner) (struct gfs2_glock * gl);
49
50/**
51 * relaxed_state_ok - is a requested lock compatible with the current lock mode?
52 * @actual: the current state of the lock
53 * @requested: the lock state that was requested by the caller
54 * @flags: the modifier flags passed in by the caller
55 *
56 * Returns: 1 if the locks are compatible, 0 otherwise
57 */
58
59static inline int relaxed_state_ok(unsigned int actual, unsigned requested,
60 int flags)
61{
62 if (actual == requested)
63 return 1;
64
65 if (flags & GL_EXACT)
66 return 0;
67
68 if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED)
69 return 1;
70
71 if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY))
72 return 1;
73
74 return 0;
75}
76
77/**
78 * gl_hash() - Turn glock number into hash bucket number
79 * @lock: The glock number
80 *
81 * Returns: The number of the corresponding hash bucket
82 */
83
84static unsigned int gl_hash(struct lm_lockname *name)
85{
86 unsigned int h;
87
88 h = jhash(&name->ln_number, sizeof(uint64_t), 0);
89 h = jhash(&name->ln_type, sizeof(unsigned int), h);
90 h &= GFS2_GL_HASH_MASK;
91
92 return h;
93}
94
95/**
96 * glock_free() - Perform a few checks and then release struct gfs2_glock
97 * @gl: The glock to release
98 *
99 * Also calls lock module to release its internal structure for this glock.
100 *
101 */
102
103static void glock_free(struct gfs2_glock *gl)
104{
105 struct gfs2_sbd *sdp = gl->gl_sbd;
106 struct inode *aspace = gl->gl_aspace;
107
108 gfs2_lm_put_lock(sdp, gl->gl_lock);
109
110 if (aspace)
111 gfs2_aspace_put(aspace);
112
113 kmem_cache_free(gfs2_glock_cachep, gl);
114}
115
116/**
117 * gfs2_glock_hold() - increment reference count on glock
118 * @gl: The glock to hold
119 *
120 */
121
122void gfs2_glock_hold(struct gfs2_glock *gl)
123{
124 kref_get(&gl->gl_ref);
125}
126
127/* All work is done after the return from kref_put() so we
128 can release the write_lock before the free. */
129
130static void kill_glock(struct kref *kref)
131{
132 struct gfs2_glock *gl = container_of(kref, struct gfs2_glock, gl_ref);
133 struct gfs2_sbd *sdp = gl->gl_sbd;
134
135 gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED);
136 gfs2_assert(sdp, list_empty(&gl->gl_reclaim));
137 gfs2_assert(sdp, list_empty(&gl->gl_holders));
138 gfs2_assert(sdp, list_empty(&gl->gl_waiters1));
139 gfs2_assert(sdp, list_empty(&gl->gl_waiters2));
140 gfs2_assert(sdp, list_empty(&gl->gl_waiters3));
141}
142
143/**
144 * gfs2_glock_put() - Decrement reference count on glock
145 * @gl: The glock to put
146 *
147 */
148
149int gfs2_glock_put(struct gfs2_glock *gl)
150{
151 struct gfs2_sbd *sdp = gl->gl_sbd;
152 struct gfs2_gl_hash_bucket *bucket = gl->gl_bucket;
153 int rv = 0;
154
155 mutex_lock(&sdp->sd_invalidate_inodes_mutex);
156
157 write_lock(&bucket->hb_lock);
158 if (kref_put(&gl->gl_ref, kill_glock)) {
159 list_del_init(&gl->gl_list);
160 write_unlock(&bucket->hb_lock);
161 glock_free(gl);
162 rv = 1;
163 goto out;
164 }
165 write_unlock(&bucket->hb_lock);
166 out:
167 mutex_unlock(&sdp->sd_invalidate_inodes_mutex);
168 return rv;
169}
170
171/**
172 * queue_empty - check to see if a glock's queue is empty
173 * @gl: the glock
174 * @head: the head of the queue to check
175 *
176 * This function protects the list in the event that a process already
177 * has a holder on the list and is adding a second holder for itself.
178 * The glmutex lock is what generally prevents processes from working
179 * on the same glock at once, but the special case of adding a second
180 * holder for yourself ("recursive" locking) doesn't involve locking
181 * glmutex, making the spin lock necessary.
182 *
183 * Returns: 1 if the queue is empty
184 */
185
186static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head)
187{
188 int empty;
189 spin_lock(&gl->gl_spin);
190 empty = list_empty(head);
191 spin_unlock(&gl->gl_spin);
192 return empty;
193}
194
195/**
196 * search_bucket() - Find struct gfs2_glock by lock number
197 * @bucket: the bucket to search
198 * @name: The lock name
199 *
200 * Returns: NULL, or the struct gfs2_glock with the requested number
201 */
202
203static struct gfs2_glock *search_bucket(struct gfs2_gl_hash_bucket *bucket,
204 struct lm_lockname *name)
205{
206 struct gfs2_glock *gl;
207
208 list_for_each_entry(gl, &bucket->hb_list, gl_list) {
209 if (test_bit(GLF_PLUG, &gl->gl_flags))
210 continue;
211 if (!lm_name_equal(&gl->gl_name, name))
212 continue;
213
214 kref_get(&gl->gl_ref);
215
216 return gl;
217 }
218
219 return NULL;
220}
221
222/**
223 * gfs2_glock_find() - Find glock by lock number
224 * @sdp: The GFS2 superblock
225 * @name: The lock name
226 *
227 * Returns: NULL, or the struct gfs2_glock with the requested number
228 */
229
230struct gfs2_glock *gfs2_glock_find(struct gfs2_sbd *sdp,
231 struct lm_lockname *name)
232{
233 struct gfs2_gl_hash_bucket *bucket = &sdp->sd_gl_hash[gl_hash(name)];
234 struct gfs2_glock *gl;
235
236 read_lock(&bucket->hb_lock);
237 gl = search_bucket(bucket, name);
238 read_unlock(&bucket->hb_lock);
239
240 return gl;
241}
242
243/**
244 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
245 * @sdp: The GFS2 superblock
246 * @number: the lock number
247 * @glops: The glock_operations to use
248 * @create: If 0, don't create the glock if it doesn't exist
249 * @glp: the glock is returned here
250 *
251 * This does not lock a glock, just finds/creates structures for one.
252 *
253 * Returns: errno
254 */
255
256int gfs2_glock_get(struct gfs2_sbd *sdp, uint64_t number,
257 struct gfs2_glock_operations *glops, int create,
258 struct gfs2_glock **glp)
259{
260 struct lm_lockname name;
261 struct gfs2_glock *gl, *tmp;
262 struct gfs2_gl_hash_bucket *bucket;
263 int error;
264
265 name.ln_number = number;
266 name.ln_type = glops->go_type;
267 bucket = &sdp->sd_gl_hash[gl_hash(&name)];
268
269 read_lock(&bucket->hb_lock);
270 gl = search_bucket(bucket, &name);
271 read_unlock(&bucket->hb_lock);
272
273 if (gl || !create) {
274 *glp = gl;
275 return 0;
276 }
277
278 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
279 if (!gl)
280 return -ENOMEM;
281
282 memset(gl, 0, sizeof(struct gfs2_glock));
283
284 INIT_LIST_HEAD(&gl->gl_list);
285 gl->gl_name = name;
286 kref_init(&gl->gl_ref);
287
288 spin_lock_init(&gl->gl_spin);
289
290 gl->gl_state = LM_ST_UNLOCKED;
291 INIT_LIST_HEAD(&gl->gl_holders);
292 INIT_LIST_HEAD(&gl->gl_waiters1);
293 INIT_LIST_HEAD(&gl->gl_waiters2);
294 INIT_LIST_HEAD(&gl->gl_waiters3);
295
296 gl->gl_ops = glops;
297
298 gl->gl_bucket = bucket;
299 INIT_LIST_HEAD(&gl->gl_reclaim);
300
301 gl->gl_sbd = sdp;
302
303 lops_init_le(&gl->gl_le, &gfs2_glock_lops);
304 INIT_LIST_HEAD(&gl->gl_ail_list);
305
306 /* If this glock protects actual on-disk data or metadata blocks,
307 create a VFS inode to manage the pages/buffers holding them. */
308 if (glops == &gfs2_inode_glops ||
309 glops == &gfs2_rgrp_glops ||
310 glops == &gfs2_meta_glops) {
311 gl->gl_aspace = gfs2_aspace_get(sdp);
312 if (!gl->gl_aspace) {
313 error = -ENOMEM;
314 goto fail;
315 }
316 }
317
318 error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock);
319 if (error)
320 goto fail_aspace;
321
322 write_lock(&bucket->hb_lock);
323 tmp = search_bucket(bucket, &name);
324 if (tmp) {
325 write_unlock(&bucket->hb_lock);
326 glock_free(gl);
327 gl = tmp;
328 } else {
329 list_add_tail(&gl->gl_list, &bucket->hb_list);
330 write_unlock(&bucket->hb_lock);
331 }
332
333 *glp = gl;
334
335 return 0;
336
337 fail_aspace:
338 if (gl->gl_aspace)
339 gfs2_aspace_put(gl->gl_aspace);
340
341 fail:
342 kmem_cache_free(gfs2_glock_cachep, gl);
343
344 return error;
345}
346
347/**
348 * gfs2_holder_init - initialize a struct gfs2_holder in the default way
349 * @gl: the glock
350 * @state: the state we're requesting
351 * @flags: the modifier flags
352 * @gh: the holder structure
353 *
354 */
355
356void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, int flags,
357 struct gfs2_holder *gh)
358{
359 INIT_LIST_HEAD(&gh->gh_list);
360 gh->gh_gl = gl;
361 gh->gh_ip = (unsigned long)__builtin_return_address(0);
362 gh->gh_owner = (flags & GL_NEVER_RECURSE) ? NULL : current;
363 gh->gh_state = state;
364 gh->gh_flags = flags;
365 gh->gh_error = 0;
366 gh->gh_iflags = 0;
367 init_completion(&gh->gh_wait);
368
369 if (gh->gh_state == LM_ST_EXCLUSIVE)
370 gh->gh_flags |= GL_LOCAL_EXCL;
371
372 gfs2_glock_hold(gl);
373}
374
375/**
376 * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
377 * @state: the state we're requesting
378 * @flags: the modifier flags
379 * @gh: the holder structure
380 *
381 * Don't mess with the glock.
382 *
383 */
384
385void gfs2_holder_reinit(unsigned int state, int flags, struct gfs2_holder *gh)
386{
387 gh->gh_state = state;
388 gh->gh_flags = flags;
389 if (gh->gh_state == LM_ST_EXCLUSIVE)
390 gh->gh_flags |= GL_LOCAL_EXCL;
391
392 gh->gh_iflags &= 1 << HIF_ALLOCED;
393 gh->gh_ip = (unsigned long)__builtin_return_address(0);
394}
395
396/**
397 * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
398 * @gh: the holder structure
399 *
400 */
401
402void gfs2_holder_uninit(struct gfs2_holder *gh)
403{
404 gfs2_glock_put(gh->gh_gl);
405 gh->gh_gl = NULL;
406 gh->gh_ip = 0;
407}
408
409/**
410 * gfs2_holder_get - get a struct gfs2_holder structure
411 * @gl: the glock
412 * @state: the state we're requesting
413 * @flags: the modifier flags
414 * @gfp_flags: __GFP_NOFAIL
415 *
416 * Figure out how big an impact this function has. Either:
417 * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd
418 * 2) Leave it like it is
419 *
420 * Returns: the holder structure, NULL on ENOMEM
421 */
422
423struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl, unsigned int state,
424 int flags, gfp_t gfp_flags)
425{
426 struct gfs2_holder *gh;
427
428 gh = kmalloc(sizeof(struct gfs2_holder), gfp_flags);
429 if (!gh)
430 return NULL;
431
432 gfs2_holder_init(gl, state, flags, gh);
433 set_bit(HIF_ALLOCED, &gh->gh_iflags);
434 gh->gh_ip = (unsigned long)__builtin_return_address(0);
435 return gh;
436}
437
438/**
439 * gfs2_holder_put - get rid of a struct gfs2_holder structure
440 * @gh: the holder structure
441 *
442 */
443
444void gfs2_holder_put(struct gfs2_holder *gh)
445{
446 gfs2_holder_uninit(gh);
447 kfree(gh);
448}
449
450/**
451 * handle_recurse - put other holder structures (marked recursive)
452 * into the holders list
453 * @gh: the holder structure
454 *
455 */
456
457static void handle_recurse(struct gfs2_holder *gh)
458{
459 struct gfs2_glock *gl = gh->gh_gl;
460 struct gfs2_sbd *sdp = gl->gl_sbd;
461 struct gfs2_holder *tmp_gh, *safe;
462 int found = 0;
463
464 if (gfs2_assert_warn(sdp, gh->gh_owner))
465 return;
466
467 list_for_each_entry_safe(tmp_gh, safe, &gl->gl_waiters3, gh_list) {
468 if (tmp_gh->gh_owner != gh->gh_owner)
469 continue;
470
471 gfs2_assert_warn(sdp,
472 test_bit(HIF_RECURSE, &tmp_gh->gh_iflags));
473
474 list_move_tail(&tmp_gh->gh_list, &gl->gl_holders);
475 tmp_gh->gh_error = 0;
476 set_bit(HIF_HOLDER, &tmp_gh->gh_iflags);
477
478 complete(&tmp_gh->gh_wait);
479
480 found = 1;
481 }
482
483 gfs2_assert_warn(sdp, found);
484}
485
486/**
487 * do_unrecurse - a recursive holder was just dropped of the waiters3 list
488 * @gh: the holder
489 *
490 * If there is only one other recursive holder, clear its HIF_RECURSE bit.
491 * If there is more than one, leave them alone.
492 *
493 */
494
495static void do_unrecurse(struct gfs2_holder *gh)
496{
497 struct gfs2_glock *gl = gh->gh_gl;
498 struct gfs2_sbd *sdp = gl->gl_sbd;
499 struct gfs2_holder *tmp_gh, *last_gh = NULL;
500 int found = 0;
501
502 if (gfs2_assert_warn(sdp, gh->gh_owner))
503 return;
504
505 list_for_each_entry(tmp_gh, &gl->gl_waiters3, gh_list) {
506 if (tmp_gh->gh_owner != gh->gh_owner)
507 continue;
508
509 gfs2_assert_warn(sdp,
510 test_bit(HIF_RECURSE, &tmp_gh->gh_iflags));
511
512 if (found)
513 return;
514
515 found = 1;
516 last_gh = tmp_gh;
517 }
518
519 if (!gfs2_assert_warn(sdp, found))
520 clear_bit(HIF_RECURSE, &last_gh->gh_iflags);
521}
522
523/**
524 * rq_mutex - process a mutex request in the queue
525 * @gh: the glock holder
526 *
527 * Returns: 1 if the queue is blocked
528 */
529
530static int rq_mutex(struct gfs2_holder *gh)
531{
532 struct gfs2_glock *gl = gh->gh_gl;
533
534 list_del_init(&gh->gh_list);
535 /* gh->gh_error never examined. */
536 set_bit(GLF_LOCK, &gl->gl_flags);
537 complete(&gh->gh_wait);
538
539 return 1;
540}
541
542/**
543 * rq_promote - process a promote request in the queue
544 * @gh: the glock holder
545 *
546 * Acquire a new inter-node lock, or change a lock state to more restrictive.
547 *
548 * Returns: 1 if the queue is blocked
549 */
550
551static int rq_promote(struct gfs2_holder *gh)
552{
553 struct gfs2_glock *gl = gh->gh_gl;
554 struct gfs2_sbd *sdp = gl->gl_sbd;
555 struct gfs2_glock_operations *glops = gl->gl_ops;
556 int recurse;
557
558 if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
559 if (list_empty(&gl->gl_holders)) {
560 gl->gl_req_gh = gh;
561 set_bit(GLF_LOCK, &gl->gl_flags);
562 spin_unlock(&gl->gl_spin);
563
564 if (atomic_read(&sdp->sd_reclaim_count) >
565 gfs2_tune_get(sdp, gt_reclaim_limit) &&
566 !(gh->gh_flags & LM_FLAG_PRIORITY)) {
567 gfs2_reclaim_glock(sdp);
568 gfs2_reclaim_glock(sdp);
569 }
570
571 glops->go_xmote_th(gl, gh->gh_state,
572 gh->gh_flags);
573
574 spin_lock(&gl->gl_spin);
575 }
576 return 1;
577 }
578
579 if (list_empty(&gl->gl_holders)) {
580 set_bit(HIF_FIRST, &gh->gh_iflags);
581 set_bit(GLF_LOCK, &gl->gl_flags);
582 recurse = 0;
583 } else {
584 struct gfs2_holder *next_gh;
585 if (gh->gh_flags & GL_LOCAL_EXCL)
586 return 1;
587 next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder,
588 gh_list);
589 if (next_gh->gh_flags & GL_LOCAL_EXCL)
590 return 1;
591 recurse = test_bit(HIF_RECURSE, &gh->gh_iflags);
592 }
593
594 list_move_tail(&gh->gh_list, &gl->gl_holders);
595 gh->gh_error = 0;
596 set_bit(HIF_HOLDER, &gh->gh_iflags);
597
598 if (recurse)
599 handle_recurse(gh);
600
601 complete(&gh->gh_wait);
602
603 return 0;
604}
605
606/**
607 * rq_demote - process a demote request in the queue
608 * @gh: the glock holder
609 *
610 * Returns: 1 if the queue is blocked
611 */
612
613static int rq_demote(struct gfs2_holder *gh)
614{
615 struct gfs2_glock *gl = gh->gh_gl;
616 struct gfs2_glock_operations *glops = gl->gl_ops;
617
618 if (!list_empty(&gl->gl_holders))
619 return 1;
620
621 if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) {
622 list_del_init(&gh->gh_list);
623 gh->gh_error = 0;
624 spin_unlock(&gl->gl_spin);
625 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
626 gfs2_holder_put(gh);
627 else
628 complete(&gh->gh_wait);
629 spin_lock(&gl->gl_spin);
630 } else {
631 gl->gl_req_gh = gh;
632 set_bit(GLF_LOCK, &gl->gl_flags);
633 spin_unlock(&gl->gl_spin);
634
635 if (gh->gh_state == LM_ST_UNLOCKED ||
636 gl->gl_state != LM_ST_EXCLUSIVE)
637 glops->go_drop_th(gl);
638 else
639 glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
640
641 spin_lock(&gl->gl_spin);
642 }
643
644 return 0;
645}
646
647/**
648 * rq_greedy - process a queued request to drop greedy status
649 * @gh: the glock holder
650 *
651 * Returns: 1 if the queue is blocked
652 */
653
654static int rq_greedy(struct gfs2_holder *gh)
655{
656 struct gfs2_glock *gl = gh->gh_gl;
657
658 list_del_init(&gh->gh_list);
659 /* gh->gh_error never examined. */
660 clear_bit(GLF_GREEDY, &gl->gl_flags);
661 spin_unlock(&gl->gl_spin);
662
663 gfs2_holder_uninit(gh);
664 kfree(container_of(gh, struct greedy, gr_gh));
665
666 spin_lock(&gl->gl_spin);
667
668 return 0;
669}
670
671/**
672 * run_queue - process holder structures on a glock
673 * @gl: the glock
674 *
675 */
676
677static void run_queue(struct gfs2_glock *gl)
678{
679 struct gfs2_holder *gh;
680 int blocked = 1;
681
682 for (;;) {
683 if (test_bit(GLF_LOCK, &gl->gl_flags))
684 break;
685
686 if (!list_empty(&gl->gl_waiters1)) {
687 gh = list_entry(gl->gl_waiters1.next,
688 struct gfs2_holder, gh_list);
689
690 if (test_bit(HIF_MUTEX, &gh->gh_iflags))
691 blocked = rq_mutex(gh);
692 else
693 gfs2_assert_warn(gl->gl_sbd, 0);
694
695 } else if (!list_empty(&gl->gl_waiters2) &&
696 !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) {
697 gh = list_entry(gl->gl_waiters2.next,
698 struct gfs2_holder, gh_list);
699
700 if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
701 blocked = rq_demote(gh);
702 else if (test_bit(HIF_GREEDY, &gh->gh_iflags))
703 blocked = rq_greedy(gh);
704 else
705 gfs2_assert_warn(gl->gl_sbd, 0);
706
707 } else if (!list_empty(&gl->gl_waiters3)) {
708 gh = list_entry(gl->gl_waiters3.next,
709 struct gfs2_holder, gh_list);
710
711 if (test_bit(HIF_PROMOTE, &gh->gh_iflags))
712 blocked = rq_promote(gh);
713 else
714 gfs2_assert_warn(gl->gl_sbd, 0);
715
716 } else
717 break;
718
719 if (blocked)
720 break;
721 }
722}
723
724/**
725 * gfs2_glmutex_lock - acquire a local lock on a glock
726 * @gl: the glock
727 *
728 * Gives caller exclusive access to manipulate a glock structure.
729 */
730
731void gfs2_glmutex_lock(struct gfs2_glock *gl)
732{
733 struct gfs2_holder gh;
734
735 gfs2_holder_init(gl, 0, 0, &gh);
736 set_bit(HIF_MUTEX, &gh.gh_iflags);
737
738 spin_lock(&gl->gl_spin);
739 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
740 list_add_tail(&gh.gh_list, &gl->gl_waiters1);
741 else
742 complete(&gh.gh_wait);
743 spin_unlock(&gl->gl_spin);
744
745 wait_for_completion(&gh.gh_wait);
746 gfs2_holder_uninit(&gh);
747}
748
749/**
750 * gfs2_glmutex_trylock - try to acquire a local lock on a glock
751 * @gl: the glock
752 *
753 * Returns: 1 if the glock is acquired
754 */
755
756int gfs2_glmutex_trylock(struct gfs2_glock *gl)
757{
758 int acquired = 1;
759
760 spin_lock(&gl->gl_spin);
761 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
762 acquired = 0;
763 spin_unlock(&gl->gl_spin);
764
765 return acquired;
766}
767
768/**
769 * gfs2_glmutex_unlock - release a local lock on a glock
770 * @gl: the glock
771 *
772 */
773
774void gfs2_glmutex_unlock(struct gfs2_glock *gl)
775{
776 spin_lock(&gl->gl_spin);
777 clear_bit(GLF_LOCK, &gl->gl_flags);
778 run_queue(gl);
779 spin_unlock(&gl->gl_spin);
780}
781
782/**
783 * handle_callback - add a demote request to a lock's queue
784 * @gl: the glock
785 * @state: the state the caller wants us to change to
786 *
787 */
788
789static void handle_callback(struct gfs2_glock *gl, unsigned int state)
790{
791 struct gfs2_holder *gh, *new_gh = NULL;
792
793 restart:
794 spin_lock(&gl->gl_spin);
795
796 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
797 if (test_bit(HIF_DEMOTE, &gh->gh_iflags) &&
798 gl->gl_req_gh != gh) {
799 if (gh->gh_state != state)
800 gh->gh_state = LM_ST_UNLOCKED;
801 goto out;
802 }
803 }
804
805 if (new_gh) {
806 list_add_tail(&new_gh->gh_list, &gl->gl_waiters2);
807 new_gh = NULL;
808 } else {
809 spin_unlock(&gl->gl_spin);
810
811 new_gh = gfs2_holder_get(gl, state,
812 LM_FLAG_TRY | GL_NEVER_RECURSE,
813 GFP_KERNEL | __GFP_NOFAIL),
814 set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
815 set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
816
817 goto restart;
818 }
819
820 out:
821 spin_unlock(&gl->gl_spin);
822
823 if (new_gh)
824 gfs2_holder_put(new_gh);
825}
826
827/**
828 * state_change - record that the glock is now in a different state
829 * @gl: the glock
830 * @new_state the new state
831 *
832 */
833
834static void state_change(struct gfs2_glock *gl, unsigned int new_state)
835{
836 int held1, held2;
837
838 held1 = (gl->gl_state != LM_ST_UNLOCKED);
839 held2 = (new_state != LM_ST_UNLOCKED);
840
841 if (held1 != held2) {
842 if (held2)
843 gfs2_glock_hold(gl);
844 else
845 gfs2_glock_put(gl);
846 }
847
848 gl->gl_state = new_state;
849}
850
851/**
852 * xmote_bh - Called after the lock module is done acquiring a lock
853 * @gl: The glock in question
854 * @ret: the int returned from the lock module
855 *
856 */
857
858static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
859{
860 struct gfs2_sbd *sdp = gl->gl_sbd;
861 struct gfs2_glock_operations *glops = gl->gl_ops;
862 struct gfs2_holder *gh = gl->gl_req_gh;
863 int prev_state = gl->gl_state;
864 int op_done = 1;
865
866 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
867 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
868 gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
869
870 state_change(gl, ret & LM_OUT_ST_MASK);
871
872 if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
873 if (glops->go_inval)
874 glops->go_inval(gl, DIO_METADATA | DIO_DATA);
875 } else if (gl->gl_state == LM_ST_DEFERRED) {
876 /* We might not want to do this here.
877 Look at moving to the inode glops. */
878 if (glops->go_inval)
879 glops->go_inval(gl, DIO_DATA);
880 }
881
882 /* Deal with each possible exit condition */
883
884 if (!gh)
885 gl->gl_stamp = jiffies;
886
887 else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
888 spin_lock(&gl->gl_spin);
889 list_del_init(&gh->gh_list);
890 gh->gh_error = -EIO;
891 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
892 do_unrecurse(gh);
893 spin_unlock(&gl->gl_spin);
894
895 } else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) {
896 spin_lock(&gl->gl_spin);
897 list_del_init(&gh->gh_list);
898 if (gl->gl_state == gh->gh_state ||
899 gl->gl_state == LM_ST_UNLOCKED)
900 gh->gh_error = 0;
901 else {
902 if (gfs2_assert_warn(sdp, gh->gh_flags &
903 (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1)
904 fs_warn(sdp, "ret = 0x%.8X\n", ret);
905 gh->gh_error = GLR_TRYFAILED;
906 }
907 spin_unlock(&gl->gl_spin);
908
909 if (ret & LM_OUT_CANCELED)
910 handle_callback(gl, LM_ST_UNLOCKED); /* Lame */
911
912 } else if (ret & LM_OUT_CANCELED) {
913 spin_lock(&gl->gl_spin);
914 list_del_init(&gh->gh_list);
915 gh->gh_error = GLR_CANCELED;
916 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
917 do_unrecurse(gh);
918 spin_unlock(&gl->gl_spin);
919
920 } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
921 spin_lock(&gl->gl_spin);
922 list_move_tail(&gh->gh_list, &gl->gl_holders);
923 gh->gh_error = 0;
924 set_bit(HIF_HOLDER, &gh->gh_iflags);
925 spin_unlock(&gl->gl_spin);
926
927 set_bit(HIF_FIRST, &gh->gh_iflags);
928
929 op_done = 0;
930
931 } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
932 spin_lock(&gl->gl_spin);
933 list_del_init(&gh->gh_list);
934 gh->gh_error = GLR_TRYFAILED;
935 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
936 do_unrecurse(gh);
937 spin_unlock(&gl->gl_spin);
938
939 } else {
940 if (gfs2_assert_withdraw(sdp, 0) == -1)
941 fs_err(sdp, "ret = 0x%.8X\n", ret);
942 }
943
944 if (glops->go_xmote_bh)
945 glops->go_xmote_bh(gl);
946
947 if (op_done) {
948 spin_lock(&gl->gl_spin);
949 gl->gl_req_gh = NULL;
950 gl->gl_req_bh = NULL;
951 clear_bit(GLF_LOCK, &gl->gl_flags);
952 run_queue(gl);
953 spin_unlock(&gl->gl_spin);
954 }
955
956 gfs2_glock_put(gl);
957
958 if (gh) {
959 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
960 gfs2_holder_put(gh);
961 else
962 complete(&gh->gh_wait);
963 }
964}
965
966/**
967 * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock
968 * @gl: The glock in question
969 * @state: the requested state
970 * @flags: modifier flags to the lock call
971 *
972 */
973
974void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags)
975{
976 struct gfs2_sbd *sdp = gl->gl_sbd;
977 struct gfs2_glock_operations *glops = gl->gl_ops;
978 int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
979 LM_FLAG_NOEXP | LM_FLAG_ANY |
980 LM_FLAG_PRIORITY);
981 unsigned int lck_ret;
982
983 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
984 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
985 gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED);
986 gfs2_assert_warn(sdp, state != gl->gl_state);
987
988 if (gl->gl_state == LM_ST_EXCLUSIVE) {
989 if (glops->go_sync)
990 glops->go_sync(gl,
991 DIO_METADATA | DIO_DATA | DIO_RELEASE);
992 }
993
994 gfs2_glock_hold(gl);
995 gl->gl_req_bh = xmote_bh;
996
997 lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state,
998 lck_flags);
999
1000 if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR)))
1001 return;
1002
1003 if (lck_ret & LM_OUT_ASYNC)
1004 gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC);
1005 else
1006 xmote_bh(gl, lck_ret);
1007}
1008
1009/**
1010 * drop_bh - Called after a lock module unlock completes
1011 * @gl: the glock
1012 * @ret: the return status
1013 *
1014 * Doesn't wake up the process waiting on the struct gfs2_holder (if any)
1015 * Doesn't drop the reference on the glock the top half took out
1016 *
1017 */
1018
1019static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
1020{
1021 struct gfs2_sbd *sdp = gl->gl_sbd;
1022 struct gfs2_glock_operations *glops = gl->gl_ops;
1023 struct gfs2_holder *gh = gl->gl_req_gh;
1024
1025 clear_bit(GLF_PREFETCH, &gl->gl_flags);
1026
1027 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1028 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
1029 gfs2_assert_warn(sdp, !ret);
1030
1031 state_change(gl, LM_ST_UNLOCKED);
1032
1033 if (glops->go_inval)
1034 glops->go_inval(gl, DIO_METADATA | DIO_DATA);
1035
1036 if (gh) {
1037 spin_lock(&gl->gl_spin);
1038 list_del_init(&gh->gh_list);
1039 gh->gh_error = 0;
1040 spin_unlock(&gl->gl_spin);
1041 }
1042
1043 if (glops->go_drop_bh)
1044 glops->go_drop_bh(gl);
1045
1046 spin_lock(&gl->gl_spin);
1047 gl->gl_req_gh = NULL;
1048 gl->gl_req_bh = NULL;
1049 clear_bit(GLF_LOCK, &gl->gl_flags);
1050 run_queue(gl);
1051 spin_unlock(&gl->gl_spin);
1052
1053 gfs2_glock_put(gl);
1054
1055 if (gh) {
1056 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
1057 gfs2_holder_put(gh);
1058 else
1059 complete(&gh->gh_wait);
1060 }
1061}
1062
1063/**
1064 * gfs2_glock_drop_th - call into the lock module to unlock a lock
1065 * @gl: the glock
1066 *
1067 */
1068
1069void gfs2_glock_drop_th(struct gfs2_glock *gl)
1070{
1071 struct gfs2_sbd *sdp = gl->gl_sbd;
1072 struct gfs2_glock_operations *glops = gl->gl_ops;
1073 unsigned int ret;
1074
1075 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1076 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
1077 gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
1078
1079 if (gl->gl_state == LM_ST_EXCLUSIVE) {
1080 if (glops->go_sync)
1081 glops->go_sync(gl,
1082 DIO_METADATA | DIO_DATA | DIO_RELEASE);
1083 }
1084
1085 gfs2_glock_hold(gl);
1086 gl->gl_req_bh = drop_bh;
1087
1088 ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state);
1089
1090 if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR)))
1091 return;
1092
1093 if (!ret)
1094 drop_bh(gl, ret);
1095 else
1096 gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC);
1097}
1098
1099/**
1100 * do_cancels - cancel requests for locks stuck waiting on an expire flag
1101 * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock
1102 *
1103 * Don't cancel GL_NOCANCEL requests.
1104 */
1105
1106static void do_cancels(struct gfs2_holder *gh)
1107{
1108 struct gfs2_glock *gl = gh->gh_gl;
1109
1110 spin_lock(&gl->gl_spin);
1111
1112 while (gl->gl_req_gh != gh &&
1113 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
1114 !list_empty(&gh->gh_list)) {
1115 if (gl->gl_req_bh &&
1116 !(gl->gl_req_gh &&
1117 (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) {
1118 spin_unlock(&gl->gl_spin);
1119 gfs2_lm_cancel(gl->gl_sbd, gl->gl_lock);
1120 msleep(100);
1121 spin_lock(&gl->gl_spin);
1122 } else {
1123 spin_unlock(&gl->gl_spin);
1124 msleep(100);
1125 spin_lock(&gl->gl_spin);
1126 }
1127 }
1128
1129 spin_unlock(&gl->gl_spin);
1130}
1131
1132/**
1133 * glock_wait_internal - wait on a glock acquisition
1134 * @gh: the glock holder
1135 *
1136 * Returns: 0 on success
1137 */
1138
1139static int glock_wait_internal(struct gfs2_holder *gh)
1140{
1141 struct gfs2_glock *gl = gh->gh_gl;
1142 struct gfs2_sbd *sdp = gl->gl_sbd;
1143 struct gfs2_glock_operations *glops = gl->gl_ops;
1144
1145 if (test_bit(HIF_ABORTED, &gh->gh_iflags))
1146 return -EIO;
1147
1148 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
1149 spin_lock(&gl->gl_spin);
1150 if (gl->gl_req_gh != gh &&
1151 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
1152 !list_empty(&gh->gh_list)) {
1153 list_del_init(&gh->gh_list);
1154 gh->gh_error = GLR_TRYFAILED;
1155 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
1156 do_unrecurse(gh);
1157 run_queue(gl);
1158 spin_unlock(&gl->gl_spin);
1159 return gh->gh_error;
1160 }
1161 spin_unlock(&gl->gl_spin);
1162 }
1163
1164 if (gh->gh_flags & LM_FLAG_PRIORITY)
1165 do_cancels(gh);
1166
1167 wait_for_completion(&gh->gh_wait);
1168
1169 if (gh->gh_error)
1170 return gh->gh_error;
1171
1172 gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags));
1173 gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state,
1174 gh->gh_state,
1175 gh->gh_flags));
1176
1177 if (test_bit(HIF_FIRST, &gh->gh_iflags)) {
1178 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1179
1180 if (glops->go_lock) {
1181 gh->gh_error = glops->go_lock(gh);
1182 if (gh->gh_error) {
1183 spin_lock(&gl->gl_spin);
1184 list_del_init(&gh->gh_list);
1185 if (test_and_clear_bit(HIF_RECURSE,
1186 &gh->gh_iflags))
1187 do_unrecurse(gh);
1188 spin_unlock(&gl->gl_spin);
1189 }
1190 }
1191
1192 spin_lock(&gl->gl_spin);
1193 gl->gl_req_gh = NULL;
1194 gl->gl_req_bh = NULL;
1195 clear_bit(GLF_LOCK, &gl->gl_flags);
1196 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
1197 handle_recurse(gh);
1198 run_queue(gl);
1199 spin_unlock(&gl->gl_spin);
1200 }
1201
1202 return gh->gh_error;
1203}
1204
1205static inline struct gfs2_holder *
1206find_holder_by_owner(struct list_head *head, struct task_struct *owner)
1207{
1208 struct gfs2_holder *gh;
1209
1210 list_for_each_entry(gh, head, gh_list) {
1211 if (gh->gh_owner == owner)
1212 return gh;
1213 }
1214
1215 return NULL;
1216}
1217
1218/**
1219 * recurse_check -
1220 *
1221 * Make sure the new holder is compatible with the pre-existing one.
1222 *
1223 */
1224
1225static int recurse_check(struct gfs2_holder *existing, struct gfs2_holder *new,
1226 unsigned int state)
1227{
1228 struct gfs2_sbd *sdp = existing->gh_gl->gl_sbd;
1229
1230 if (gfs2_assert_warn(sdp, (new->gh_flags & LM_FLAG_ANY) ||
1231 !(existing->gh_flags & LM_FLAG_ANY)))
1232 goto fail;
1233
1234 if (gfs2_assert_warn(sdp, (existing->gh_flags & GL_LOCAL_EXCL) ||
1235 !(new->gh_flags & GL_LOCAL_EXCL)))
1236 goto fail;
1237
1238 if (gfs2_assert_warn(sdp, relaxed_state_ok(state, new->gh_state,
1239 new->gh_flags)))
1240 goto fail;
1241
1242 return 0;
1243
1244 fail:
1245 print_symbol(KERN_WARNING "GFS2: Existing holder from %s\n",
1246 existing->gh_ip);
1247 print_symbol(KERN_WARNING "GFS2: New holder from %s\n", new->gh_ip);
1248 set_bit(HIF_ABORTED, &new->gh_iflags);
1249 return -EINVAL;
1250}
1251
1252/**
1253 * add_to_queue - Add a holder to the wait queue (but look for recursion)
1254 * @gh: the holder structure to add
1255 *
1256 */
1257
1258static void add_to_queue(struct gfs2_holder *gh)
1259{
1260 struct gfs2_glock *gl = gh->gh_gl;
1261 struct gfs2_holder *existing;
1262
1263 if (!gh->gh_owner)
1264 goto out;
1265
1266 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner);
1267 if (existing) {
1268 if (recurse_check(existing, gh, gl->gl_state))
1269 return;
1270
1271 list_add_tail(&gh->gh_list, &gl->gl_holders);
1272 set_bit(HIF_HOLDER, &gh->gh_iflags);
1273
1274 gh->gh_error = 0;
1275 complete(&gh->gh_wait);
1276
1277 return;
1278 }
1279
1280 existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner);
1281 if (existing) {
1282 if (recurse_check(existing, gh, existing->gh_state))
1283 return;
1284
1285 set_bit(HIF_RECURSE, &gh->gh_iflags);
1286 set_bit(HIF_RECURSE, &existing->gh_iflags);
1287
1288 list_add_tail(&gh->gh_list, &gl->gl_waiters3);
1289
1290 return;
1291 }
1292
1293 out:
1294 if (gh->gh_flags & LM_FLAG_PRIORITY)
1295 list_add(&gh->gh_list, &gl->gl_waiters3);
1296 else
1297 list_add_tail(&gh->gh_list, &gl->gl_waiters3);
1298}
1299
1300/**
1301 * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
1302 * @gh: the holder structure
1303 *
1304 * if (gh->gh_flags & GL_ASYNC), this never returns an error
1305 *
1306 * Returns: 0, GLR_TRYFAILED, or errno on failure
1307 */
1308
1309int gfs2_glock_nq(struct gfs2_holder *gh)
1310{
1311 struct gfs2_glock *gl = gh->gh_gl;
1312 struct gfs2_sbd *sdp = gl->gl_sbd;
1313 int error = 0;
1314
1315 restart:
1316 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
1317 set_bit(HIF_ABORTED, &gh->gh_iflags);
1318 return -EIO;
1319 }
1320
1321 set_bit(HIF_PROMOTE, &gh->gh_iflags);
1322
1323 spin_lock(&gl->gl_spin);
1324 add_to_queue(gh);
1325 run_queue(gl);
1326 spin_unlock(&gl->gl_spin);
1327
1328 if (!(gh->gh_flags & GL_ASYNC)) {
1329 error = glock_wait_internal(gh);
1330 if (error == GLR_CANCELED) {
1331 msleep(1000);
1332 goto restart;
1333 }
1334 }
1335
1336 clear_bit(GLF_PREFETCH, &gl->gl_flags);
1337
1338 return error;
1339}
1340
1341/**
1342 * gfs2_glock_poll - poll to see if an async request has been completed
1343 * @gh: the holder
1344 *
1345 * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on
1346 */
1347
1348int gfs2_glock_poll(struct gfs2_holder *gh)
1349{
1350 struct gfs2_glock *gl = gh->gh_gl;
1351 int ready = 0;
1352
1353 spin_lock(&gl->gl_spin);
1354
1355 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
1356 ready = 1;
1357 else if (list_empty(&gh->gh_list)) {
1358 if (gh->gh_error == GLR_CANCELED) {
1359 spin_unlock(&gl->gl_spin);
1360 msleep(1000);
1361 if (gfs2_glock_nq(gh))
1362 return 1;
1363 return 0;
1364 } else
1365 ready = 1;
1366 }
1367
1368 spin_unlock(&gl->gl_spin);
1369
1370 return ready;
1371}
1372
1373/**
1374 * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC
1375 * @gh: the holder structure
1376 *
1377 * Returns: 0, GLR_TRYFAILED, or errno on failure
1378 */
1379
1380int gfs2_glock_wait(struct gfs2_holder *gh)
1381{
1382 int error;
1383
1384 error = glock_wait_internal(gh);
1385 if (error == GLR_CANCELED) {
1386 msleep(1000);
1387 gh->gh_flags &= ~GL_ASYNC;
1388 error = gfs2_glock_nq(gh);
1389 }
1390
1391 return error;
1392}
1393
1394/**
1395 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1396 * @gh: the glock holder
1397 *
1398 */
1399
1400void gfs2_glock_dq(struct gfs2_holder *gh)
1401{
1402 struct gfs2_glock *gl = gh->gh_gl;
1403 struct gfs2_glock_operations *glops = gl->gl_ops;
1404
1405 if (gh->gh_flags & GL_SYNC)
1406 set_bit(GLF_SYNC, &gl->gl_flags);
1407
1408 if (gh->gh_flags & GL_NOCACHE)
1409 handle_callback(gl, LM_ST_UNLOCKED);
1410
1411 gfs2_glmutex_lock(gl);
1412
1413 spin_lock(&gl->gl_spin);
1414 list_del_init(&gh->gh_list);
1415
1416 if (list_empty(&gl->gl_holders)) {
1417 spin_unlock(&gl->gl_spin);
1418
1419 if (glops->go_unlock)
1420 glops->go_unlock(gh);
1421
1422 if (test_bit(GLF_SYNC, &gl->gl_flags)) {
1423 if (glops->go_sync)
1424 glops->go_sync(gl, DIO_METADATA | DIO_DATA);
1425 }
1426
1427 gl->gl_stamp = jiffies;
1428
1429 spin_lock(&gl->gl_spin);
1430 }
1431
1432 clear_bit(GLF_LOCK, &gl->gl_flags);
1433 run_queue(gl);
1434 spin_unlock(&gl->gl_spin);
1435}
1436
1437/**
1438 * gfs2_glock_prefetch - Try to prefetch a glock
1439 * @gl: the glock
1440 * @state: the state to prefetch in
1441 * @flags: flags passed to go_xmote_th()
1442 *
1443 */
1444
1445void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, int flags)
1446{
1447 struct gfs2_glock_operations *glops = gl->gl_ops;
1448
1449 spin_lock(&gl->gl_spin);
1450
1451 if (test_bit(GLF_LOCK, &gl->gl_flags) ||
1452 !list_empty(&gl->gl_holders) ||
1453 !list_empty(&gl->gl_waiters1) ||
1454 !list_empty(&gl->gl_waiters2) ||
1455 !list_empty(&gl->gl_waiters3) ||
1456 relaxed_state_ok(gl->gl_state, state, flags)) {
1457 spin_unlock(&gl->gl_spin);
1458 return;
1459 }
1460
1461 set_bit(GLF_PREFETCH, &gl->gl_flags);
1462 set_bit(GLF_LOCK, &gl->gl_flags);
1463 spin_unlock(&gl->gl_spin);
1464
1465 glops->go_xmote_th(gl, state, flags);
1466}
1467
1468/**
1469 * gfs2_glock_force_drop - Force a glock to be uncached
1470 * @gl: the glock
1471 *
1472 */
1473
1474void gfs2_glock_force_drop(struct gfs2_glock *gl)
1475{
1476 struct gfs2_holder gh;
1477
1478 gfs2_holder_init(gl, LM_ST_UNLOCKED, GL_NEVER_RECURSE, &gh);
1479 set_bit(HIF_DEMOTE, &gh.gh_iflags);
1480
1481 spin_lock(&gl->gl_spin);
1482 list_add_tail(&gh.gh_list, &gl->gl_waiters2);
1483 run_queue(gl);
1484 spin_unlock(&gl->gl_spin);
1485
1486 wait_for_completion(&gh.gh_wait);
1487 gfs2_holder_uninit(&gh);
1488}
1489
1490static void greedy_work(void *data)
1491{
1492 struct greedy *gr = (struct greedy *)data;
1493 struct gfs2_holder *gh = &gr->gr_gh;
1494 struct gfs2_glock *gl = gh->gh_gl;
1495 struct gfs2_glock_operations *glops = gl->gl_ops;
1496
1497 clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1498
1499 if (glops->go_greedy)
1500 glops->go_greedy(gl);
1501
1502 spin_lock(&gl->gl_spin);
1503
1504 if (list_empty(&gl->gl_waiters2)) {
1505 clear_bit(GLF_GREEDY, &gl->gl_flags);
1506 spin_unlock(&gl->gl_spin);
1507 gfs2_holder_uninit(gh);
1508 kfree(gr);
1509 } else {
1510 gfs2_glock_hold(gl);
1511 list_add_tail(&gh->gh_list, &gl->gl_waiters2);
1512 run_queue(gl);
1513 spin_unlock(&gl->gl_spin);
1514 gfs2_glock_put(gl);
1515 }
1516}
1517
1518/**
1519 * gfs2_glock_be_greedy -
1520 * @gl:
1521 * @time:
1522 *
1523 * Returns: 0 if go_greedy will be called, 1 otherwise
1524 */
1525
1526int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time)
1527{
1528 struct greedy *gr;
1529 struct gfs2_holder *gh;
1530
1531 if (!time ||
1532 gl->gl_sbd->sd_args.ar_localcaching ||
1533 test_and_set_bit(GLF_GREEDY, &gl->gl_flags))
1534 return 1;
1535
1536 gr = kmalloc(sizeof(struct greedy), GFP_KERNEL);
1537 if (!gr) {
1538 clear_bit(GLF_GREEDY, &gl->gl_flags);
1539 return 1;
1540 }
1541 gh = &gr->gr_gh;
1542
1543 gfs2_holder_init(gl, 0, GL_NEVER_RECURSE, gh);
1544 set_bit(HIF_GREEDY, &gh->gh_iflags);
1545 INIT_WORK(&gr->gr_work, greedy_work, gr);
1546
1547 set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1548 schedule_delayed_work(&gr->gr_work, time);
1549
1550 return 0;
1551}
1552
1553/**
1554 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1555 * @gh: the holder structure
1556 *
1557 */
1558
1559void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1560{
1561 gfs2_glock_dq(gh);
1562 gfs2_holder_uninit(gh);
1563}
1564
1565/**
1566 * gfs2_glock_nq_num - acquire a glock based on lock number
1567 * @sdp: the filesystem
1568 * @number: the lock number
1569 * @glops: the glock operations for the type of glock
1570 * @state: the state to acquire the glock in
1571 * @flags: modifier flags for the aquisition
1572 * @gh: the struct gfs2_holder
1573 *
1574 * Returns: errno
1575 */
1576
1577int gfs2_glock_nq_num(struct gfs2_sbd *sdp, uint64_t number,
1578 struct gfs2_glock_operations *glops, unsigned int state,
1579 int flags, struct gfs2_holder *gh)
1580{
1581 struct gfs2_glock *gl;
1582 int error;
1583
1584 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1585 if (!error) {
1586 error = gfs2_glock_nq_init(gl, state, flags, gh);
1587 gfs2_glock_put(gl);
1588 }
1589
1590 return error;
1591}
1592
1593/**
1594 * glock_compare - Compare two struct gfs2_glock structures for sorting
1595 * @arg_a: the first structure
1596 * @arg_b: the second structure
1597 *
1598 */
1599
1600static int glock_compare(const void *arg_a, const void *arg_b)
1601{
1602 struct gfs2_holder *gh_a = *(struct gfs2_holder **)arg_a;
1603 struct gfs2_holder *gh_b = *(struct gfs2_holder **)arg_b;
1604 struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1605 struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1606 int ret = 0;
1607
1608 if (a->ln_number > b->ln_number)
1609 ret = 1;
1610 else if (a->ln_number < b->ln_number)
1611 ret = -1;
1612 else {
1613 if (gh_a->gh_state == LM_ST_SHARED &&
1614 gh_b->gh_state == LM_ST_EXCLUSIVE)
1615 ret = 1;
1616 else if (!(gh_a->gh_flags & GL_LOCAL_EXCL) &&
1617 (gh_b->gh_flags & GL_LOCAL_EXCL))
1618 ret = 1;
1619 }
1620
1621 return ret;
1622}
1623
1624/**
1625 * nq_m_sync - synchonously acquire more than one glock in deadlock free order
1626 * @num_gh: the number of structures
1627 * @ghs: an array of struct gfs2_holder structures
1628 *
1629 * Returns: 0 on success (all glocks acquired),
1630 * errno on failure (no glocks acquired)
1631 */
1632
1633static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1634 struct gfs2_holder **p)
1635{
1636 unsigned int x;
1637 int error = 0;
1638
1639 for (x = 0; x < num_gh; x++)
1640 p[x] = &ghs[x];
1641
1642 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);
1643
1644 for (x = 0; x < num_gh; x++) {
1645 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1646
1647 error = gfs2_glock_nq(p[x]);
1648 if (error) {
1649 while (x--)
1650 gfs2_glock_dq(p[x]);
1651 break;
1652 }
1653 }
1654
1655 return error;
1656}
1657
1658/**
1659 * gfs2_glock_nq_m - acquire multiple glocks
1660 * @num_gh: the number of structures
1661 * @ghs: an array of struct gfs2_holder structures
1662 *
1663 * Figure out how big an impact this function has. Either:
1664 * 1) Replace this code with code that calls gfs2_glock_prefetch()
1665 * 2) Forget async stuff and just call nq_m_sync()
1666 * 3) Leave it like it is
1667 *
1668 * Returns: 0 on success (all glocks acquired),
1669 * errno on failure (no glocks acquired)
1670 */
1671
1672int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1673{
1674 int *e;
1675 unsigned int x;
1676 int borked = 0, serious = 0;
1677 int error = 0;
1678
1679 if (!num_gh)
1680 return 0;
1681
1682 if (num_gh == 1) {
1683 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1684 return gfs2_glock_nq(ghs);
1685 }
1686
1687 e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1688 if (!e)
1689 return -ENOMEM;
1690
1691 for (x = 0; x < num_gh; x++) {
1692 ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
1693 error = gfs2_glock_nq(&ghs[x]);
1694 if (error) {
1695 borked = 1;
1696 serious = error;
1697 num_gh = x;
1698 break;
1699 }
1700 }
1701
1702 for (x = 0; x < num_gh; x++) {
1703 error = e[x] = glock_wait_internal(&ghs[x]);
1704 if (error) {
1705 borked = 1;
1706 if (error != GLR_TRYFAILED && error != GLR_CANCELED)
1707 serious = error;
1708 }
1709 }
1710
1711 if (!borked) {
1712 kfree(e);
1713 return 0;
1714 }
1715
1716 for (x = 0; x < num_gh; x++)
1717 if (!e[x])
1718 gfs2_glock_dq(&ghs[x]);
1719
1720 if (serious)
1721 error = serious;
1722 else {
1723 for (x = 0; x < num_gh; x++)
1724 gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
1725 &ghs[x]);
1726 error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
1727 }
1728
1729 kfree(e);
1730
1731 return error;
1732}
1733
1734/**
1735 * gfs2_glock_dq_m - release multiple glocks
1736 * @num_gh: the number of structures
1737 * @ghs: an array of struct gfs2_holder structures
1738 *
1739 */
1740
1741void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1742{
1743 unsigned int x;
1744
1745 for (x = 0; x < num_gh; x++)
1746 gfs2_glock_dq(&ghs[x]);
1747}
1748
1749/**
1750 * gfs2_glock_dq_uninit_m - release multiple glocks
1751 * @num_gh: the number of structures
1752 * @ghs: an array of struct gfs2_holder structures
1753 *
1754 */
1755
1756void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1757{
1758 unsigned int x;
1759
1760 for (x = 0; x < num_gh; x++)
1761 gfs2_glock_dq_uninit(&ghs[x]);
1762}
1763
1764/**
1765 * gfs2_glock_prefetch_num - prefetch a glock based on lock number
1766 * @sdp: the filesystem
1767 * @number: the lock number
1768 * @glops: the glock operations for the type of glock
1769 * @state: the state to acquire the glock in
1770 * @flags: modifier flags for the aquisition
1771 *
1772 * Returns: errno
1773 */
1774
1775void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, uint64_t number,
1776 struct gfs2_glock_operations *glops,
1777 unsigned int state, int flags)
1778{
1779 struct gfs2_glock *gl;
1780 int error;
1781
1782 if (atomic_read(&sdp->sd_reclaim_count) <
1783 gfs2_tune_get(sdp, gt_reclaim_limit)) {
1784 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1785 if (!error) {
1786 gfs2_glock_prefetch(gl, state, flags);
1787 gfs2_glock_put(gl);
1788 }
1789 }
1790}
1791
1792/**
1793 * gfs2_lvb_hold - attach a LVB from a glock
1794 * @gl: The glock in question
1795 *
1796 */
1797
1798int gfs2_lvb_hold(struct gfs2_glock *gl)
1799{
1800 int error;
1801
1802 gfs2_glmutex_lock(gl);
1803
1804 if (!atomic_read(&gl->gl_lvb_count)) {
1805 error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb);
1806 if (error) {
1807 gfs2_glmutex_unlock(gl);
1808 return error;
1809 }
1810 gfs2_glock_hold(gl);
1811 }
1812 atomic_inc(&gl->gl_lvb_count);
1813
1814 gfs2_glmutex_unlock(gl);
1815
1816 return 0;
1817}
1818
1819/**
1820 * gfs2_lvb_unhold - detach a LVB from a glock
1821 * @gl: The glock in question
1822 *
1823 */
1824
1825void gfs2_lvb_unhold(struct gfs2_glock *gl)
1826{
1827 gfs2_glock_hold(gl);
1828 gfs2_glmutex_lock(gl);
1829
1830 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0);
1831 if (atomic_dec_and_test(&gl->gl_lvb_count)) {
1832 gfs2_lm_unhold_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb);
1833 gl->gl_lvb = NULL;
1834 gfs2_glock_put(gl);
1835 }
1836
1837 gfs2_glmutex_unlock(gl);
1838 gfs2_glock_put(gl);
1839}
1840
1841void gfs2_lvb_sync(struct gfs2_glock *gl)
1842{
1843 gfs2_glmutex_lock(gl);
1844
1845 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count));
1846 if (!gfs2_assert_warn(gl->gl_sbd, gfs2_glock_is_held_excl(gl)))
1847 gfs2_lm_sync_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb);
1848
1849 gfs2_glmutex_unlock(gl);
1850}
1851
1852static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1853 unsigned int state)
1854{
1855 struct gfs2_glock *gl;
1856
1857 gl = gfs2_glock_find(sdp, name);
1858 if (!gl)
1859 return;
1860
1861 if (gl->gl_ops->go_callback)
1862 gl->gl_ops->go_callback(gl, state);
1863 handle_callback(gl, state);
1864
1865 spin_lock(&gl->gl_spin);
1866 run_queue(gl);
1867 spin_unlock(&gl->gl_spin);
1868
1869 gfs2_glock_put(gl);
1870}
1871
1872/**
1873 * gfs2_glock_cb - Callback used by locking module
1874 * @fsdata: Pointer to the superblock
1875 * @type: Type of callback
1876 * @data: Type dependent data pointer
1877 *
1878 * Called by the locking module when it wants to tell us something.
1879 * Either we need to drop a lock, one of our ASYNC requests completed, or
1880 * a journal from another client needs to be recovered.
1881 */
1882
1883void gfs2_glock_cb(lm_fsdata_t *fsdata, unsigned int type, void *data)
1884{
1885 struct gfs2_sbd *sdp = (struct gfs2_sbd *)fsdata;
1886
1887 switch (type) {
1888 case LM_CB_NEED_E:
1889 blocking_cb(sdp, (struct lm_lockname *)data, LM_ST_UNLOCKED);
1890 return;
1891
1892 case LM_CB_NEED_D:
1893 blocking_cb(sdp, (struct lm_lockname *)data, LM_ST_DEFERRED);
1894 return;
1895
1896 case LM_CB_NEED_S:
1897 blocking_cb(sdp, (struct lm_lockname *)data, LM_ST_SHARED);
1898 return;
1899
1900 case LM_CB_ASYNC: {
1901 struct lm_async_cb *async = (struct lm_async_cb *)data;
1902 struct gfs2_glock *gl;
1903
1904 gl = gfs2_glock_find(sdp, &async->lc_name);
1905 if (gfs2_assert_warn(sdp, gl))
1906 return;
1907 if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
1908 gl->gl_req_bh(gl, async->lc_ret);
1909 gfs2_glock_put(gl);
1910
1911 return;
1912 }
1913
1914 case LM_CB_NEED_RECOVERY:
1915 gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data);
1916 if (sdp->sd_recoverd_process)
1917 wake_up_process(sdp->sd_recoverd_process);
1918 return;
1919
1920 case LM_CB_DROPLOCKS:
1921 gfs2_gl_hash_clear(sdp, NO_WAIT);
1922 gfs2_quota_scan(sdp);
1923 return;
1924
1925 default:
1926 gfs2_assert_warn(sdp, 0);
1927 return;
1928 }
1929}
1930
1931/**
1932 * gfs2_try_toss_inode - try to remove a particular inode struct from cache
1933 * sdp: the filesystem
1934 * inum: the inode number
1935 *
1936 */
1937
1938void gfs2_try_toss_inode(struct gfs2_sbd *sdp, struct gfs2_inum *inum)
1939{
1940 struct gfs2_glock *gl;
1941 struct gfs2_inode *ip;
1942 int error;
1943
1944 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops,
1945 NO_CREATE, &gl);
1946 if (error || !gl)
1947 return;
1948
1949 if (!gfs2_glmutex_trylock(gl))
1950 goto out;
1951
1952 ip = gl->gl_object;
1953 if (!ip)
1954 goto out_unlock;
1955
1956 if (atomic_read(&ip->i_count))
1957 goto out_unlock;
1958
1959 gfs2_inode_destroy(ip);
1960
1961 out_unlock:
1962 gfs2_glmutex_unlock(gl);
1963
1964 out:
1965 gfs2_glock_put(gl);
1966}
1967
1968/**
1969 * gfs2_iopen_go_callback - Try to kick the inode/vnode associated with an
1970 * iopen glock from memory
1971 * @io_gl: the iopen glock
1972 * @state: the state into which the glock should be put
1973 *
1974 */
1975
1976void gfs2_iopen_go_callback(struct gfs2_glock *io_gl, unsigned int state)
1977{
1978 struct gfs2_glock *i_gl;
1979
1980 if (state != LM_ST_UNLOCKED)
1981 return;
1982
1983 spin_lock(&io_gl->gl_spin);
1984 i_gl = io_gl->gl_object;
1985 if (i_gl) {
1986 gfs2_glock_hold(i_gl);
1987 spin_unlock(&io_gl->gl_spin);
1988 } else {
1989 spin_unlock(&io_gl->gl_spin);
1990 return;
1991 }
1992
1993 if (gfs2_glmutex_trylock(i_gl)) {
1994 struct gfs2_inode *ip = i_gl->gl_object;
1995 if (ip) {
1996 gfs2_try_toss_vnode(ip);
1997 gfs2_glmutex_unlock(i_gl);
1998 gfs2_glock_schedule_for_reclaim(i_gl);
1999 goto out;
2000 }
2001 gfs2_glmutex_unlock(i_gl);
2002 }
2003
2004 out:
2005 gfs2_glock_put(i_gl);
2006}
2007
2008/**
2009 * demote_ok - Check to see if it's ok to unlock a glock
2010 * @gl: the glock
2011 *
2012 * Returns: 1 if it's ok
2013 */
2014
2015static int demote_ok(struct gfs2_glock *gl)
2016{
2017 struct gfs2_sbd *sdp = gl->gl_sbd;
2018 struct gfs2_glock_operations *glops = gl->gl_ops;
2019 int demote = 1;
2020
2021 if (test_bit(GLF_STICKY, &gl->gl_flags))
2022 demote = 0;
2023 else if (test_bit(GLF_PREFETCH, &gl->gl_flags))
2024 demote = time_after_eq(jiffies,
2025 gl->gl_stamp +
2026 gfs2_tune_get(sdp, gt_prefetch_secs) * HZ);
2027 else if (glops->go_demote_ok)
2028 demote = glops->go_demote_ok(gl);
2029
2030 return demote;
2031}
2032
2033/**
2034 * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
2035 * @gl: the glock
2036 *
2037 */
2038
2039void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
2040{
2041 struct gfs2_sbd *sdp = gl->gl_sbd;
2042
2043 spin_lock(&sdp->sd_reclaim_lock);
2044 if (list_empty(&gl->gl_reclaim)) {
2045 gfs2_glock_hold(gl);
2046 list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list);
2047 atomic_inc(&sdp->sd_reclaim_count);
2048 }
2049 spin_unlock(&sdp->sd_reclaim_lock);
2050
2051 wake_up(&sdp->sd_reclaim_wq);
2052}
2053
2054/**
2055 * gfs2_reclaim_glock - process the next glock on the filesystem's reclaim list
2056 * @sdp: the filesystem
2057 *
2058 * Called from gfs2_glockd() glock reclaim daemon, or when promoting a
2059 * different glock and we notice that there are a lot of glocks in the
2060 * reclaim list.
2061 *
2062 */
2063
2064void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
2065{
2066 struct gfs2_glock *gl;
2067
2068 spin_lock(&sdp->sd_reclaim_lock);
2069 if (list_empty(&sdp->sd_reclaim_list)) {
2070 spin_unlock(&sdp->sd_reclaim_lock);
2071 return;
2072 }
2073 gl = list_entry(sdp->sd_reclaim_list.next,
2074 struct gfs2_glock, gl_reclaim);
2075 list_del_init(&gl->gl_reclaim);
2076 spin_unlock(&sdp->sd_reclaim_lock);
2077
2078 atomic_dec(&sdp->sd_reclaim_count);
2079 atomic_inc(&sdp->sd_reclaimed);
2080
2081 if (gfs2_glmutex_trylock(gl)) {
2082 if (gl->gl_ops == &gfs2_inode_glops) {
2083 struct gfs2_inode *ip = gl->gl_object;
2084 if (ip && !atomic_read(&ip->i_count))
2085 gfs2_inode_destroy(ip);
2086 }
2087 if (queue_empty(gl, &gl->gl_holders) &&
2088 gl->gl_state != LM_ST_UNLOCKED &&
2089 demote_ok(gl))
2090 handle_callback(gl, LM_ST_UNLOCKED);
2091 gfs2_glmutex_unlock(gl);
2092 }
2093
2094 gfs2_glock_put(gl);
2095}
2096
2097/**
2098 * examine_bucket - Call a function for glock in a hash bucket
2099 * @examiner: the function
2100 * @sdp: the filesystem
2101 * @bucket: the bucket
2102 *
2103 * Returns: 1 if the bucket has entries
2104 */
2105
2106static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
2107 struct gfs2_gl_hash_bucket *bucket)
2108{
2109 struct glock_plug plug;
2110 struct list_head *tmp;
2111 struct gfs2_glock *gl;
2112 int entries;
2113
2114 /* Add "plug" to end of bucket list, work back up list from there */
2115 memset(&plug.gl_flags, 0, sizeof(unsigned long));
2116 set_bit(GLF_PLUG, &plug.gl_flags);
2117
2118 write_lock(&bucket->hb_lock);
2119 list_add(&plug.gl_list, &bucket->hb_list);
2120 write_unlock(&bucket->hb_lock);
2121
2122 for (;;) {
2123 write_lock(&bucket->hb_lock);
2124
2125 for (;;) {
2126 tmp = plug.gl_list.next;
2127
2128 if (tmp == &bucket->hb_list) {
2129 list_del(&plug.gl_list);
2130 entries = !list_empty(&bucket->hb_list);
2131 write_unlock(&bucket->hb_lock);
2132 return entries;
2133 }
2134 gl = list_entry(tmp, struct gfs2_glock, gl_list);
2135
2136 /* Move plug up list */
2137 list_move(&plug.gl_list, &gl->gl_list);
2138
2139 if (test_bit(GLF_PLUG, &gl->gl_flags))
2140 continue;
2141
2142 /* examiner() must glock_put() */
2143 gfs2_glock_hold(gl);
2144
2145 break;
2146 }
2147
2148 write_unlock(&bucket->hb_lock);
2149
2150 examiner(gl);
2151 }
2152}
2153
2154/**
2155 * scan_glock - look at a glock and see if we can reclaim it
2156 * @gl: the glock to look at
2157 *
2158 */
2159
2160static void scan_glock(struct gfs2_glock *gl)
2161{
2162 if (gfs2_glmutex_trylock(gl)) {
2163 if (gl->gl_ops == &gfs2_inode_glops) {
2164 struct gfs2_inode *ip = gl->gl_object;
2165 if (ip && !atomic_read(&ip->i_count))
2166 goto out_schedule;
2167 }
2168 if (queue_empty(gl, &gl->gl_holders) &&
2169 gl->gl_state != LM_ST_UNLOCKED &&
2170 demote_ok(gl))
2171 goto out_schedule;
2172
2173 gfs2_glmutex_unlock(gl);
2174 }
2175
2176 gfs2_glock_put(gl);
2177
2178 return;
2179
2180 out_schedule:
2181 gfs2_glmutex_unlock(gl);
2182 gfs2_glock_schedule_for_reclaim(gl);
2183 gfs2_glock_put(gl);
2184}
2185
2186/**
2187 * gfs2_scand_internal - Look for glocks and inodes to toss from memory
2188 * @sdp: the filesystem
2189 *
2190 */
2191
2192void gfs2_scand_internal(struct gfs2_sbd *sdp)
2193{
2194 unsigned int x;
2195
2196 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
2197 examine_bucket(scan_glock, sdp, &sdp->sd_gl_hash[x]);
2198 cond_resched();
2199 }
2200}
2201
2202/**
2203 * clear_glock - look at a glock and see if we can free it from glock cache
2204 * @gl: the glock to look at
2205 *
2206 */
2207
2208static void clear_glock(struct gfs2_glock *gl)
2209{
2210 struct gfs2_sbd *sdp = gl->gl_sbd;
2211 int released;
2212
2213 spin_lock(&sdp->sd_reclaim_lock);
2214 if (!list_empty(&gl->gl_reclaim)) {
2215 list_del_init(&gl->gl_reclaim);
2216 atomic_dec(&sdp->sd_reclaim_count);
2217 released = gfs2_glock_put(gl);
2218 gfs2_assert(sdp, !released);
2219 }
2220 spin_unlock(&sdp->sd_reclaim_lock);
2221
2222 if (gfs2_glmutex_trylock(gl)) {
2223 if (gl->gl_ops == &gfs2_inode_glops) {
2224 struct gfs2_inode *ip = gl->gl_object;
2225 if (ip && !atomic_read(&ip->i_count))
2226 gfs2_inode_destroy(ip);
2227 }
2228 if (queue_empty(gl, &gl->gl_holders) &&
2229 gl->gl_state != LM_ST_UNLOCKED)
2230 handle_callback(gl, LM_ST_UNLOCKED);
2231
2232 gfs2_glmutex_unlock(gl);
2233 }
2234
2235 gfs2_glock_put(gl);
2236}
2237
2238/**
2239 * gfs2_gl_hash_clear - Empty out the glock hash table
2240 * @sdp: the filesystem
2241 * @wait: wait until it's all gone
2242 *
2243 * Called when unmounting the filesystem, or when inter-node lock manager
2244 * requests DROPLOCKS because it is running out of capacity.
2245 */
2246
2247void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
2248{
2249 unsigned long t;
2250 unsigned int x;
2251 int cont;
2252
2253 t = jiffies;
2254
2255 for (;;) {
2256 cont = 0;
2257
2258 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
2259 if (examine_bucket(clear_glock, sdp,
2260 &sdp->sd_gl_hash[x]))
2261 cont = 1;
2262
2263 if (!wait || !cont)
2264 break;
2265
2266 if (time_after_eq(jiffies,
2267 t + gfs2_tune_get(sdp, gt_stall_secs) * HZ)) {
2268 fs_warn(sdp, "Unmount seems to be stalled. "
2269 "Dumping lock state...\n");
2270 gfs2_dump_lockstate(sdp);
2271 t = jiffies;
2272 }
2273
2274 /* invalidate_inodes() requires that the sb inodes list
2275 not change, but an async completion callback for an
2276 unlock can occur which does glock_put() which
2277 can call iput() which will change the sb inodes list.
2278 invalidate_inodes_mutex prevents glock_put()'s during
2279 an invalidate_inodes() */
2280
2281 mutex_lock(&sdp->sd_invalidate_inodes_mutex);
2282 invalidate_inodes(sdp->sd_vfs);
2283 mutex_unlock(&sdp->sd_invalidate_inodes_mutex);
2284 yield();
2285 }
2286}
2287
2288/*
2289 * Diagnostic routines to help debug distributed deadlock
2290 */
2291
2292/**
2293 * dump_holder - print information about a glock holder
2294 * @str: a string naming the type of holder
2295 * @gh: the glock holder
2296 *
2297 * Returns: 0 on success, -ENOBUFS when we run out of space
2298 */
2299
2300static int dump_holder(char *str, struct gfs2_holder *gh)
2301{
2302 unsigned int x;
2303 int error = -ENOBUFS;
2304
2305 printk(KERN_INFO " %s\n", str);
2306 printk(KERN_INFO " owner = %ld\n",
2307 (gh->gh_owner) ? (long)gh->gh_owner->pid : -1);
2308 printk(KERN_INFO " gh_state = %u\n", gh->gh_state);
2309 printk(KERN_INFO " gh_flags =");
2310 for (x = 0; x < 32; x++)
2311 if (gh->gh_flags & (1 << x))
2312 printk(" %u", x);
2313 printk(" \n");
2314 printk(KERN_INFO " error = %d\n", gh->gh_error);
2315 printk(KERN_INFO " gh_iflags =");
2316 for (x = 0; x < 32; x++)
2317 if (test_bit(x, &gh->gh_iflags))
2318 printk(" %u", x);
2319 printk(" \n");
2320 print_symbol(KERN_INFO " initialized at: %s\n", gh->gh_ip);
2321
2322 error = 0;
2323
2324 return error;
2325}
2326
2327/**
2328 * dump_inode - print information about an inode
2329 * @ip: the inode
2330 *
2331 * Returns: 0 on success, -ENOBUFS when we run out of space
2332 */
2333
2334static int dump_inode(struct gfs2_inode *ip)
2335{
2336 unsigned int x;
2337 int error = -ENOBUFS;
2338
2339 printk(KERN_INFO " Inode:\n");
2340 printk(KERN_INFO " num = %llu %llu\n",
2341 ip->i_num.no_formal_ino, ip->i_num.no_addr);
2342 printk(KERN_INFO " type = %u\n", IF2DT(ip->i_di.di_mode));
2343 printk(KERN_INFO " i_count = %d\n", atomic_read(&ip->i_count));
2344 printk(KERN_INFO " i_flags =");
2345 for (x = 0; x < 32; x++)
2346 if (test_bit(x, &ip->i_flags))
2347 printk(" %u", x);
2348 printk(" \n");
2349 printk(KERN_INFO " vnode = %s\n", (ip->i_vnode) ? "yes" : "no");
2350
2351 error = 0;
2352
2353 return error;
2354}
2355
2356/**
2357 * dump_glock - print information about a glock
2358 * @gl: the glock
2359 * @count: where we are in the buffer
2360 *
2361 * Returns: 0 on success, -ENOBUFS when we run out of space
2362 */
2363
2364static int dump_glock(struct gfs2_glock *gl)
2365{
2366 struct gfs2_holder *gh;
2367 unsigned int x;
2368 int error = -ENOBUFS;
2369
2370 spin_lock(&gl->gl_spin);
2371
2372 printk(KERN_INFO "Glock (%u, %llu)\n",
2373 gl->gl_name.ln_type,
2374 gl->gl_name.ln_number);
2375 printk(KERN_INFO " gl_flags =");
2376 for (x = 0; x < 32; x++)
2377 if (test_bit(x, &gl->gl_flags))
2378 printk(" %u", x);
2379 printk(" \n");
2380 printk(KERN_INFO " gl_ref = %d\n", atomic_read(&gl->gl_ref.refcount));
2381 printk(KERN_INFO " gl_state = %u\n", gl->gl_state);
2382 printk(KERN_INFO " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
2383 printk(KERN_INFO " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
2384 printk(KERN_INFO " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
2385 printk(KERN_INFO " object = %s\n", (gl->gl_object) ? "yes" : "no");
2386 printk(KERN_INFO " le = %s\n",
2387 (list_empty(&gl->gl_le.le_list)) ? "no" : "yes");
2388 printk(KERN_INFO " reclaim = %s\n",
2389 (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
2390 if (gl->gl_aspace)
2391 printk(KERN_INFO " aspace = %lu\n",
2392 gl->gl_aspace->i_mapping->nrpages);
2393 else
2394 printk(KERN_INFO " aspace = no\n");
2395 printk(KERN_INFO " ail = %d\n", atomic_read(&gl->gl_ail_count));
2396 if (gl->gl_req_gh) {
2397 error = dump_holder("Request", gl->gl_req_gh);
2398 if (error)
2399 goto out;
2400 }
2401 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
2402 error = dump_holder("Holder", gh);
2403 if (error)
2404 goto out;
2405 }
2406 list_for_each_entry(gh, &gl->gl_waiters1, gh_list) {
2407 error = dump_holder("Waiter1", gh);
2408 if (error)
2409 goto out;
2410 }
2411 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
2412 error = dump_holder("Waiter2", gh);
2413 if (error)
2414 goto out;
2415 }
2416 list_for_each_entry(gh, &gl->gl_waiters3, gh_list) {
2417 error = dump_holder("Waiter3", gh);
2418 if (error)
2419 goto out;
2420 }
2421 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
2422 if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
2423 list_empty(&gl->gl_holders)) {
2424 error = dump_inode(gl->gl_object);
2425 if (error)
2426 goto out;
2427 } else {
2428 error = -ENOBUFS;
2429 printk(KERN_INFO " Inode: busy\n");
2430 }
2431 }
2432
2433 error = 0;
2434
2435 out:
2436 spin_unlock(&gl->gl_spin);
2437
2438 return error;
2439}
2440
2441/**
2442 * gfs2_dump_lockstate - print out the current lockstate
2443 * @sdp: the filesystem
2444 * @ub: the buffer to copy the information into
2445 *
2446 * If @ub is NULL, dump the lockstate to the console.
2447 *
2448 */
2449
2450int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
2451{
2452 struct gfs2_gl_hash_bucket *bucket;
2453 struct gfs2_glock *gl;
2454 unsigned int x;
2455 int error = 0;
2456
2457 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
2458 bucket = &sdp->sd_gl_hash[x];
2459
2460 read_lock(&bucket->hb_lock);
2461
2462 list_for_each_entry(gl, &bucket->hb_list, gl_list) {
2463 if (test_bit(GLF_PLUG, &gl->gl_flags))
2464 continue;
2465
2466 error = dump_glock(gl);
2467 if (error)
2468 break;
2469 }
2470
2471 read_unlock(&bucket->hb_lock);
2472
2473 if (error)
2474 break;
2475 }
2476
2477
2478 return error;
2479}
2480
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
new file mode 100644
index 000000000000..560029de8d07
--- /dev/null
+++ b/fs/gfs2/glock.h
@@ -0,0 +1,166 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GLOCK_DOT_H__
11#define __GLOCK_DOT_H__
12
13/* Flags for lock requests; used in gfs2_holder gh_flag field.
14 From lm_interface.h:
15#define LM_FLAG_TRY 0x00000001
16#define LM_FLAG_TRY_1CB 0x00000002
17#define LM_FLAG_NOEXP 0x00000004
18#define LM_FLAG_ANY 0x00000008
19#define LM_FLAG_PRIORITY 0x00000010 */
20
21#define GL_LOCAL_EXCL 0x00000020
22#define GL_ASYNC 0x00000040
23#define GL_EXACT 0x00000080
24#define GL_SKIP 0x00000100
25#define GL_ATIME 0x00000200
26#define GL_NOCACHE 0x00000400
27#define GL_SYNC 0x00000800
28#define GL_NOCANCEL 0x00001000
29#define GL_NEVER_RECURSE 0x00002000
30
31#define GLR_TRYFAILED 13
32#define GLR_CANCELED 14
33
34static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
35{
36 struct gfs2_holder *gh;
37 int locked = 0;
38
39 /* Look in glock's list of holders for one with current task as owner */
40 spin_lock(&gl->gl_spin);
41 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
42 if (gh->gh_owner == current) {
43 locked = 1;
44 break;
45 }
46 }
47 spin_unlock(&gl->gl_spin);
48
49 return locked;
50}
51
52static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
53{
54 return (gl->gl_state == LM_ST_EXCLUSIVE);
55}
56
57static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl)
58{
59 return (gl->gl_state == LM_ST_DEFERRED);
60}
61
62static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
63{
64 return (gl->gl_state == LM_ST_SHARED);
65}
66
67static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
68{
69 int ret;
70 spin_lock(&gl->gl_spin);
71 ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3);
72 spin_unlock(&gl->gl_spin);
73 return ret;
74}
75
76struct gfs2_glock *gfs2_glock_find(struct gfs2_sbd *sdp,
77 struct lm_lockname *name);
78int gfs2_glock_get(struct gfs2_sbd *sdp,
79 uint64_t number, struct gfs2_glock_operations *glops,
80 int create, struct gfs2_glock **glp);
81void gfs2_glock_hold(struct gfs2_glock *gl);
82int gfs2_glock_put(struct gfs2_glock *gl);
83
84void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, int flags,
85 struct gfs2_holder *gh);
86void gfs2_holder_reinit(unsigned int state, int flags, struct gfs2_holder *gh);
87void gfs2_holder_uninit(struct gfs2_holder *gh);
88struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl, unsigned int state,
89 int flags, gfp_t gfp_flags);
90void gfs2_holder_put(struct gfs2_holder *gh);
91
92void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags);
93void gfs2_glock_drop_th(struct gfs2_glock *gl);
94
95void gfs2_glmutex_lock(struct gfs2_glock *gl);
96int gfs2_glmutex_trylock(struct gfs2_glock *gl);
97void gfs2_glmutex_unlock(struct gfs2_glock *gl);
98
99int gfs2_glock_nq(struct gfs2_holder *gh);
100int gfs2_glock_poll(struct gfs2_holder *gh);
101int gfs2_glock_wait(struct gfs2_holder *gh);
102void gfs2_glock_dq(struct gfs2_holder *gh);
103
104void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, int flags);
105void gfs2_glock_force_drop(struct gfs2_glock *gl);
106
107int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time);
108
109void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
110int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
111 uint64_t number, struct gfs2_glock_operations *glops,
112 unsigned int state, int flags, struct gfs2_holder *gh);
113
114int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
115void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
116void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
117
118void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, uint64_t number,
119 struct gfs2_glock_operations *glops,
120 unsigned int state, int flags);
121
122/**
123 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
124 * @gl: the glock
125 * @state: the state we're requesting
126 * @flags: the modifier flags
127 * @gh: the holder structure
128 *
129 * Returns: 0, GLR_*, or errno
130 */
131
132static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
133 unsigned int state, int flags,
134 struct gfs2_holder *gh)
135{
136 int error;
137
138 gfs2_holder_init(gl, state, flags, gh);
139
140 error = gfs2_glock_nq(gh);
141 if (error)
142 gfs2_holder_uninit(gh);
143
144 return error;
145}
146
147/* Lock Value Block functions */
148
149int gfs2_lvb_hold(struct gfs2_glock *gl);
150void gfs2_lvb_unhold(struct gfs2_glock *gl);
151void gfs2_lvb_sync(struct gfs2_glock *gl);
152
153void gfs2_glock_cb(lm_fsdata_t *fsdata, unsigned int type, void *data);
154
155void gfs2_try_toss_inode(struct gfs2_sbd *sdp, struct gfs2_inum *inum);
156void gfs2_iopen_go_callback(struct gfs2_glock *gl, unsigned int state);
157
158void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
159void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
160
161void gfs2_scand_internal(struct gfs2_sbd *sdp);
162void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
163
164int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
165
166#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
new file mode 100644
index 000000000000..d9334eb72df8
--- /dev/null
+++ b/fs/gfs2/glops.c
@@ -0,0 +1,492 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <asm/semaphore.h>
17
18#include "gfs2.h"
19#include "lm_interface.h"
20#include "incore.h"
21#include "bmap.h"
22#include "glock.h"
23#include "glops.h"
24#include "inode.h"
25#include "log.h"
26#include "meta_io.h"
27#include "page.h"
28#include "recovery.h"
29#include "rgrp.h"
30#include "util.h"
31
32/**
33 * meta_go_sync - sync out the metadata for this glock
34 * @gl: the glock
35 * @flags: DIO_*
36 *
37 * Called when demoting or unlocking an EX glock. We must flush
38 * to disk all dirty buffers/pages relating to this glock, and must not
39 * not return to caller to demote/unlock the glock until I/O is complete.
40 */
41
42static void meta_go_sync(struct gfs2_glock *gl, int flags)
43{
44 if (!(flags & DIO_METADATA))
45 return;
46
47 if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
48 gfs2_log_flush_glock(gl);
49 gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
50 if (flags & DIO_RELEASE)
51 gfs2_ail_empty_gl(gl);
52 }
53
54 clear_bit(GLF_SYNC, &gl->gl_flags);
55}
56
57/**
58 * meta_go_inval - invalidate the metadata for this glock
59 * @gl: the glock
60 * @flags:
61 *
62 */
63
64static void meta_go_inval(struct gfs2_glock *gl, int flags)
65{
66 if (!(flags & DIO_METADATA))
67 return;
68
69 gfs2_meta_inval(gl);
70 gl->gl_vn++;
71}
72
73/**
74 * meta_go_demote_ok - Check to see if it's ok to unlock a glock
75 * @gl: the glock
76 *
77 * Returns: 1 if we have no cached data; ok to demote meta glock
78 */
79
80static int meta_go_demote_ok(struct gfs2_glock *gl)
81{
82 return !gl->gl_aspace->i_mapping->nrpages;
83}
84
85/**
86 * inode_go_xmote_th - promote/demote a glock
87 * @gl: the glock
88 * @state: the requested state
89 * @flags:
90 *
91 */
92
93static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
94 int flags)
95{
96 if (gl->gl_state != LM_ST_UNLOCKED)
97 gfs2_pte_inval(gl);
98 gfs2_glock_xmote_th(gl, state, flags);
99}
100
101/**
102 * inode_go_xmote_bh - After promoting/demoting a glock
103 * @gl: the glock
104 *
105 */
106
107static void inode_go_xmote_bh(struct gfs2_glock *gl)
108{
109 struct gfs2_holder *gh = gl->gl_req_gh;
110 struct buffer_head *bh;
111 int error;
112
113 if (gl->gl_state != LM_ST_UNLOCKED &&
114 (!gh || !(gh->gh_flags & GL_SKIP))) {
115 error = gfs2_meta_read(gl, gl->gl_name.ln_number, DIO_START,
116 &bh);
117 if (!error)
118 brelse(bh);
119 }
120}
121
122/**
123 * inode_go_drop_th - unlock a glock
124 * @gl: the glock
125 *
126 * Invoked from rq_demote().
127 * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long)
128 * is being purged from our node's glock cache; we're dropping lock.
129 */
130
131static void inode_go_drop_th(struct gfs2_glock *gl)
132{
133 gfs2_pte_inval(gl);
134 gfs2_glock_drop_th(gl);
135}
136
137/**
138 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
139 * @gl: the glock protecting the inode
140 * @flags:
141 *
142 */
143
144static void inode_go_sync(struct gfs2_glock *gl, int flags)
145{
146 int meta = (flags & DIO_METADATA);
147 int data = (flags & DIO_DATA);
148
149 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
150 if (meta && data) {
151 gfs2_page_sync(gl, flags | DIO_START);
152 gfs2_log_flush_glock(gl);
153 gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
154 gfs2_page_sync(gl, flags | DIO_WAIT);
155 clear_bit(GLF_DIRTY, &gl->gl_flags);
156 } else if (meta) {
157 gfs2_log_flush_glock(gl);
158 gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
159 } else if (data)
160 gfs2_page_sync(gl, flags | DIO_START | DIO_WAIT);
161 if (flags & DIO_RELEASE)
162 gfs2_ail_empty_gl(gl);
163 }
164
165 clear_bit(GLF_SYNC, &gl->gl_flags);
166}
167
168/**
169 * inode_go_inval - prepare a inode glock to be released
170 * @gl: the glock
171 * @flags:
172 *
173 */
174
175static void inode_go_inval(struct gfs2_glock *gl, int flags)
176{
177 int meta = (flags & DIO_METADATA);
178 int data = (flags & DIO_DATA);
179
180 if (meta) {
181 gfs2_meta_inval(gl);
182 gl->gl_vn++;
183 }
184 if (data)
185 gfs2_page_inval(gl);
186}
187
188/**
189 * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
190 * @gl: the glock
191 *
192 * Returns: 1 if it's ok
193 */
194
195static int inode_go_demote_ok(struct gfs2_glock *gl)
196{
197 struct gfs2_sbd *sdp = gl->gl_sbd;
198 int demote = 0;
199
200 if (!gl->gl_object && !gl->gl_aspace->i_mapping->nrpages)
201 demote = 1;
202 else if (!sdp->sd_args.ar_localcaching &&
203 time_after_eq(jiffies, gl->gl_stamp +
204 gfs2_tune_get(sdp, gt_demote_secs) * HZ))
205 demote = 1;
206
207 return demote;
208}
209
210/**
211 * inode_go_lock - operation done after an inode lock is locked by a process
212 * @gl: the glock
213 * @flags:
214 *
215 * Returns: errno
216 */
217
218static int inode_go_lock(struct gfs2_holder *gh)
219{
220 struct gfs2_glock *gl = gh->gh_gl;
221 struct gfs2_inode *ip = gl->gl_object;
222 int error = 0;
223
224 if (!ip)
225 return 0;
226
227 if (ip->i_vn != gl->gl_vn) {
228 error = gfs2_inode_refresh(ip);
229 if (error)
230 return error;
231 gfs2_inode_attr_in(ip);
232 }
233
234 if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
235 (gl->gl_state == LM_ST_EXCLUSIVE) &&
236 (gh->gh_flags & GL_LOCAL_EXCL))
237 error = gfs2_truncatei_resume(ip);
238
239 return error;
240}
241
242/**
243 * inode_go_unlock - operation done before an inode lock is unlocked by a
244 * process
245 * @gl: the glock
246 * @flags:
247 *
248 */
249
250static void inode_go_unlock(struct gfs2_holder *gh)
251{
252 struct gfs2_glock *gl = gh->gh_gl;
253 struct gfs2_inode *ip = gl->gl_object;
254
255 if (ip && test_bit(GLF_DIRTY, &gl->gl_flags))
256 gfs2_inode_attr_in(ip);
257
258 if (ip)
259 gfs2_meta_cache_flush(ip);
260}
261
262/**
263 * inode_greedy -
264 * @gl: the glock
265 *
266 */
267
268static void inode_greedy(struct gfs2_glock *gl)
269{
270 struct gfs2_sbd *sdp = gl->gl_sbd;
271 struct gfs2_inode *ip = gl->gl_object;
272 unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum);
273 unsigned int max = gfs2_tune_get(sdp, gt_greedy_max);
274 unsigned int new_time;
275
276 spin_lock(&ip->i_spin);
277
278 if (time_after(ip->i_last_pfault + quantum, jiffies)) {
279 new_time = ip->i_greedy + quantum;
280 if (new_time > max)
281 new_time = max;
282 } else {
283 new_time = ip->i_greedy - quantum;
284 if (!new_time || new_time > max)
285 new_time = 1;
286 }
287
288 ip->i_greedy = new_time;
289
290 spin_unlock(&ip->i_spin);
291
292 gfs2_inode_put(ip);
293}
294
295/**
296 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
297 * @gl: the glock
298 *
299 * Returns: 1 if it's ok
300 */
301
302static int rgrp_go_demote_ok(struct gfs2_glock *gl)
303{
304 return !gl->gl_aspace->i_mapping->nrpages;
305}
306
307/**
308 * rgrp_go_lock - operation done after an rgrp lock is locked by
309 * a first holder on this node.
310 * @gl: the glock
311 * @flags:
312 *
313 * Returns: errno
314 */
315
316static int rgrp_go_lock(struct gfs2_holder *gh)
317{
318 return gfs2_rgrp_bh_get(gh->gh_gl->gl_object);
319}
320
321/**
322 * rgrp_go_unlock - operation done before an rgrp lock is unlocked by
323 * a last holder on this node.
324 * @gl: the glock
325 * @flags:
326 *
327 */
328
329static void rgrp_go_unlock(struct gfs2_holder *gh)
330{
331 gfs2_rgrp_bh_put(gh->gh_gl->gl_object);
332}
333
334/**
335 * trans_go_xmote_th - promote/demote the transaction glock
336 * @gl: the glock
337 * @state: the requested state
338 * @flags:
339 *
340 */
341
342static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
343 int flags)
344{
345 struct gfs2_sbd *sdp = gl->gl_sbd;
346
347 if (gl->gl_state != LM_ST_UNLOCKED &&
348 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
349 gfs2_meta_syncfs(sdp);
350 gfs2_log_shutdown(sdp);
351 }
352
353 gfs2_glock_xmote_th(gl, state, flags);
354}
355
356/**
357 * trans_go_xmote_bh - After promoting/demoting the transaction glock
358 * @gl: the glock
359 *
360 */
361
362static void trans_go_xmote_bh(struct gfs2_glock *gl)
363{
364 struct gfs2_sbd *sdp = gl->gl_sbd;
365 struct gfs2_inode *ip = sdp->sd_jdesc->jd_inode->u.generic_ip;
366 struct gfs2_glock *j_gl = ip->i_gl;
367 struct gfs2_log_header head;
368 int error;
369
370 if (gl->gl_state != LM_ST_UNLOCKED &&
371 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
372 gfs2_meta_cache_flush(sdp->sd_jdesc->jd_inode->u.generic_ip);
373 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
374
375 error = gfs2_find_jhead(sdp->sd_jdesc, &head);
376 if (error)
377 gfs2_consist(sdp);
378 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
379 gfs2_consist(sdp);
380
381 /* Initialize some head of the log stuff */
382 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
383 sdp->sd_log_sequence = head.lh_sequence + 1;
384 gfs2_log_pointers_init(sdp, head.lh_blkno);
385 }
386 }
387}
388
389/**
390 * trans_go_drop_th - unlock the transaction glock
391 * @gl: the glock
392 *
393 * We want to sync the device even with localcaching. Remember
394 * that localcaching journal replay only marks buffers dirty.
395 */
396
397static void trans_go_drop_th(struct gfs2_glock *gl)
398{
399 struct gfs2_sbd *sdp = gl->gl_sbd;
400
401 if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
402 gfs2_meta_syncfs(sdp);
403 gfs2_log_shutdown(sdp);
404 }
405
406 gfs2_glock_drop_th(gl);
407}
408
409/**
410 * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
411 * @gl: the glock
412 *
413 * Returns: 1 if it's ok
414 */
415
416static int quota_go_demote_ok(struct gfs2_glock *gl)
417{
418 return !atomic_read(&gl->gl_lvb_count);
419}
420
421struct gfs2_glock_operations gfs2_meta_glops = {
422 .go_xmote_th = gfs2_glock_xmote_th,
423 .go_drop_th = gfs2_glock_drop_th,
424 .go_sync = meta_go_sync,
425 .go_inval = meta_go_inval,
426 .go_demote_ok = meta_go_demote_ok,
427 .go_type = LM_TYPE_META
428};
429
430struct gfs2_glock_operations gfs2_inode_glops = {
431 .go_xmote_th = inode_go_xmote_th,
432 .go_xmote_bh = inode_go_xmote_bh,
433 .go_drop_th = inode_go_drop_th,
434 .go_sync = inode_go_sync,
435 .go_inval = inode_go_inval,
436 .go_demote_ok = inode_go_demote_ok,
437 .go_lock = inode_go_lock,
438 .go_unlock = inode_go_unlock,
439 .go_greedy = inode_greedy,
440 .go_type = LM_TYPE_INODE
441};
442
443struct gfs2_glock_operations gfs2_rgrp_glops = {
444 .go_xmote_th = gfs2_glock_xmote_th,
445 .go_drop_th = gfs2_glock_drop_th,
446 .go_sync = meta_go_sync,
447 .go_inval = meta_go_inval,
448 .go_demote_ok = rgrp_go_demote_ok,
449 .go_lock = rgrp_go_lock,
450 .go_unlock = rgrp_go_unlock,
451 .go_type = LM_TYPE_RGRP
452};
453
454struct gfs2_glock_operations gfs2_trans_glops = {
455 .go_xmote_th = trans_go_xmote_th,
456 .go_xmote_bh = trans_go_xmote_bh,
457 .go_drop_th = trans_go_drop_th,
458 .go_type = LM_TYPE_NONDISK
459};
460
461struct gfs2_glock_operations gfs2_iopen_glops = {
462 .go_xmote_th = gfs2_glock_xmote_th,
463 .go_drop_th = gfs2_glock_drop_th,
464 .go_callback = gfs2_iopen_go_callback,
465 .go_type = LM_TYPE_IOPEN
466};
467
468struct gfs2_glock_operations gfs2_flock_glops = {
469 .go_xmote_th = gfs2_glock_xmote_th,
470 .go_drop_th = gfs2_glock_drop_th,
471 .go_type = LM_TYPE_FLOCK
472};
473
474struct gfs2_glock_operations gfs2_nondisk_glops = {
475 .go_xmote_th = gfs2_glock_xmote_th,
476 .go_drop_th = gfs2_glock_drop_th,
477 .go_type = LM_TYPE_NONDISK
478};
479
480struct gfs2_glock_operations gfs2_quota_glops = {
481 .go_xmote_th = gfs2_glock_xmote_th,
482 .go_drop_th = gfs2_glock_drop_th,
483 .go_demote_ok = quota_go_demote_ok,
484 .go_type = LM_TYPE_QUOTA
485};
486
487struct gfs2_glock_operations gfs2_journal_glops = {
488 .go_xmote_th = gfs2_glock_xmote_th,
489 .go_drop_th = gfs2_glock_drop_th,
490 .go_type = LM_TYPE_JOURNAL
491};
492
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
new file mode 100644
index 000000000000..94f2d264aa64
--- /dev/null
+++ b/fs/gfs2/glops.h
@@ -0,0 +1,23 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GLOPS_DOT_H__
11#define __GLOPS_DOT_H__
12
13extern struct gfs2_glock_operations gfs2_meta_glops;
14extern struct gfs2_glock_operations gfs2_inode_glops;
15extern struct gfs2_glock_operations gfs2_rgrp_glops;
16extern struct gfs2_glock_operations gfs2_trans_glops;
17extern struct gfs2_glock_operations gfs2_iopen_glops;
18extern struct gfs2_glock_operations gfs2_flock_glops;
19extern struct gfs2_glock_operations gfs2_nondisk_glops;
20extern struct gfs2_glock_operations gfs2_quota_glops;
21extern struct gfs2_glock_operations gfs2_journal_glops;
22
23#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
new file mode 100644
index 000000000000..b5a994d1b5f7
--- /dev/null
+++ b/fs/gfs2/incore.h
@@ -0,0 +1,684 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __INCORE_DOT_H__
11#define __INCORE_DOT_H__
12
13#define DIO_FORCE 0x00000001
14#define DIO_CLEAN 0x00000002
15#define DIO_DIRTY 0x00000004
16#define DIO_START 0x00000008
17#define DIO_WAIT 0x00000010
18#define DIO_METADATA 0x00000020
19#define DIO_DATA 0x00000040
20#define DIO_RELEASE 0x00000080
21#define DIO_ALL 0x00000100
22
23struct gfs2_log_operations;
24struct gfs2_log_element;
25struct gfs2_bitmap;
26struct gfs2_rgrpd;
27struct gfs2_bufdata;
28struct gfs2_glock_operations;
29struct gfs2_holder;
30struct gfs2_glock;
31struct gfs2_alloc;
32struct gfs2_inode;
33struct gfs2_file;
34struct gfs2_revoke;
35struct gfs2_revoke_replay;
36struct gfs2_unlinked;
37struct gfs2_quota_data;
38struct gfs2_log_buf;
39struct gfs2_trans;
40struct gfs2_ail;
41struct gfs2_jdesc;
42struct gfs2_args;
43struct gfs2_tune;
44struct gfs2_gl_hash_bucket;
45struct gfs2_sbd;
46
47typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
48
49/*
50 * Structure of operations that are associated with each
51 * type of element in the log.
52 */
53
54struct gfs2_log_operations {
55 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le);
56 void (*lo_incore_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr);
57 void (*lo_before_commit) (struct gfs2_sbd *sdp);
58 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
59 void (*lo_before_scan) (struct gfs2_jdesc *jd,
60 struct gfs2_log_header *head, int pass);
61 int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
62 struct gfs2_log_descriptor *ld, __be64 *ptr,
63 int pass);
64 void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass);
65 char *lo_name;
66};
67
68struct gfs2_log_element {
69 struct list_head le_list;
70 struct gfs2_log_operations *le_ops;
71};
72
73struct gfs2_bitmap {
74 struct buffer_head *bi_bh;
75 char *bi_clone;
76 uint32_t bi_offset;
77 uint32_t bi_start;
78 uint32_t bi_len;
79};
80
81struct gfs2_rgrpd {
82 struct list_head rd_list; /* Link with superblock */
83 struct list_head rd_list_mru;
84 struct list_head rd_recent; /* Recently used rgrps */
85 struct gfs2_glock *rd_gl; /* Glock for this rgrp */
86 struct gfs2_rindex rd_ri;
87 struct gfs2_rgrp rd_rg;
88 uint64_t rd_rg_vn;
89 struct gfs2_bitmap *rd_bits;
90 unsigned int rd_bh_count;
91 struct mutex rd_mutex;
92 uint32_t rd_free_clone;
93 struct gfs2_log_element rd_le;
94 uint32_t rd_last_alloc_data;
95 uint32_t rd_last_alloc_meta;
96 struct gfs2_sbd *rd_sbd;
97};
98
99enum gfs2_state_bits {
100 BH_Pinned = BH_PrivateStart,
101 BH_Escaped = BH_PrivateStart + 1,
102};
103
104BUFFER_FNS(Pinned, pinned)
105TAS_BUFFER_FNS(Pinned, pinned)
106BUFFER_FNS(Escaped, escaped)
107TAS_BUFFER_FNS(Escaped, escaped)
108
109struct gfs2_bufdata {
110 struct buffer_head *bd_bh;
111 struct gfs2_glock *bd_gl;
112
113 struct list_head bd_list_tr;
114 struct gfs2_log_element bd_le;
115
116 struct gfs2_ail *bd_ail;
117 struct list_head bd_ail_st_list;
118 struct list_head bd_ail_gl_list;
119};
120
121struct gfs2_glock_operations {
122 void (*go_xmote_th) (struct gfs2_glock * gl, unsigned int state,
123 int flags);
124 void (*go_xmote_bh) (struct gfs2_glock * gl);
125 void (*go_drop_th) (struct gfs2_glock * gl);
126 void (*go_drop_bh) (struct gfs2_glock * gl);
127 void (*go_sync) (struct gfs2_glock * gl, int flags);
128 void (*go_inval) (struct gfs2_glock * gl, int flags);
129 int (*go_demote_ok) (struct gfs2_glock * gl);
130 int (*go_lock) (struct gfs2_holder * gh);
131 void (*go_unlock) (struct gfs2_holder * gh);
132 void (*go_callback) (struct gfs2_glock * gl, unsigned int state);
133 void (*go_greedy) (struct gfs2_glock * gl);
134 int go_type;
135};
136
137enum {
138 /* Actions */
139 HIF_MUTEX = 0,
140 HIF_PROMOTE = 1,
141 HIF_DEMOTE = 2,
142 HIF_GREEDY = 3,
143
144 /* States */
145 HIF_ALLOCED = 4,
146 HIF_DEALLOC = 5,
147 HIF_HOLDER = 6,
148 HIF_FIRST = 7,
149 HIF_RECURSE = 8,
150 HIF_ABORTED = 9,
151};
152
153struct gfs2_holder {
154 struct list_head gh_list;
155
156 struct gfs2_glock *gh_gl;
157 struct task_struct *gh_owner;
158 unsigned int gh_state;
159 int gh_flags;
160
161 int gh_error;
162 unsigned long gh_iflags;
163 struct completion gh_wait;
164 unsigned long gh_ip;
165};
166
167enum {
168 GLF_PLUG = 0,
169 GLF_LOCK = 1,
170 GLF_STICKY = 2,
171 GLF_PREFETCH = 3,
172 GLF_SYNC = 4,
173 GLF_DIRTY = 5,
174 GLF_SKIP_WAITERS2 = 6,
175 GLF_GREEDY = 7,
176};
177
178struct gfs2_glock {
179 struct list_head gl_list;
180 unsigned long gl_flags; /* GLF_... */
181 struct lm_lockname gl_name;
182 struct kref gl_ref;
183
184 spinlock_t gl_spin;
185
186 unsigned int gl_state;
187 struct list_head gl_holders;
188 struct list_head gl_waiters1; /* HIF_MUTEX */
189 struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */
190 struct list_head gl_waiters3; /* HIF_PROMOTE */
191
192 struct gfs2_glock_operations *gl_ops;
193
194 struct gfs2_holder *gl_req_gh;
195 gfs2_glop_bh_t gl_req_bh;
196
197 lm_lock_t *gl_lock;
198 char *gl_lvb;
199 atomic_t gl_lvb_count;
200
201 uint64_t gl_vn;
202 unsigned long gl_stamp;
203 void *gl_object;
204
205 struct gfs2_gl_hash_bucket *gl_bucket;
206 struct list_head gl_reclaim;
207
208 struct gfs2_sbd *gl_sbd;
209
210 struct inode *gl_aspace;
211 struct gfs2_log_element gl_le;
212 struct list_head gl_ail_list;
213 atomic_t gl_ail_count;
214};
215
216struct gfs2_alloc {
217 /* Quota stuff */
218
219 unsigned int al_qd_num;
220 struct gfs2_quota_data *al_qd[4];
221 struct gfs2_holder al_qd_ghs[4];
222
223 /* Filled in by the caller to gfs2_inplace_reserve() */
224
225 uint32_t al_requested;
226
227 /* Filled in by gfs2_inplace_reserve() */
228
229 char *al_file;
230 unsigned int al_line;
231 struct gfs2_holder al_ri_gh;
232 struct gfs2_holder al_rgd_gh;
233 struct gfs2_rgrpd *al_rgd;
234
235 /* Filled in by gfs2_alloc_*() */
236
237 uint32_t al_alloced;
238};
239
240enum {
241 GIF_MIN_INIT = 0,
242 GIF_QD_LOCKED = 1,
243 GIF_PAGED = 2,
244 GIF_SW_PAGED = 3,
245};
246
247struct gfs2_inode {
248 struct gfs2_inum i_num;
249
250 atomic_t i_count;
251 unsigned long i_flags; /* GIF_... */
252
253 uint64_t i_vn;
254 struct gfs2_dinode i_di;
255
256 struct gfs2_glock *i_gl;
257 struct gfs2_sbd *i_sbd;
258 struct inode *i_vnode;
259
260 struct gfs2_holder i_iopen_gh;
261 struct gfs2_holder i_gh; /* for prepare/commit_write only */
262 struct gfs2_alloc i_alloc;
263 uint64_t i_last_rg_alloc;
264
265 spinlock_t i_spin;
266 struct rw_semaphore i_rw_mutex;
267
268 unsigned int i_greedy;
269 unsigned long i_last_pfault;
270
271 struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
272};
273
274enum {
275 GFF_DID_DIRECT_ALLOC = 0,
276};
277
278struct gfs2_file {
279 unsigned long f_flags; /* GFF_... */
280
281 struct mutex f_fl_mutex;
282 struct gfs2_holder f_fl_gh;
283
284 struct gfs2_inode *f_inode;
285 struct file *f_vfile;
286};
287
288struct gfs2_revoke {
289 struct gfs2_log_element rv_le;
290 uint64_t rv_blkno;
291};
292
293struct gfs2_revoke_replay {
294 struct list_head rr_list;
295 uint64_t rr_blkno;
296 unsigned int rr_where;
297};
298
299enum {
300 ULF_LOCKED = 0,
301};
302
303struct gfs2_unlinked {
304 struct list_head ul_list;
305 unsigned int ul_count;
306 struct gfs2_unlinked_tag ul_ut;
307 unsigned long ul_flags; /* ULF_... */
308 unsigned int ul_slot;
309};
310
311enum {
312 QDF_USER = 0,
313 QDF_CHANGE = 1,
314 QDF_LOCKED = 2,
315};
316
317struct gfs2_quota_lvb {
318 uint32_t qb_magic;
319 uint32_t __pad;
320 uint64_t qb_limit; /* Hard limit of # blocks to alloc */
321 uint64_t qb_warn; /* Warn user when alloc is above this # */
322 int64_t qb_value; /* Current # blocks allocated */
323};
324
325struct gfs2_quota_data {
326 struct list_head qd_list;
327 unsigned int qd_count;
328
329 uint32_t qd_id;
330 unsigned long qd_flags; /* QDF_... */
331
332 int64_t qd_change;
333 int64_t qd_change_sync;
334
335 unsigned int qd_slot;
336 unsigned int qd_slot_count;
337
338 struct buffer_head *qd_bh;
339 struct gfs2_quota_change *qd_bh_qc;
340 unsigned int qd_bh_count;
341
342 struct gfs2_glock *qd_gl;
343 struct gfs2_quota_lvb qd_qb;
344
345 uint64_t qd_sync_gen;
346 unsigned long qd_last_warn;
347 unsigned long qd_last_touched;
348};
349
350struct gfs2_log_buf {
351 struct list_head lb_list;
352 struct buffer_head *lb_bh;
353 struct buffer_head *lb_real;
354};
355
356struct gfs2_trans {
357 unsigned long tr_ip;
358
359 unsigned int tr_blocks;
360 unsigned int tr_revokes;
361 unsigned int tr_reserved;
362
363 struct gfs2_holder tr_t_gh;
364
365 int tr_touched;
366
367 unsigned int tr_num_buf;
368 unsigned int tr_num_buf_new;
369 unsigned int tr_num_buf_rm;
370 struct list_head tr_list_buf;
371
372 unsigned int tr_num_revoke;
373 unsigned int tr_num_revoke_rm;
374};
375
376struct gfs2_ail {
377 struct list_head ai_list;
378
379 unsigned int ai_first;
380 struct list_head ai_ail1_list;
381 struct list_head ai_ail2_list;
382
383 uint64_t ai_sync_gen;
384};
385
386struct gfs2_jdesc {
387 struct list_head jd_list;
388
389 struct inode *jd_inode;
390 unsigned int jd_jid;
391 int jd_dirty;
392
393 unsigned int jd_blocks;
394};
395
396#define GFS2_GLOCKD_DEFAULT 1
397#define GFS2_GLOCKD_MAX 16
398
399#define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF
400#define GFS2_QUOTA_OFF 0
401#define GFS2_QUOTA_ACCOUNT 1
402#define GFS2_QUOTA_ON 2
403
404#define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED
405#define GFS2_DATA_WRITEBACK 1
406#define GFS2_DATA_ORDERED 2
407
408struct gfs2_args {
409 char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
410 char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
411 char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
412 int ar_spectator; /* Don't get a journal because we're always RO */
413 int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */
414 int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */
415 int ar_localcaching; /* Local-style caching (dangerous on multihost) */
416 int ar_debug; /* Oops on errors instead of trying to be graceful */
417 int ar_upgrade; /* Upgrade ondisk/multihost format */
418 unsigned int ar_num_glockd; /* Number of glockd threads */
419 int ar_posix_acl; /* Enable posix acls */
420 int ar_quota; /* off/account/on */
421 int ar_suiddir; /* suiddir support */
422 int ar_data; /* ordered/writeback */
423};
424
425struct gfs2_tune {
426 spinlock_t gt_spin;
427
428 unsigned int gt_ilimit;
429 unsigned int gt_ilimit_tries;
430 unsigned int gt_ilimit_min;
431 unsigned int gt_demote_secs; /* Cache retention for unheld glock */
432 unsigned int gt_incore_log_blocks;
433 unsigned int gt_log_flush_secs;
434 unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
435
436 unsigned int gt_scand_secs;
437 unsigned int gt_recoverd_secs;
438 unsigned int gt_logd_secs;
439 unsigned int gt_quotad_secs;
440 unsigned int gt_inoded_secs;
441
442 unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
443 unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
444 unsigned int gt_quota_scale_num; /* Numerator */
445 unsigned int gt_quota_scale_den; /* Denominator */
446 unsigned int gt_quota_cache_secs;
447 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
448 unsigned int gt_atime_quantum; /* Min secs between atime updates */
449 unsigned int gt_new_files_jdata;
450 unsigned int gt_new_files_directio;
451 unsigned int gt_max_atomic_write; /* Split big writes into this size */
452 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
453 unsigned int gt_lockdump_size;
454 unsigned int gt_stall_secs; /* Detects trouble! */
455 unsigned int gt_complain_secs;
456 unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
457 unsigned int gt_entries_per_readdir;
458 unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */
459 unsigned int gt_greedy_default;
460 unsigned int gt_greedy_quantum;
461 unsigned int gt_greedy_max;
462 unsigned int gt_statfs_quantum;
463 unsigned int gt_statfs_slow;
464};
465
466struct gfs2_gl_hash_bucket {
467 rwlock_t hb_lock;
468 struct list_head hb_list;
469};
470
471enum {
472 SDF_JOURNAL_CHECKED = 0,
473 SDF_JOURNAL_LIVE = 1,
474 SDF_SHUTDOWN = 2,
475 SDF_NOATIME = 3,
476};
477
478#define GFS2_GL_HASH_SHIFT 13
479#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
480#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
481#define GFS2_FSNAME_LEN 256
482
483struct gfs2_sbd {
484 struct super_block *sd_vfs;
485 struct kobject sd_kobj;
486 unsigned long sd_flags; /* SDF_... */
487 struct gfs2_sb sd_sb;
488
489 /* Constants computed on mount */
490
491 uint32_t sd_fsb2bb;
492 uint32_t sd_fsb2bb_shift;
493 uint32_t sd_diptrs; /* Number of pointers in a dinode */
494 uint32_t sd_inptrs; /* Number of pointers in a indirect block */
495 uint32_t sd_jbsize; /* Size of a journaled data block */
496 uint32_t sd_hash_bsize; /* sizeof(exhash block) */
497 uint32_t sd_hash_bsize_shift;
498 uint32_t sd_hash_ptrs; /* Number of pointers in a hash block */
499 uint32_t sd_ut_per_block;
500 uint32_t sd_qc_per_block;
501 uint32_t sd_max_dirres; /* Max blocks needed to add a directory entry */
502 uint32_t sd_max_height; /* Max height of a file's metadata tree */
503 uint64_t sd_heightsize[GFS2_MAX_META_HEIGHT];
504 uint32_t sd_max_jheight; /* Max height of journaled file's meta tree */
505 uint64_t sd_jheightsize[GFS2_MAX_META_HEIGHT];
506
507 struct gfs2_args sd_args; /* Mount arguments */
508 struct gfs2_tune sd_tune; /* Filesystem tuning structure */
509
510 /* Lock Stuff */
511
512 struct lm_lockstruct sd_lockstruct;
513 struct gfs2_gl_hash_bucket sd_gl_hash[GFS2_GL_HASH_SIZE];
514 struct list_head sd_reclaim_list;
515 spinlock_t sd_reclaim_lock;
516 wait_queue_head_t sd_reclaim_wq;
517 atomic_t sd_reclaim_count;
518 struct gfs2_holder sd_live_gh;
519 struct gfs2_glock *sd_rename_gl;
520 struct gfs2_glock *sd_trans_gl;
521 struct mutex sd_invalidate_inodes_mutex;
522
523 /* Inode Stuff */
524
525 struct inode *sd_master_dir;
526 struct inode *sd_jindex;
527 struct inode *sd_inum_inode;
528 struct inode *sd_statfs_inode;
529 struct inode *sd_ir_inode;
530 struct inode *sd_sc_inode;
531 struct inode *sd_ut_inode;
532 struct inode *sd_qc_inode;
533 struct inode *sd_rindex;
534 struct inode *sd_quota_inode;
535
536 /* Inum stuff */
537
538 struct mutex sd_inum_mutex;
539
540 /* StatFS stuff */
541
542 spinlock_t sd_statfs_spin;
543 struct mutex sd_statfs_mutex;
544 struct gfs2_statfs_change sd_statfs_master;
545 struct gfs2_statfs_change sd_statfs_local;
546 unsigned long sd_statfs_sync_time;
547
548 /* Resource group stuff */
549
550 uint64_t sd_rindex_vn;
551 spinlock_t sd_rindex_spin;
552 struct mutex sd_rindex_mutex;
553 struct list_head sd_rindex_list;
554 struct list_head sd_rindex_mru_list;
555 struct list_head sd_rindex_recent_list;
556 struct gfs2_rgrpd *sd_rindex_forward;
557 unsigned int sd_rgrps;
558
559 /* Journal index stuff */
560
561 struct list_head sd_jindex_list;
562 spinlock_t sd_jindex_spin;
563 struct mutex sd_jindex_mutex;
564 unsigned int sd_journals;
565 unsigned long sd_jindex_refresh_time;
566
567 struct gfs2_jdesc *sd_jdesc;
568 struct gfs2_holder sd_journal_gh;
569 struct gfs2_holder sd_jinode_gh;
570
571 struct gfs2_holder sd_ir_gh;
572 struct gfs2_holder sd_sc_gh;
573 struct gfs2_holder sd_ut_gh;
574 struct gfs2_holder sd_qc_gh;
575
576 /* Daemon stuff */
577
578 struct task_struct *sd_scand_process;
579 struct task_struct *sd_recoverd_process;
580 struct task_struct *sd_logd_process;
581 struct task_struct *sd_quotad_process;
582 struct task_struct *sd_inoded_process;
583 struct task_struct *sd_glockd_process[GFS2_GLOCKD_MAX];
584 unsigned int sd_glockd_num;
585
586 /* Unlinked inode stuff */
587
588 struct list_head sd_unlinked_list;
589 atomic_t sd_unlinked_count;
590 spinlock_t sd_unlinked_spin;
591 struct mutex sd_unlinked_mutex;
592
593 unsigned int sd_unlinked_slots;
594 unsigned int sd_unlinked_chunks;
595 unsigned char **sd_unlinked_bitmap;
596
597 /* Quota stuff */
598
599 struct list_head sd_quota_list;
600 atomic_t sd_quota_count;
601 spinlock_t sd_quota_spin;
602 struct mutex sd_quota_mutex;
603
604 unsigned int sd_quota_slots;
605 unsigned int sd_quota_chunks;
606 unsigned char **sd_quota_bitmap;
607
608 uint64_t sd_quota_sync_gen;
609 unsigned long sd_quota_sync_time;
610
611 /* Log stuff */
612
613 spinlock_t sd_log_lock;
614
615 unsigned int sd_log_blks_reserved;
616 unsigned int sd_log_commited_buf;
617 unsigned int sd_log_commited_revoke;
618
619 unsigned int sd_log_num_gl;
620 unsigned int sd_log_num_buf;
621 unsigned int sd_log_num_revoke;
622 unsigned int sd_log_num_rg;
623 unsigned int sd_log_num_databuf;
624 unsigned int sd_log_num_jdata;
625
626 struct list_head sd_log_le_gl;
627 struct list_head sd_log_le_buf;
628 struct list_head sd_log_le_revoke;
629 struct list_head sd_log_le_rg;
630 struct list_head sd_log_le_databuf;
631
632 unsigned int sd_log_blks_free;
633 struct mutex sd_log_reserve_mutex;
634
635 uint64_t sd_log_sequence;
636 unsigned int sd_log_head;
637 unsigned int sd_log_tail;
638 uint64_t sd_log_wraps;
639 int sd_log_idle;
640
641 unsigned long sd_log_flush_time;
642 struct rw_semaphore sd_log_flush_lock;
643 struct list_head sd_log_flush_list;
644
645 unsigned int sd_log_flush_head;
646 uint64_t sd_log_flush_wrapped;
647
648 struct list_head sd_ail1_list;
649 struct list_head sd_ail2_list;
650 uint64_t sd_ail_sync_gen;
651
652 /* Replay stuff */
653
654 struct list_head sd_revoke_list;
655 unsigned int sd_replay_tail;
656
657 unsigned int sd_found_blocks;
658 unsigned int sd_found_revokes;
659 unsigned int sd_replayed_blocks;
660
661 /* For quiescing the filesystem */
662
663 struct gfs2_holder sd_freeze_gh;
664 struct mutex sd_freeze_lock;
665 unsigned int sd_freeze_count;
666
667 /* Counters */
668
669 atomic_t sd_glock_count;
670 atomic_t sd_glock_held_count;
671 atomic_t sd_inode_count;
672 atomic_t sd_reclaimed;
673
674 char sd_fsname[GFS2_FSNAME_LEN];
675 char sd_table_name[GFS2_FSNAME_LEN];
676 char sd_proto_name[GFS2_FSNAME_LEN];
677
678 /* Debugging crud */
679
680 unsigned long sd_last_warning;
681};
682
683#endif /* __INCORE_DOT_H__ */
684
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
new file mode 100644
index 000000000000..6140c2434e85
--- /dev/null
+++ b/fs/gfs2/inode.c
@@ -0,0 +1,1854 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/posix_acl.h>
16#include <linux/sort.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/crc32.h>
19#include <asm/semaphore.h>
20
21#include "gfs2.h"
22#include "lm_interface.h"
23#include "incore.h"
24#include "acl.h"
25#include "bmap.h"
26#include "dir.h"
27#include "eattr.h"
28#include "glock.h"
29#include "glops.h"
30#include "inode.h"
31#include "log.h"
32#include "meta_io.h"
33#include "ops_address.h"
34#include "ops_file.h"
35#include "ops_inode.h"
36#include "quota.h"
37#include "rgrp.h"
38#include "trans.h"
39#include "unlinked.h"
40#include "util.h"
41
42/**
43 * inode_attr_in - Copy attributes from the dinode into the VFS inode
44 * @ip: The GFS2 inode (with embedded disk inode data)
45 * @inode: The Linux VFS inode
46 *
47 */
48
49static void inode_attr_in(struct gfs2_inode *ip, struct inode *inode)
50{
51 inode->i_ino = ip->i_num.no_formal_ino;
52
53 switch (ip->i_di.di_mode & S_IFMT) {
54 case S_IFBLK:
55 case S_IFCHR:
56 inode->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
57 break;
58 default:
59 inode->i_rdev = 0;
60 break;
61 };
62
63 inode->i_mode = ip->i_di.di_mode;
64 inode->i_nlink = ip->i_di.di_nlink;
65 inode->i_uid = ip->i_di.di_uid;
66 inode->i_gid = ip->i_di.di_gid;
67 i_size_write(inode, ip->i_di.di_size);
68 inode->i_atime.tv_sec = ip->i_di.di_atime;
69 inode->i_mtime.tv_sec = ip->i_di.di_mtime;
70 inode->i_ctime.tv_sec = ip->i_di.di_ctime;
71 inode->i_atime.tv_nsec = 0;
72 inode->i_mtime.tv_nsec = 0;
73 inode->i_ctime.tv_nsec = 0;
74 inode->i_blksize = PAGE_SIZE;
75 inode->i_blocks = ip->i_di.di_blocks <<
76 (ip->i_sbd->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
77
78 if (ip->i_di.di_flags & GFS2_DIF_IMMUTABLE)
79 inode->i_flags |= S_IMMUTABLE;
80 else
81 inode->i_flags &= ~S_IMMUTABLE;
82
83 if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY)
84 inode->i_flags |= S_APPEND;
85 else
86 inode->i_flags &= ~S_APPEND;
87}
88
89/**
90 * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode
91 * @ip: The GFS2 inode (with embedded disk inode data)
92 *
93 */
94
95void gfs2_inode_attr_in(struct gfs2_inode *ip)
96{
97 struct inode *inode;
98
99 inode = gfs2_ip2v_lookup(ip);
100 if (inode) {
101 inode_attr_in(ip, inode);
102 iput(inode);
103 }
104}
105
106/**
107 * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode
108 * @ip: The GFS2 inode
109 *
110 * Only copy out the attributes that we want the VFS layer
111 * to be able to modify.
112 */
113
114void gfs2_inode_attr_out(struct gfs2_inode *ip)
115{
116 struct inode *inode = ip->i_vnode;
117
118 gfs2_assert_withdraw(ip->i_sbd,
119 (ip->i_di.di_mode & S_IFMT) == (inode->i_mode & S_IFMT));
120 ip->i_di.di_mode = inode->i_mode;
121 ip->i_di.di_uid = inode->i_uid;
122 ip->i_di.di_gid = inode->i_gid;
123 ip->i_di.di_atime = inode->i_atime.tv_sec;
124 ip->i_di.di_mtime = inode->i_mtime.tv_sec;
125 ip->i_di.di_ctime = inode->i_ctime.tv_sec;
126}
127
128/**
129 * gfs2_ip2v_lookup - Get the struct inode for a struct gfs2_inode
130 * @ip: the struct gfs2_inode to get the struct inode for
131 *
132 * Returns: A VFS inode, or NULL if none
133 */
134
135struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip)
136{
137 struct inode *inode = NULL;
138
139 gfs2_assert_warn(ip->i_sbd, test_bit(GIF_MIN_INIT, &ip->i_flags));
140
141 spin_lock(&ip->i_spin);
142 if (ip->i_vnode)
143 inode = igrab(ip->i_vnode);
144 spin_unlock(&ip->i_spin);
145
146 return inode;
147}
148
149/**
150 * gfs2_ip2v - Get/Create a struct inode for a struct gfs2_inode
151 * @ip: the struct gfs2_inode to get the struct inode for
152 *
153 * Returns: A VFS inode, or NULL if no mem
154 */
155
156struct inode *gfs2_ip2v(struct gfs2_inode *ip)
157{
158 struct inode *inode, *tmp;
159
160 inode = gfs2_ip2v_lookup(ip);
161 if (inode)
162 return inode;
163
164 tmp = new_inode(ip->i_sbd->sd_vfs);
165 if (!tmp)
166 return NULL;
167
168 inode_attr_in(ip, tmp);
169
170 if (S_ISREG(ip->i_di.di_mode)) {
171 tmp->i_op = &gfs2_file_iops;
172 tmp->i_fop = &gfs2_file_fops;
173 tmp->i_mapping->a_ops = &gfs2_file_aops;
174 } else if (S_ISDIR(ip->i_di.di_mode)) {
175 tmp->i_op = &gfs2_dir_iops;
176 tmp->i_fop = &gfs2_dir_fops;
177 } else if (S_ISLNK(ip->i_di.di_mode)) {
178 tmp->i_op = &gfs2_symlink_iops;
179 } else {
180 tmp->i_op = &gfs2_dev_iops;
181 init_special_inode(tmp, tmp->i_mode, tmp->i_rdev);
182 }
183
184 tmp->u.generic_ip = NULL;
185
186 for (;;) {
187 spin_lock(&ip->i_spin);
188 if (!ip->i_vnode)
189 break;
190 inode = igrab(ip->i_vnode);
191 spin_unlock(&ip->i_spin);
192
193 if (inode) {
194 iput(tmp);
195 return inode;
196 }
197 yield();
198 }
199
200 inode = tmp;
201
202 gfs2_inode_hold(ip);
203 ip->i_vnode = inode;
204 inode->u.generic_ip = ip;
205
206 spin_unlock(&ip->i_spin);
207
208 insert_inode_hash(inode);
209
210 return inode;
211}
212
213static int iget_test(struct inode *inode, void *opaque)
214{
215 struct gfs2_inode *ip = inode->u.generic_ip;
216 struct gfs2_inum *inum = (struct gfs2_inum *)opaque;
217
218 if (ip && ip->i_num.no_addr == inum->no_addr)
219 return 1;
220
221 return 0;
222}
223
224struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
225{
226 return ilookup5(sb, (unsigned long)inum->no_formal_ino,
227 iget_test, inum);
228}
229
230void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type)
231{
232 if (!test_and_set_bit(GIF_MIN_INIT, &ip->i_flags)) {
233 ip->i_di.di_nlink = 1;
234 ip->i_di.di_mode = DT2IF(type);
235 }
236}
237
238/**
239 * gfs2_inode_refresh - Refresh the incore copy of the dinode
240 * @ip: The GFS2 inode
241 *
242 * Returns: errno
243 */
244
245int gfs2_inode_refresh(struct gfs2_inode *ip)
246{
247 struct buffer_head *dibh;
248 int error;
249
250 error = gfs2_meta_inode_buffer(ip, &dibh);
251 if (error)
252 return error;
253
254 if (gfs2_metatype_check(ip->i_sbd, dibh, GFS2_METATYPE_DI)) {
255 brelse(dibh);
256 return -EIO;
257 }
258
259 gfs2_dinode_in(&ip->i_di, dibh->b_data);
260 set_bit(GIF_MIN_INIT, &ip->i_flags);
261
262 brelse(dibh);
263
264 if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
265 if (gfs2_consist_inode(ip))
266 gfs2_dinode_print(&ip->i_di);
267 return -EIO;
268 }
269 if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino)
270 return -ESTALE;
271
272 ip->i_vn = ip->i_gl->gl_vn;
273
274 return 0;
275}
276
277/**
278 * inode_create - create a struct gfs2_inode
279 * @i_gl: The glock covering the inode
280 * @inum: The inode number
281 * @io_gl: the iopen glock to acquire/hold (using holder in new gfs2_inode)
282 * @io_state: the state the iopen glock should be acquired in
283 * @ipp: pointer to put the returned inode in
284 *
285 * Returns: errno
286 */
287
288static int inode_create(struct gfs2_glock *i_gl, const struct gfs2_inum *inum,
289 struct gfs2_glock *io_gl, unsigned int io_state,
290 struct gfs2_inode **ipp)
291{
292 struct gfs2_sbd *sdp = i_gl->gl_sbd;
293 struct gfs2_inode *ip;
294 int error = 0;
295
296 ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
297 if (!ip)
298 return -ENOMEM;
299 memset(ip, 0, sizeof(struct gfs2_inode));
300
301 ip->i_num = *inum;
302
303 atomic_set(&ip->i_count, 1);
304
305 ip->i_vn = i_gl->gl_vn - 1;
306
307 ip->i_gl = i_gl;
308 ip->i_sbd = sdp;
309
310 spin_lock_init(&ip->i_spin);
311 init_rwsem(&ip->i_rw_mutex);
312
313 ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
314
315 error = gfs2_glock_nq_init(io_gl,
316 io_state, GL_LOCAL_EXCL | GL_EXACT,
317 &ip->i_iopen_gh);
318 if (error)
319 goto fail;
320 ip->i_iopen_gh.gh_owner = NULL;
321
322 spin_lock(&io_gl->gl_spin);
323 gfs2_glock_hold(i_gl);
324 io_gl->gl_object = i_gl;
325 spin_unlock(&io_gl->gl_spin);
326
327 gfs2_glock_hold(i_gl);
328 i_gl->gl_object = ip;
329
330 atomic_inc(&sdp->sd_inode_count);
331
332 *ipp = ip;
333
334 return 0;
335
336 fail:
337 gfs2_meta_cache_flush(ip);
338 kmem_cache_free(gfs2_inode_cachep, ip);
339 *ipp = NULL;
340
341 return error;
342}
343
344/**
345 * gfs2_inode_get - Create or get a reference on an inode
346 * @i_gl: The glock covering the inode
347 * @inum: The inode number
348 * @create:
349 * @ipp: pointer to put the returned inode in
350 *
351 * Returns: errno
352 */
353
354int gfs2_inode_get(struct gfs2_glock *i_gl, const struct gfs2_inum *inum,
355 int create, struct gfs2_inode **ipp)
356{
357 struct gfs2_sbd *sdp = i_gl->gl_sbd;
358 struct gfs2_glock *io_gl;
359 int error = 0;
360
361 gfs2_glmutex_lock(i_gl);
362
363 *ipp = i_gl->gl_object;
364 if (*ipp) {
365 error = -ESTALE;
366 if ((*ipp)->i_num.no_formal_ino != inum->no_formal_ino)
367 goto out;
368 atomic_inc(&(*ipp)->i_count);
369 error = 0;
370 goto out;
371 }
372
373 if (!create)
374 goto out;
375
376 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops,
377 CREATE, &io_gl);
378 if (!error) {
379 error = inode_create(i_gl, inum, io_gl, LM_ST_SHARED, ipp);
380 gfs2_glock_put(io_gl);
381 }
382
383 out:
384 gfs2_glmutex_unlock(i_gl);
385
386 return error;
387}
388
389void gfs2_inode_hold(struct gfs2_inode *ip)
390{
391 gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0);
392 atomic_inc(&ip->i_count);
393}
394
395void gfs2_inode_put(struct gfs2_inode *ip)
396{
397 gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0);
398 atomic_dec(&ip->i_count);
399}
400
401void gfs2_inode_destroy(struct gfs2_inode *ip)
402{
403 struct gfs2_sbd *sdp = ip->i_sbd;
404 struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl;
405 struct gfs2_glock *i_gl = ip->i_gl;
406
407 gfs2_assert_warn(sdp, !atomic_read(&ip->i_count));
408 gfs2_assert(sdp, io_gl->gl_object == i_gl);
409
410 spin_lock(&io_gl->gl_spin);
411 io_gl->gl_object = NULL;
412 gfs2_glock_put(i_gl);
413 spin_unlock(&io_gl->gl_spin);
414
415 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
416
417 gfs2_meta_cache_flush(ip);
418 kmem_cache_free(gfs2_inode_cachep, ip);
419
420 i_gl->gl_object = NULL;
421 gfs2_glock_put(i_gl);
422
423 atomic_dec(&sdp->sd_inode_count);
424}
425
426static int dinode_dealloc(struct gfs2_inode *ip, struct gfs2_unlinked *ul)
427{
428 struct gfs2_sbd *sdp = ip->i_sbd;
429 struct gfs2_alloc *al;
430 struct gfs2_rgrpd *rgd;
431 int error;
432
433 if (ip->i_di.di_blocks != 1) {
434 if (gfs2_consist_inode(ip))
435 gfs2_dinode_print(&ip->i_di);
436 return -EIO;
437 }
438
439 al = gfs2_alloc_get(ip);
440
441 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
442 if (error)
443 goto out;
444
445 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
446 if (error)
447 goto out_qs;
448
449 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
450 if (!rgd) {
451 gfs2_consist_inode(ip);
452 error = -EIO;
453 goto out_rindex_relse;
454 }
455
456 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
457 &al->al_rgd_gh);
458 if (error)
459 goto out_rindex_relse;
460
461 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED +
462 RES_STATFS + RES_QUOTA, 1);
463 if (error)
464 goto out_rg_gunlock;
465
466 gfs2_trans_add_gl(ip->i_gl);
467
468 gfs2_free_di(rgd, ip);
469
470 error = gfs2_unlinked_ondisk_rm(sdp, ul);
471
472 gfs2_trans_end(sdp);
473 clear_bit(GLF_STICKY, &ip->i_gl->gl_flags);
474
475 out_rg_gunlock:
476 gfs2_glock_dq_uninit(&al->al_rgd_gh);
477
478 out_rindex_relse:
479 gfs2_glock_dq_uninit(&al->al_ri_gh);
480
481 out_qs:
482 gfs2_quota_unhold(ip);
483
484 out:
485 gfs2_alloc_put(ip);
486
487 return error;
488}
489
490/**
491 * inode_dealloc - Deallocate all on-disk blocks for an inode (dinode)
492 * @sdp: the filesystem
493 * @inum: the inode number to deallocate
494 * @io_gh: a holder for the iopen glock for this inode
495 *
496 * Returns: errno
497 */
498
499static int inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul,
500 struct gfs2_holder *io_gh)
501{
502 struct gfs2_inode *ip;
503 struct gfs2_holder i_gh;
504 int error;
505
506 error = gfs2_glock_nq_num(sdp,
507 ul->ul_ut.ut_inum.no_addr, &gfs2_inode_glops,
508 LM_ST_EXCLUSIVE, 0, &i_gh);
509 if (error)
510 return error;
511
512 /* We reacquire the iopen lock here to avoid a race with the NFS server
513 calling gfs2_read_inode() with the inode number of a inode we're in
514 the process of deallocating. And we can't keep our hold on the lock
515 from inode_dealloc_init() for deadlock reasons. */
516
517 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY, io_gh);
518 error = gfs2_glock_nq(io_gh);
519 switch (error) {
520 case 0:
521 break;
522 case GLR_TRYFAILED:
523 error = 1;
524 default:
525 goto out;
526 }
527
528 gfs2_assert_warn(sdp, !i_gh.gh_gl->gl_object);
529 error = inode_create(i_gh.gh_gl, &ul->ul_ut.ut_inum, io_gh->gh_gl,
530 LM_ST_EXCLUSIVE, &ip);
531
532 gfs2_glock_dq(io_gh);
533
534 if (error)
535 goto out;
536
537 error = gfs2_inode_refresh(ip);
538 if (error)
539 goto out_iput;
540
541 if (ip->i_di.di_nlink) {
542 if (gfs2_consist_inode(ip))
543 gfs2_dinode_print(&ip->i_di);
544 error = -EIO;
545 goto out_iput;
546 }
547
548 if (S_ISDIR(ip->i_di.di_mode) &&
549 (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
550 error = gfs2_dir_exhash_dealloc(ip);
551 if (error)
552 goto out_iput;
553 }
554
555 if (ip->i_di.di_eattr) {
556 error = gfs2_ea_dealloc(ip);
557 if (error)
558 goto out_iput;
559 }
560
561 if (!gfs2_is_stuffed(ip)) {
562 error = gfs2_file_dealloc(ip);
563 if (error)
564 goto out_iput;
565 }
566
567 error = dinode_dealloc(ip, ul);
568 if (error)
569 goto out_iput;
570
571 out_iput:
572 gfs2_glmutex_lock(i_gh.gh_gl);
573 gfs2_inode_put(ip);
574 gfs2_inode_destroy(ip);
575 gfs2_glmutex_unlock(i_gh.gh_gl);
576
577 out:
578 gfs2_glock_dq_uninit(&i_gh);
579
580 return error;
581}
582
583/**
584 * try_inode_dealloc - Try to deallocate an inode and all its blocks
585 * @sdp: the filesystem
586 *
587 * Returns: 0 on success, -errno on error, 1 on busy (inode open)
588 */
589
590static int try_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
591{
592 struct gfs2_holder io_gh;
593 int error = 0;
594
595 gfs2_try_toss_inode(sdp, &ul->ul_ut.ut_inum);
596
597 error = gfs2_glock_nq_num(sdp,
598 ul->ul_ut.ut_inum.no_addr, &gfs2_iopen_glops,
599 LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &io_gh);
600 switch (error) {
601 case 0:
602 break;
603 case GLR_TRYFAILED:
604 return 1;
605 default:
606 return error;
607 }
608
609 gfs2_glock_dq(&io_gh);
610 error = inode_dealloc(sdp, ul, &io_gh);
611 gfs2_holder_uninit(&io_gh);
612
613 return error;
614}
615
616static int inode_dealloc_uninit(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
617{
618 struct gfs2_rgrpd *rgd;
619 struct gfs2_holder ri_gh, rgd_gh;
620 int error;
621
622 error = gfs2_rindex_hold(sdp, &ri_gh);
623 if (error)
624 return error;
625
626 rgd = gfs2_blk2rgrpd(sdp, ul->ul_ut.ut_inum.no_addr);
627 if (!rgd) {
628 gfs2_consist(sdp);
629 error = -EIO;
630 goto out;
631 }
632
633 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
634 if (error)
635 goto out;
636
637 error = gfs2_trans_begin(sdp,
638 RES_RG_BIT + RES_UNLINKED + RES_STATFS,
639 0);
640 if (error)
641 goto out_gunlock;
642
643 gfs2_free_uninit_di(rgd, ul->ul_ut.ut_inum.no_addr);
644 gfs2_unlinked_ondisk_rm(sdp, ul);
645
646 gfs2_trans_end(sdp);
647
648 out_gunlock:
649 gfs2_glock_dq_uninit(&rgd_gh);
650 out:
651 gfs2_glock_dq_uninit(&ri_gh);
652
653 return error;
654}
655
656int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
657{
658 if (ul->ul_ut.ut_flags & GFS2_UTF_UNINIT)
659 return inode_dealloc_uninit(sdp, ul);
660 else
661 return try_inode_dealloc(sdp, ul);
662}
663
664/**
665 * gfs2_change_nlink - Change nlink count on inode
666 * @ip: The GFS2 inode
667 * @diff: The change in the nlink count required
668 *
669 * Returns: errno
670 */
671
672int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
673{
674 struct buffer_head *dibh;
675 uint32_t nlink;
676 int error;
677
678 nlink = ip->i_di.di_nlink + diff;
679
680 /* If we are reducing the nlink count, but the new value ends up being
681 bigger than the old one, we must have underflowed. */
682 if (diff < 0 && nlink > ip->i_di.di_nlink) {
683 if (gfs2_consist_inode(ip))
684 gfs2_dinode_print(&ip->i_di);
685 return -EIO;
686 }
687
688 error = gfs2_meta_inode_buffer(ip, &dibh);
689 if (error)
690 return error;
691
692 ip->i_di.di_nlink = nlink;
693 ip->i_di.di_ctime = get_seconds();
694
695 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
696 gfs2_dinode_out(&ip->i_di, dibh->b_data);
697 brelse(dibh);
698
699 return 0;
700}
701
702struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
703{
704 struct qstr qstr;
705 gfs2_str2qstr(&qstr, name);
706 return gfs2_lookupi(dip, &qstr, 1, NULL);
707}
708
709
710/**
711 * gfs2_lookupi - Look up a filename in a directory and return its inode
712 * @d_gh: An initialized holder for the directory glock
713 * @name: The name of the inode to look for
714 * @is_root: If 1, ignore the caller's permissions
715 * @i_gh: An uninitialized holder for the new inode glock
716 *
717 * There will always be a vnode (Linux VFS inode) for the d_gh inode unless
718 * @is_root is true.
719 *
720 * Returns: errno
721 */
722
723struct inode *gfs2_lookupi(struct inode *dir, struct qstr *name, int is_root,
724 struct nameidata *nd)
725
726{
727 struct super_block *sb = dir->i_sb;
728 struct gfs2_inode *ipp;
729 struct gfs2_inode *dip = dir->u.generic_ip;
730 struct gfs2_sbd *sdp = dip->i_sbd;
731 struct gfs2_holder d_gh;
732 struct gfs2_inum inum;
733 unsigned int type;
734 struct gfs2_glock *gl;
735 int error = 0;
736 struct inode *inode = NULL;
737
738 if (!name->len || name->len > GFS2_FNAMESIZE)
739 return ERR_PTR(-ENAMETOOLONG);
740
741 if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) ||
742 (name->len == 2 && memcmp(name->name, "..", 2) == 0 &&
743 dir == sb->s_root->d_inode)) {
744 gfs2_inode_hold(dip);
745 ipp = dip;
746 goto done;
747 }
748
749 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
750 if (error)
751 return ERR_PTR(error);
752
753 if (!is_root) {
754 error = gfs2_repermission(dip->i_vnode, MAY_EXEC, NULL);
755 if (error)
756 goto out;
757 }
758
759 error = gfs2_dir_search(dir, name, &inum, &type);
760 if (error)
761 goto out;
762
763 error = gfs2_glock_get(sdp, inum.no_addr, &gfs2_inode_glops,
764 CREATE, &gl);
765 if (error)
766 goto out;
767
768 error = gfs2_inode_get(gl, &inum, CREATE, &ipp);
769 if (!error)
770 gfs2_inode_min_init(ipp, type);
771
772 gfs2_glock_put(gl);
773
774out:
775 gfs2_glock_dq_uninit(&d_gh);
776done:
777 if (error == -ENOENT)
778 return NULL;
779 if (error == 0) {
780 inode = gfs2_ip2v(ipp);
781 gfs2_inode_put(ipp);
782 if (!inode)
783 return ERR_PTR(-ENOMEM);
784 return inode;
785 }
786 return ERR_PTR(error);
787}
788
789static int pick_formal_ino_1(struct gfs2_sbd *sdp, uint64_t *formal_ino)
790{
791 struct gfs2_inode *ip = sdp->sd_ir_inode->u.generic_ip;
792 struct buffer_head *bh;
793 struct gfs2_inum_range ir;
794 int error;
795
796 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
797 if (error)
798 return error;
799 mutex_lock(&sdp->sd_inum_mutex);
800
801 error = gfs2_meta_inode_buffer(ip, &bh);
802 if (error) {
803 mutex_unlock(&sdp->sd_inum_mutex);
804 gfs2_trans_end(sdp);
805 return error;
806 }
807
808 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
809
810 if (ir.ir_length) {
811 *formal_ino = ir.ir_start++;
812 ir.ir_length--;
813 gfs2_trans_add_bh(ip->i_gl, bh, 1);
814 gfs2_inum_range_out(&ir,
815 bh->b_data + sizeof(struct gfs2_dinode));
816 brelse(bh);
817 mutex_unlock(&sdp->sd_inum_mutex);
818 gfs2_trans_end(sdp);
819 return 0;
820 }
821
822 brelse(bh);
823
824 mutex_unlock(&sdp->sd_inum_mutex);
825 gfs2_trans_end(sdp);
826
827 return 1;
828}
829
830static int pick_formal_ino_2(struct gfs2_sbd *sdp, uint64_t *formal_ino)
831{
832 struct gfs2_inode *ip = sdp->sd_ir_inode->u.generic_ip;
833 struct gfs2_inode *m_ip = sdp->sd_inum_inode->u.generic_ip;
834 struct gfs2_holder gh;
835 struct buffer_head *bh;
836 struct gfs2_inum_range ir;
837 int error;
838
839 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
840 if (error)
841 return error;
842
843 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
844 if (error)
845 goto out;
846 mutex_lock(&sdp->sd_inum_mutex);
847
848 error = gfs2_meta_inode_buffer(ip, &bh);
849 if (error)
850 goto out_end_trans;
851
852 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
853
854 if (!ir.ir_length) {
855 struct buffer_head *m_bh;
856 uint64_t x, y;
857
858 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
859 if (error)
860 goto out_brelse;
861
862 x = *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode));
863 x = y = be64_to_cpu(x);
864 ir.ir_start = x;
865 ir.ir_length = GFS2_INUM_QUANTUM;
866 x += GFS2_INUM_QUANTUM;
867 if (x < y)
868 gfs2_consist_inode(m_ip);
869 x = cpu_to_be64(x);
870 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
871 *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x;
872
873 brelse(m_bh);
874 }
875
876 *formal_ino = ir.ir_start++;
877 ir.ir_length--;
878
879 gfs2_trans_add_bh(ip->i_gl, bh, 1);
880 gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode));
881
882 out_brelse:
883 brelse(bh);
884
885 out_end_trans:
886 mutex_unlock(&sdp->sd_inum_mutex);
887 gfs2_trans_end(sdp);
888
889 out:
890 gfs2_glock_dq_uninit(&gh);
891
892 return error;
893}
894
895static int pick_formal_ino(struct gfs2_sbd *sdp, uint64_t *inum)
896{
897 int error;
898
899 error = pick_formal_ino_1(sdp, inum);
900 if (error <= 0)
901 return error;
902
903 error = pick_formal_ino_2(sdp, inum);
904
905 return error;
906}
907
908/**
909 * create_ok - OK to create a new on-disk inode here?
910 * @dip: Directory in which dinode is to be created
911 * @name: Name of new dinode
912 * @mode:
913 *
914 * Returns: errno
915 */
916
917static int create_ok(struct gfs2_inode *dip, struct qstr *name,
918 unsigned int mode)
919{
920 int error;
921
922 error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
923 if (error)
924 return error;
925
926 /* Don't create entries in an unlinked directory */
927 if (!dip->i_di.di_nlink)
928 return -EPERM;
929
930 error = gfs2_dir_search(dip->i_vnode, name, NULL, NULL);
931 switch (error) {
932 case -ENOENT:
933 error = 0;
934 break;
935 case 0:
936 return -EEXIST;
937 default:
938 return error;
939 }
940
941 if (dip->i_di.di_entries == (uint32_t)-1)
942 return -EFBIG;
943 if (S_ISDIR(mode) && dip->i_di.di_nlink == (uint32_t)-1)
944 return -EMLINK;
945
946 return 0;
947}
948
949static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
950 unsigned int *uid, unsigned int *gid)
951{
952 if (dip->i_sbd->sd_args.ar_suiddir &&
953 (dip->i_di.di_mode & S_ISUID) &&
954 dip->i_di.di_uid) {
955 if (S_ISDIR(*mode))
956 *mode |= S_ISUID;
957 else if (dip->i_di.di_uid != current->fsuid)
958 *mode &= ~07111;
959 *uid = dip->i_di.di_uid;
960 } else
961 *uid = current->fsuid;
962
963 if (dip->i_di.di_mode & S_ISGID) {
964 if (S_ISDIR(*mode))
965 *mode |= S_ISGID;
966 *gid = dip->i_di.di_gid;
967 } else
968 *gid = current->fsgid;
969}
970
971static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_unlinked *ul)
972{
973 struct gfs2_sbd *sdp = dip->i_sbd;
974 int error;
975
976 gfs2_alloc_get(dip);
977
978 dip->i_alloc.al_requested = RES_DINODE;
979 error = gfs2_inplace_reserve(dip);
980 if (error)
981 goto out;
982
983 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED +
984 RES_STATFS, 0);
985 if (error)
986 goto out_ipreserv;
987
988 ul->ul_ut.ut_inum.no_addr = gfs2_alloc_di(dip);
989
990 ul->ul_ut.ut_flags = GFS2_UTF_UNINIT;
991 error = gfs2_unlinked_ondisk_add(sdp, ul);
992
993 gfs2_trans_end(sdp);
994
995 out_ipreserv:
996 gfs2_inplace_release(dip);
997
998 out:
999 gfs2_alloc_put(dip);
1000
1001 return error;
1002}
1003
1004/**
1005 * init_dinode - Fill in a new dinode structure
1006 * @dip: the directory this inode is being created in
1007 * @gl: The glock covering the new inode
1008 * @inum: the inode number
1009 * @mode: the file permissions
1010 * @uid:
1011 * @gid:
1012 *
1013 */
1014
1015static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
1016 struct gfs2_inum *inum, unsigned int mode,
1017 unsigned int uid, unsigned int gid)
1018{
1019 struct gfs2_sbd *sdp = dip->i_sbd;
1020 struct gfs2_dinode *di;
1021 struct buffer_head *dibh;
1022
1023 dibh = gfs2_meta_new(gl, inum->no_addr);
1024 gfs2_trans_add_bh(gl, dibh, 1);
1025 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
1026 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1027 di = (struct gfs2_dinode *)dibh->b_data;
1028
1029 di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino);
1030 di->di_num.no_addr = cpu_to_be64(inum->no_addr);
1031 di->di_mode = cpu_to_be32(mode);
1032 di->di_uid = cpu_to_be32(uid);
1033 di->di_gid = cpu_to_be32(gid);
1034 di->di_nlink = cpu_to_be32(0);
1035 di->di_size = cpu_to_be64(0);
1036 di->di_blocks = cpu_to_be64(1);
1037 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
1038 di->di_major = di->di_minor = cpu_to_be32(0);
1039 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
1040 di->__pad[0] = di->__pad[1] = 0;
1041 di->di_flags = cpu_to_be32(0);
1042
1043 if (S_ISREG(mode)) {
1044 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
1045 gfs2_tune_get(sdp, gt_new_files_jdata))
1046 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
1047 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) ||
1048 gfs2_tune_get(sdp, gt_new_files_directio))
1049 di->di_flags |= cpu_to_be32(GFS2_DIF_DIRECTIO);
1050 } else if (S_ISDIR(mode)) {
1051 di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
1052 GFS2_DIF_INHERIT_DIRECTIO);
1053 di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
1054 GFS2_DIF_INHERIT_JDATA);
1055 }
1056
1057 di->__pad1 = 0;
1058 di->di_height = cpu_to_be32(0);
1059 di->__pad2 = 0;
1060 di->__pad3 = 0;
1061 di->di_depth = cpu_to_be16(0);
1062 di->di_entries = cpu_to_be32(0);
1063 memset(&di->__pad4, 0, sizeof(di->__pad4));
1064 di->di_eattr = cpu_to_be64(0);
1065 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
1066
1067 brelse(dibh);
1068}
1069
1070static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
1071 unsigned int mode, struct gfs2_unlinked *ul)
1072{
1073 struct gfs2_sbd *sdp = dip->i_sbd;
1074 unsigned int uid, gid;
1075 int error;
1076
1077 munge_mode_uid_gid(dip, &mode, &uid, &gid);
1078
1079 gfs2_alloc_get(dip);
1080
1081 error = gfs2_quota_lock(dip, uid, gid);
1082 if (error)
1083 goto out;
1084
1085 error = gfs2_quota_check(dip, uid, gid);
1086 if (error)
1087 goto out_quota;
1088
1089 error = gfs2_trans_begin(sdp, RES_DINODE + RES_UNLINKED +
1090 RES_QUOTA, 0);
1091 if (error)
1092 goto out_quota;
1093
1094 ul->ul_ut.ut_flags = 0;
1095 error = gfs2_unlinked_ondisk_munge(sdp, ul);
1096
1097 init_dinode(dip, gl, &ul->ul_ut.ut_inum,
1098 mode, uid, gid);
1099
1100 gfs2_quota_change(dip, +1, uid, gid);
1101
1102 gfs2_trans_end(sdp);
1103
1104 out_quota:
1105 gfs2_quota_unlock(dip);
1106
1107 out:
1108 gfs2_alloc_put(dip);
1109
1110 return error;
1111}
1112
1113static int link_dinode(struct gfs2_inode *dip, struct qstr *name,
1114 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1115{
1116 struct gfs2_sbd *sdp = dip->i_sbd;
1117 struct gfs2_alloc *al;
1118 int alloc_required;
1119 struct buffer_head *dibh;
1120 int error;
1121
1122 al = gfs2_alloc_get(dip);
1123
1124 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1125 if (error)
1126 goto fail;
1127
1128 error = alloc_required = gfs2_diradd_alloc_required(dip->i_vnode, name);
1129 if (alloc_required < 0)
1130 goto fail;
1131 if (alloc_required) {
1132 error = gfs2_quota_check(dip, dip->i_di.di_uid,
1133 dip->i_di.di_gid);
1134 if (error)
1135 goto fail_quota_locks;
1136
1137 al->al_requested = sdp->sd_max_dirres;
1138
1139 error = gfs2_inplace_reserve(dip);
1140 if (error)
1141 goto fail_quota_locks;
1142
1143 error = gfs2_trans_begin(sdp,
1144 sdp->sd_max_dirres +
1145 al->al_rgd->rd_ri.ri_length +
1146 2 * RES_DINODE + RES_UNLINKED +
1147 RES_STATFS + RES_QUOTA, 0);
1148 if (error)
1149 goto fail_ipreserv;
1150 } else {
1151 error = gfs2_trans_begin(sdp,
1152 RES_LEAF +
1153 2 * RES_DINODE +
1154 RES_UNLINKED, 0);
1155 if (error)
1156 goto fail_quota_locks;
1157 }
1158
1159 error = gfs2_dir_add(dip->i_vnode, name, &ip->i_num, IF2DT(ip->i_di.di_mode));
1160 if (error)
1161 goto fail_end_trans;
1162
1163 error = gfs2_meta_inode_buffer(ip, &dibh);
1164 if (error)
1165 goto fail_end_trans;
1166 ip->i_di.di_nlink = 1;
1167 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1168 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1169 brelse(dibh);
1170
1171 error = gfs2_unlinked_ondisk_rm(sdp, ul);
1172 if (error)
1173 goto fail_end_trans;
1174
1175 return 0;
1176
1177 fail_end_trans:
1178 gfs2_trans_end(sdp);
1179
1180 fail_ipreserv:
1181 if (dip->i_alloc.al_rgd)
1182 gfs2_inplace_release(dip);
1183
1184 fail_quota_locks:
1185 gfs2_quota_unlock(dip);
1186
1187 fail:
1188 gfs2_alloc_put(dip);
1189
1190 return error;
1191}
1192
1193/**
1194 * gfs2_createi - Create a new inode
1195 * @ghs: An array of two holders
1196 * @name: The name of the new file
1197 * @mode: the permissions on the new inode
1198 *
1199 * @ghs[0] is an initialized holder for the directory
1200 * @ghs[1] is the holder for the inode lock
1201 *
1202 * If the return value is not NULL, the glocks on both the directory and the new
1203 * file are held. A transaction has been started and an inplace reservation
1204 * is held, as well.
1205 *
1206 * Returns: An inode
1207 */
1208
1209struct inode *gfs2_createi(struct gfs2_holder *ghs, struct qstr *name,
1210 unsigned int mode)
1211{
1212 struct inode *inode;
1213 struct gfs2_inode *dip = ghs->gh_gl->gl_object;
1214 struct gfs2_sbd *sdp = dip->i_sbd;
1215 struct gfs2_unlinked *ul;
1216 struct gfs2_inode *ip;
1217 int error;
1218
1219 if (!name->len || name->len > GFS2_FNAMESIZE)
1220 return ERR_PTR(-ENAMETOOLONG);
1221
1222 error = gfs2_unlinked_get(sdp, &ul);
1223 if (error)
1224 return ERR_PTR(error);
1225
1226 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
1227 error = gfs2_glock_nq(ghs);
1228 if (error)
1229 goto fail;
1230
1231 error = create_ok(dip, name, mode);
1232 if (error)
1233 goto fail_gunlock;
1234
1235 error = pick_formal_ino(sdp, &ul->ul_ut.ut_inum.no_formal_ino);
1236 if (error)
1237 goto fail_gunlock;
1238
1239 error = alloc_dinode(dip, ul);
1240 if (error)
1241 goto fail_gunlock;
1242
1243 if (ul->ul_ut.ut_inum.no_addr < dip->i_num.no_addr) {
1244 gfs2_glock_dq(ghs);
1245
1246 error = gfs2_glock_nq_num(sdp,
1247 ul->ul_ut.ut_inum.no_addr,
1248 &gfs2_inode_glops,
1249 LM_ST_EXCLUSIVE, GL_SKIP,
1250 ghs + 1);
1251 if (error) {
1252 gfs2_unlinked_put(sdp, ul);
1253 return ERR_PTR(error);
1254 }
1255
1256 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
1257 error = gfs2_glock_nq(ghs);
1258 if (error) {
1259 gfs2_glock_dq_uninit(ghs + 1);
1260 gfs2_unlinked_put(sdp, ul);
1261 return ERR_PTR(error);
1262 }
1263
1264 error = create_ok(dip, name, mode);
1265 if (error)
1266 goto fail_gunlock2;
1267 } else {
1268 error = gfs2_glock_nq_num(sdp,
1269 ul->ul_ut.ut_inum.no_addr,
1270 &gfs2_inode_glops,
1271 LM_ST_EXCLUSIVE, GL_SKIP,
1272 ghs + 1);
1273 if (error)
1274 goto fail_gunlock;
1275 }
1276
1277 error = make_dinode(dip, ghs[1].gh_gl, mode, ul);
1278 if (error)
1279 goto fail_gunlock2;
1280
1281 error = gfs2_inode_get(ghs[1].gh_gl, &ul->ul_ut.ut_inum, CREATE, &ip);
1282 if (error)
1283 goto fail_gunlock2;
1284
1285 error = gfs2_inode_refresh(ip);
1286 if (error)
1287 goto fail_iput;
1288
1289 error = gfs2_acl_create(dip, ip);
1290 if (error)
1291 goto fail_iput;
1292
1293 error = link_dinode(dip, name, ip, ul);
1294 if (error)
1295 goto fail_iput;
1296
1297 gfs2_unlinked_put(sdp, ul);
1298
1299 inode = gfs2_ip2v(ip);
1300 gfs2_inode_put(ip);
1301 if (!inode)
1302 return ERR_PTR(-ENOMEM);
1303 return inode;
1304
1305 fail_iput:
1306 gfs2_inode_put(ip);
1307
1308 fail_gunlock2:
1309 gfs2_glock_dq_uninit(ghs + 1);
1310
1311 fail_gunlock:
1312 gfs2_glock_dq(ghs);
1313
1314 fail:
1315 gfs2_unlinked_put(sdp, ul);
1316
1317 return ERR_PTR(error);
1318}
1319
1320/**
1321 * gfs2_unlinki - Unlink a file
1322 * @dip: The inode of the directory
1323 * @name: The name of the file to be unlinked
1324 * @ip: The inode of the file to be removed
1325 *
1326 * Assumes Glocks on both dip and ip are held.
1327 *
1328 * Returns: errno
1329 */
1330
1331int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name,
1332 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1333{
1334 struct gfs2_sbd *sdp = dip->i_sbd;
1335 int error;
1336
1337 error = gfs2_dir_del(dip, name);
1338 if (error)
1339 return error;
1340
1341 error = gfs2_change_nlink(ip, -1);
1342 if (error)
1343 return error;
1344
1345 /* If this inode is being unlinked from the directory structure,
1346 we need to mark that in the log so that it isn't lost during
1347 a crash. */
1348
1349 if (!ip->i_di.di_nlink) {
1350 ul->ul_ut.ut_inum = ip->i_num;
1351 error = gfs2_unlinked_ondisk_add(sdp, ul);
1352 if (!error)
1353 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
1354 }
1355
1356 return error;
1357}
1358
1359/**
1360 * gfs2_rmdiri - Remove a directory
1361 * @dip: The parent directory of the directory to be removed
1362 * @name: The name of the directory to be removed
1363 * @ip: The GFS2 inode of the directory to be removed
1364 *
1365 * Assumes Glocks on dip and ip are held
1366 *
1367 * Returns: errno
1368 */
1369
1370int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name,
1371 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1372{
1373 struct gfs2_sbd *sdp = dip->i_sbd;
1374 struct qstr dotname;
1375 int error;
1376
1377 if (ip->i_di.di_entries != 2) {
1378 if (gfs2_consist_inode(ip))
1379 gfs2_dinode_print(&ip->i_di);
1380 return -EIO;
1381 }
1382
1383 error = gfs2_dir_del(dip, name);
1384 if (error)
1385 return error;
1386
1387 error = gfs2_change_nlink(dip, -1);
1388 if (error)
1389 return error;
1390
1391 gfs2_str2qstr(&dotname, ".");
1392 error = gfs2_dir_del(ip, &dotname);
1393 if (error)
1394 return error;
1395
1396 dotname.len = 2;
1397 dotname.name = "..";
1398 dotname.hash = gfs2_disk_hash(dotname.name, dotname.len);
1399 error = gfs2_dir_del(ip, &dotname);
1400 if (error)
1401 return error;
1402
1403 error = gfs2_change_nlink(ip, -2);
1404 if (error)
1405 return error;
1406
1407 /* This inode is being unlinked from the directory structure and
1408 we need to mark that in the log so that it isn't lost during
1409 a crash. */
1410
1411 ul->ul_ut.ut_inum = ip->i_num;
1412 error = gfs2_unlinked_ondisk_add(sdp, ul);
1413 if (!error)
1414 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
1415
1416 return error;
1417}
1418
1419/*
1420 * gfs2_unlink_ok - check to see that a inode is still in a directory
1421 * @dip: the directory
1422 * @name: the name of the file
1423 * @ip: the inode
1424 *
1425 * Assumes that the lock on (at least) @dip is held.
1426 *
1427 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
1428 */
1429
1430int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name,
1431 struct gfs2_inode *ip)
1432{
1433 struct gfs2_inum inum;
1434 unsigned int type;
1435 int error;
1436
1437 if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode))
1438 return -EPERM;
1439
1440 if ((dip->i_di.di_mode & S_ISVTX) &&
1441 dip->i_di.di_uid != current->fsuid &&
1442 ip->i_di.di_uid != current->fsuid &&
1443 !capable(CAP_FOWNER))
1444 return -EPERM;
1445
1446 if (IS_APPEND(dip->i_vnode))
1447 return -EPERM;
1448
1449 error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
1450 if (error)
1451 return error;
1452
1453 error = gfs2_dir_search(dip->i_vnode, name, &inum, &type);
1454 if (error)
1455 return error;
1456
1457 if (!gfs2_inum_equal(&inum, &ip->i_num))
1458 return -ENOENT;
1459
1460 if (IF2DT(ip->i_di.di_mode) != type) {
1461 gfs2_consist_inode(dip);
1462 return -EIO;
1463 }
1464
1465 return 0;
1466}
1467
1468/*
1469 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1470 * @this: move this
1471 * @to: to here
1472 *
1473 * Follow @to back to the root and make sure we don't encounter @this
1474 * Assumes we already hold the rename lock.
1475 *
1476 * Returns: errno
1477 */
1478
1479int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1480{
1481 struct inode *dir = to->i_vnode;
1482 struct super_block *sb = dir->i_sb;
1483 struct inode *tmp;
1484 struct qstr dotdot;
1485 int error = 0;
1486
1487 gfs2_str2qstr(&dotdot, "..");
1488
1489 igrab(dir);
1490
1491 for (;;) {
1492 if (dir == this->i_vnode) {
1493 error = -EINVAL;
1494 break;
1495 }
1496 if (dir == sb->s_root->d_inode) {
1497 error = 0;
1498 break;
1499 }
1500
1501 tmp = gfs2_lookupi(dir, &dotdot, 1, NULL);
1502 if (IS_ERR(tmp)) {
1503 error = PTR_ERR(tmp);
1504 break;
1505 }
1506
1507 iput(dir);
1508 dir = tmp;
1509 }
1510
1511 iput(dir);
1512
1513 return error;
1514}
1515
1516/**
1517 * gfs2_readlinki - return the contents of a symlink
1518 * @ip: the symlink's inode
1519 * @buf: a pointer to the buffer to be filled
1520 * @len: a pointer to the length of @buf
1521 *
1522 * If @buf is too small, a piece of memory is kmalloc()ed and needs
1523 * to be freed by the caller.
1524 *
1525 * Returns: errno
1526 */
1527
1528int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
1529{
1530 struct gfs2_holder i_gh;
1531 struct buffer_head *dibh;
1532 unsigned int x;
1533 int error;
1534
1535 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
1536 error = gfs2_glock_nq_atime(&i_gh);
1537 if (error) {
1538 gfs2_holder_uninit(&i_gh);
1539 return error;
1540 }
1541
1542 if (!ip->i_di.di_size) {
1543 gfs2_consist_inode(ip);
1544 error = -EIO;
1545 goto out;
1546 }
1547
1548 error = gfs2_meta_inode_buffer(ip, &dibh);
1549 if (error)
1550 goto out;
1551
1552 x = ip->i_di.di_size + 1;
1553 if (x > *len) {
1554 *buf = kmalloc(x, GFP_KERNEL);
1555 if (!*buf) {
1556 error = -ENOMEM;
1557 goto out_brelse;
1558 }
1559 }
1560
1561 memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
1562 *len = x;
1563
1564 out_brelse:
1565 brelse(dibh);
1566
1567 out:
1568 gfs2_glock_dq_uninit(&i_gh);
1569
1570 return error;
1571}
1572
1573/**
1574 * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and
1575 * conditionally update the inode's atime
1576 * @gh: the holder to acquire
1577 *
1578 * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap
1579 * Update if the difference between the current time and the inode's current
1580 * atime is greater than an interval specified at mount.
1581 *
1582 * Returns: errno
1583 */
1584
1585int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1586{
1587 struct gfs2_glock *gl = gh->gh_gl;
1588 struct gfs2_sbd *sdp = gl->gl_sbd;
1589 struct gfs2_inode *ip = gl->gl_object;
1590 int64_t curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
1591 unsigned int state;
1592 int flags;
1593 int error;
1594
1595 if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
1596 gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
1597 gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops))
1598 return -EINVAL;
1599
1600 state = gh->gh_state;
1601 flags = gh->gh_flags;
1602
1603 error = gfs2_glock_nq(gh);
1604 if (error)
1605 return error;
1606
1607 if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
1608 (sdp->sd_vfs->s_flags & MS_RDONLY))
1609 return 0;
1610
1611 curtime = get_seconds();
1612 if (curtime - ip->i_di.di_atime >= quantum) {
1613 gfs2_glock_dq(gh);
1614 gfs2_holder_reinit(LM_ST_EXCLUSIVE,
1615 gh->gh_flags & ~LM_FLAG_ANY,
1616 gh);
1617 error = gfs2_glock_nq(gh);
1618 if (error)
1619 return error;
1620
1621 /* Verify that atime hasn't been updated while we were
1622 trying to get exclusive lock. */
1623
1624 curtime = get_seconds();
1625 if (curtime - ip->i_di.di_atime >= quantum) {
1626 struct buffer_head *dibh;
1627
1628 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1629 if (error == -EROFS)
1630 return 0;
1631 if (error)
1632 goto fail;
1633
1634 error = gfs2_meta_inode_buffer(ip, &dibh);
1635 if (error)
1636 goto fail_end_trans;
1637
1638 ip->i_di.di_atime = curtime;
1639
1640 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1641 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1642 brelse(dibh);
1643
1644 gfs2_trans_end(sdp);
1645 }
1646
1647 /* If someone else has asked for the glock,
1648 unlock and let them have it. Then reacquire
1649 in the original state. */
1650 if (gfs2_glock_is_blocking(gl)) {
1651 gfs2_glock_dq(gh);
1652 gfs2_holder_reinit(state, flags, gh);
1653 return gfs2_glock_nq(gh);
1654 }
1655 }
1656
1657 return 0;
1658
1659 fail_end_trans:
1660 gfs2_trans_end(sdp);
1661
1662 fail:
1663 gfs2_glock_dq(gh);
1664
1665 return error;
1666}
1667
1668/**
1669 * glock_compare_atime - Compare two struct gfs2_glock structures for sort
1670 * @arg_a: the first structure
1671 * @arg_b: the second structure
1672 *
1673 * Returns: 1 if A > B
1674 * -1 if A < B
1675 * 0 if A = B
1676 */
1677
1678static int glock_compare_atime(const void *arg_a, const void *arg_b)
1679{
1680 struct gfs2_holder *gh_a = *(struct gfs2_holder **)arg_a;
1681 struct gfs2_holder *gh_b = *(struct gfs2_holder **)arg_b;
1682 struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1683 struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1684 int ret = 0;
1685
1686 if (a->ln_number > b->ln_number)
1687 ret = 1;
1688 else if (a->ln_number < b->ln_number)
1689 ret = -1;
1690 else {
1691 if (gh_a->gh_state == LM_ST_SHARED &&
1692 gh_b->gh_state == LM_ST_EXCLUSIVE)
1693 ret = 1;
1694 else if (gh_a->gh_state == LM_ST_SHARED &&
1695 (gh_b->gh_flags & GL_ATIME))
1696 ret = 1;
1697 }
1698
1699 return ret;
1700}
1701
1702/**
1703 * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an
1704 * atime update
1705 * @num_gh: the number of structures
1706 * @ghs: an array of struct gfs2_holder structures
1707 *
1708 * Returns: 0 on success (all glocks acquired),
1709 * errno on failure (no glocks acquired)
1710 */
1711
1712int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs)
1713{
1714 struct gfs2_holder **p;
1715 unsigned int x;
1716 int error = 0;
1717
1718 if (!num_gh)
1719 return 0;
1720
1721 if (num_gh == 1) {
1722 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1723 if (ghs->gh_flags & GL_ATIME)
1724 error = gfs2_glock_nq_atime(ghs);
1725 else
1726 error = gfs2_glock_nq(ghs);
1727 return error;
1728 }
1729
1730 p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1731 if (!p)
1732 return -ENOMEM;
1733
1734 for (x = 0; x < num_gh; x++)
1735 p[x] = &ghs[x];
1736
1737 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL);
1738
1739 for (x = 0; x < num_gh; x++) {
1740 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1741
1742 if (p[x]->gh_flags & GL_ATIME)
1743 error = gfs2_glock_nq_atime(p[x]);
1744 else
1745 error = gfs2_glock_nq(p[x]);
1746
1747 if (error) {
1748 while (x--)
1749 gfs2_glock_dq(p[x]);
1750 break;
1751 }
1752 }
1753
1754 kfree(p);
1755
1756 return error;
1757}
1758
1759/**
1760 * gfs2_try_toss_vnode - See if we can toss a vnode from memory
1761 * @ip: the inode
1762 *
1763 * Returns: 1 if the vnode was tossed
1764 */
1765
1766void gfs2_try_toss_vnode(struct gfs2_inode *ip)
1767{
1768 struct inode *inode;
1769
1770 inode = gfs2_ip2v_lookup(ip);
1771 if (!inode)
1772 return;
1773
1774 d_prune_aliases(inode);
1775
1776 if (S_ISDIR(ip->i_di.di_mode)) {
1777 struct list_head *head = &inode->i_dentry;
1778 struct dentry *d = NULL;
1779
1780 spin_lock(&dcache_lock);
1781 if (list_empty(head))
1782 spin_unlock(&dcache_lock);
1783 else {
1784 d = list_entry(head->next, struct dentry, d_alias);
1785 dget_locked(d);
1786 spin_unlock(&dcache_lock);
1787
1788 if (have_submounts(d))
1789 dput(d);
1790 else {
1791 shrink_dcache_parent(d);
1792 dput(d);
1793 d_prune_aliases(inode);
1794 }
1795 }
1796 }
1797
1798 inode->i_nlink = 0;
1799 iput(inode);
1800}
1801
1802
1803static int
1804__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1805{
1806 struct buffer_head *dibh;
1807 int error;
1808
1809 error = gfs2_meta_inode_buffer(ip, &dibh);
1810 if (!error) {
1811 error = inode_setattr(ip->i_vnode, attr);
1812 gfs2_assert_warn(ip->i_sbd, !error);
1813 gfs2_inode_attr_out(ip);
1814
1815 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1816 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1817 brelse(dibh);
1818 }
1819 return error;
1820}
1821
1822/**
1823 * gfs2_setattr_simple -
1824 * @ip:
1825 * @attr:
1826 *
1827 * Called with a reference on the vnode.
1828 *
1829 * Returns: errno
1830 */
1831
1832int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1833{
1834 int error;
1835
1836 if (current->journal_info)
1837 return __gfs2_setattr_simple(ip, attr);
1838
1839 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE, 0);
1840 if (error)
1841 return error;
1842
1843 error = __gfs2_setattr_simple(ip, attr);
1844
1845 gfs2_trans_end(ip->i_sbd);
1846
1847 return error;
1848}
1849
1850int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd)
1851{
1852 return permission(inode, mask, nd);
1853}
1854
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
new file mode 100644
index 000000000000..0dd2a26626ec
--- /dev/null
+++ b/fs/gfs2/inode.h
@@ -0,0 +1,72 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __INODE_DOT_H__
11#define __INODE_DOT_H__
12
13static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
14{
15 return !ip->i_di.di_height;
16}
17
18static inline int gfs2_is_jdata(struct gfs2_inode *ip)
19{
20 return ip->i_di.di_flags & GFS2_DIF_JDATA;
21}
22
23static inline int gfs2_is_dir(struct gfs2_inode *ip)
24{
25 return S_ISDIR(ip->i_di.di_mode);
26}
27
28void gfs2_inode_attr_in(struct gfs2_inode *ip);
29void gfs2_inode_attr_out(struct gfs2_inode *ip);
30struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip);
31struct inode *gfs2_ip2v(struct gfs2_inode *ip);
32struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum);
33
34void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type);
35int gfs2_inode_refresh(struct gfs2_inode *ip);
36
37int gfs2_inode_get(struct gfs2_glock *i_gl,
38 const struct gfs2_inum *inum, int create,
39 struct gfs2_inode **ipp);
40void gfs2_inode_hold(struct gfs2_inode *ip);
41void gfs2_inode_put(struct gfs2_inode *ip);
42void gfs2_inode_destroy(struct gfs2_inode *ip);
43
44int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
45
46int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
47struct inode *gfs2_lookupi(struct inode *dir, struct qstr *name, int is_root,
48 struct nameidata *nd);
49struct inode *gfs2_createi(struct gfs2_holder *ghs, struct qstr *name,
50 unsigned int mode);
51int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name,
52 struct gfs2_inode *ip, struct gfs2_unlinked *ul);
53int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name,
54 struct gfs2_inode *ip, struct gfs2_unlinked *ul);
55int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name,
56 struct gfs2_inode *ip);
57int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
58int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
59
60int gfs2_glock_nq_atime(struct gfs2_holder *gh);
61int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs);
62
63void gfs2_try_toss_vnode(struct gfs2_inode *ip);
64
65int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
66
67int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd);
68
69struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
70
71#endif /* __INODE_DOT_H__ */
72
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c
new file mode 100644
index 000000000000..5b3c56d2df2f
--- /dev/null
+++ b/fs/gfs2/lm.c
@@ -0,0 +1,243 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/delay.h>
16#include <linux/gfs2_ondisk.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "glock.h"
23#include "lm.h"
24#include "super.h"
25#include "util.h"
26#include "lvb.h"
27
28/**
29 * gfs2_lm_mount - mount a locking protocol
30 * @sdp: the filesystem
31 * @args: mount arguements
32 * @silent: if 1, don't complain if the FS isn't a GFS2 fs
33 *
34 * Returns: errno
35 */
36
37int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
38{
39 char *proto = sdp->sd_proto_name;
40 char *table = sdp->sd_table_name;
41 int flags = 0;
42 int error;
43
44 if (sdp->sd_args.ar_spectator)
45 flags |= LM_MFLAG_SPECTATOR;
46
47 fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
48
49 error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata,
50 gfs2_glock_cb, sdp,
51 GFS2_MIN_LVB_SIZE, flags,
52 &sdp->sd_lockstruct, &sdp->sd_kobj);
53 if (error) {
54 fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
55 proto, table, sdp->sd_args.ar_hostdata);
56 goto out;
57 }
58
59 if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) ||
60 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
61 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >=
62 GFS2_MIN_LVB_SIZE)) {
63 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
64 goto out;
65 }
66
67 if (sdp->sd_args.ar_spectator)
68 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
69 else
70 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
71 sdp->sd_lockstruct.ls_jid);
72
73 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
74
75 if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
76 !sdp->sd_args.ar_ignore_local_fs) {
77 sdp->sd_args.ar_localflocks = 1;
78 sdp->sd_args.ar_localcaching = 1;
79 }
80
81 out:
82 return error;
83}
84
85void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
86{
87 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
88 sdp->sd_lockstruct.ls_ops->lm_others_may_mount(
89 sdp->sd_lockstruct.ls_lockspace);
90}
91
92void gfs2_lm_unmount(struct gfs2_sbd *sdp)
93{
94 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
95 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
96}
97
98int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
99{
100 va_list args;
101
102 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
103 return 0;
104
105 va_start(args, fmt);
106 vprintk(fmt, args);
107 va_end(args);
108
109 fs_err(sdp, "about to withdraw from the cluster\n");
110 BUG_ON(sdp->sd_args.ar_debug);
111
112
113 fs_err(sdp, "waiting for outstanding I/O\n");
114
115 /* FIXME: suspend dm device so oustanding bio's complete
116 and all further io requests fail */
117
118 fs_err(sdp, "telling LM to withdraw\n");
119 gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
120 fs_err(sdp, "withdrawn\n");
121 dump_stack();
122
123 return -1;
124}
125
126int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
127 lm_lock_t **lockp)
128{
129 int error;
130 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
131 error = -EIO;
132 else
133 error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
134 sdp->sd_lockstruct.ls_lockspace, name, lockp);
135 return error;
136}
137
138void gfs2_lm_put_lock(struct gfs2_sbd *sdp, lm_lock_t *lock)
139{
140 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
141 sdp->sd_lockstruct.ls_ops->lm_put_lock(lock);
142}
143
144unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, lm_lock_t *lock,
145 unsigned int cur_state, unsigned int req_state,
146 unsigned int flags)
147{
148 int ret;
149 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
150 ret = 0;
151 else
152 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
153 cur_state,
154 req_state, flags);
155 return ret;
156}
157
158unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, lm_lock_t *lock,
159 unsigned int cur_state)
160{
161 int ret;
162 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
163 ret = 0;
164 else
165 ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state);
166 return ret;
167}
168
169void gfs2_lm_cancel(struct gfs2_sbd *sdp, lm_lock_t *lock)
170{
171 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
172 sdp->sd_lockstruct.ls_ops->lm_cancel(lock);
173}
174
175int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char **lvbp)
176{
177 int error;
178 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
179 error = -EIO;
180 else
181 error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
182 return error;
183}
184
185void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb)
186{
187 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
188 sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(lock, lvb);
189}
190
191void gfs2_lm_sync_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb)
192{
193 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
194 sdp->sd_lockstruct.ls_ops->lm_sync_lvb(lock, lvb);
195}
196
197int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
198 struct file *file, struct file_lock *fl)
199{
200 int error;
201 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
202 error = -EIO;
203 else
204 error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
205 sdp->sd_lockstruct.ls_lockspace,
206 name, file, fl);
207 return error;
208}
209
210int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
211 struct file *file, int cmd, struct file_lock *fl)
212{
213 int error;
214 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
215 error = -EIO;
216 else
217 error = sdp->sd_lockstruct.ls_ops->lm_plock(
218 sdp->sd_lockstruct.ls_lockspace,
219 name, file, cmd, fl);
220 return error;
221}
222
223int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
224 struct file *file, struct file_lock *fl)
225{
226 int error;
227 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
228 error = -EIO;
229 else
230 error = sdp->sd_lockstruct.ls_ops->lm_punlock(
231 sdp->sd_lockstruct.ls_lockspace,
232 name, file, fl);
233 return error;
234}
235
236void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
237 unsigned int message)
238{
239 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
240 sdp->sd_lockstruct.ls_ops->lm_recovery_done(
241 sdp->sd_lockstruct.ls_lockspace, jid, message);
242}
243
diff --git a/fs/gfs2/lm.h b/fs/gfs2/lm.h
new file mode 100644
index 000000000000..ec812424fdec
--- /dev/null
+++ b/fs/gfs2/lm.h
@@ -0,0 +1,42 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LM_DOT_H__
11#define __LM_DOT_H__
12
13int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent);
14void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp);
15void gfs2_lm_unmount(struct gfs2_sbd *sdp);
16int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
17__attribute__ ((format(printf, 2, 3)));
18int gfs2_lm_get_lock(struct gfs2_sbd *sdp,
19 struct lm_lockname *name, lm_lock_t **lockp);
20void gfs2_lm_put_lock(struct gfs2_sbd *sdp, lm_lock_t *lock);
21unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, lm_lock_t *lock,
22 unsigned int cur_state, unsigned int req_state,
23 unsigned int flags);
24unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, lm_lock_t *lock,
25 unsigned int cur_state);
26void gfs2_lm_cancel(struct gfs2_sbd *sdp, lm_lock_t *lock);
27int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char **lvbp);
28void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb);
29void gfs2_lm_sync_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb);
30int gfs2_lm_plock_get(struct gfs2_sbd *sdp,
31 struct lm_lockname *name,
32 struct file *file, struct file_lock *fl);
33int gfs2_lm_plock(struct gfs2_sbd *sdp,
34 struct lm_lockname *name,
35 struct file *file, int cmd, struct file_lock *fl);
36int gfs2_lm_punlock(struct gfs2_sbd *sdp,
37 struct lm_lockname *name,
38 struct file *file, struct file_lock *fl);
39void gfs2_lm_recovery_done(struct gfs2_sbd *sdp,
40 unsigned int jid, unsigned int message);
41
42#endif /* __LM_DOT_H__ */
diff --git a/fs/gfs2/lm_interface.h b/fs/gfs2/lm_interface.h
new file mode 100644
index 000000000000..378432f17f27
--- /dev/null
+++ b/fs/gfs2/lm_interface.h
@@ -0,0 +1,295 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LM_INTERFACE_DOT_H__
11#define __LM_INTERFACE_DOT_H__
12
13/*
14 * Opaque handles represent the lock module's lockspace structure, the lock
15 * module's lock structures, and GFS's file system (superblock) structure.
16 */
17
18typedef void lm_lockspace_t;
19typedef void lm_lock_t;
20typedef void lm_fsdata_t;
21
22typedef void (*lm_callback_t) (lm_fsdata_t *fsdata, unsigned int type,
23 void *data);
24
25/*
26 * lm_mount() flags
27 *
28 * LM_MFLAG_SPECTATOR
29 * GFS is asking to join the filesystem's lockspace, but it doesn't want to
30 * modify the filesystem. The lock module shouldn't assign a journal to the FS
31 * mount. It shouldn't send recovery callbacks to the FS mount. If the node
32 * dies or withdraws, all locks can be wiped immediately.
33 */
34
35#define LM_MFLAG_SPECTATOR 0x00000001
36
37/*
38 * lm_lockstruct flags
39 *
40 * LM_LSFLAG_LOCAL
41 * The lock_nolock module returns LM_LSFLAG_LOCAL to GFS, indicating that GFS
42 * can make single-node optimizations.
43 */
44
45#define LM_LSFLAG_LOCAL 0x00000001
46
47/*
48 * lm_lockname types
49 */
50
51#define LM_TYPE_RESERVED 0x00
52#define LM_TYPE_NONDISK 0x01
53#define LM_TYPE_INODE 0x02
54#define LM_TYPE_RGRP 0x03
55#define LM_TYPE_META 0x04
56#define LM_TYPE_IOPEN 0x05
57#define LM_TYPE_FLOCK 0x06
58#define LM_TYPE_PLOCK 0x07
59#define LM_TYPE_QUOTA 0x08
60#define LM_TYPE_JOURNAL 0x09
61
62/*
63 * lm_lock() states
64 *
65 * SHARED is compatible with SHARED, not with DEFERRED or EX.
66 * DEFERRED is compatible with DEFERRED, not with SHARED or EX.
67 */
68
69#define LM_ST_UNLOCKED 0
70#define LM_ST_EXCLUSIVE 1
71#define LM_ST_DEFERRED 2
72#define LM_ST_SHARED 3
73
74/*
75 * lm_lock() flags
76 *
77 * LM_FLAG_TRY
78 * Don't wait to acquire the lock if it can't be granted immediately.
79 *
80 * LM_FLAG_TRY_1CB
81 * Send one blocking callback if TRY is set and the lock is not granted.
82 *
83 * LM_FLAG_NOEXP
84 * GFS sets this flag on lock requests it makes while doing journal recovery.
85 * These special requests should not be blocked due to the recovery like
86 * ordinary locks would be.
87 *
88 * LM_FLAG_ANY
89 * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may
90 * also be granted in SHARED. The preferred state is whichever is compatible
91 * with other granted locks, or the specified state if no other locks exist.
92 *
93 * LM_FLAG_PRIORITY
94 * Override fairness considerations. Suppose a lock is held in a shared state
95 * and there is a pending request for the deferred state. A shared lock
96 * request with the priority flag would be allowed to bypass the deferred
97 * request and directly join the other shared lock. A shared lock request
98 * without the priority flag might be forced to wait until the deferred
99 * requested had acquired and released the lock.
100 */
101
102#define LM_FLAG_TRY 0x00000001
103#define LM_FLAG_TRY_1CB 0x00000002
104#define LM_FLAG_NOEXP 0x00000004
105#define LM_FLAG_ANY 0x00000008
106#define LM_FLAG_PRIORITY 0x00000010
107
108/*
109 * lm_lock() and lm_async_cb return flags
110 *
111 * LM_OUT_ST_MASK
112 * Masks the lower two bits of lock state in the returned value.
113 *
114 * LM_OUT_CACHEABLE
115 * The lock hasn't been released so GFS can continue to cache data for it.
116 *
117 * LM_OUT_CANCELED
118 * The lock request was canceled.
119 *
120 * LM_OUT_ASYNC
121 * The result of the request will be returned in an LM_CB_ASYNC callback.
122 */
123
124#define LM_OUT_ST_MASK 0x00000003
125#define LM_OUT_CACHEABLE 0x00000004
126#define LM_OUT_CANCELED 0x00000008
127#define LM_OUT_ASYNC 0x00000080
128#define LM_OUT_ERROR 0x00000100
129
130/*
131 * lm_callback_t types
132 *
133 * LM_CB_NEED_E LM_CB_NEED_D LM_CB_NEED_S
134 * Blocking callback, a remote node is requesting the given lock in
135 * EXCLUSIVE, DEFERRED, or SHARED.
136 *
137 * LM_CB_NEED_RECOVERY
138 * The given journal needs to be recovered.
139 *
140 * LM_CB_DROPLOCKS
141 * Reduce the number of cached locks.
142 *
143 * LM_CB_ASYNC
144 * The given lock has been granted.
145 */
146
147#define LM_CB_NEED_E 257
148#define LM_CB_NEED_D 258
149#define LM_CB_NEED_S 259
150#define LM_CB_NEED_RECOVERY 260
151#define LM_CB_DROPLOCKS 261
152#define LM_CB_ASYNC 262
153
154/*
155 * lm_recovery_done() messages
156 */
157
158#define LM_RD_GAVEUP 308
159#define LM_RD_SUCCESS 309
160
161
162struct lm_lockname {
163 uint64_t ln_number;
164 unsigned int ln_type;
165};
166
167#define lm_name_equal(name1, name2) \
168 (((name1)->ln_number == (name2)->ln_number) && \
169 ((name1)->ln_type == (name2)->ln_type)) \
170
171struct lm_async_cb {
172 struct lm_lockname lc_name;
173 int lc_ret;
174};
175
176struct lm_lockstruct;
177
178struct lm_lockops {
179 char lm_proto_name[256];
180
181 /*
182 * Mount/Unmount
183 */
184
185 int (*lm_mount) (char *table_name, char *host_data,
186 lm_callback_t cb, lm_fsdata_t *fsdata,
187 unsigned int min_lvb_size, int flags,
188 struct lm_lockstruct *lockstruct,
189 struct kobject *fskobj);
190
191 void (*lm_others_may_mount) (lm_lockspace_t *lockspace);
192
193 void (*lm_unmount) (lm_lockspace_t *lockspace);
194
195 void (*lm_withdraw) (lm_lockspace_t *lockspace);
196
197 /*
198 * Lock oriented operations
199 */
200
201 int (*lm_get_lock) (lm_lockspace_t *lockspace,
202 struct lm_lockname *name, lm_lock_t **lockp);
203
204 void (*lm_put_lock) (lm_lock_t *lock);
205
206 unsigned int (*lm_lock) (lm_lock_t *lock, unsigned int cur_state,
207 unsigned int req_state, unsigned int flags);
208
209 unsigned int (*lm_unlock) (lm_lock_t *lock, unsigned int cur_state);
210
211 void (*lm_cancel) (lm_lock_t *lock);
212
213 int (*lm_hold_lvb) (lm_lock_t *lock, char **lvbp);
214 void (*lm_unhold_lvb) (lm_lock_t *lock, char *lvb);
215 void (*lm_sync_lvb) (lm_lock_t *lock, char *lvb);
216
217 /*
218 * Posix Lock oriented operations
219 */
220
221 int (*lm_plock_get) (lm_lockspace_t *lockspace,
222 struct lm_lockname *name,
223 struct file *file, struct file_lock *fl);
224
225 int (*lm_plock) (lm_lockspace_t *lockspace,
226 struct lm_lockname *name,
227 struct file *file, int cmd, struct file_lock *fl);
228
229 int (*lm_punlock) (lm_lockspace_t *lockspace,
230 struct lm_lockname *name,
231 struct file *file, struct file_lock *fl);
232
233 /*
234 * Client oriented operations
235 */
236
237 void (*lm_recovery_done) (lm_lockspace_t *lockspace, unsigned int jid,
238 unsigned int message);
239
240 struct module *lm_owner;
241};
242
243/*
244 * lm_mount() return values
245 *
246 * ls_jid - the journal ID this node should use
247 * ls_first - this node is the first to mount the file system
248 * ls_lvb_size - size in bytes of lock value blocks
249 * ls_lockspace - lock module's context for this file system
250 * ls_ops - lock module's functions
251 * ls_flags - lock module features
252 */
253
254struct lm_lockstruct {
255 unsigned int ls_jid;
256 unsigned int ls_first;
257 unsigned int ls_lvb_size;
258 lm_lockspace_t *ls_lockspace;
259 struct lm_lockops *ls_ops;
260 int ls_flags;
261};
262
263void __init gfs2_init_lmh(void);
264
265/*
266 * Lock module bottom interface. A lock module makes itself available to GFS
267 * with these functions.
268 *
269 * For the time being, we copy the gfs1 lock module bottom interface so the
270 * same lock modules can be used with both gfs1 and gfs2 (it won't be possible
271 * to load both gfs1 and gfs2 at once.) Eventually the lock modules will fork
272 * for gfs1/gfs2 and this API can change to the gfs2_ prefix.
273 */
274
275int gfs_register_lockproto(struct lm_lockops *proto);
276
277void gfs_unregister_lockproto(struct lm_lockops *proto);
278
279/*
280 * Lock module top interface. GFS calls these functions when mounting or
281 * unmounting a file system.
282 */
283
284int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
285 lm_callback_t cb, lm_fsdata_t *fsdata,
286 unsigned int min_lvb_size, int flags,
287 struct lm_lockstruct *lockstruct,
288 struct kobject *fskobj);
289
290void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct);
291
292void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct);
293
294#endif /* __LM_INTERFACE_DOT_H__ */
295
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c
new file mode 100644
index 000000000000..0f4c50ebcbad
--- /dev/null
+++ b/fs/gfs2/locking.c
@@ -0,0 +1,191 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/string.h>
13#include <linux/slab.h>
14#include <linux/wait.h>
15#include <linux/sched.h>
16#include <linux/kmod.h>
17#include <linux/fs.h>
18#include <linux/delay.h>
19
20#include "lm_interface.h"
21
22struct lmh_wrapper {
23 struct list_head lw_list;
24 struct lm_lockops *lw_ops;
25};
26
27/* List of registered low-level locking protocols. A file system selects one
28 of them by name at mount time, e.g. lock_nolock, lock_dlm. */
29
30static struct list_head lmh_list;
31static struct semaphore lmh_lock;
32
33/**
34 * gfs_register_lockproto - Register a low-level locking protocol
35 * @proto: the protocol definition
36 *
37 * Returns: 0 on success, -EXXX on failure
38 */
39
40int gfs_register_lockproto(struct lm_lockops *proto)
41{
42 struct lmh_wrapper *lw;
43
44 down(&lmh_lock);
45
46 list_for_each_entry(lw, &lmh_list, lw_list) {
47 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
48 up(&lmh_lock);
49 printk(KERN_INFO "GFS2: protocol %s already exists\n",
50 proto->lm_proto_name);
51 return -EEXIST;
52 }
53 }
54
55 lw = kzalloc(sizeof(struct lmh_wrapper), GFP_KERNEL);
56 if (!lw) {
57 up(&lmh_lock);
58 return -ENOMEM;
59 }
60
61 lw->lw_ops = proto;
62 list_add(&lw->lw_list, &lmh_list);
63
64 up(&lmh_lock);
65
66 return 0;
67}
68
69/**
70 * gfs_unregister_lockproto - Unregister a low-level locking protocol
71 * @proto: the protocol definition
72 *
73 */
74
75void gfs_unregister_lockproto(struct lm_lockops *proto)
76{
77 struct lmh_wrapper *lw;
78
79 down(&lmh_lock);
80
81 list_for_each_entry(lw, &lmh_list, lw_list) {
82 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
83 list_del(&lw->lw_list);
84 up(&lmh_lock);
85 kfree(lw);
86 return;
87 }
88 }
89
90 up(&lmh_lock);
91
92 printk(KERN_WARNING "GFS2: can't unregister lock protocol %s\n",
93 proto->lm_proto_name);
94}
95
96/**
97 * gfs2_mount_lockproto - Mount a lock protocol
98 * @proto_name - the name of the protocol
99 * @table_name - the name of the lock space
100 * @host_data - data specific to this host
101 * @cb - the callback to the code using the lock module
102 * @fsdata - data to pass back with the callback
103 * @min_lvb_size - the mininum LVB size that the caller can deal with
104 * @flags - LM_MFLAG_*
105 * @lockstruct - a structure returned describing the mount
106 *
107 * Returns: 0 on success, -EXXX on failure
108 */
109
110int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
111 lm_callback_t cb, lm_fsdata_t *fsdata,
112 unsigned int min_lvb_size, int flags,
113 struct lm_lockstruct *lockstruct,
114 struct kobject *fskobj)
115{
116 struct lmh_wrapper *lw = NULL;
117 int try = 0;
118 int error, found;
119
120 retry:
121 down(&lmh_lock);
122
123 found = 0;
124 list_for_each_entry(lw, &lmh_list, lw_list) {
125 if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
126 found = 1;
127 break;
128 }
129 }
130
131 if (!found) {
132 if (!try && capable(CAP_SYS_MODULE)) {
133 try = 1;
134 up(&lmh_lock);
135 request_module(proto_name);
136 goto retry;
137 }
138 printk(KERN_INFO "GFS2: can't find protocol %s\n", proto_name);
139 error = -ENOENT;
140 goto out;
141 }
142
143 if (!try_module_get(lw->lw_ops->lm_owner)) {
144 try = 0;
145 up(&lmh_lock);
146 msleep(1000);
147 goto retry;
148 }
149
150 error = lw->lw_ops->lm_mount(table_name, host_data, cb, fsdata,
151 min_lvb_size, flags, lockstruct, fskobj);
152 if (error)
153 module_put(lw->lw_ops->lm_owner);
154 out:
155 up(&lmh_lock);
156 return error;
157}
158
159void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct)
160{
161 down(&lmh_lock);
162 lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
163 if (lockstruct->ls_ops->lm_owner)
164 module_put(lockstruct->ls_ops->lm_owner);
165 up(&lmh_lock);
166}
167
168/**
169 * gfs2_withdraw_lockproto - abnormally unmount a lock module
170 * @lockstruct: the lockstruct passed into mount
171 *
172 */
173
174void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct)
175{
176 down(&lmh_lock);
177 lockstruct->ls_ops->lm_withdraw(lockstruct->ls_lockspace);
178 if (lockstruct->ls_ops->lm_owner)
179 module_put(lockstruct->ls_ops->lm_owner);
180 up(&lmh_lock);
181}
182
183void __init gfs2_init_lmh(void)
184{
185 init_MUTEX(&lmh_lock);
186 INIT_LIST_HEAD(&lmh_list);
187}
188
189EXPORT_SYMBOL_GPL(gfs_register_lockproto);
190EXPORT_SYMBOL_GPL(gfs_unregister_lockproto);
191
diff --git a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile
new file mode 100644
index 000000000000..a9733ff80371
--- /dev/null
+++ b/fs/gfs2/locking/dlm/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_GFS2_FS) += lock_dlm.o
2lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o plock.o
3
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
new file mode 100644
index 000000000000..1799d2237e7e
--- /dev/null
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -0,0 +1,538 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include "lock_dlm.h"
11
12static char junk_lvb[GDLM_LVB_SIZE];
13
14static void queue_complete(struct gdlm_lock *lp)
15{
16 struct gdlm_ls *ls = lp->ls;
17
18 clear_bit(LFL_ACTIVE, &lp->flags);
19
20 spin_lock(&ls->async_lock);
21 list_add_tail(&lp->clist, &ls->complete);
22 spin_unlock(&ls->async_lock);
23 wake_up(&ls->thread_wait);
24}
25
26static inline void gdlm_ast(void *astarg)
27{
28 queue_complete((struct gdlm_lock *) astarg);
29}
30
31static inline void gdlm_bast(void *astarg, int mode)
32{
33 struct gdlm_lock *lp = astarg;
34 struct gdlm_ls *ls = lp->ls;
35
36 if (!mode) {
37 printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
38 lp->lockname.ln_type, lp->lockname.ln_number);
39 return;
40 }
41
42 spin_lock(&ls->async_lock);
43 if (!lp->bast_mode) {
44 list_add_tail(&lp->blist, &ls->blocking);
45 lp->bast_mode = mode;
46 } else if (lp->bast_mode < mode)
47 lp->bast_mode = mode;
48 spin_unlock(&ls->async_lock);
49 wake_up(&ls->thread_wait);
50}
51
52void gdlm_queue_delayed(struct gdlm_lock *lp)
53{
54 struct gdlm_ls *ls = lp->ls;
55
56 spin_lock(&ls->async_lock);
57 list_add_tail(&lp->delay_list, &ls->delayed);
58 spin_unlock(&ls->async_lock);
59}
60
61/* convert gfs lock-state to dlm lock-mode */
62
63static int16_t make_mode(int16_t lmstate)
64{
65 switch (lmstate) {
66 case LM_ST_UNLOCKED:
67 return DLM_LOCK_NL;
68 case LM_ST_EXCLUSIVE:
69 return DLM_LOCK_EX;
70 case LM_ST_DEFERRED:
71 return DLM_LOCK_CW;
72 case LM_ST_SHARED:
73 return DLM_LOCK_PR;
74 }
75 gdlm_assert(0, "unknown LM state %d", lmstate);
76 return -1;
77}
78
79/* convert dlm lock-mode to gfs lock-state */
80
81int16_t gdlm_make_lmstate(int16_t dlmmode)
82{
83 switch (dlmmode) {
84 case DLM_LOCK_IV:
85 case DLM_LOCK_NL:
86 return LM_ST_UNLOCKED;
87 case DLM_LOCK_EX:
88 return LM_ST_EXCLUSIVE;
89 case DLM_LOCK_CW:
90 return LM_ST_DEFERRED;
91 case DLM_LOCK_PR:
92 return LM_ST_SHARED;
93 }
94 gdlm_assert(0, "unknown DLM mode %d", dlmmode);
95 return -1;
96}
97
98/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
99 DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
100
101static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
102{
103 int16_t cur = make_mode(cur_state);
104 if (lp->cur != DLM_LOCK_IV)
105 gdlm_assert(lp->cur == cur, "%d, %d", lp->cur, cur);
106}
107
108static inline unsigned int make_flags(struct gdlm_lock *lp,
109 unsigned int gfs_flags,
110 int16_t cur, int16_t req)
111{
112 unsigned int lkf = 0;
113
114 if (gfs_flags & LM_FLAG_TRY)
115 lkf |= DLM_LKF_NOQUEUE;
116
117 if (gfs_flags & LM_FLAG_TRY_1CB) {
118 lkf |= DLM_LKF_NOQUEUE;
119 lkf |= DLM_LKF_NOQUEUEBAST;
120 }
121
122 if (gfs_flags & LM_FLAG_PRIORITY) {
123 lkf |= DLM_LKF_NOORDER;
124 lkf |= DLM_LKF_HEADQUE;
125 }
126
127 if (gfs_flags & LM_FLAG_ANY) {
128 if (req == DLM_LOCK_PR)
129 lkf |= DLM_LKF_ALTCW;
130 else if (req == DLM_LOCK_CW)
131 lkf |= DLM_LKF_ALTPR;
132 }
133
134 if (lp->lksb.sb_lkid != 0) {
135 lkf |= DLM_LKF_CONVERT;
136
137 /* Conversion deadlock avoidance by DLM */
138
139 if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
140 !(lkf & DLM_LKF_NOQUEUE) &&
141 cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
142 lkf |= DLM_LKF_CONVDEADLK;
143 }
144
145 if (lp->lvb)
146 lkf |= DLM_LKF_VALBLK;
147
148 return lkf;
149}
150
151/* make_strname - convert GFS lock numbers to a string */
152
153static inline void make_strname(struct lm_lockname *lockname,
154 struct gdlm_strname *str)
155{
156 sprintf(str->name, "%8x%16llx", lockname->ln_type,
157 lockname->ln_number);
158 str->namelen = GDLM_STRNAME_BYTES;
159}
160
161int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
162 struct gdlm_lock **lpp)
163{
164 struct gdlm_lock *lp;
165
166 lp = kzalloc(sizeof(struct gdlm_lock), GFP_KERNEL);
167 if (!lp)
168 return -ENOMEM;
169
170 lp->lockname = *name;
171 lp->ls = ls;
172 lp->cur = DLM_LOCK_IV;
173 lp->lvb = NULL;
174 lp->hold_null = NULL;
175 init_completion(&lp->ast_wait);
176 INIT_LIST_HEAD(&lp->clist);
177 INIT_LIST_HEAD(&lp->blist);
178 INIT_LIST_HEAD(&lp->delay_list);
179
180 spin_lock(&ls->async_lock);
181 list_add(&lp->all_list, &ls->all_locks);
182 ls->all_locks_count++;
183 spin_unlock(&ls->async_lock);
184
185 *lpp = lp;
186 return 0;
187}
188
189void gdlm_delete_lp(struct gdlm_lock *lp)
190{
191 struct gdlm_ls *ls = lp->ls;
192
193 spin_lock(&ls->async_lock);
194 if (!list_empty(&lp->clist))
195 list_del_init(&lp->clist);
196 if (!list_empty(&lp->blist))
197 list_del_init(&lp->blist);
198 if (!list_empty(&lp->delay_list))
199 list_del_init(&lp->delay_list);
200 gdlm_assert(!list_empty(&lp->all_list),
201 "%x,%llx", lp->lockname.ln_type, lp->lockname.ln_number);
202 list_del_init(&lp->all_list);
203 ls->all_locks_count--;
204 spin_unlock(&ls->async_lock);
205
206 kfree(lp);
207}
208
209int gdlm_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
210 lm_lock_t **lockp)
211{
212 struct gdlm_lock *lp;
213 int error;
214
215 error = gdlm_create_lp((struct gdlm_ls *) lockspace, name, &lp);
216
217 *lockp = (lm_lock_t *) lp;
218 return error;
219}
220
221void gdlm_put_lock(lm_lock_t *lock)
222{
223 gdlm_delete_lp((struct gdlm_lock *) lock);
224}
225
226unsigned int gdlm_do_lock(struct gdlm_lock *lp)
227{
228 struct gdlm_ls *ls = lp->ls;
229 struct gdlm_strname str;
230 int error, bast = 1;
231
232 /*
233 * When recovery is in progress, delay lock requests for submission
234 * once recovery is done. Requests for recovery (NOEXP) and unlocks
235 * can pass.
236 */
237
238 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
239 !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
240 gdlm_queue_delayed(lp);
241 return LM_OUT_ASYNC;
242 }
243
244 /*
245 * Submit the actual lock request.
246 */
247
248 if (test_bit(LFL_NOBAST, &lp->flags))
249 bast = 0;
250
251 make_strname(&lp->lockname, &str);
252
253 set_bit(LFL_ACTIVE, &lp->flags);
254
255 log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type,
256 lp->lockname.ln_number, lp->lksb.sb_lkid,
257 lp->cur, lp->req, lp->lkf);
258
259 error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
260 str.name, str.namelen, 0, gdlm_ast, (void *) lp,
261 bast ? gdlm_bast : NULL);
262
263 if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
264 lp->lksb.sb_status = -EAGAIN;
265 queue_complete(lp);
266 error = 0;
267 }
268
269 if (error) {
270 log_debug("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x "
271 "flags=%lx", ls->fsname, lp->lockname.ln_type,
272 lp->lockname.ln_number, error, lp->cur, lp->req,
273 lp->lkf, lp->flags);
274 return LM_OUT_ERROR;
275 }
276 return LM_OUT_ASYNC;
277}
278
279unsigned int gdlm_do_unlock(struct gdlm_lock *lp)
280{
281 struct gdlm_ls *ls = lp->ls;
282 unsigned int lkf = 0;
283 int error;
284
285 set_bit(LFL_DLM_UNLOCK, &lp->flags);
286 set_bit(LFL_ACTIVE, &lp->flags);
287
288 if (lp->lvb)
289 lkf = DLM_LKF_VALBLK;
290
291 log_debug("un %x,%llx %x %d %x", lp->lockname.ln_type,
292 lp->lockname.ln_number, lp->lksb.sb_lkid, lp->cur, lkf);
293
294 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp);
295
296 if (error) {
297 log_debug("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x "
298 "flags=%lx", ls->fsname, lp->lockname.ln_type,
299 lp->lockname.ln_number, error, lp->cur, lp->req,
300 lp->lkf, lp->flags);
301 return LM_OUT_ERROR;
302 }
303 return LM_OUT_ASYNC;
304}
305
306unsigned int gdlm_lock(lm_lock_t *lock, unsigned int cur_state,
307 unsigned int req_state, unsigned int flags)
308{
309 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
310
311 clear_bit(LFL_DLM_CANCEL, &lp->flags);
312 if (flags & LM_FLAG_NOEXP)
313 set_bit(LFL_NOBLOCK, &lp->flags);
314
315 check_cur_state(lp, cur_state);
316 lp->req = make_mode(req_state);
317 lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
318
319 return gdlm_do_lock(lp);
320}
321
322unsigned int gdlm_unlock(lm_lock_t *lock, unsigned int cur_state)
323{
324 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
325
326 clear_bit(LFL_DLM_CANCEL, &lp->flags);
327 if (lp->cur == DLM_LOCK_IV)
328 return 0;
329 return gdlm_do_unlock(lp);
330}
331
332void gdlm_cancel(lm_lock_t *lock)
333{
334 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
335 struct gdlm_ls *ls = lp->ls;
336 int error, delay_list = 0;
337
338 if (test_bit(LFL_DLM_CANCEL, &lp->flags))
339 return;
340
341 log_info("gdlm_cancel %x,%llx flags %lx",
342 lp->lockname.ln_type, lp->lockname.ln_number, lp->flags);
343
344 spin_lock(&ls->async_lock);
345 if (!list_empty(&lp->delay_list)) {
346 list_del_init(&lp->delay_list);
347 delay_list = 1;
348 }
349 spin_unlock(&ls->async_lock);
350
351 if (delay_list) {
352 set_bit(LFL_CANCEL, &lp->flags);
353 set_bit(LFL_ACTIVE, &lp->flags);
354 queue_complete(lp);
355 return;
356 }
357
358 if (!test_bit(LFL_ACTIVE, &lp->flags) ||
359 test_bit(LFL_DLM_UNLOCK, &lp->flags)) {
360 log_info("gdlm_cancel skip %x,%llx flags %lx",
361 lp->lockname.ln_type, lp->lockname.ln_number,
362 lp->flags);
363 return;
364 }
365
366 /* the lock is blocked in the dlm */
367
368 set_bit(LFL_DLM_CANCEL, &lp->flags);
369 set_bit(LFL_ACTIVE, &lp->flags);
370
371 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
372 NULL, lp);
373
374 log_info("gdlm_cancel rv %d %x,%llx flags %lx", error,
375 lp->lockname.ln_type, lp->lockname.ln_number, lp->flags);
376
377 if (error == -EBUSY)
378 clear_bit(LFL_DLM_CANCEL, &lp->flags);
379}
380
381int gdlm_add_lvb(struct gdlm_lock *lp)
382{
383 char *lvb;
384
385 lvb = kzalloc(GDLM_LVB_SIZE, GFP_KERNEL);
386 if (!lvb)
387 return -ENOMEM;
388
389 lp->lksb.sb_lvbptr = lvb;
390 lp->lvb = lvb;
391 return 0;
392}
393
394void gdlm_del_lvb(struct gdlm_lock *lp)
395{
396 kfree(lp->lvb);
397 lp->lvb = NULL;
398 lp->lksb.sb_lvbptr = NULL;
399}
400
401/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
402 the completion) because gfs won't call hold_lvb() during a callback (from
403 the context of a lock_dlm thread). */
404
405static int hold_null_lock(struct gdlm_lock *lp)
406{
407 struct gdlm_lock *lpn = NULL;
408 int error;
409
410 if (lp->hold_null) {
411 printk(KERN_INFO "lock_dlm: lvb already held\n");
412 return 0;
413 }
414
415 error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
416 if (error)
417 goto out;
418
419 lpn->lksb.sb_lvbptr = junk_lvb;
420 lpn->lvb = junk_lvb;
421
422 lpn->req = DLM_LOCK_NL;
423 lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
424 set_bit(LFL_NOBAST, &lpn->flags);
425 set_bit(LFL_INLOCK, &lpn->flags);
426
427 init_completion(&lpn->ast_wait);
428 gdlm_do_lock(lpn);
429 wait_for_completion(&lpn->ast_wait);
430 error = lp->lksb.sb_status;
431 if (error) {
432 printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
433 error);
434 gdlm_delete_lp(lpn);
435 lpn = NULL;
436 }
437 out:
438 lp->hold_null = lpn;
439 return error;
440}
441
442/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
443 the completion) because gfs may call unhold_lvb() during a callback (from
444 the context of a lock_dlm thread) which could cause a deadlock since the
445 other lock_dlm thread could be engaged in recovery. */
446
447static void unhold_null_lock(struct gdlm_lock *lp)
448{
449 struct gdlm_lock *lpn = lp->hold_null;
450
451 gdlm_assert(lpn, "%x,%llx",
452 lp->lockname.ln_type, lp->lockname.ln_number);
453 lpn->lksb.sb_lvbptr = NULL;
454 lpn->lvb = NULL;
455 set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
456 gdlm_do_unlock(lpn);
457 lp->hold_null = NULL;
458}
459
460/* Acquire a NL lock because gfs requires the value block to remain
461 intact on the resource while the lvb is "held" even if it's holding no locks
462 on the resource. */
463
464int gdlm_hold_lvb(lm_lock_t *lock, char **lvbp)
465{
466 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
467 int error;
468
469 error = gdlm_add_lvb(lp);
470 if (error)
471 return error;
472
473 *lvbp = lp->lvb;
474
475 error = hold_null_lock(lp);
476 if (error)
477 gdlm_del_lvb(lp);
478
479 return error;
480}
481
482void gdlm_unhold_lvb(lm_lock_t *lock, char *lvb)
483{
484 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
485
486 unhold_null_lock(lp);
487 gdlm_del_lvb(lp);
488}
489
490void gdlm_sync_lvb(lm_lock_t *lock, char *lvb)
491{
492 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
493
494 if (lp->cur != DLM_LOCK_EX)
495 return;
496
497 init_completion(&lp->ast_wait);
498 set_bit(LFL_SYNC_LVB, &lp->flags);
499
500 lp->req = DLM_LOCK_EX;
501 lp->lkf = make_flags(lp, 0, lp->cur, lp->req);
502
503 gdlm_do_lock(lp);
504 wait_for_completion(&lp->ast_wait);
505}
506
507void gdlm_submit_delayed(struct gdlm_ls *ls)
508{
509 struct gdlm_lock *lp, *safe;
510
511 spin_lock(&ls->async_lock);
512 list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
513 list_del_init(&lp->delay_list);
514 list_add_tail(&lp->delay_list, &ls->submit);
515 }
516 spin_unlock(&ls->async_lock);
517 wake_up(&ls->thread_wait);
518}
519
520int gdlm_release_all_locks(struct gdlm_ls *ls)
521{
522 struct gdlm_lock *lp, *safe;
523 int count = 0;
524
525 spin_lock(&ls->async_lock);
526 list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) {
527 list_del_init(&lp->all_list);
528
529 if (lp->lvb && lp->lvb != junk_lvb)
530 kfree(lp->lvb);
531 kfree(lp);
532 count++;
533 }
534 spin_unlock(&ls->async_lock);
535
536 return count;
537}
538
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
new file mode 100644
index 000000000000..6d76146953ce
--- /dev/null
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -0,0 +1,191 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef LOCK_DLM_DOT_H
11#define LOCK_DLM_DOT_H
12
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/spinlock.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/string.h>
19#include <linux/list.h>
20#include <linux/socket.h>
21#include <linux/delay.h>
22#include <linux/kthread.h>
23#include <linux/kobject.h>
24#include <linux/fcntl.h>
25#include <linux/wait.h>
26#include <net/sock.h>
27
28#include <linux/dlm.h>
29#include "../../lm_interface.h"
30
31/*
32 * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
33 * prefix of lock_dlm_ gets awkward. Externally, GFS refers to this module
34 * as "lock_dlm".
35 */
36
37#define GDLM_STRNAME_BYTES 24
38#define GDLM_LVB_SIZE 32
39#define GDLM_DROP_COUNT 50000
40#define GDLM_DROP_PERIOD 60
41#define GDLM_NAME_LEN 128
42
43/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
44 We sprintf these numbers into a 24 byte string of hex values to make them
45 human-readable (to make debugging simpler.) */
46
47struct gdlm_strname {
48 unsigned char name[GDLM_STRNAME_BYTES];
49 unsigned short namelen;
50};
51
52enum {
53 DFL_BLOCK_LOCKS = 0,
54 DFL_SPECTATOR = 1,
55 DFL_WITHDRAW = 2,
56};
57
58struct gdlm_ls {
59 uint32_t id;
60 int jid;
61 int first;
62 int first_done;
63 unsigned long flags;
64 struct kobject kobj;
65 char clustername[GDLM_NAME_LEN];
66 char fsname[GDLM_NAME_LEN];
67 int fsflags;
68 dlm_lockspace_t *dlm_lockspace;
69 lm_callback_t fscb;
70 lm_fsdata_t *fsdata;
71 int recover_jid;
72 int recover_jid_done;
73 spinlock_t async_lock;
74 struct list_head complete;
75 struct list_head blocking;
76 struct list_head delayed;
77 struct list_head submit;
78 struct list_head all_locks;
79 uint32_t all_locks_count;
80 wait_queue_head_t wait_control;
81 struct task_struct *thread1;
82 struct task_struct *thread2;
83 wait_queue_head_t thread_wait;
84 unsigned long drop_time;
85 int drop_locks_count;
86 int drop_locks_period;
87};
88
89enum {
90 LFL_NOBLOCK = 0,
91 LFL_NOCACHE = 1,
92 LFL_DLM_UNLOCK = 2,
93 LFL_DLM_CANCEL = 3,
94 LFL_SYNC_LVB = 4,
95 LFL_FORCE_PROMOTE = 5,
96 LFL_REREQUEST = 6,
97 LFL_ACTIVE = 7,
98 LFL_INLOCK = 8,
99 LFL_CANCEL = 9,
100 LFL_NOBAST = 10,
101 LFL_HEADQUE = 11,
102 LFL_UNLOCK_DELETE = 12,
103};
104
105struct gdlm_lock {
106 struct gdlm_ls *ls;
107 struct lm_lockname lockname;
108 char *lvb;
109 struct dlm_lksb lksb;
110
111 int16_t cur;
112 int16_t req;
113 int16_t prev_req;
114 uint32_t lkf; /* dlm flags DLM_LKF_ */
115 unsigned long flags; /* lock_dlm flags LFL_ */
116
117 int bast_mode; /* protected by async_lock */
118 struct completion ast_wait;
119
120 struct list_head clist; /* complete */
121 struct list_head blist; /* blocking */
122 struct list_head delay_list; /* delayed */
123 struct list_head all_list; /* all locks for the fs */
124 struct gdlm_lock *hold_null; /* NL lock for hold_lvb */
125};
126
127#define gdlm_assert(assertion, fmt, args...) \
128do { \
129 if (unlikely(!(assertion))) { \
130 printk(KERN_EMERG "lock_dlm: fatal assertion failed \"%s\"\n" \
131 "lock_dlm: " fmt "\n", \
132 #assertion, ##args); \
133 BUG(); \
134 } \
135} while (0)
136
137#define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg)
138#define log_info(fmt, arg...) log_print(KERN_INFO , fmt , ## arg)
139#define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg)
140#ifdef LOCK_DLM_LOG_DEBUG
141#define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg)
142#else
143#define log_debug(fmt, arg...)
144#endif
145
146/* sysfs.c */
147
148int gdlm_sysfs_init(void);
149void gdlm_sysfs_exit(void);
150int gdlm_kobject_setup(struct gdlm_ls *, struct kobject *);
151void gdlm_kobject_release(struct gdlm_ls *);
152
153/* thread.c */
154
155int gdlm_init_threads(struct gdlm_ls *);
156void gdlm_release_threads(struct gdlm_ls *);
157
158/* lock.c */
159
160int16_t gdlm_make_lmstate(int16_t);
161void gdlm_queue_delayed(struct gdlm_lock *);
162void gdlm_submit_delayed(struct gdlm_ls *);
163int gdlm_release_all_locks(struct gdlm_ls *);
164int gdlm_create_lp(struct gdlm_ls *, struct lm_lockname *, struct gdlm_lock **);
165void gdlm_delete_lp(struct gdlm_lock *);
166int gdlm_add_lvb(struct gdlm_lock *);
167void gdlm_del_lvb(struct gdlm_lock *);
168unsigned int gdlm_do_lock(struct gdlm_lock *);
169unsigned int gdlm_do_unlock(struct gdlm_lock *);
170
171int gdlm_get_lock(lm_lockspace_t *, struct lm_lockname *, lm_lock_t **);
172void gdlm_put_lock(lm_lock_t *);
173unsigned int gdlm_lock(lm_lock_t *, unsigned int, unsigned int, unsigned int);
174unsigned int gdlm_unlock(lm_lock_t *, unsigned int);
175void gdlm_cancel(lm_lock_t *);
176int gdlm_hold_lvb(lm_lock_t *, char **);
177void gdlm_unhold_lvb(lm_lock_t *, char *);
178void gdlm_sync_lvb(lm_lock_t *, char *);
179
180/* plock.c */
181
182int gdlm_plock_init(void);
183void gdlm_plock_exit(void);
184int gdlm_plock(lm_lockspace_t *, struct lm_lockname *, struct file *, int,
185 struct file_lock *);
186int gdlm_plock_get(lm_lockspace_t *, struct lm_lockname *, struct file *,
187 struct file_lock *);
188int gdlm_punlock(lm_lockspace_t *, struct lm_lockname *, struct file *,
189 struct file_lock *);
190#endif
191
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
new file mode 100644
index 000000000000..3c9adf18fd9c
--- /dev/null
+++ b/fs/gfs2/locking/dlm/main.c
@@ -0,0 +1,64 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/init.h>
11
12#include "lock_dlm.h"
13
14extern int gdlm_drop_count;
15extern int gdlm_drop_period;
16
17extern struct lm_lockops gdlm_ops;
18
19int __init init_lock_dlm(void)
20{
21 int error;
22
23 error = gfs_register_lockproto(&gdlm_ops);
24 if (error) {
25 printk(KERN_WARNING "lock_dlm: can't register protocol: %d\n",
26 error);
27 return error;
28 }
29
30 error = gdlm_sysfs_init();
31 if (error) {
32 gfs_unregister_lockproto(&gdlm_ops);
33 return error;
34 }
35
36 error = gdlm_plock_init();
37 if (error) {
38 gdlm_sysfs_exit();
39 gfs_unregister_lockproto(&gdlm_ops);
40 return error;
41 }
42
43 gdlm_drop_count = GDLM_DROP_COUNT;
44 gdlm_drop_period = GDLM_DROP_PERIOD;
45
46 printk(KERN_INFO
47 "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
48 return 0;
49}
50
51void __exit exit_lock_dlm(void)
52{
53 gdlm_plock_exit();
54 gdlm_sysfs_exit();
55 gfs_unregister_lockproto(&gdlm_ops);
56}
57
58module_init(init_lock_dlm);
59module_exit(exit_lock_dlm);
60
61MODULE_DESCRIPTION("GFS DLM Locking Module");
62MODULE_AUTHOR("Red Hat, Inc.");
63MODULE_LICENSE("GPL");
64
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
new file mode 100644
index 000000000000..026f05ce168d
--- /dev/null
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -0,0 +1,255 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include "lock_dlm.h"
11
12int gdlm_drop_count;
13int gdlm_drop_period;
14struct lm_lockops gdlm_ops;
15
16
17static struct gdlm_ls *init_gdlm(lm_callback_t cb, lm_fsdata_t *fsdata,
18 int flags, char *table_name)
19{
20 struct gdlm_ls *ls;
21 char buf[256], *p;
22
23 ls = kzalloc(sizeof(struct gdlm_ls), GFP_KERNEL);
24 if (!ls)
25 return NULL;
26
27 ls->drop_locks_count = gdlm_drop_count;
28 ls->drop_locks_period = gdlm_drop_period;
29 ls->fscb = cb;
30 ls->fsdata = fsdata;
31 ls->fsflags = flags;
32 spin_lock_init(&ls->async_lock);
33 INIT_LIST_HEAD(&ls->complete);
34 INIT_LIST_HEAD(&ls->blocking);
35 INIT_LIST_HEAD(&ls->delayed);
36 INIT_LIST_HEAD(&ls->submit);
37 INIT_LIST_HEAD(&ls->all_locks);
38 init_waitqueue_head(&ls->thread_wait);
39 init_waitqueue_head(&ls->wait_control);
40 ls->thread1 = NULL;
41 ls->thread2 = NULL;
42 ls->drop_time = jiffies;
43 ls->jid = -1;
44
45 strncpy(buf, table_name, 256);
46 buf[255] = '\0';
47
48 p = strstr(buf, ":");
49 if (!p) {
50 log_info("invalid table_name \"%s\"", table_name);
51 kfree(ls);
52 return NULL;
53 }
54 *p = '\0';
55 p++;
56
57 strncpy(ls->clustername, buf, GDLM_NAME_LEN);
58 strncpy(ls->fsname, p, GDLM_NAME_LEN);
59
60 return ls;
61}
62
63static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir)
64{
65 char data[256];
66 char *options, *x, *y;
67 int error = 0;
68
69 memset(data, 0, 256);
70 strncpy(data, data_arg, 255);
71
72 for (options = data; (x = strsep(&options, ":")); ) {
73 if (!*x)
74 continue;
75
76 y = strchr(x, '=');
77 if (y)
78 *y++ = 0;
79
80 if (!strcmp(x, "jid")) {
81 if (!y) {
82 log_error("need argument to jid");
83 error = -EINVAL;
84 break;
85 }
86 sscanf(y, "%u", &ls->jid);
87
88 } else if (!strcmp(x, "first")) {
89 if (!y) {
90 log_error("need argument to first");
91 error = -EINVAL;
92 break;
93 }
94 sscanf(y, "%u", &ls->first);
95
96 } else if (!strcmp(x, "id")) {
97 if (!y) {
98 log_error("need argument to id");
99 error = -EINVAL;
100 break;
101 }
102 sscanf(y, "%u", &ls->id);
103
104 } else if (!strcmp(x, "nodir")) {
105 if (!y) {
106 log_error("need argument to nodir");
107 error = -EINVAL;
108 break;
109 }
110 sscanf(y, "%u", nodir);
111
112 } else {
113 log_error("unkonwn option: %s", x);
114 error = -EINVAL;
115 break;
116 }
117 }
118
119 return error;
120}
121
122static int gdlm_mount(char *table_name, char *host_data,
123 lm_callback_t cb, lm_fsdata_t *fsdata,
124 unsigned int min_lvb_size, int flags,
125 struct lm_lockstruct *lockstruct,
126 struct kobject *fskobj)
127{
128 struct gdlm_ls *ls;
129 int error = -ENOMEM, nodir = 0;
130
131 if (min_lvb_size > GDLM_LVB_SIZE)
132 goto out;
133
134 ls = init_gdlm(cb, fsdata, flags, table_name);
135 if (!ls)
136 goto out;
137
138 error = make_args(ls, host_data, &nodir);
139 if (error)
140 goto out;
141
142 error = gdlm_init_threads(ls);
143 if (error)
144 goto out_free;
145
146 error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
147 &ls->dlm_lockspace,
148 nodir ? DLM_LSFL_NODIR : 0,
149 GDLM_LVB_SIZE);
150 if (error) {
151 log_error("dlm_new_lockspace error %d", error);
152 goto out_thread;
153 }
154
155 error = gdlm_kobject_setup(ls, fskobj);
156 if (error)
157 goto out_dlm;
158
159 lockstruct->ls_jid = ls->jid;
160 lockstruct->ls_first = ls->first;
161 lockstruct->ls_lockspace = ls;
162 lockstruct->ls_ops = &gdlm_ops;
163 lockstruct->ls_flags = 0;
164 lockstruct->ls_lvb_size = GDLM_LVB_SIZE;
165 return 0;
166
167 out_dlm:
168 dlm_release_lockspace(ls->dlm_lockspace, 2);
169 out_thread:
170 gdlm_release_threads(ls);
171 out_free:
172 kfree(ls);
173 out:
174 return error;
175}
176
177static void gdlm_unmount(lm_lockspace_t *lockspace)
178{
179 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
180 int rv;
181
182 log_debug("unmount flags %lx", ls->flags);
183
184 /* FIXME: serialize unmount and withdraw in case they
185 happen at once. Also, if unmount follows withdraw,
186 wait for withdraw to finish. */
187
188 if (test_bit(DFL_WITHDRAW, &ls->flags))
189 goto out;
190
191 gdlm_kobject_release(ls);
192 dlm_release_lockspace(ls->dlm_lockspace, 2);
193 gdlm_release_threads(ls);
194 rv = gdlm_release_all_locks(ls);
195 if (rv)
196 log_info("gdlm_unmount: %d stray locks freed", rv);
197 out:
198 kfree(ls);
199}
200
201static void gdlm_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
202 unsigned int message)
203{
204 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
205 ls->recover_jid_done = jid;
206 kobject_uevent(&ls->kobj, KOBJ_CHANGE);
207}
208
209static void gdlm_others_may_mount(lm_lockspace_t *lockspace)
210{
211 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
212 ls->first_done = 1;
213 kobject_uevent(&ls->kobj, KOBJ_CHANGE);
214}
215
216/* Userspace gets the offline uevent, blocks new gfs locks on
217 other mounters, and lets us know (sets WITHDRAW flag). Then,
218 userspace leaves the mount group while we leave the lockspace. */
219
220static void gdlm_withdraw(lm_lockspace_t *lockspace)
221{
222 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
223
224 kobject_uevent(&ls->kobj, KOBJ_OFFLINE);
225
226 wait_event_interruptible(ls->wait_control,
227 test_bit(DFL_WITHDRAW, &ls->flags));
228
229 dlm_release_lockspace(ls->dlm_lockspace, 2);
230 gdlm_release_threads(ls);
231 gdlm_release_all_locks(ls);
232 gdlm_kobject_release(ls);
233}
234
235struct lm_lockops gdlm_ops = {
236 .lm_proto_name = "lock_dlm",
237 .lm_mount = gdlm_mount,
238 .lm_others_may_mount = gdlm_others_may_mount,
239 .lm_unmount = gdlm_unmount,
240 .lm_withdraw = gdlm_withdraw,
241 .lm_get_lock = gdlm_get_lock,
242 .lm_put_lock = gdlm_put_lock,
243 .lm_lock = gdlm_lock,
244 .lm_unlock = gdlm_unlock,
245 .lm_plock = gdlm_plock,
246 .lm_punlock = gdlm_punlock,
247 .lm_plock_get = gdlm_plock_get,
248 .lm_cancel = gdlm_cancel,
249 .lm_hold_lvb = gdlm_hold_lvb,
250 .lm_unhold_lvb = gdlm_unhold_lvb,
251 .lm_sync_lvb = gdlm_sync_lvb,
252 .lm_recovery_done = gdlm_recovery_done,
253 .lm_owner = THIS_MODULE,
254};
255
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
new file mode 100644
index 000000000000..f7ac5821def9
--- /dev/null
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -0,0 +1,298 @@
1/*
2 * Copyright (C) 2005 Red Hat, Inc. All rights reserved.
3 *
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License v.2.
7 */
8
9#include <linux/miscdevice.h>
10#include <linux/lock_dlm_plock.h>
11
12#include "lock_dlm.h"
13
14
15static spinlock_t ops_lock;
16static struct list_head send_list;
17static struct list_head recv_list;
18static wait_queue_head_t send_wq;
19static wait_queue_head_t recv_wq;
20
21struct plock_op {
22 struct list_head list;
23 int done;
24 struct gdlm_plock_info info;
25};
26
27static inline void set_version(struct gdlm_plock_info *info)
28{
29 info->version[0] = GDLM_PLOCK_VERSION_MAJOR;
30 info->version[1] = GDLM_PLOCK_VERSION_MINOR;
31 info->version[2] = GDLM_PLOCK_VERSION_PATCH;
32}
33
34static int check_version(struct gdlm_plock_info *info)
35{
36 if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
37 (GDLM_PLOCK_VERSION_MINOR < info->version[1])) {
38 log_error("plock device version mismatch: "
39 "kernel (%u.%u.%u), user (%u.%u.%u)",
40 GDLM_PLOCK_VERSION_MAJOR,
41 GDLM_PLOCK_VERSION_MINOR,
42 GDLM_PLOCK_VERSION_PATCH,
43 info->version[0],
44 info->version[1],
45 info->version[2]);
46 return -EINVAL;
47 }
48 return 0;
49}
50
51static void send_op(struct plock_op *op)
52{
53 set_version(&op->info);
54 INIT_LIST_HEAD(&op->list);
55 spin_lock(&ops_lock);
56 list_add_tail(&op->list, &send_list);
57 spin_unlock(&ops_lock);
58 wake_up(&send_wq);
59}
60
61int gdlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
62 struct file *file, int cmd, struct file_lock *fl)
63{
64 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
65 struct plock_op *op;
66 int rv;
67
68 op = kzalloc(sizeof(*op), GFP_KERNEL);
69 if (!op)
70 return -ENOMEM;
71
72 op->info.optype = GDLM_PLOCK_OP_LOCK;
73 op->info.pid = (uint32_t) fl->fl_owner;
74 op->info.ex = (fl->fl_type == F_WRLCK);
75 op->info.wait = IS_SETLKW(cmd);
76 op->info.fsid = ls->id;
77 op->info.number = name->ln_number;
78 op->info.start = fl->fl_start;
79 op->info.end = fl->fl_end;
80
81 send_op(op);
82 wait_event(recv_wq, (op->done != 0));
83
84 spin_lock(&ops_lock);
85 if (!list_empty(&op->list)) {
86 printk(KERN_INFO "plock op on list\n");
87 list_del(&op->list);
88 }
89 spin_unlock(&ops_lock);
90
91 rv = op->info.rv;
92
93 if (!rv) {
94 if (posix_lock_file_wait(file, fl) < 0)
95 log_error("gdlm_plock: vfs lock error %x,%llx",
96 name->ln_type, name->ln_number);
97 }
98
99 kfree(op);
100 return rv;
101}
102
103int gdlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
104 struct file *file, struct file_lock *fl)
105{
106 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
107 struct plock_op *op;
108 int rv;
109
110 op = kzalloc(sizeof(*op), GFP_KERNEL);
111 if (!op)
112 return -ENOMEM;
113
114 if (posix_lock_file_wait(file, fl) < 0)
115 log_error("gdlm_punlock: vfs unlock error %x,%llx",
116 name->ln_type, name->ln_number);
117
118 op->info.optype = GDLM_PLOCK_OP_UNLOCK;
119 op->info.pid = (uint32_t) fl->fl_owner;
120 op->info.fsid = ls->id;
121 op->info.number = name->ln_number;
122 op->info.start = fl->fl_start;
123 op->info.end = fl->fl_end;
124
125 send_op(op);
126 wait_event(recv_wq, (op->done != 0));
127
128 spin_lock(&ops_lock);
129 if (!list_empty(&op->list)) {
130 printk(KERN_INFO "punlock op on list\n");
131 list_del(&op->list);
132 }
133 spin_unlock(&ops_lock);
134
135 rv = op->info.rv;
136
137 kfree(op);
138 return rv;
139}
140
141int gdlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
142 struct file *file, struct file_lock *fl)
143{
144 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
145 struct plock_op *op;
146 int rv;
147
148 op = kzalloc(sizeof(*op), GFP_KERNEL);
149 if (!op)
150 return -ENOMEM;
151
152 op->info.optype = GDLM_PLOCK_OP_GET;
153 op->info.pid = (uint32_t) fl->fl_owner;
154 op->info.ex = (fl->fl_type == F_WRLCK);
155 op->info.fsid = ls->id;
156 op->info.number = name->ln_number;
157 op->info.start = fl->fl_start;
158 op->info.end = fl->fl_end;
159
160 send_op(op);
161 wait_event(recv_wq, (op->done != 0));
162
163 spin_lock(&ops_lock);
164 if (!list_empty(&op->list)) {
165 printk(KERN_INFO "plock_get op on list\n");
166 list_del(&op->list);
167 }
168 spin_unlock(&ops_lock);
169
170 rv = op->info.rv;
171
172 if (rv == 0)
173 fl->fl_type = F_UNLCK;
174 else if (rv > 0) {
175 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
176 fl->fl_pid = op->info.pid;
177 fl->fl_start = op->info.start;
178 fl->fl_end = op->info.end;
179 }
180
181 kfree(op);
182 return rv;
183}
184
185/* a read copies out one plock request from the send list */
186static ssize_t dev_read(struct file *file, char __user *u, size_t count,
187 loff_t *ppos)
188{
189 struct gdlm_plock_info info;
190 struct plock_op *op = NULL;
191
192 if (count < sizeof(info))
193 return -EINVAL;
194
195 spin_lock(&ops_lock);
196 if (!list_empty(&send_list)) {
197 op = list_entry(send_list.next, struct plock_op, list);
198 list_move(&op->list, &recv_list);
199 memcpy(&info, &op->info, sizeof(info));
200 }
201 spin_unlock(&ops_lock);
202
203 if (!op)
204 return -EAGAIN;
205
206 if (copy_to_user(u, &info, sizeof(info)))
207 return -EFAULT;
208 return sizeof(info);
209}
210
211/* a write copies in one plock result that should match a plock_op
212 on the recv list */
213static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
214 loff_t *ppos)
215{
216 struct gdlm_plock_info info;
217 struct plock_op *op;
218 int found = 0;
219
220 if (count != sizeof(info))
221 return -EINVAL;
222
223 if (copy_from_user(&info, u, sizeof(info)))
224 return -EFAULT;
225
226 if (check_version(&info))
227 return -EINVAL;
228
229 spin_lock(&ops_lock);
230 list_for_each_entry(op, &recv_list, list) {
231 if (op->info.fsid == info.fsid &&
232 op->info.number == info.number) {
233 list_del_init(&op->list);
234 found = 1;
235 op->done = 1;
236 memcpy(&op->info, &info, sizeof(info));
237 break;
238 }
239 }
240 spin_unlock(&ops_lock);
241
242 if (found)
243 wake_up(&recv_wq);
244 else
245 printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid,
246 info.number);
247 return count;
248}
249
250static unsigned int dev_poll(struct file *file, poll_table *wait)
251{
252 poll_wait(file, &send_wq, wait);
253
254 spin_lock(&ops_lock);
255 if (!list_empty(&send_list)) {
256 spin_unlock(&ops_lock);
257 return POLLIN | POLLRDNORM;
258 }
259 spin_unlock(&ops_lock);
260 return 0;
261}
262
263static struct file_operations dev_fops = {
264 .read = dev_read,
265 .write = dev_write,
266 .poll = dev_poll,
267 .owner = THIS_MODULE
268};
269
270static struct miscdevice plock_dev_misc = {
271 .minor = MISC_DYNAMIC_MINOR,
272 .name = GDLM_PLOCK_MISC_NAME,
273 .fops = &dev_fops
274};
275
276int gdlm_plock_init(void)
277{
278 int rv;
279
280 spin_lock_init(&ops_lock);
281 INIT_LIST_HEAD(&send_list);
282 INIT_LIST_HEAD(&recv_list);
283 init_waitqueue_head(&send_wq);
284 init_waitqueue_head(&recv_wq);
285
286 rv = misc_register(&plock_dev_misc);
287 if (rv)
288 printk(KERN_INFO "gdlm_plock_init: misc_register failed %d",
289 rv);
290 return rv;
291}
292
293void gdlm_plock_exit(void)
294{
295 if (misc_deregister(&plock_dev_misc) < 0)
296 printk(KERN_INFO "gdlm_plock_exit: misc_deregister failed");
297}
298
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
new file mode 100644
index 000000000000..e1e5186c97c9
--- /dev/null
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -0,0 +1,218 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/ctype.h>
11#include <linux/stat.h>
12
13#include "lock_dlm.h"
14
15extern struct lm_lockops gdlm_ops;
16
17static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf)
18{
19 return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name);
20}
21
22static ssize_t block_show(struct gdlm_ls *ls, char *buf)
23{
24 ssize_t ret;
25 int val = 0;
26
27 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags))
28 val = 1;
29 ret = sprintf(buf, "%d\n", val);
30 return ret;
31}
32
33static ssize_t block_store(struct gdlm_ls *ls, const char *buf, size_t len)
34{
35 ssize_t ret = len;
36 int val;
37
38 val = simple_strtol(buf, NULL, 0);
39
40 if (val == 1)
41 set_bit(DFL_BLOCK_LOCKS, &ls->flags);
42 else if (val == 0) {
43 clear_bit(DFL_BLOCK_LOCKS, &ls->flags);
44 gdlm_submit_delayed(ls);
45 } else
46 ret = -EINVAL;
47 return ret;
48}
49
50static ssize_t withdraw_show(struct gdlm_ls *ls, char *buf)
51{
52 ssize_t ret;
53 int val = 0;
54
55 if (test_bit(DFL_WITHDRAW, &ls->flags))
56 val = 1;
57 ret = sprintf(buf, "%d\n", val);
58 return ret;
59}
60
61static ssize_t withdraw_store(struct gdlm_ls *ls, const char *buf, size_t len)
62{
63 ssize_t ret = len;
64 int val;
65
66 val = simple_strtol(buf, NULL, 0);
67
68 if (val == 1)
69 set_bit(DFL_WITHDRAW, &ls->flags);
70 else
71 ret = -EINVAL;
72 wake_up(&ls->wait_control);
73 return ret;
74}
75
76static ssize_t id_show(struct gdlm_ls *ls, char *buf)
77{
78 return sprintf(buf, "%u\n", ls->id);
79}
80
81static ssize_t jid_show(struct gdlm_ls *ls, char *buf)
82{
83 return sprintf(buf, "%d\n", ls->jid);
84}
85
86static ssize_t first_show(struct gdlm_ls *ls, char *buf)
87{
88 return sprintf(buf, "%d\n", ls->first);
89}
90
91static ssize_t first_done_show(struct gdlm_ls *ls, char *buf)
92{
93 return sprintf(buf, "%d\n", ls->first_done);
94}
95
96static ssize_t recover_show(struct gdlm_ls *ls, char *buf)
97{
98 return sprintf(buf, "%d\n", ls->recover_jid);
99}
100
101static ssize_t recover_store(struct gdlm_ls *ls, const char *buf, size_t len)
102{
103 ls->recover_jid = simple_strtol(buf, NULL, 0);
104 ls->fscb(ls->fsdata, LM_CB_NEED_RECOVERY, &ls->recover_jid);
105 return len;
106}
107
108static ssize_t recover_done_show(struct gdlm_ls *ls, char *buf)
109{
110 return sprintf(buf, "%d\n", ls->recover_jid_done);
111}
112
113struct gdlm_attr {
114 struct attribute attr;
115 ssize_t (*show)(struct gdlm_ls *, char *);
116 ssize_t (*store)(struct gdlm_ls *, const char *, size_t);
117};
118
119#define GDLM_ATTR(_name,_mode,_show,_store) \
120static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
121
122GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
123GDLM_ATTR(block, 0644, block_show, block_store);
124GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
125GDLM_ATTR(id, 0444, id_show, NULL);
126GDLM_ATTR(jid, 0444, jid_show, NULL);
127GDLM_ATTR(first, 0444, first_show, NULL);
128GDLM_ATTR(first_done, 0444, first_done_show, NULL);
129GDLM_ATTR(recover, 0644, recover_show, recover_store);
130GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
131
132static struct attribute *gdlm_attrs[] = {
133 &gdlm_attr_proto_name.attr,
134 &gdlm_attr_block.attr,
135 &gdlm_attr_withdraw.attr,
136 &gdlm_attr_id.attr,
137 &gdlm_attr_jid.attr,
138 &gdlm_attr_first.attr,
139 &gdlm_attr_first_done.attr,
140 &gdlm_attr_recover.attr,
141 &gdlm_attr_recover_done.attr,
142 NULL,
143};
144
145static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr,
146 char *buf)
147{
148 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
149 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
150 return a->show ? a->show(ls, buf) : 0;
151}
152
153static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr,
154 const char *buf, size_t len)
155{
156 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
157 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
158 return a->store ? a->store(ls, buf, len) : len;
159}
160
161static struct sysfs_ops gdlm_attr_ops = {
162 .show = gdlm_attr_show,
163 .store = gdlm_attr_store,
164};
165
166static struct kobj_type gdlm_ktype = {
167 .default_attrs = gdlm_attrs,
168 .sysfs_ops = &gdlm_attr_ops,
169};
170
171static struct kset gdlm_kset = {
172 .subsys = &kernel_subsys,
173 .kobj = {.name = "lock_dlm",},
174 .ktype = &gdlm_ktype,
175};
176
177int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj)
178{
179 int error;
180
181 error = kobject_set_name(&ls->kobj, "%s", "lock_module");
182 if (error) {
183 log_error("can't set kobj name %d", error);
184 return error;
185 }
186
187 ls->kobj.kset = &gdlm_kset;
188 ls->kobj.ktype = &gdlm_ktype;
189 ls->kobj.parent = fskobj;
190
191 error = kobject_register(&ls->kobj);
192 if (error)
193 log_error("can't register kobj %d", error);
194
195 return error;
196}
197
198void gdlm_kobject_release(struct gdlm_ls *ls)
199{
200 kobject_unregister(&ls->kobj);
201}
202
203int gdlm_sysfs_init(void)
204{
205 int error;
206
207 error = kset_register(&gdlm_kset);
208 if (error)
209 printk("lock_dlm: cannot register kset %d\n", error);
210
211 return error;
212}
213
214void gdlm_sysfs_exit(void)
215{
216 kset_unregister(&gdlm_kset);
217}
218
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
new file mode 100644
index 000000000000..3e2edcc2dbf6
--- /dev/null
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -0,0 +1,352 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include "lock_dlm.h"
11
12/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
13 thread gets to it. */
14
15static void queue_submit(struct gdlm_lock *lp)
16{
17 struct gdlm_ls *ls = lp->ls;
18
19 spin_lock(&ls->async_lock);
20 list_add_tail(&lp->delay_list, &ls->submit);
21 spin_unlock(&ls->async_lock);
22 wake_up(&ls->thread_wait);
23}
24
25static void process_blocking(struct gdlm_lock *lp, int bast_mode)
26{
27 struct gdlm_ls *ls = lp->ls;
28 unsigned int cb = 0;
29
30 switch (gdlm_make_lmstate(bast_mode)) {
31 case LM_ST_EXCLUSIVE:
32 cb = LM_CB_NEED_E;
33 break;
34 case LM_ST_DEFERRED:
35 cb = LM_CB_NEED_D;
36 break;
37 case LM_ST_SHARED:
38 cb = LM_CB_NEED_S;
39 break;
40 default:
41 gdlm_assert(0, "unknown bast mode %u", lp->bast_mode);
42 }
43
44 ls->fscb(ls->fsdata, cb, &lp->lockname);
45}
46
47static void process_complete(struct gdlm_lock *lp)
48{
49 struct gdlm_ls *ls = lp->ls;
50 struct lm_async_cb acb;
51 int16_t prev_mode = lp->cur;
52
53 memset(&acb, 0, sizeof(acb));
54
55 if (lp->lksb.sb_status == -DLM_ECANCEL) {
56 log_info("complete dlm cancel %x,%llx flags %lx",
57 lp->lockname.ln_type, lp->lockname.ln_number,
58 lp->flags);
59
60 lp->req = lp->cur;
61 acb.lc_ret |= LM_OUT_CANCELED;
62 if (lp->cur == DLM_LOCK_IV)
63 lp->lksb.sb_lkid = 0;
64 goto out;
65 }
66
67 if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
68 if (lp->lksb.sb_status != -DLM_EUNLOCK) {
69 log_info("unlock sb_status %d %x,%llx flags %lx",
70 lp->lksb.sb_status, lp->lockname.ln_type,
71 lp->lockname.ln_number, lp->flags);
72 return;
73 }
74
75 lp->cur = DLM_LOCK_IV;
76 lp->req = DLM_LOCK_IV;
77 lp->lksb.sb_lkid = 0;
78
79 if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
80 gdlm_delete_lp(lp);
81 return;
82 }
83 goto out;
84 }
85
86 if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
87 memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
88
89 if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
90 if (lp->req == DLM_LOCK_PR)
91 lp->req = DLM_LOCK_CW;
92 else if (lp->req == DLM_LOCK_CW)
93 lp->req = DLM_LOCK_PR;
94 }
95
96 /*
97 * A canceled lock request. The lock was just taken off the delayed
98 * list and was never even submitted to dlm.
99 */
100
101 if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
102 log_info("complete internal cancel %x,%llx",
103 lp->lockname.ln_type, lp->lockname.ln_number);
104 lp->req = lp->cur;
105 acb.lc_ret |= LM_OUT_CANCELED;
106 goto out;
107 }
108
109 /*
110 * An error occured.
111 */
112
113 if (lp->lksb.sb_status) {
114 /* a "normal" error */
115 if ((lp->lksb.sb_status == -EAGAIN) &&
116 (lp->lkf & DLM_LKF_NOQUEUE)) {
117 lp->req = lp->cur;
118 if (lp->cur == DLM_LOCK_IV)
119 lp->lksb.sb_lkid = 0;
120 goto out;
121 }
122
123 /* this could only happen with cancels I think */
124 log_info("ast sb_status %d %x,%llx flags %lx",
125 lp->lksb.sb_status, lp->lockname.ln_type,
126 lp->lockname.ln_number, lp->flags);
127 return;
128 }
129
130 /*
131 * This is an AST for an EX->EX conversion for sync_lvb from GFS.
132 */
133
134 if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
135 complete(&lp->ast_wait);
136 return;
137 }
138
139 /*
140 * A lock has been demoted to NL because it initially completed during
141 * BLOCK_LOCKS. Now it must be requested in the originally requested
142 * mode.
143 */
144
145 if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
146 gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
147 lp->lockname.ln_type, lp->lockname.ln_number);
148 gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
149 lp->lockname.ln_type, lp->lockname.ln_number);
150
151 lp->cur = DLM_LOCK_NL;
152 lp->req = lp->prev_req;
153 lp->prev_req = DLM_LOCK_IV;
154 lp->lkf &= ~DLM_LKF_CONVDEADLK;
155
156 set_bit(LFL_NOCACHE, &lp->flags);
157
158 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
159 !test_bit(LFL_NOBLOCK, &lp->flags))
160 gdlm_queue_delayed(lp);
161 else
162 queue_submit(lp);
163 return;
164 }
165
166 /*
167 * A request is granted during dlm recovery. It may be granted
168 * because the locks of a failed node were cleared. In that case,
169 * there may be inconsistent data beneath this lock and we must wait
170 * for recovery to complete to use it. When gfs recovery is done this
171 * granted lock will be converted to NL and then reacquired in this
172 * granted state.
173 */
174
175 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
176 !test_bit(LFL_NOBLOCK, &lp->flags) &&
177 lp->req != DLM_LOCK_NL) {
178
179 lp->cur = lp->req;
180 lp->prev_req = lp->req;
181 lp->req = DLM_LOCK_NL;
182 lp->lkf |= DLM_LKF_CONVERT;
183 lp->lkf &= ~DLM_LKF_CONVDEADLK;
184
185 log_debug("rereq %x,%llx id %x %d,%d",
186 lp->lockname.ln_type, lp->lockname.ln_number,
187 lp->lksb.sb_lkid, lp->cur, lp->req);
188
189 set_bit(LFL_REREQUEST, &lp->flags);
190 queue_submit(lp);
191 return;
192 }
193
194 /*
195 * DLM demoted the lock to NL before it was granted so GFS must be
196 * told it cannot cache data for this lock.
197 */
198
199 if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
200 set_bit(LFL_NOCACHE, &lp->flags);
201
202 out:
203 /*
204 * This is an internal lock_dlm lock
205 */
206
207 if (test_bit(LFL_INLOCK, &lp->flags)) {
208 clear_bit(LFL_NOBLOCK, &lp->flags);
209 lp->cur = lp->req;
210 complete(&lp->ast_wait);
211 return;
212 }
213
214 /*
215 * Normal completion of a lock request. Tell GFS it now has the lock.
216 */
217
218 clear_bit(LFL_NOBLOCK, &lp->flags);
219 lp->cur = lp->req;
220
221 acb.lc_name = lp->lockname;
222 acb.lc_ret |= gdlm_make_lmstate(lp->cur);
223
224 if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) &&
225 (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL))
226 acb.lc_ret |= LM_OUT_CACHEABLE;
227
228 ls->fscb(ls->fsdata, LM_CB_ASYNC, &acb);
229}
230
231static inline int no_work(struct gdlm_ls *ls, int blocking)
232{
233 int ret;
234
235 spin_lock(&ls->async_lock);
236 ret = list_empty(&ls->complete) && list_empty(&ls->submit);
237 if (ret && blocking)
238 ret = list_empty(&ls->blocking);
239 spin_unlock(&ls->async_lock);
240
241 return ret;
242}
243
244static inline int check_drop(struct gdlm_ls *ls)
245{
246 if (!ls->drop_locks_count)
247 return 0;
248
249 if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) {
250 ls->drop_time = jiffies;
251 if (ls->all_locks_count >= ls->drop_locks_count)
252 return 1;
253 }
254 return 0;
255}
256
257static int gdlm_thread(void *data)
258{
259 struct gdlm_ls *ls = (struct gdlm_ls *) data;
260 struct gdlm_lock *lp = NULL;
261 int blist = 0;
262 uint8_t complete, blocking, submit, drop;
263 DECLARE_WAITQUEUE(wait, current);
264
265 /* Only thread1 is allowed to do blocking callbacks since gfs
266 may wait for a completion callback within a blocking cb. */
267
268 if (current == ls->thread1)
269 blist = 1;
270
271 while (!kthread_should_stop()) {
272 set_current_state(TASK_INTERRUPTIBLE);
273 add_wait_queue(&ls->thread_wait, &wait);
274 if (no_work(ls, blist))
275 schedule();
276 remove_wait_queue(&ls->thread_wait, &wait);
277 set_current_state(TASK_RUNNING);
278
279 complete = blocking = submit = drop = 0;
280
281 spin_lock(&ls->async_lock);
282
283 if (blist && !list_empty(&ls->blocking)) {
284 lp = list_entry(ls->blocking.next, struct gdlm_lock,
285 blist);
286 list_del_init(&lp->blist);
287 blocking = lp->bast_mode;
288 lp->bast_mode = 0;
289 } else if (!list_empty(&ls->complete)) {
290 lp = list_entry(ls->complete.next, struct gdlm_lock,
291 clist);
292 list_del_init(&lp->clist);
293 complete = 1;
294 } else if (!list_empty(&ls->submit)) {
295 lp = list_entry(ls->submit.next, struct gdlm_lock,
296 delay_list);
297 list_del_init(&lp->delay_list);
298 submit = 1;
299 }
300
301 drop = check_drop(ls);
302 spin_unlock(&ls->async_lock);
303
304 if (complete)
305 process_complete(lp);
306
307 else if (blocking)
308 process_blocking(lp, blocking);
309
310 else if (submit)
311 gdlm_do_lock(lp);
312
313 if (drop)
314 ls->fscb(ls->fsdata, LM_CB_DROPLOCKS, NULL);
315
316 schedule();
317 }
318
319 return 0;
320}
321
322int gdlm_init_threads(struct gdlm_ls *ls)
323{
324 struct task_struct *p;
325 int error;
326
327 p = kthread_run(gdlm_thread, ls, "lock_dlm1");
328 error = IS_ERR(p);
329 if (error) {
330 log_error("can't start lock_dlm1 thread %d", error);
331 return error;
332 }
333 ls->thread1 = p;
334
335 p = kthread_run(gdlm_thread, ls, "lock_dlm2");
336 error = IS_ERR(p);
337 if (error) {
338 log_error("can't start lock_dlm2 thread %d", error);
339 kthread_stop(ls->thread1);
340 return error;
341 }
342 ls->thread2 = p;
343
344 return 0;
345}
346
347void gdlm_release_threads(struct gdlm_ls *ls)
348{
349 kthread_stop(ls->thread1);
350 kthread_stop(ls->thread2);
351}
352
diff --git a/fs/gfs2/locking/nolock/Makefile b/fs/gfs2/locking/nolock/Makefile
new file mode 100644
index 000000000000..cdadf956c831
--- /dev/null
+++ b/fs/gfs2/locking/nolock/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_GFS2_FS) += lock_nolock.o
2lock_nolock-y := main.o
3
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
new file mode 100644
index 000000000000..9398309f2810
--- /dev/null
+++ b/fs/gfs2/locking/nolock/main.c
@@ -0,0 +1,264 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/types.h>
15#include <linux/fs.h>
16#include <linux/smp_lock.h>
17
18#include "../../lm_interface.h"
19
20struct nolock_lockspace {
21 unsigned int nl_lvb_size;
22};
23
24struct lm_lockops nolock_ops;
25
26static int nolock_mount(char *table_name, char *host_data,
27 lm_callback_t cb, lm_fsdata_t *fsdata,
28 unsigned int min_lvb_size, int flags,
29 struct lm_lockstruct *lockstruct,
30 struct kobject *fskobj)
31{
32 char *c;
33 unsigned int jid;
34 struct nolock_lockspace *nl;
35
36 c = strstr(host_data, "jid=");
37 if (!c)
38 jid = 0;
39 else {
40 c += 4;
41 sscanf(c, "%u", &jid);
42 }
43
44 nl = kzalloc(sizeof(struct nolock_lockspace), GFP_KERNEL);
45 if (!nl)
46 return -ENOMEM;
47
48 nl->nl_lvb_size = min_lvb_size;
49
50 lockstruct->ls_jid = jid;
51 lockstruct->ls_first = 1;
52 lockstruct->ls_lvb_size = min_lvb_size;
53 lockstruct->ls_lockspace = (lm_lockspace_t *)nl;
54 lockstruct->ls_ops = &nolock_ops;
55 lockstruct->ls_flags = LM_LSFLAG_LOCAL;
56
57 return 0;
58}
59
60static void nolock_others_may_mount(lm_lockspace_t *lockspace)
61{
62}
63
64static void nolock_unmount(lm_lockspace_t *lockspace)
65{
66 struct nolock_lockspace *nl = (struct nolock_lockspace *)lockspace;
67 kfree(nl);
68}
69
70static void nolock_withdraw(lm_lockspace_t *lockspace)
71{
72}
73
74/**
75 * nolock_get_lock - get a lm_lock_t given a descripton of the lock
76 * @lockspace: the lockspace the lock lives in
77 * @name: the name of the lock
78 * @lockp: return the lm_lock_t here
79 *
80 * Returns: 0 on success, -EXXX on failure
81 */
82
83static int nolock_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
84 lm_lock_t **lockp)
85{
86 *lockp = (lm_lock_t *)lockspace;
87 return 0;
88}
89
90/**
91 * nolock_put_lock - get rid of a lock structure
92 * @lock: the lock to throw away
93 *
94 */
95
96static void nolock_put_lock(lm_lock_t *lock)
97{
98}
99
100/**
101 * nolock_lock - acquire a lock
102 * @lock: the lock to manipulate
103 * @cur_state: the current state
104 * @req_state: the requested state
105 * @flags: modifier flags
106 *
107 * Returns: A bitmap of LM_OUT_*
108 */
109
110static unsigned int nolock_lock(lm_lock_t *lock, unsigned int cur_state,
111 unsigned int req_state, unsigned int flags)
112{
113 return req_state | LM_OUT_CACHEABLE;
114}
115
116/**
117 * nolock_unlock - unlock a lock
118 * @lock: the lock to manipulate
119 * @cur_state: the current state
120 *
121 * Returns: 0
122 */
123
124static unsigned int nolock_unlock(lm_lock_t *lock, unsigned int cur_state)
125{
126 return 0;
127}
128
129static void nolock_cancel(lm_lock_t *lock)
130{
131}
132
133/**
134 * nolock_hold_lvb - hold on to a lock value block
135 * @lock: the lock the LVB is associated with
136 * @lvbp: return the lm_lvb_t here
137 *
138 * Returns: 0 on success, -EXXX on failure
139 */
140
141static int nolock_hold_lvb(lm_lock_t *lock, char **lvbp)
142{
143 struct nolock_lockspace *nl = (struct nolock_lockspace *)lock;
144 int error = 0;
145
146 *lvbp = kzalloc(nl->nl_lvb_size, GFP_KERNEL);
147 if (!*lvbp)
148 error = -ENOMEM;
149
150 return error;
151}
152
153/**
154 * nolock_unhold_lvb - release a LVB
155 * @lock: the lock the LVB is associated with
156 * @lvb: the lock value block
157 *
158 */
159
160static void nolock_unhold_lvb(lm_lock_t *lock, char *lvb)
161{
162 kfree(lvb);
163}
164
165/**
166 * nolock_sync_lvb - sync out the value of a lvb
167 * @lock: the lock the LVB is associated with
168 * @lvb: the lock value block
169 *
170 */
171
172static void nolock_sync_lvb(lm_lock_t *lock, char *lvb)
173{
174}
175
176static int nolock_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
177 struct file *file, struct file_lock *fl)
178{
179 struct file_lock *tmp;
180
181 lock_kernel();
182 tmp = posix_test_lock(file, fl);
183 fl->fl_type = F_UNLCK;
184 if (tmp)
185 memcpy(fl, tmp, sizeof(struct file_lock));
186 unlock_kernel();
187
188 return 0;
189}
190
191static int nolock_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
192 struct file *file, int cmd, struct file_lock *fl)
193{
194 int error;
195 lock_kernel();
196 error = posix_lock_file_wait(file, fl);
197 unlock_kernel();
198 return error;
199}
200
201static int nolock_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
202 struct file *file, struct file_lock *fl)
203{
204 int error;
205 lock_kernel();
206 error = posix_lock_file_wait(file, fl);
207 unlock_kernel();
208 return error;
209}
210
211static void nolock_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
212 unsigned int message)
213{
214}
215
216struct lm_lockops nolock_ops = {
217 .lm_proto_name = "lock_nolock",
218 .lm_mount = nolock_mount,
219 .lm_others_may_mount = nolock_others_may_mount,
220 .lm_unmount = nolock_unmount,
221 .lm_withdraw = nolock_withdraw,
222 .lm_get_lock = nolock_get_lock,
223 .lm_put_lock = nolock_put_lock,
224 .lm_lock = nolock_lock,
225 .lm_unlock = nolock_unlock,
226 .lm_cancel = nolock_cancel,
227 .lm_hold_lvb = nolock_hold_lvb,
228 .lm_unhold_lvb = nolock_unhold_lvb,
229 .lm_sync_lvb = nolock_sync_lvb,
230 .lm_plock_get = nolock_plock_get,
231 .lm_plock = nolock_plock,
232 .lm_punlock = nolock_punlock,
233 .lm_recovery_done = nolock_recovery_done,
234 .lm_owner = THIS_MODULE,
235};
236
237int __init init_nolock(void)
238{
239 int error;
240
241 error = gfs_register_lockproto(&nolock_ops);
242 if (error) {
243 printk(KERN_WARNING
244 "lock_nolock: can't register protocol: %d\n", error);
245 return error;
246 }
247
248 printk(KERN_INFO
249 "Lock_Nolock (built %s %s) installed\n", __DATE__, __TIME__);
250 return 0;
251}
252
253void __exit exit_nolock(void)
254{
255 gfs_unregister_lockproto(&nolock_ops);
256}
257
258module_init(init_nolock);
259module_exit(exit_nolock);
260
261MODULE_DESCRIPTION("GFS Nolock Locking Module");
262MODULE_AUTHOR("Red Hat, Inc.");
263MODULE_LICENSE("GPL");
264
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
new file mode 100644
index 000000000000..ea69376c00d8
--- /dev/null
+++ b/fs/gfs2/log.c
@@ -0,0 +1,592 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "bmap.h"
23#include "glock.h"
24#include "log.h"
25#include "lops.h"
26#include "meta_io.h"
27#include "util.h"
28#include "dir.h"
29
30#define PULL 1
31
32/**
33 * gfs2_struct2blk - compute stuff
34 * @sdp: the filesystem
35 * @nstruct: the number of structures
36 * @ssize: the size of the structures
37 *
38 * Compute the number of log descriptor blocks needed to hold a certain number
39 * of structures of a certain size.
40 *
41 * Returns: the number of blocks needed (minimum is always 1)
42 */
43
44unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
45 unsigned int ssize)
46{
47 unsigned int blks;
48 unsigned int first, second;
49
50 blks = 1;
51 first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) /
52 ssize;
53
54 if (nstruct > first) {
55 second = (sdp->sd_sb.sb_bsize -
56 sizeof(struct gfs2_meta_header)) / ssize;
57 blks += DIV_ROUND_UP(nstruct - first, second);
58 }
59
60 return blks;
61}
62
63void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
64{
65 struct list_head *head = &sdp->sd_ail1_list;
66 uint64_t sync_gen;
67 struct list_head *first, *tmp;
68 struct gfs2_ail *first_ai, *ai;
69
70 gfs2_log_lock(sdp);
71 if (list_empty(head)) {
72 gfs2_log_unlock(sdp);
73 return;
74 }
75 sync_gen = sdp->sd_ail_sync_gen++;
76
77 first = head->prev;
78 first_ai = list_entry(first, struct gfs2_ail, ai_list);
79 first_ai->ai_sync_gen = sync_gen;
80 gfs2_ail1_start_one(sdp, first_ai);
81
82 if (flags & DIO_ALL)
83 first = NULL;
84
85 for (;;) {
86 if (first && (head->prev != first ||
87 gfs2_ail1_empty_one(sdp, first_ai, 0)))
88 break;
89
90 for (tmp = head->prev; tmp != head; tmp = tmp->prev) {
91 ai = list_entry(tmp, struct gfs2_ail, ai_list);
92 if (ai->ai_sync_gen >= sync_gen)
93 continue;
94 ai->ai_sync_gen = sync_gen;
95 gfs2_ail1_start_one(sdp, ai);
96 break;
97 }
98
99 if (tmp == head)
100 break;
101 }
102
103 gfs2_log_unlock(sdp);
104}
105
106int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
107{
108 struct gfs2_ail *ai, *s;
109 int ret;
110
111 gfs2_log_lock(sdp);
112
113 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
114 if (gfs2_ail1_empty_one(sdp, ai, flags))
115 list_move(&ai->ai_list, &sdp->sd_ail2_list);
116 else if (!(flags & DIO_ALL))
117 break;
118 }
119
120 ret = list_empty(&sdp->sd_ail1_list);
121
122 gfs2_log_unlock(sdp);
123
124 return ret;
125}
126
127static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
128{
129 struct gfs2_ail *ai, *safe;
130 unsigned int old_tail = sdp->sd_log_tail;
131 int wrap = (new_tail < old_tail);
132 int a, b, rm;
133
134 gfs2_log_lock(sdp);
135
136 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
137 a = (old_tail <= ai->ai_first);
138 b = (ai->ai_first < new_tail);
139 rm = (wrap) ? (a || b) : (a && b);
140 if (!rm)
141 continue;
142
143 gfs2_ail2_empty_one(sdp, ai);
144 list_del(&ai->ai_list);
145 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list));
146 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list));
147 kfree(ai);
148 }
149
150 gfs2_log_unlock(sdp);
151}
152
153/**
154 * gfs2_log_reserve - Make a log reservation
155 * @sdp: The GFS2 superblock
156 * @blks: The number of blocks to reserve
157 *
158 * Returns: errno
159 */
160
161int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
162{
163 unsigned int try = 0;
164
165 if (gfs2_assert_warn(sdp, blks) ||
166 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
167 return -EINVAL;
168
169 mutex_lock(&sdp->sd_log_reserve_mutex);
170 gfs2_log_lock(sdp);
171 while(sdp->sd_log_blks_free <= blks) {
172 gfs2_log_unlock(sdp);
173 gfs2_ail1_empty(sdp, 0);
174 gfs2_log_flush(sdp);
175
176 if (try++)
177 gfs2_ail1_start(sdp, 0);
178 gfs2_log_lock(sdp);
179 }
180 sdp->sd_log_blks_free -= blks;
181 gfs2_log_unlock(sdp);
182 mutex_unlock(&sdp->sd_log_reserve_mutex);
183
184 down_read(&sdp->sd_log_flush_lock);
185
186 return 0;
187}
188
189/**
190 * gfs2_log_release - Release a given number of log blocks
191 * @sdp: The GFS2 superblock
192 * @blks: The number of blocks
193 *
194 */
195
196void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
197{
198 up_read(&sdp->sd_log_flush_lock);
199
200 gfs2_log_lock(sdp);
201 sdp->sd_log_blks_free += blks;
202 gfs2_assert_withdraw(sdp,
203 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
204 gfs2_log_unlock(sdp);
205}
206
207static uint64_t log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
208{
209 int new = 0;
210 uint64_t dbn;
211 int error;
212
213 error = gfs2_block_map(sdp->sd_jdesc->jd_inode->u.generic_ip,
214 lbn, &new, &dbn, NULL);
215 gfs2_assert_withdraw(sdp, !error && dbn);
216
217 return dbn;
218}
219
220/**
221 * log_distance - Compute distance between two journal blocks
222 * @sdp: The GFS2 superblock
223 * @newer: The most recent journal block of the pair
224 * @older: The older journal block of the pair
225 *
226 * Compute the distance (in the journal direction) between two
227 * blocks in the journal
228 *
229 * Returns: the distance in blocks
230 */
231
232static inline unsigned int log_distance(struct gfs2_sbd *sdp,
233 unsigned int newer,
234 unsigned int older)
235{
236 int dist;
237
238 dist = newer - older;
239 if (dist < 0)
240 dist += sdp->sd_jdesc->jd_blocks;
241
242 return dist;
243}
244
245static unsigned int current_tail(struct gfs2_sbd *sdp)
246{
247 struct gfs2_ail *ai;
248 unsigned int tail;
249
250 gfs2_log_lock(sdp);
251
252 if (list_empty(&sdp->sd_ail1_list))
253 tail = sdp->sd_log_head;
254 else {
255 ai = list_entry(sdp->sd_ail1_list.prev,
256 struct gfs2_ail, ai_list);
257 tail = ai->ai_first;
258 }
259
260 gfs2_log_unlock(sdp);
261
262 return tail;
263}
264
265static inline void log_incr_head(struct gfs2_sbd *sdp)
266{
267 if (sdp->sd_log_flush_head == sdp->sd_log_tail)
268 gfs2_assert_withdraw(sdp,
269 sdp->sd_log_flush_head == sdp->sd_log_head);
270
271 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
272 sdp->sd_log_flush_head = 0;
273 sdp->sd_log_flush_wrapped = 1;
274 }
275}
276
277/**
278 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
279 * @sdp: The GFS2 superblock
280 *
281 * Returns: the buffer_head
282 */
283
284struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
285{
286 uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
287 struct gfs2_log_buf *lb;
288 struct buffer_head *bh;
289
290 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
291 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
292
293 bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno);
294 lock_buffer(bh);
295 memset(bh->b_data, 0, bh->b_size);
296 set_buffer_uptodate(bh);
297 clear_buffer_dirty(bh);
298 unlock_buffer(bh);
299
300 log_incr_head(sdp);
301
302 return bh;
303}
304
305/**
306 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
307 * @sdp: the filesystem
308 * @data: the data the buffer_head should point to
309 *
310 * Returns: the log buffer descriptor
311 */
312
313struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
314 struct buffer_head *real)
315{
316 uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
317 struct gfs2_log_buf *lb;
318 struct buffer_head *bh;
319
320 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
321 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
322 lb->lb_real = real;
323
324 bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
325 atomic_set(&bh->b_count, 1);
326 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
327 set_bh_page(bh, real->b_page, bh_offset(real));
328 bh->b_blocknr = blkno;
329 bh->b_size = sdp->sd_sb.sb_bsize;
330 bh->b_bdev = sdp->sd_vfs->s_bdev;
331
332 log_incr_head(sdp);
333
334 return bh;
335}
336
337static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
338{
339 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
340
341 ail2_empty(sdp, new_tail);
342
343 gfs2_log_lock(sdp);
344 sdp->sd_log_blks_free += dist - ((pull) ? 1 : 0);
345 gfs2_assert_withdraw(sdp,
346 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
347 gfs2_log_unlock(sdp);
348
349 sdp->sd_log_tail = new_tail;
350}
351
352/**
353 * log_write_header - Get and initialize a journal header buffer
354 * @sdp: The GFS2 superblock
355 *
356 * Returns: the initialized log buffer descriptor
357 */
358
359static void log_write_header(struct gfs2_sbd *sdp, uint32_t flags, int pull)
360{
361 uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
362 struct buffer_head *bh;
363 struct gfs2_log_header *lh;
364 unsigned int tail;
365 uint32_t hash;
366
367 bh = sb_getblk(sdp->sd_vfs, blkno);
368 lock_buffer(bh);
369 memset(bh->b_data, 0, bh->b_size);
370 set_buffer_uptodate(bh);
371 clear_buffer_dirty(bh);
372 unlock_buffer(bh);
373
374 gfs2_ail1_empty(sdp, 0);
375 tail = current_tail(sdp);
376
377 lh = (struct gfs2_log_header *)bh->b_data;
378 memset(lh, 0, sizeof(struct gfs2_log_header));
379 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
380 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
381 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
382 lh->lh_sequence = be64_to_cpu(sdp->sd_log_sequence++);
383 lh->lh_flags = be32_to_cpu(flags);
384 lh->lh_tail = be32_to_cpu(tail);
385 lh->lh_blkno = be32_to_cpu(sdp->sd_log_flush_head);
386 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
387 lh->lh_hash = cpu_to_be32(hash);
388
389 set_buffer_dirty(bh);
390 if (sync_dirty_buffer(bh))
391 gfs2_io_error_bh(sdp, bh);
392 brelse(bh);
393
394 if (sdp->sd_log_tail != tail)
395 log_pull_tail(sdp, tail, pull);
396 else
397 gfs2_assert_withdraw(sdp, !pull);
398
399 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
400 log_incr_head(sdp);
401}
402
403static void log_flush_commit(struct gfs2_sbd *sdp)
404{
405 struct list_head *head = &sdp->sd_log_flush_list;
406 struct gfs2_log_buf *lb;
407 struct buffer_head *bh;
408 unsigned int d;
409
410 d = log_distance(sdp, sdp->sd_log_flush_head, sdp->sd_log_head);
411
412 gfs2_assert_withdraw(sdp, d + 1 == sdp->sd_log_blks_reserved);
413
414 while (!list_empty(head)) {
415 lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
416 list_del(&lb->lb_list);
417 bh = lb->lb_bh;
418
419 wait_on_buffer(bh);
420 if (!buffer_uptodate(bh))
421 gfs2_io_error_bh(sdp, bh);
422 if (lb->lb_real) {
423 while (atomic_read(&bh->b_count) != 1) /* Grrrr... */
424 schedule();
425 free_buffer_head(bh);
426 } else
427 brelse(bh);
428 kfree(lb);
429 }
430
431 log_write_header(sdp, 0, 0);
432}
433
434/**
435 * gfs2_log_flush_i - flush incore transaction(s)
436 * @sdp: the filesystem
437 * @gl: The glock structure to flush. If NULL, flush the whole incore log
438 *
439 */
440
441void gfs2_log_flush_i(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
442{
443 struct gfs2_ail *ai;
444
445 ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
446 INIT_LIST_HEAD(&ai->ai_ail1_list);
447 INIT_LIST_HEAD(&ai->ai_ail2_list);
448
449 down_write(&sdp->sd_log_flush_lock);
450
451 if (gl) {
452 gfs2_log_lock(sdp);
453 if (list_empty(&gl->gl_le.le_list)) {
454 gfs2_log_unlock(sdp);
455 up_write(&sdp->sd_log_flush_lock);
456 kfree(ai);
457 return;
458 }
459 gfs2_log_unlock(sdp);
460 }
461
462
463 gfs2_assert_withdraw(sdp,
464 sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
465 gfs2_assert_withdraw(sdp,
466 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
467
468 sdp->sd_log_flush_head = sdp->sd_log_head;
469 sdp->sd_log_flush_wrapped = 0;
470 ai->ai_first = sdp->sd_log_flush_head;
471
472 lops_before_commit(sdp);
473 if (!list_empty(&sdp->sd_log_flush_list))
474 log_flush_commit(sdp);
475 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
476 log_write_header(sdp, 0, PULL);
477 lops_after_commit(sdp, ai);
478 sdp->sd_log_head = sdp->sd_log_flush_head;
479 if (sdp->sd_log_flush_wrapped)
480 sdp->sd_log_wraps++;
481
482 sdp->sd_log_blks_reserved =
483 sdp->sd_log_commited_buf =
484 sdp->sd_log_commited_revoke = 0;
485
486 gfs2_log_lock(sdp);
487 if (!list_empty(&ai->ai_ail1_list)) {
488 list_add(&ai->ai_list, &sdp->sd_ail1_list);
489 ai = NULL;
490 }
491 gfs2_log_unlock(sdp);
492
493 sdp->sd_vfs->s_dirt = 0;
494 up_write(&sdp->sd_log_flush_lock);
495
496 kfree(ai);
497}
498
499static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
500{
501 unsigned int reserved = 1;
502 unsigned int old;
503
504 gfs2_log_lock(sdp);
505
506 sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
507 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
508 sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
509 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
510
511 if (sdp->sd_log_commited_buf)
512 reserved += 1 + sdp->sd_log_commited_buf +
513 sdp->sd_log_commited_buf/503;
514 if (sdp->sd_log_commited_revoke)
515 reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
516 sizeof(uint64_t));
517
518 old = sdp->sd_log_blks_free;
519 sdp->sd_log_blks_free += tr->tr_reserved -
520 (reserved - sdp->sd_log_blks_reserved);
521
522 gfs2_assert_withdraw(sdp,
523 sdp->sd_log_blks_free >= old);
524 gfs2_assert_withdraw(sdp,
525 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
526
527 sdp->sd_log_blks_reserved = reserved;
528
529 gfs2_log_unlock(sdp);
530}
531
532/**
533 * gfs2_log_commit - Commit a transaction to the log
534 * @sdp: the filesystem
535 * @tr: the transaction
536 *
537 * Returns: errno
538 */
539
540void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
541{
542 log_refund(sdp, tr);
543 lops_incore_commit(sdp, tr);
544
545 sdp->sd_vfs->s_dirt = 1;
546 up_read(&sdp->sd_log_flush_lock);
547
548 gfs2_log_lock(sdp);
549 if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) {
550 gfs2_log_unlock(sdp);
551 gfs2_log_flush(sdp);
552 } else
553 gfs2_log_unlock(sdp);
554}
555
556/**
557 * gfs2_log_shutdown - write a shutdown header into a journal
558 * @sdp: the filesystem
559 *
560 */
561
562void gfs2_log_shutdown(struct gfs2_sbd *sdp)
563{
564 down_write(&sdp->sd_log_flush_lock);
565
566 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
567 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
568 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
569 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
570 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
571 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
572 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
573 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
574
575 sdp->sd_log_flush_head = sdp->sd_log_head;
576 sdp->sd_log_flush_wrapped = 0;
577
578 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
579
580 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free ==
581 sdp->sd_jdesc->jd_blocks);
582 gfs2_assert_withdraw(sdp, sdp->sd_log_head == sdp->sd_log_tail);
583 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail2_list));
584
585 sdp->sd_log_head = sdp->sd_log_flush_head;
586 if (sdp->sd_log_flush_wrapped)
587 sdp->sd_log_wraps++;
588 sdp->sd_log_tail = sdp->sd_log_head;
589
590 up_write(&sdp->sd_log_flush_lock);
591}
592
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
new file mode 100644
index 000000000000..e7a6a65c530f
--- /dev/null
+++ b/fs/gfs2/log.h
@@ -0,0 +1,65 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LOG_DOT_H__
11#define __LOG_DOT_H__
12
13/**
14 * gfs2_log_lock - acquire the right to mess with the log manager
15 * @sdp: the filesystem
16 *
17 */
18
19static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
20{
21 spin_lock(&sdp->sd_log_lock);
22}
23
24/**
25 * gfs2_log_unlock - release the right to mess with the log manager
26 * @sdp: the filesystem
27 *
28 */
29
30static inline void gfs2_log_unlock(struct gfs2_sbd *sdp)
31{
32 spin_unlock(&sdp->sd_log_lock);
33}
34
35static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
36 unsigned int value)
37{
38 if (++value == sdp->sd_jdesc->jd_blocks) {
39 value = 0;
40 sdp->sd_log_wraps++;
41 }
42 sdp->sd_log_head = sdp->sd_log_tail = value;
43}
44
45unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
46 unsigned int ssize);
47
48void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags);
49int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
50
51int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
52void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
53
54struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
55struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
56 struct buffer_head *real);
57
58#define gfs2_log_flush(sdp) gfs2_log_flush_i((sdp), NULL)
59#define gfs2_log_flush_glock(gl) gfs2_log_flush_i((gl)->gl_sbd, (gl))
60void gfs2_log_flush_i(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
61void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
62
63void gfs2_log_shutdown(struct gfs2_sbd *sdp);
64
65#endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
new file mode 100644
index 000000000000..689c9101c0fb
--- /dev/null
+++ b/fs/gfs2/lops.c
@@ -0,0 +1,803 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <asm/semaphore.h>
17
18#include "gfs2.h"
19#include "lm_interface.h"
20#include "incore.h"
21#include "glock.h"
22#include "log.h"
23#include "lops.h"
24#include "meta_io.h"
25#include "recovery.h"
26#include "rgrp.h"
27#include "trans.h"
28#include "util.h"
29
30static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
31{
32 struct gfs2_glock *gl;
33 struct gfs2_trans *tr = current->journal_info;
34
35 tr->tr_touched = 1;
36
37 if (!list_empty(&le->le_list))
38 return;
39
40 gl = container_of(le, struct gfs2_glock, gl_le);
41 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
42 return;
43 gfs2_glock_hold(gl);
44 set_bit(GLF_DIRTY, &gl->gl_flags);
45
46 gfs2_log_lock(sdp);
47 sdp->sd_log_num_gl++;
48 list_add(&le->le_list, &sdp->sd_log_le_gl);
49 gfs2_log_unlock(sdp);
50}
51
52static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
53{
54 struct list_head *head = &sdp->sd_log_le_gl;
55 struct gfs2_glock *gl;
56
57 while (!list_empty(head)) {
58 gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list);
59 list_del_init(&gl->gl_le.le_list);
60 sdp->sd_log_num_gl--;
61
62 gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl));
63 gfs2_glock_put(gl);
64 }
65 gfs2_assert_warn(sdp, !sdp->sd_log_num_gl);
66}
67
68static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
69{
70 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
71 struct gfs2_trans *tr;
72
73 if (!list_empty(&bd->bd_list_tr))
74 return;
75
76 tr = current->journal_info;
77 tr->tr_touched = 1;
78 tr->tr_num_buf++;
79 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
80
81 if (!list_empty(&le->le_list))
82 return;
83
84 gfs2_trans_add_gl(bd->bd_gl);
85
86 gfs2_meta_check(sdp, bd->bd_bh);
87 gfs2_pin(sdp, bd->bd_bh);
88
89 gfs2_log_lock(sdp);
90 sdp->sd_log_num_buf++;
91 list_add(&le->le_list, &sdp->sd_log_le_buf);
92 gfs2_log_unlock(sdp);
93
94 tr->tr_num_buf_new++;
95}
96
97static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
98{
99 struct list_head *head = &tr->tr_list_buf;
100 struct gfs2_bufdata *bd;
101
102 while (!list_empty(head)) {
103 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
104 list_del_init(&bd->bd_list_tr);
105 tr->tr_num_buf--;
106 }
107 gfs2_assert_warn(sdp, !tr->tr_num_buf);
108}
109
110static void buf_lo_before_commit(struct gfs2_sbd *sdp)
111{
112 struct buffer_head *bh;
113 struct gfs2_log_descriptor *ld;
114 struct gfs2_bufdata *bd1 = NULL, *bd2;
115 unsigned int total = sdp->sd_log_num_buf;
116 unsigned int offset = sizeof(struct gfs2_log_descriptor);
117 unsigned int limit;
118 unsigned int num;
119 unsigned n;
120 __be64 *ptr;
121
122 offset += (sizeof(__be64) - 1);
123 offset &= ~(sizeof(__be64) - 1);
124 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
125 /* for 4k blocks, limit = 503 */
126
127 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
128 while(total) {
129 num = total;
130 if (total > limit)
131 num = limit;
132 bh = gfs2_log_get_buf(sdp);
133 ld = (struct gfs2_log_descriptor *)bh->b_data;
134 ptr = (__be64 *)(bh->b_data + offset);
135 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
136 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
137 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
138 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
139 ld->ld_length = cpu_to_be32(num + 1);
140 ld->ld_data1 = cpu_to_be32(num);
141 ld->ld_data2 = cpu_to_be32(0);
142 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
143
144 n = 0;
145 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
146 bd_le.le_list) {
147 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
148 if (++n >= num)
149 break;
150 }
151
152 set_buffer_dirty(bh);
153 ll_rw_block(WRITE, 1, &bh);
154
155 n = 0;
156 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
157 bd_le.le_list) {
158 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
159 set_buffer_dirty(bh);
160 ll_rw_block(WRITE, 1, &bh);
161 if (++n >= num)
162 break;
163 }
164
165 total -= num;
166 }
167}
168
169static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
170{
171 struct list_head *head = &sdp->sd_log_le_buf;
172 struct gfs2_bufdata *bd;
173
174 while (!list_empty(head)) {
175 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
176 list_del_init(&bd->bd_le.le_list);
177 sdp->sd_log_num_buf--;
178
179 gfs2_unpin(sdp, bd->bd_bh, ai);
180 }
181 gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
182}
183
184static void buf_lo_before_scan(struct gfs2_jdesc *jd,
185 struct gfs2_log_header *head, int pass)
186{
187 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
188 struct gfs2_sbd *sdp = ip->i_sbd;
189
190 if (pass != 0)
191 return;
192
193 sdp->sd_found_blocks = 0;
194 sdp->sd_replayed_blocks = 0;
195}
196
197static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
198 struct gfs2_log_descriptor *ld, __be64 *ptr,
199 int pass)
200{
201 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
202 struct gfs2_sbd *sdp = ip->i_sbd;
203 struct gfs2_glock *gl = ip->i_gl;
204 unsigned int blks = be32_to_cpu(ld->ld_data1);
205 struct buffer_head *bh_log, *bh_ip;
206 uint64_t blkno;
207 int error = 0;
208
209 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
210 return 0;
211
212 gfs2_replay_incr_blk(sdp, &start);
213
214 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
215 blkno = be64_to_cpu(*ptr++);
216
217 sdp->sd_found_blocks++;
218
219 if (gfs2_revoke_check(sdp, blkno, start))
220 continue;
221
222 error = gfs2_replay_read_block(jd, start, &bh_log);
223 if (error)
224 return error;
225
226 bh_ip = gfs2_meta_new(gl, blkno);
227 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
228
229 if (gfs2_meta_check(sdp, bh_ip))
230 error = -EIO;
231 else
232 mark_buffer_dirty(bh_ip);
233
234 brelse(bh_log);
235 brelse(bh_ip);
236
237 if (error)
238 break;
239
240 sdp->sd_replayed_blocks++;
241 }
242
243 return error;
244}
245
246static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
247{
248 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
249 struct gfs2_sbd *sdp = ip->i_sbd;
250
251 if (error) {
252 gfs2_meta_sync(ip->i_gl,
253 DIO_START | DIO_WAIT);
254 return;
255 }
256 if (pass != 1)
257 return;
258
259 gfs2_meta_sync(ip->i_gl, DIO_START | DIO_WAIT);
260
261 fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
262 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
263}
264
265static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
266{
267 struct gfs2_trans *tr;
268
269 tr = current->journal_info;
270 tr->tr_touched = 1;
271 tr->tr_num_revoke++;
272
273 gfs2_log_lock(sdp);
274 sdp->sd_log_num_revoke++;
275 list_add(&le->le_list, &sdp->sd_log_le_revoke);
276 gfs2_log_unlock(sdp);
277}
278
279static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
280{
281 struct gfs2_log_descriptor *ld;
282 struct gfs2_meta_header *mh;
283 struct buffer_head *bh;
284 unsigned int offset;
285 struct list_head *head = &sdp->sd_log_le_revoke;
286 struct gfs2_revoke *rv;
287
288 if (!sdp->sd_log_num_revoke)
289 return;
290
291 bh = gfs2_log_get_buf(sdp);
292 ld = (struct gfs2_log_descriptor *)bh->b_data;
293 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
294 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
295 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
296 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
297 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
298 sizeof(uint64_t)));
299 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
300 ld->ld_data2 = cpu_to_be32(0);
301 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
302 offset = sizeof(struct gfs2_log_descriptor);
303
304 while (!list_empty(head)) {
305 rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list);
306 list_del_init(&rv->rv_le.le_list);
307 sdp->sd_log_num_revoke--;
308
309 if (offset + sizeof(uint64_t) > sdp->sd_sb.sb_bsize) {
310 set_buffer_dirty(bh);
311 ll_rw_block(WRITE, 1, &bh);
312
313 bh = gfs2_log_get_buf(sdp);
314 mh = (struct gfs2_meta_header *)bh->b_data;
315 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
316 mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
317 mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
318 offset = sizeof(struct gfs2_meta_header);
319 }
320
321 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno);
322 kfree(rv);
323
324 offset += sizeof(uint64_t);
325 }
326 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
327
328 set_buffer_dirty(bh);
329 ll_rw_block(WRITE, 1, &bh);
330}
331
332static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
333 struct gfs2_log_header *head, int pass)
334{
335 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
336 struct gfs2_sbd *sdp = ip->i_sbd;
337
338 if (pass != 0)
339 return;
340
341 sdp->sd_found_revokes = 0;
342 sdp->sd_replay_tail = head->lh_tail;
343}
344
345static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
346 struct gfs2_log_descriptor *ld, __be64 *ptr,
347 int pass)
348{
349 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
350 struct gfs2_sbd *sdp = ip->i_sbd;
351 unsigned int blks = be32_to_cpu(ld->ld_length);
352 unsigned int revokes = be32_to_cpu(ld->ld_data1);
353 struct buffer_head *bh;
354 unsigned int offset;
355 uint64_t blkno;
356 int first = 1;
357 int error;
358
359 if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
360 return 0;
361
362 offset = sizeof(struct gfs2_log_descriptor);
363
364 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
365 error = gfs2_replay_read_block(jd, start, &bh);
366 if (error)
367 return error;
368
369 if (!first)
370 gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
371
372 while (offset + sizeof(uint64_t) <= sdp->sd_sb.sb_bsize) {
373 blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
374
375 error = gfs2_revoke_add(sdp, blkno, start);
376 if (error < 0)
377 return error;
378 else if (error)
379 sdp->sd_found_revokes++;
380
381 if (!--revokes)
382 break;
383 offset += sizeof(uint64_t);
384 }
385
386 brelse(bh);
387 offset = sizeof(struct gfs2_meta_header);
388 first = 0;
389 }
390
391 return 0;
392}
393
394static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
395{
396 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
397 struct gfs2_sbd *sdp = ip->i_sbd;
398
399 if (error) {
400 gfs2_revoke_clean(sdp);
401 return;
402 }
403 if (pass != 1)
404 return;
405
406 fs_info(sdp, "jid=%u: Found %u revoke tags\n",
407 jd->jd_jid, sdp->sd_found_revokes);
408
409 gfs2_revoke_clean(sdp);
410}
411
412static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
413{
414 struct gfs2_rgrpd *rgd;
415 struct gfs2_trans *tr = current->journal_info;
416
417 tr->tr_touched = 1;
418
419 if (!list_empty(&le->le_list))
420 return;
421
422 rgd = container_of(le, struct gfs2_rgrpd, rd_le);
423 gfs2_rgrp_bh_hold(rgd);
424
425 gfs2_log_lock(sdp);
426 sdp->sd_log_num_rg++;
427 list_add(&le->le_list, &sdp->sd_log_le_rg);
428 gfs2_log_unlock(sdp);
429}
430
431static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
432{
433 struct list_head *head = &sdp->sd_log_le_rg;
434 struct gfs2_rgrpd *rgd;
435
436 while (!list_empty(head)) {
437 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
438 list_del_init(&rgd->rd_le.le_list);
439 sdp->sd_log_num_rg--;
440
441 gfs2_rgrp_repolish_clones(rgd);
442 gfs2_rgrp_bh_put(rgd);
443 }
444 gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
445}
446
447/**
448 * databuf_lo_add - Add a databuf to the transaction.
449 *
450 * This is used in two distinct cases:
451 * i) In ordered write mode
452 * We put the data buffer on a list so that we can ensure that its
453 * synced to disk at the right time
454 * ii) In journaled data mode
455 * We need to journal the data block in the same way as metadata in
456 * the functions above. The difference is that here we have a tag
457 * which is two __be64's being the block number (as per meta data)
458 * and a flag which says whether the data block needs escaping or
459 * not. This means we need a new log entry for each 251 or so data
460 * blocks, which isn't an enormous overhead but twice as much as
461 * for normal metadata blocks.
462 */
463static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
464{
465 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
466 struct gfs2_trans *tr = current->journal_info;
467 struct address_space *mapping = bd->bd_bh->b_page->mapping;
468 struct gfs2_inode *ip = mapping->host->u.generic_ip;
469
470 tr->tr_touched = 1;
471 if (!list_empty(&bd->bd_list_tr) &&
472 (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
473 tr->tr_num_buf++;
474 gfs2_trans_add_gl(bd->bd_gl);
475 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
476 gfs2_pin(sdp, bd->bd_bh);
477 tr->tr_num_buf_new++;
478 }
479 gfs2_log_lock(sdp);
480 if (!list_empty(&le->le_list)) {
481 if (ip->i_di.di_flags & GFS2_DIF_JDATA)
482 sdp->sd_log_num_jdata++;
483 sdp->sd_log_num_databuf++;
484 list_add(&le->le_list, &sdp->sd_log_le_databuf);
485 }
486 gfs2_log_unlock(sdp);
487}
488
489static int gfs2_check_magic(struct buffer_head *bh)
490{
491 struct page *page = bh->b_page;
492 void *kaddr;
493 __be32 *ptr;
494 int rv = 0;
495
496 kaddr = kmap_atomic(page, KM_USER0);
497 ptr = kaddr + bh_offset(bh);
498 if (*ptr == cpu_to_be32(GFS2_MAGIC))
499 rv = 1;
500 kunmap_atomic(page, KM_USER0);
501
502 return rv;
503}
504
505/**
506 * databuf_lo_before_commit - Scan the data buffers, writing as we go
507 *
508 * Here we scan through the lists of buffers and make the assumption
509 * that any buffer thats been pinned is being journaled, and that
510 * any unpinned buffer is an ordered write data buffer and therefore
511 * will be written back rather than journaled.
512 */
513static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
514{
515 LIST_HEAD(started);
516 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
517 struct buffer_head *bh = NULL;
518 unsigned int offset = sizeof(struct gfs2_log_descriptor);
519 struct gfs2_log_descriptor *ld;
520 unsigned int limit;
521 unsigned int total_dbuf = sdp->sd_log_num_databuf;
522 unsigned int total_jdata = sdp->sd_log_num_jdata;
523 unsigned int num, n;
524 __be64 *ptr = NULL;
525
526 offset += (2*sizeof(__be64) - 1);
527 offset &= ~(2*sizeof(__be64) - 1);
528 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
529
530 /*
531 * Start writing ordered buffers, write journaled buffers
532 * into the log along with a header
533 */
534 gfs2_log_lock(sdp);
535 bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf,
536 bd_le.le_list);
537 while(total_dbuf) {
538 num = total_jdata;
539 if (num > limit)
540 num = limit;
541 n = 0;
542 list_for_each_entry_safe_continue(bd1, bdt,
543 &sdp->sd_log_le_databuf,
544 bd_le.le_list) {
545 /* An ordered write buffer */
546 if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) {
547 list_move(&bd1->bd_le.le_list, &started);
548 if (bd1 == bd2) {
549 bd2 = NULL;
550 bd2 = list_prepare_entry(bd2,
551 &sdp->sd_log_le_databuf,
552 bd_le.le_list);
553 }
554 total_dbuf--;
555 if (bd1->bd_bh) {
556 get_bh(bd1->bd_bh);
557 if (buffer_dirty(bd1->bd_bh)) {
558 gfs2_log_unlock(sdp);
559 wait_on_buffer(bd1->bd_bh);
560 ll_rw_block(WRITE, 1,
561 &bd1->bd_bh);
562 gfs2_log_lock(sdp);
563 }
564 brelse(bd1->bd_bh);
565 continue;
566 }
567 continue;
568 } else if (bd1->bd_bh) { /* A journaled buffer */
569 int magic;
570 gfs2_log_unlock(sdp);
571 if (!bh) {
572 bh = gfs2_log_get_buf(sdp);
573 ld = (struct gfs2_log_descriptor *)
574 bh->b_data;
575 ptr = (__be64 *)(bh->b_data + offset);
576 ld->ld_header.mh_magic =
577 cpu_to_be32(GFS2_MAGIC);
578 ld->ld_header.mh_type =
579 cpu_to_be32(GFS2_METATYPE_LD);
580 ld->ld_header.mh_format =
581 cpu_to_be32(GFS2_FORMAT_LD);
582 ld->ld_type =
583 cpu_to_be32(GFS2_LOG_DESC_JDATA);
584 ld->ld_length = cpu_to_be32(num + 1);
585 ld->ld_data1 = cpu_to_be32(num);
586 ld->ld_data2 = cpu_to_be32(0);
587 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
588 }
589 magic = gfs2_check_magic(bd1->bd_bh);
590 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
591 *ptr++ = cpu_to_be64((__u64)magic);
592 clear_buffer_escaped(bd1->bd_bh);
593 if (unlikely(magic != 0))
594 set_buffer_escaped(bd1->bd_bh);
595 gfs2_log_lock(sdp);
596 if (n++ > num)
597 break;
598 }
599 }
600 gfs2_log_unlock(sdp);
601 if (bh) {
602 set_buffer_dirty(bh);
603 ll_rw_block(WRITE, 1, &bh);
604 bh = NULL;
605 }
606 n = 0;
607 gfs2_log_lock(sdp);
608 list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf,
609 bd_le.le_list) {
610 if (!bd2->bd_bh)
611 continue;
612 /* copy buffer if it needs escaping */
613 gfs2_log_unlock(sdp);
614 if (unlikely(buffer_escaped(bd2->bd_bh))) {
615 void *kaddr;
616 struct page *page = bd2->bd_bh->b_page;
617 bh = gfs2_log_get_buf(sdp);
618 kaddr = kmap_atomic(page, KM_USER0);
619 memcpy(bh->b_data,
620 kaddr + bh_offset(bd2->bd_bh),
621 sdp->sd_sb.sb_bsize);
622 kunmap_atomic(page, KM_USER0);
623 *(__be32 *)bh->b_data = 0;
624 } else {
625 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
626 }
627 set_buffer_dirty(bh);
628 ll_rw_block(WRITE, 1, &bh);
629 gfs2_log_lock(sdp);
630 if (++n >= num)
631 break;
632 }
633 bh = NULL;
634 total_dbuf -= num;
635 total_jdata -= num;
636 }
637 gfs2_log_unlock(sdp);
638
639 /* Wait on all ordered buffers */
640 while (!list_empty(&started)) {
641 gfs2_log_lock(sdp);
642 bd1 = list_entry(started.next, struct gfs2_bufdata,
643 bd_le.le_list);
644 list_del(&bd1->bd_le.le_list);
645 sdp->sd_log_num_databuf--;
646
647 bh = bd1->bd_bh;
648 if (bh) {
649 bh->b_private = NULL;
650 gfs2_log_unlock(sdp);
651 wait_on_buffer(bh);
652 brelse(bh);
653 } else
654 gfs2_log_unlock(sdp);
655
656 kfree(bd1);
657 }
658
659 /* We've removed all the ordered write bufs here, so only jdata left */
660 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
661}
662
663static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
664 struct gfs2_log_descriptor *ld,
665 __be64 *ptr, int pass)
666{
667 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
668 struct gfs2_sbd *sdp = ip->i_sbd;
669 struct gfs2_glock *gl = ip->i_gl;
670 unsigned int blks = be32_to_cpu(ld->ld_data1);
671 struct buffer_head *bh_log, *bh_ip;
672 uint64_t blkno;
673 uint64_t esc;
674 int error = 0;
675
676 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
677 return 0;
678
679 gfs2_replay_incr_blk(sdp, &start);
680 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
681 blkno = be64_to_cpu(*ptr++);
682 esc = be64_to_cpu(*ptr++);
683
684 sdp->sd_found_blocks++;
685
686 if (gfs2_revoke_check(sdp, blkno, start))
687 continue;
688
689 error = gfs2_replay_read_block(jd, start, &bh_log);
690 if (error)
691 return error;
692
693 bh_ip = gfs2_meta_new(gl, blkno);
694 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
695
696 /* Unescape */
697 if (esc) {
698 __be32 *eptr = (__be32 *)bh_ip->b_data;
699 *eptr = cpu_to_be32(GFS2_MAGIC);
700 }
701 mark_buffer_dirty(bh_ip);
702
703 brelse(bh_log);
704 brelse(bh_ip);
705 if (error)
706 break;
707
708 sdp->sd_replayed_blocks++;
709 }
710
711 return error;
712}
713
714/* FIXME: sort out accounting for log blocks etc. */
715
716static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
717{
718 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
719 struct gfs2_sbd *sdp = ip->i_sbd;
720
721 if (error) {
722 gfs2_meta_sync(ip->i_gl,
723 DIO_START | DIO_WAIT);
724 return;
725 }
726 if (pass != 1)
727 return;
728
729 /* data sync? */
730 gfs2_meta_sync(ip->i_gl, DIO_START | DIO_WAIT);
731
732 fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
733 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
734}
735
736static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
737{
738 struct list_head *head = &sdp->sd_log_le_databuf;
739 struct gfs2_bufdata *bd;
740
741 while (!list_empty(head)) {
742 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
743 list_del(&bd->bd_le.le_list);
744 sdp->sd_log_num_databuf--;
745 sdp->sd_log_num_jdata--;
746 gfs2_unpin(sdp, bd->bd_bh, ai);
747 }
748 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
749 gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
750}
751
752
753struct gfs2_log_operations gfs2_glock_lops = {
754 .lo_add = glock_lo_add,
755 .lo_after_commit = glock_lo_after_commit,
756 .lo_name = "glock"
757};
758
759struct gfs2_log_operations gfs2_buf_lops = {
760 .lo_add = buf_lo_add,
761 .lo_incore_commit = buf_lo_incore_commit,
762 .lo_before_commit = buf_lo_before_commit,
763 .lo_after_commit = buf_lo_after_commit,
764 .lo_before_scan = buf_lo_before_scan,
765 .lo_scan_elements = buf_lo_scan_elements,
766 .lo_after_scan = buf_lo_after_scan,
767 .lo_name = "buf"
768};
769
770struct gfs2_log_operations gfs2_revoke_lops = {
771 .lo_add = revoke_lo_add,
772 .lo_before_commit = revoke_lo_before_commit,
773 .lo_before_scan = revoke_lo_before_scan,
774 .lo_scan_elements = revoke_lo_scan_elements,
775 .lo_after_scan = revoke_lo_after_scan,
776 .lo_name = "revoke"
777};
778
779struct gfs2_log_operations gfs2_rg_lops = {
780 .lo_add = rg_lo_add,
781 .lo_after_commit = rg_lo_after_commit,
782 .lo_name = "rg"
783};
784
785struct gfs2_log_operations gfs2_databuf_lops = {
786 .lo_add = databuf_lo_add,
787 .lo_incore_commit = buf_lo_incore_commit,
788 .lo_before_commit = databuf_lo_before_commit,
789 .lo_after_commit = databuf_lo_after_commit,
790 .lo_scan_elements = databuf_lo_scan_elements,
791 .lo_after_scan = databuf_lo_after_scan,
792 .lo_name = "databuf"
793};
794
795struct gfs2_log_operations *gfs2_log_ops[] = {
796 &gfs2_glock_lops,
797 &gfs2_buf_lops,
798 &gfs2_revoke_lops,
799 &gfs2_rg_lops,
800 &gfs2_databuf_lops,
801 NULL
802};
803
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
new file mode 100644
index 000000000000..417f5aade4b1
--- /dev/null
+++ b/fs/gfs2/lops.h
@@ -0,0 +1,96 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LOPS_DOT_H__
11#define __LOPS_DOT_H__
12
13extern struct gfs2_log_operations gfs2_glock_lops;
14extern struct gfs2_log_operations gfs2_buf_lops;
15extern struct gfs2_log_operations gfs2_revoke_lops;
16extern struct gfs2_log_operations gfs2_rg_lops;
17extern struct gfs2_log_operations gfs2_databuf_lops;
18
19extern struct gfs2_log_operations *gfs2_log_ops[];
20
21static inline void lops_init_le(struct gfs2_log_element *le,
22 struct gfs2_log_operations *lops)
23{
24 INIT_LIST_HEAD(&le->le_list);
25 le->le_ops = lops;
26}
27
28static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
29{
30 if (le->le_ops->lo_add)
31 le->le_ops->lo_add(sdp, le);
32}
33
34static inline void lops_incore_commit(struct gfs2_sbd *sdp,
35 struct gfs2_trans *tr)
36{
37 int x;
38 for (x = 0; gfs2_log_ops[x]; x++)
39 if (gfs2_log_ops[x]->lo_incore_commit)
40 gfs2_log_ops[x]->lo_incore_commit(sdp, tr);
41}
42
43static inline void lops_before_commit(struct gfs2_sbd *sdp)
44{
45 int x;
46 for (x = 0; gfs2_log_ops[x]; x++)
47 if (gfs2_log_ops[x]->lo_before_commit)
48 gfs2_log_ops[x]->lo_before_commit(sdp);
49}
50
51static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
52{
53 int x;
54 for (x = 0; gfs2_log_ops[x]; x++)
55 if (gfs2_log_ops[x]->lo_after_commit)
56 gfs2_log_ops[x]->lo_after_commit(sdp, ai);
57}
58
59static inline void lops_before_scan(struct gfs2_jdesc *jd,
60 struct gfs2_log_header *head,
61 unsigned int pass)
62{
63 int x;
64 for (x = 0; gfs2_log_ops[x]; x++)
65 if (gfs2_log_ops[x]->lo_before_scan)
66 gfs2_log_ops[x]->lo_before_scan(jd, head, pass);
67}
68
69static inline int lops_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
70 struct gfs2_log_descriptor *ld,
71 __be64 *ptr,
72 unsigned int pass)
73{
74 int x, error;
75 for (x = 0; gfs2_log_ops[x]; x++)
76 if (gfs2_log_ops[x]->lo_scan_elements) {
77 error = gfs2_log_ops[x]->lo_scan_elements(jd, start,
78 ld, ptr, pass);
79 if (error)
80 return error;
81 }
82
83 return 0;
84}
85
86static inline void lops_after_scan(struct gfs2_jdesc *jd, int error,
87 unsigned int pass)
88{
89 int x;
90 for (x = 0; gfs2_log_ops[x]; x++)
91 if (gfs2_log_ops[x]->lo_before_scan)
92 gfs2_log_ops[x]->lo_after_scan(jd, error, pass);
93}
94
95#endif /* __LOPS_DOT_H__ */
96
diff --git a/fs/gfs2/lvb.c b/fs/gfs2/lvb.c
new file mode 100644
index 000000000000..63b815dad8e7
--- /dev/null
+++ b/fs/gfs2/lvb.c
@@ -0,0 +1,53 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <asm/semaphore.h>
17
18#include "gfs2.h"
19#include "lm_interface.h"
20#include "incore.h"
21#include "lvb.h"
22
23#define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \
24 struct->member);
25
26void gfs2_quota_lvb_in(struct gfs2_quota_lvb *qb, char *lvb)
27{
28 struct gfs2_quota_lvb *str = (struct gfs2_quota_lvb *)lvb;
29
30 qb->qb_magic = be32_to_cpu(str->qb_magic);
31 qb->qb_limit = be64_to_cpu(str->qb_limit);
32 qb->qb_warn = be64_to_cpu(str->qb_warn);
33 qb->qb_value = be64_to_cpu(str->qb_value);
34}
35
36void gfs2_quota_lvb_out(struct gfs2_quota_lvb *qb, char *lvb)
37{
38 struct gfs2_quota_lvb *str = (struct gfs2_quota_lvb *)lvb;
39
40 str->qb_magic = cpu_to_be32(qb->qb_magic);
41 str->qb_limit = cpu_to_be64(qb->qb_limit);
42 str->qb_warn = cpu_to_be64(qb->qb_warn);
43 str->qb_value = cpu_to_be64(qb->qb_value);
44}
45
46void gfs2_quota_lvb_print(struct gfs2_quota_lvb *qb)
47{
48 pv(qb, qb_magic, "%u");
49 pv(qb, qb_limit, "%llu");
50 pv(qb, qb_warn, "%llu");
51 pv(qb, qb_value, "%lld");
52}
53
diff --git a/fs/gfs2/lvb.h b/fs/gfs2/lvb.h
new file mode 100644
index 000000000000..1b9eb69b9534
--- /dev/null
+++ b/fs/gfs2/lvb.h
@@ -0,0 +1,20 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LVB_DOT_H__
11#define __LVB_DOT_H__
12
13#define GFS2_MIN_LVB_SIZE 32
14
15void gfs2_quota_lvb_in(struct gfs2_quota_lvb *qb, char *lvb);
16void gfs2_quota_lvb_out(struct gfs2_quota_lvb *qb, char *lvb);
17void gfs2_quota_lvb_print(struct gfs2_quota_lvb *qb);
18
19#endif /* __LVB_DOT_H__ */
20
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
new file mode 100644
index 000000000000..c8d17b7ba60b
--- /dev/null
+++ b/fs/gfs2/main.c
@@ -0,0 +1,114 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/gfs2_ondisk.h>
18#include <asm/semaphore.h>
19
20#include "gfs2.h"
21#include "lm_interface.h"
22#include "incore.h"
23#include "ops_fstype.h"
24#include "sys.h"
25#include "util.h"
26
27/**
28 * init_gfs2_fs - Register GFS2 as a filesystem
29 *
30 * Returns: 0 on success, error code on failure
31 */
32
33static int __init init_gfs2_fs(void)
34{
35 int error;
36
37 gfs2_init_lmh();
38
39 error = gfs2_sys_init();
40 if (error)
41 return error;
42
43 error = -ENOMEM;
44
45 gfs2_glock_cachep = kmem_cache_create("gfs2_glock",
46 sizeof(struct gfs2_glock),
47 0, 0, NULL, NULL);
48 if (!gfs2_glock_cachep)
49 goto fail;
50
51 gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
52 sizeof(struct gfs2_inode),
53 0, 0, NULL, NULL);
54 if (!gfs2_inode_cachep)
55 goto fail;
56
57 gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata",
58 sizeof(struct gfs2_bufdata),
59 0, 0, NULL, NULL);
60 if (!gfs2_bufdata_cachep)
61 goto fail;
62
63 error = register_filesystem(&gfs2_fs_type);
64 if (error)
65 goto fail;
66
67 error = register_filesystem(&gfs2meta_fs_type);
68 if (error)
69 goto fail_unregister;
70
71 printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__);
72
73 return 0;
74
75fail_unregister:
76 unregister_filesystem(&gfs2_fs_type);
77fail:
78 if (gfs2_bufdata_cachep)
79 kmem_cache_destroy(gfs2_bufdata_cachep);
80
81 if (gfs2_inode_cachep)
82 kmem_cache_destroy(gfs2_inode_cachep);
83
84 if (gfs2_glock_cachep)
85 kmem_cache_destroy(gfs2_glock_cachep);
86
87 gfs2_sys_uninit();
88 return error;
89}
90
91/**
92 * exit_gfs2_fs - Unregister the file system
93 *
94 */
95
96static void __exit exit_gfs2_fs(void)
97{
98 unregister_filesystem(&gfs2_fs_type);
99 unregister_filesystem(&gfs2meta_fs_type);
100
101 kmem_cache_destroy(gfs2_bufdata_cachep);
102 kmem_cache_destroy(gfs2_inode_cachep);
103 kmem_cache_destroy(gfs2_glock_cachep);
104
105 gfs2_sys_uninit();
106}
107
108MODULE_DESCRIPTION("Global File System");
109MODULE_AUTHOR("Red Hat, Inc.");
110MODULE_LICENSE("GPL");
111
112module_init(init_gfs2_fs);
113module_exit(exit_gfs2_fs);
114
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
new file mode 100644
index 000000000000..b85fa2464666
--- /dev/null
+++ b/fs/gfs2/meta_io.c
@@ -0,0 +1,887 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <linux/writeback.h>
18#include <linux/swap.h>
19#include <linux/delay.h>
20#include <linux/gfs2_ondisk.h>
21#include <asm/semaphore.h>
22
23#include "gfs2.h"
24#include "lm_interface.h"
25#include "incore.h"
26#include "glock.h"
27#include "glops.h"
28#include "inode.h"
29#include "log.h"
30#include "lops.h"
31#include "meta_io.h"
32#include "rgrp.h"
33#include "trans.h"
34#include "util.h"
35
36#define buffer_busy(bh) \
37((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
38#define buffer_in_io(bh) \
39((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
40
41static int aspace_get_block(struct inode *inode, sector_t lblock,
42 struct buffer_head *bh_result, int create)
43{
44 gfs2_assert_warn(inode->i_sb->s_fs_info, 0);
45 return -EOPNOTSUPP;
46}
47
48static int gfs2_aspace_writepage(struct page *page,
49 struct writeback_control *wbc)
50{
51 return block_write_full_page(page, aspace_get_block, wbc);
52}
53
54/**
55 * stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out.
56 * @bh: the buffer we're stuck on
57 *
58 */
59
60static void stuck_releasepage(struct buffer_head *bh)
61{
62 struct gfs2_sbd *sdp = bh->b_page->mapping->host->i_sb->s_fs_info;
63 struct gfs2_bufdata *bd = bh->b_private;
64 struct gfs2_glock *gl;
65
66 fs_warn(sdp, "stuck in gfs2_releasepage()\n");
67 fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n",
68 (uint64_t)bh->b_blocknr, atomic_read(&bh->b_count));
69 fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh));
70 fs_warn(sdp, "bh->b_private = %s\n", (bd) ? "!NULL" : "NULL");
71
72 if (!bd)
73 return;
74
75 gl = bd->bd_gl;
76
77 fs_warn(sdp, "gl = (%u, %llu)\n",
78 gl->gl_name.ln_type, gl->gl_name.ln_number);
79
80 fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n",
81 (list_empty(&bd->bd_list_tr)) ? "no" : "yes",
82 (list_empty(&bd->bd_le.le_list)) ? "no" : "yes");
83
84 if (gl->gl_ops == &gfs2_inode_glops) {
85 struct gfs2_inode *ip = gl->gl_object;
86 unsigned int x;
87
88 if (!ip)
89 return;
90
91 fs_warn(sdp, "ip = %llu %llu\n",
92 ip->i_num.no_formal_ino, ip->i_num.no_addr);
93 fs_warn(sdp, "ip->i_count = %d, ip->i_vnode = %s\n",
94 atomic_read(&ip->i_count),
95 (ip->i_vnode) ? "!NULL" : "NULL");
96
97 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
98 fs_warn(sdp, "ip->i_cache[%u] = %s\n",
99 x, (ip->i_cache[x]) ? "!NULL" : "NULL");
100 }
101}
102
103/**
104 * gfs2_aspace_releasepage - free the metadata associated with a page
105 * @page: the page that's being released
106 * @gfp_mask: passed from Linux VFS, ignored by us
107 *
108 * Call try_to_free_buffers() if the buffers in this page can be
109 * released.
110 *
111 * Returns: 0
112 */
113
114static int gfs2_aspace_releasepage(struct page *page, gfp_t gfp_mask)
115{
116 struct inode *aspace = page->mapping->host;
117 struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info;
118 struct buffer_head *bh, *head;
119 struct gfs2_bufdata *bd;
120 unsigned long t;
121
122 if (!page_has_buffers(page))
123 goto out;
124
125 head = bh = page_buffers(page);
126 do {
127 t = jiffies;
128
129 while (atomic_read(&bh->b_count)) {
130 if (atomic_read(&aspace->i_writecount)) {
131 if (time_after_eq(jiffies, t +
132 gfs2_tune_get(sdp, gt_stall_secs) * HZ)) {
133 stuck_releasepage(bh);
134 t = jiffies;
135 }
136
137 yield();
138 continue;
139 }
140
141 return 0;
142 }
143
144 gfs2_assert_warn(sdp, !buffer_pinned(bh));
145
146 bd = bh->b_private;
147 if (bd) {
148 gfs2_assert_warn(sdp, bd->bd_bh == bh);
149 gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
150 gfs2_assert_warn(sdp, list_empty(&bd->bd_le.le_list));
151 gfs2_assert_warn(sdp, !bd->bd_ail);
152 kmem_cache_free(gfs2_bufdata_cachep, bd);
153 bh->b_private = NULL;
154 }
155
156 bh = bh->b_this_page;
157 }
158 while (bh != head);
159
160 out:
161 return try_to_free_buffers(page);
162}
163
164static struct address_space_operations aspace_aops = {
165 .writepage = gfs2_aspace_writepage,
166 .releasepage = gfs2_aspace_releasepage,
167};
168
169/**
170 * gfs2_aspace_get - Create and initialize a struct inode structure
171 * @sdp: the filesystem the aspace is in
172 *
173 * Right now a struct inode is just a struct inode. Maybe Linux
174 * will supply a more lightweight address space construct (that works)
175 * in the future.
176 *
177 * Make sure pages/buffers in this aspace aren't in high memory.
178 *
179 * Returns: the aspace
180 */
181
182struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
183{
184 struct inode *aspace;
185
186 aspace = new_inode(sdp->sd_vfs);
187 if (aspace) {
188 mapping_set_gfp_mask(aspace->i_mapping, GFP_KERNEL);
189 aspace->i_mapping->a_ops = &aspace_aops;
190 aspace->i_size = ~0ULL;
191 aspace->u.generic_ip = NULL;
192 insert_inode_hash(aspace);
193 }
194
195 return aspace;
196}
197
198void gfs2_aspace_put(struct inode *aspace)
199{
200 remove_inode_hash(aspace);
201 iput(aspace);
202}
203
204/**
205 * gfs2_ail1_start_one - Start I/O on a part of the AIL
206 * @sdp: the filesystem
207 * @tr: the part of the AIL
208 *
209 */
210
211void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
212{
213 struct gfs2_bufdata *bd, *s;
214 struct buffer_head *bh;
215 int retry;
216
217 do {
218 retry = 0;
219
220 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
221 bd_ail_st_list) {
222 bh = bd->bd_bh;
223
224 gfs2_assert(sdp, bd->bd_ail == ai);
225
226 if (!buffer_busy(bh)) {
227 if (!buffer_uptodate(bh))
228 gfs2_io_error_bh(sdp, bh);
229 list_move(&bd->bd_ail_st_list,
230 &ai->ai_ail2_list);
231 continue;
232 }
233
234 if (!buffer_dirty(bh))
235 continue;
236
237 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
238
239 gfs2_log_unlock(sdp);
240 wait_on_buffer(bh);
241 ll_rw_block(WRITE, 1, &bh);
242 gfs2_log_lock(sdp);
243
244 retry = 1;
245 break;
246 }
247 } while (retry);
248}
249
250/**
251 * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
252 * @sdp: the filesystem
253 * @ai: the AIL entry
254 *
255 */
256
257int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags)
258{
259 struct gfs2_bufdata *bd, *s;
260 struct buffer_head *bh;
261
262 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
263 bd_ail_st_list) {
264 bh = bd->bd_bh;
265
266 gfs2_assert(sdp, bd->bd_ail == ai);
267
268 if (buffer_busy(bh)) {
269 if (flags & DIO_ALL)
270 continue;
271 else
272 break;
273 }
274
275 if (!buffer_uptodate(bh))
276 gfs2_io_error_bh(sdp, bh);
277
278 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
279 }
280
281 return list_empty(&ai->ai_ail1_list);
282}
283
284/**
285 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
286 * @sdp: the filesystem
287 * @ai: the AIL entry
288 *
289 */
290
291void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
292{
293 struct list_head *head = &ai->ai_ail2_list;
294 struct gfs2_bufdata *bd;
295
296 while (!list_empty(head)) {
297 bd = list_entry(head->prev, struct gfs2_bufdata,
298 bd_ail_st_list);
299 gfs2_assert(sdp, bd->bd_ail == ai);
300 bd->bd_ail = NULL;
301 list_del(&bd->bd_ail_st_list);
302 list_del(&bd->bd_ail_gl_list);
303 atomic_dec(&bd->bd_gl->gl_ail_count);
304 brelse(bd->bd_bh);
305 }
306}
307
308/**
309 * ail_empty_gl - remove all buffers for a given lock from the AIL
310 * @gl: the glock
311 *
312 * None of the buffers should be dirty, locked, or pinned.
313 */
314
315void gfs2_ail_empty_gl(struct gfs2_glock *gl)
316{
317 struct gfs2_sbd *sdp = gl->gl_sbd;
318 unsigned int blocks;
319 struct list_head *head = &gl->gl_ail_list;
320 struct gfs2_bufdata *bd;
321 struct buffer_head *bh;
322 uint64_t blkno;
323 int error;
324
325 blocks = atomic_read(&gl->gl_ail_count);
326 if (!blocks)
327 return;
328
329 error = gfs2_trans_begin(sdp, 0, blocks);
330 if (gfs2_assert_withdraw(sdp, !error))
331 return;
332
333 gfs2_log_lock(sdp);
334 while (!list_empty(head)) {
335 bd = list_entry(head->next, struct gfs2_bufdata,
336 bd_ail_gl_list);
337 bh = bd->bd_bh;
338 blkno = bh->b_blocknr;
339 gfs2_assert_withdraw(sdp, !buffer_busy(bh));
340
341 bd->bd_ail = NULL;
342 list_del(&bd->bd_ail_st_list);
343 list_del(&bd->bd_ail_gl_list);
344 atomic_dec(&gl->gl_ail_count);
345 brelse(bh);
346 gfs2_log_unlock(sdp);
347
348 gfs2_trans_add_revoke(sdp, blkno);
349
350 gfs2_log_lock(sdp);
351 }
352 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
353 gfs2_log_unlock(sdp);
354
355 gfs2_trans_end(sdp);
356 gfs2_log_flush(sdp);
357}
358
359/**
360 * gfs2_meta_inval - Invalidate all buffers associated with a glock
361 * @gl: the glock
362 *
363 */
364
365void gfs2_meta_inval(struct gfs2_glock *gl)
366{
367 struct gfs2_sbd *sdp = gl->gl_sbd;
368 struct inode *aspace = gl->gl_aspace;
369 struct address_space *mapping = gl->gl_aspace->i_mapping;
370
371 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
372
373 atomic_inc(&aspace->i_writecount);
374 truncate_inode_pages(mapping, 0);
375 atomic_dec(&aspace->i_writecount);
376
377 gfs2_assert_withdraw(sdp, !mapping->nrpages);
378}
379
380/**
381 * gfs2_meta_sync - Sync all buffers associated with a glock
382 * @gl: The glock
383 * @flags: DIO_START | DIO_WAIT
384 *
385 */
386
387void gfs2_meta_sync(struct gfs2_glock *gl, int flags)
388{
389 struct address_space *mapping = gl->gl_aspace->i_mapping;
390 int error = 0;
391
392 if (flags & DIO_START)
393 filemap_fdatawrite(mapping);
394 if (!error && (flags & DIO_WAIT))
395 error = filemap_fdatawait(mapping);
396
397 if (error)
398 gfs2_io_error(gl->gl_sbd);
399}
400
401/**
402 * getbuf - Get a buffer with a given address space
403 * @sdp: the filesystem
404 * @aspace: the address space
405 * @blkno: the block number (filesystem scope)
406 * @create: 1 if the buffer should be created
407 *
408 * Returns: the buffer
409 */
410
411static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
412 uint64_t blkno, int create)
413{
414 struct page *page;
415 struct buffer_head *bh;
416 unsigned int shift;
417 unsigned long index;
418 unsigned int bufnum;
419
420 shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
421 index = blkno >> shift; /* convert block to page */
422 bufnum = blkno - (index << shift); /* block buf index within page */
423
424 if (create) {
425 for (;;) {
426 page = grab_cache_page(aspace->i_mapping, index);
427 if (page)
428 break;
429 yield();
430 }
431 } else {
432 page = find_lock_page(aspace->i_mapping, index);
433 if (!page)
434 return NULL;
435 }
436
437 if (!page_has_buffers(page))
438 create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
439
440 /* Locate header for our buffer within our page */
441 for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
442 /* Do nothing */;
443 get_bh(bh);
444
445 if (!buffer_mapped(bh))
446 map_bh(bh, sdp->sd_vfs, blkno);
447
448 unlock_page(page);
449 mark_page_accessed(page);
450 page_cache_release(page);
451
452 return bh;
453}
454
455static void meta_prep_new(struct buffer_head *bh)
456{
457 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
458
459 lock_buffer(bh);
460 clear_buffer_dirty(bh);
461 set_buffer_uptodate(bh);
462 unlock_buffer(bh);
463
464 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
465}
466
467/**
468 * gfs2_meta_new - Get a block
469 * @gl: The glock associated with this block
470 * @blkno: The block number
471 *
472 * Returns: The buffer
473 */
474
475struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno)
476{
477 struct buffer_head *bh;
478 bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
479 meta_prep_new(bh);
480 return bh;
481}
482
483/**
484 * gfs2_meta_read - Read a block from disk
485 * @gl: The glock covering the block
486 * @blkno: The block number
487 * @flags: flags to gfs2_dreread()
488 * @bhp: the place where the buffer is returned (NULL on failure)
489 *
490 * Returns: errno
491 */
492
493int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno, int flags,
494 struct buffer_head **bhp)
495{
496 int error;
497
498 *bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
499 error = gfs2_meta_reread(gl->gl_sbd, *bhp, flags);
500 if (error)
501 brelse(*bhp);
502
503 return error;
504}
505
506/**
507 * gfs2_meta_reread - Reread a block from disk
508 * @sdp: the filesystem
509 * @bh: The block to read
510 * @flags: Flags that control the read
511 *
512 * Returns: errno
513 */
514
515int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags)
516{
517 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
518 return -EIO;
519
520 if (flags & DIO_FORCE)
521 clear_buffer_uptodate(bh);
522
523 if ((flags & DIO_START) && !buffer_uptodate(bh))
524 ll_rw_block(READ, 1, &bh);
525
526 if (flags & DIO_WAIT) {
527 wait_on_buffer(bh);
528
529 if (!buffer_uptodate(bh)) {
530 struct gfs2_trans *tr = current->journal_info;
531 if (tr && tr->tr_touched)
532 gfs2_io_error_bh(sdp, bh);
533 return -EIO;
534 }
535 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
536 return -EIO;
537 }
538
539 return 0;
540}
541
542/**
543 * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
544 * @gl: the glock the buffer belongs to
545 * @bh: The buffer to be attached to
546 * @meta: Flag to indicate whether its metadata or not
547 */
548
549void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
550 int meta)
551{
552 struct gfs2_bufdata *bd;
553
554 if (meta)
555 lock_page(bh->b_page);
556
557 if (bh->b_private) {
558 if (meta)
559 unlock_page(bh->b_page);
560 return;
561 }
562
563 bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
564 memset(bd, 0, sizeof(struct gfs2_bufdata));
565
566 bd->bd_bh = bh;
567 bd->bd_gl = gl;
568
569 INIT_LIST_HEAD(&bd->bd_list_tr);
570 if (meta) {
571 lops_init_le(&bd->bd_le, &gfs2_buf_lops);
572 } else {
573 lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
574 get_bh(bh);
575 }
576 bh->b_private = bd;
577
578 if (meta)
579 unlock_page(bh->b_page);
580}
581
582/**
583 * gfs2_pin - Pin a buffer in memory
584 * @sdp: the filesystem the buffer belongs to
585 * @bh: The buffer to be pinned
586 *
587 */
588
589void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
590{
591 struct gfs2_bufdata *bd = bh->b_private;
592
593 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
594
595 if (test_set_buffer_pinned(bh))
596 gfs2_assert_withdraw(sdp, 0);
597
598 wait_on_buffer(bh);
599
600 /* If this buffer is in the AIL and it has already been written
601 to in-place disk block, remove it from the AIL. */
602
603 gfs2_log_lock(sdp);
604 if (bd->bd_ail && !buffer_in_io(bh))
605 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
606 gfs2_log_unlock(sdp);
607
608 clear_buffer_dirty(bh);
609 wait_on_buffer(bh);
610
611 if (!buffer_uptodate(bh))
612 gfs2_io_error_bh(sdp, bh);
613
614 get_bh(bh);
615}
616
617/**
618 * gfs2_unpin - Unpin a buffer
619 * @sdp: the filesystem the buffer belongs to
620 * @bh: The buffer to unpin
621 * @ai:
622 *
623 */
624
625void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
626 struct gfs2_ail *ai)
627{
628 struct gfs2_bufdata *bd = bh->b_private;
629
630 gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
631
632 if (!buffer_pinned(bh))
633 gfs2_assert_withdraw(sdp, 0);
634
635 mark_buffer_dirty(bh);
636 clear_buffer_pinned(bh);
637
638 gfs2_log_lock(sdp);
639 if (bd->bd_ail) {
640 list_del(&bd->bd_ail_st_list);
641 brelse(bh);
642 } else {
643 struct gfs2_glock *gl = bd->bd_gl;
644 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
645 atomic_inc(&gl->gl_ail_count);
646 }
647 bd->bd_ail = ai;
648 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
649 gfs2_log_unlock(sdp);
650}
651
652/**
653 * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore
654 * @ip: the inode who owns the buffers
655 * @bstart: the first buffer in the run
656 * @blen: the number of buffers in the run
657 *
658 */
659
660void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
661{
662 struct gfs2_sbd *sdp = ip->i_sbd;
663 struct inode *aspace = ip->i_gl->gl_aspace;
664 struct buffer_head *bh;
665
666 while (blen) {
667 bh = getbuf(sdp, aspace, bstart, NO_CREATE);
668 if (bh) {
669 struct gfs2_bufdata *bd = bh->b_private;
670
671 if (test_clear_buffer_pinned(bh)) {
672 struct gfs2_trans *tr = current->journal_info;
673 gfs2_log_lock(sdp);
674 list_del_init(&bd->bd_le.le_list);
675 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
676 sdp->sd_log_num_buf--;
677 gfs2_log_unlock(sdp);
678 tr->tr_num_buf_rm++;
679 brelse(bh);
680 }
681 if (bd) {
682 gfs2_log_lock(sdp);
683 if (bd->bd_ail) {
684 uint64_t blkno = bh->b_blocknr;
685 bd->bd_ail = NULL;
686 list_del(&bd->bd_ail_st_list);
687 list_del(&bd->bd_ail_gl_list);
688 atomic_dec(&bd->bd_gl->gl_ail_count);
689 brelse(bh);
690 gfs2_log_unlock(sdp);
691 gfs2_trans_add_revoke(sdp, blkno);
692 } else
693 gfs2_log_unlock(sdp);
694 }
695
696 lock_buffer(bh);
697 clear_buffer_dirty(bh);
698 clear_buffer_uptodate(bh);
699 unlock_buffer(bh);
700
701 brelse(bh);
702 }
703
704 bstart++;
705 blen--;
706 }
707}
708
709/**
710 * gfs2_meta_cache_flush - get rid of any references on buffers for this inode
711 * @ip: The GFS2 inode
712 *
713 * This releases buffers that are in the most-recently-used array of
714 * blocks used for indirect block addressing for this inode.
715 */
716
717void gfs2_meta_cache_flush(struct gfs2_inode *ip)
718{
719 struct buffer_head **bh_slot;
720 unsigned int x;
721
722 spin_lock(&ip->i_spin);
723
724 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
725 bh_slot = &ip->i_cache[x];
726 if (!*bh_slot)
727 break;
728 brelse(*bh_slot);
729 *bh_slot = NULL;
730 }
731
732 spin_unlock(&ip->i_spin);
733}
734
735/**
736 * gfs2_meta_indirect_buffer - Get a metadata buffer
737 * @ip: The GFS2 inode
738 * @height: The level of this buf in the metadata (indir addr) tree (if any)
739 * @num: The block number (device relative) of the buffer
740 * @new: Non-zero if we may create a new buffer
741 * @bhp: the buffer is returned here
742 *
743 * Try to use the gfs2_inode's MRU metadata tree cache.
744 *
745 * Returns: errno
746 */
747
748int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num,
749 int new, struct buffer_head **bhp)
750{
751 struct buffer_head *bh, **bh_slot = ip->i_cache + height;
752 int error;
753
754 spin_lock(&ip->i_spin);
755 bh = *bh_slot;
756 if (bh) {
757 if (bh->b_blocknr == num)
758 get_bh(bh);
759 else
760 bh = NULL;
761 }
762 spin_unlock(&ip->i_spin);
763
764 if (bh) {
765 if (new)
766 meta_prep_new(bh);
767 else {
768 error = gfs2_meta_reread(ip->i_sbd, bh,
769 DIO_START | DIO_WAIT);
770 if (error) {
771 brelse(bh);
772 return error;
773 }
774 }
775 } else {
776 if (new)
777 bh = gfs2_meta_new(ip->i_gl, num);
778 else {
779 error = gfs2_meta_read(ip->i_gl, num,
780 DIO_START | DIO_WAIT, &bh);
781 if (error)
782 return error;
783 }
784
785 spin_lock(&ip->i_spin);
786 if (*bh_slot != bh) {
787 brelse(*bh_slot);
788 *bh_slot = bh;
789 get_bh(bh);
790 }
791 spin_unlock(&ip->i_spin);
792 }
793
794 if (new) {
795 if (gfs2_assert_warn(ip->i_sbd, height)) {
796 brelse(bh);
797 return -EIO;
798 }
799 gfs2_trans_add_bh(ip->i_gl, bh, 1);
800 gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
801 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
802
803 } else if (gfs2_metatype_check(ip->i_sbd, bh,
804 (height) ? GFS2_METATYPE_IN : GFS2_METATYPE_DI)) {
805 brelse(bh);
806 return -EIO;
807 }
808
809 *bhp = bh;
810
811 return 0;
812}
813
814/**
815 * gfs2_meta_ra - start readahead on an extent of a file
816 * @gl: the glock the blocks belong to
817 * @dblock: the starting disk block
818 * @extlen: the number of blocks in the extent
819 *
820 */
821
822void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen)
823{
824 struct gfs2_sbd *sdp = gl->gl_sbd;
825 struct inode *aspace = gl->gl_aspace;
826 struct buffer_head *first_bh, *bh;
827 uint32_t max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
828 sdp->sd_sb.sb_bsize_shift;
829 int error;
830
831 if (!extlen || !max_ra)
832 return;
833 if (extlen > max_ra)
834 extlen = max_ra;
835
836 first_bh = getbuf(sdp, aspace, dblock, CREATE);
837
838 if (buffer_uptodate(first_bh))
839 goto out;
840 if (!buffer_locked(first_bh)) {
841 error = gfs2_meta_reread(sdp, first_bh, DIO_START);
842 if (error)
843 goto out;
844 }
845
846 dblock++;
847 extlen--;
848
849 while (extlen) {
850 bh = getbuf(sdp, aspace, dblock, CREATE);
851
852 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
853 error = gfs2_meta_reread(sdp, bh, DIO_START);
854 brelse(bh);
855 if (error)
856 goto out;
857 } else
858 brelse(bh);
859
860 dblock++;
861 extlen--;
862
863 if (buffer_uptodate(first_bh))
864 break;
865 }
866
867 out:
868 brelse(first_bh);
869}
870
871/**
872 * gfs2_meta_syncfs - sync all the buffers in a filesystem
873 * @sdp: the filesystem
874 *
875 */
876
877void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
878{
879 gfs2_log_flush(sdp);
880 for (;;) {
881 gfs2_ail1_start(sdp, DIO_ALL);
882 if (gfs2_ail1_empty(sdp, DIO_ALL))
883 break;
884 msleep(100);
885 }
886}
887
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
new file mode 100644
index 000000000000..d72144d5d727
--- /dev/null
+++ b/fs/gfs2/meta_io.h
@@ -0,0 +1,89 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __DIO_DOT_H__
11#define __DIO_DOT_H__
12
13static inline void gfs2_buffer_clear(struct buffer_head *bh)
14{
15 memset(bh->b_data, 0, bh->b_size);
16}
17
18static inline void gfs2_buffer_clear_tail(struct buffer_head *bh, int head)
19{
20 memset(bh->b_data + head, 0, bh->b_size - head);
21}
22
23static inline void gfs2_buffer_clear_ends(struct buffer_head *bh, int offset,
24 int amount, int journaled)
25{
26 int z_off1 = (journaled) ? sizeof(struct gfs2_meta_header) : 0;
27 int z_len1 = offset - z_off1;
28 int z_off2 = offset + amount;
29 int z_len2 = (bh)->b_size - z_off2;
30
31 if (z_len1)
32 memset(bh->b_data + z_off1, 0, z_len1);
33
34 if (z_len2)
35 memset(bh->b_data + z_off2, 0, z_len2);
36}
37
38static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
39 int to_head,
40 struct buffer_head *from_bh,
41 int from_head)
42{
43 memcpy(to_bh->b_data + to_head,
44 from_bh->b_data + from_head,
45 from_bh->b_size - from_head);
46 memset(to_bh->b_data + to_bh->b_size + to_head - from_head,
47 0,
48 from_head - to_head);
49}
50
51struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp);
52void gfs2_aspace_put(struct inode *aspace);
53
54void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai);
55int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags);
56void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai);
57void gfs2_ail_empty_gl(struct gfs2_glock *gl);
58
59void gfs2_meta_inval(struct gfs2_glock *gl);
60void gfs2_meta_sync(struct gfs2_glock *gl, int flags);
61
62struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno);
63int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno,
64 int flags, struct buffer_head **bhp);
65int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags);
66
67void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
68 int meta);
69void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
70void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
71 struct gfs2_ail *ai);
72
73void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
74
75void gfs2_meta_cache_flush(struct gfs2_inode *ip);
76int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num,
77 int new, struct buffer_head **bhp);
78
79static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
80 struct buffer_head **bhp)
81{
82 return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp);
83}
84
85void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen);
86void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
87
88#endif /* __DIO_DOT_H__ */
89
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
new file mode 100644
index 000000000000..e90ea7d32f9e
--- /dev/null
+++ b/fs/gfs2/mount.c
@@ -0,0 +1,215 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <asm/semaphore.h>
17
18#include "gfs2.h"
19#include "lm_interface.h"
20#include "incore.h"
21#include "mount.h"
22#include "sys.h"
23#include "util.h"
24
25/**
26 * gfs2_mount_args - Parse mount options
27 * @sdp:
28 * @data:
29 *
30 * Return: errno
31 */
32
33int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
34{
35 struct gfs2_args *args = &sdp->sd_args;
36 char *data = data_arg;
37 char *options, *o, *v;
38 int error = 0;
39
40 if (!remount) {
41 /* If someone preloaded options, use those instead */
42 spin_lock(&gfs2_sys_margs_lock);
43 if (gfs2_sys_margs) {
44 data = gfs2_sys_margs;
45 gfs2_sys_margs = NULL;
46 }
47 spin_unlock(&gfs2_sys_margs_lock);
48
49 /* Set some defaults */
50 args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
51 args->ar_quota = GFS2_QUOTA_DEFAULT;
52 args->ar_data = GFS2_DATA_DEFAULT;
53 }
54
55 /* Split the options into tokens with the "," character and
56 process them */
57
58 for (options = data; (o = strsep(&options, ",")); ) {
59 if (!*o)
60 continue;
61
62 v = strchr(o, '=');
63 if (v)
64 *v++ = 0;
65
66 if (!strcmp(o, "lockproto")) {
67 if (!v)
68 goto need_value;
69 if (remount && strcmp(v, args->ar_lockproto))
70 goto cant_remount;
71 strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN);
72 args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0;
73 }
74
75 else if (!strcmp(o, "locktable")) {
76 if (!v)
77 goto need_value;
78 if (remount && strcmp(v, args->ar_locktable))
79 goto cant_remount;
80 strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN);
81 args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0;
82 }
83
84 else if (!strcmp(o, "hostdata")) {
85 if (!v)
86 goto need_value;
87 if (remount && strcmp(v, args->ar_hostdata))
88 goto cant_remount;
89 strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN);
90 args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0;
91 }
92
93 else if (!strcmp(o, "spectator")) {
94 if (remount && !args->ar_spectator)
95 goto cant_remount;
96 args->ar_spectator = 1;
97 sdp->sd_vfs->s_flags |= MS_RDONLY;
98 }
99
100 else if (!strcmp(o, "ignore_local_fs")) {
101 if (remount && !args->ar_ignore_local_fs)
102 goto cant_remount;
103 args->ar_ignore_local_fs = 1;
104 }
105
106 else if (!strcmp(o, "localflocks")) {
107 if (remount && !args->ar_localflocks)
108 goto cant_remount;
109 args->ar_localflocks = 1;
110 }
111
112 else if (!strcmp(o, "localcaching")) {
113 if (remount && !args->ar_localcaching)
114 goto cant_remount;
115 args->ar_localcaching = 1;
116 }
117
118 else if (!strcmp(o, "debug"))
119 args->ar_debug = 1;
120
121 else if (!strcmp(o, "nodebug"))
122 args->ar_debug = 0;
123
124 else if (!strcmp(o, "upgrade")) {
125 if (remount && !args->ar_upgrade)
126 goto cant_remount;
127 args->ar_upgrade = 1;
128 }
129
130 else if (!strcmp(o, "num_glockd")) {
131 unsigned int x;
132 if (!v)
133 goto need_value;
134 sscanf(v, "%u", &x);
135 if (remount && x != args->ar_num_glockd)
136 goto cant_remount;
137 if (!x || x > GFS2_GLOCKD_MAX) {
138 fs_info(sdp, "0 < num_glockd <= %u (not %u)\n",
139 GFS2_GLOCKD_MAX, x);
140 error = -EINVAL;
141 break;
142 }
143 args->ar_num_glockd = x;
144 }
145
146 else if (!strcmp(o, "acl")) {
147 args->ar_posix_acl = 1;
148 sdp->sd_vfs->s_flags |= MS_POSIXACL;
149 }
150
151 else if (!strcmp(o, "noacl")) {
152 args->ar_posix_acl = 0;
153 sdp->sd_vfs->s_flags &= ~MS_POSIXACL;
154 }
155
156 else if (!strcmp(o, "quota")) {
157 if (!v)
158 goto need_value;
159 if (!strcmp(v, "off"))
160 args->ar_quota = GFS2_QUOTA_OFF;
161 else if (!strcmp(v, "account"))
162 args->ar_quota = GFS2_QUOTA_ACCOUNT;
163 else if (!strcmp(v, "on"))
164 args->ar_quota = GFS2_QUOTA_ON;
165 else {
166 fs_info(sdp, "invalid value for quota\n");
167 error = -EINVAL;
168 break;
169 }
170 }
171
172 else if (!strcmp(o, "suiddir"))
173 args->ar_suiddir = 1;
174
175 else if (!strcmp(o, "nosuiddir"))
176 args->ar_suiddir = 0;
177
178 else if (!strcmp(o, "data")) {
179 if (!v)
180 goto need_value;
181 if (!strcmp(v, "writeback"))
182 args->ar_data = GFS2_DATA_WRITEBACK;
183 else if (!strcmp(v, "ordered"))
184 args->ar_data = GFS2_DATA_ORDERED;
185 else {
186 fs_info(sdp, "invalid value for data\n");
187 error = -EINVAL;
188 break;
189 }
190 }
191
192 else {
193 fs_info(sdp, "unknown option: %s\n", o);
194 error = -EINVAL;
195 break;
196 }
197 }
198
199 if (error)
200 fs_info(sdp, "invalid mount option(s)\n");
201
202 if (data != data_arg)
203 kfree(data);
204
205 return error;
206
207 need_value:
208 fs_info(sdp, "need value for option %s\n", o);
209 return -EINVAL;
210
211 cant_remount:
212 fs_info(sdp, "can't remount with option %s\n", o);
213 return -EINVAL;
214}
215
diff --git a/fs/gfs2/mount.h b/fs/gfs2/mount.h
new file mode 100644
index 000000000000..bc8331cd7b2c
--- /dev/null
+++ b/fs/gfs2/mount.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __MOUNT_DOT_H__
11#define __MOUNT_DOT_H__
12
13int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount);
14
15#endif /* __MOUNT_DOT_H__ */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
new file mode 100644
index 000000000000..acfc944ce13e
--- /dev/null
+++ b/fs/gfs2/ondisk.c
@@ -0,0 +1,517 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include <linux/gfs2_ondisk.h>
19
20#define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \
21 struct->member);
22#define pa(struct, member, count) print_array(#member, struct->member, count);
23
24/**
25 * print_array - Print out an array of bytes
26 * @title: what to print before the array
27 * @buf: the array
28 * @count: the number of bytes
29 *
30 */
31
32static void print_array(char *title, char *buf, int count)
33{
34 int x;
35
36 printk(KERN_INFO " %s =\n" KERN_INFO, title);
37 for (x = 0; x < count; x++) {
38 printk("%.2X ", (unsigned char)buf[x]);
39 if (x % 16 == 15)
40 printk("\n" KERN_INFO);
41 }
42 if (x % 16)
43 printk("\n");
44}
45
46/*
47 * gfs2_xxx_in - read in an xxx struct
48 * first arg: the cpu-order structure
49 * buf: the disk-order buffer
50 *
51 * gfs2_xxx_out - write out an xxx struct
52 * first arg: the cpu-order structure
53 * buf: the disk-order buffer
54 *
55 * gfs2_xxx_print - print out an xxx struct
56 * first arg: the cpu-order structure
57 */
58
59void gfs2_inum_in(struct gfs2_inum *no, char *buf)
60{
61 struct gfs2_inum *str = (struct gfs2_inum *)buf;
62
63 no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
64 no->no_addr = be64_to_cpu(str->no_addr);
65}
66
67void gfs2_inum_out(const struct gfs2_inum *no, char *buf)
68{
69 struct gfs2_inum *str = (struct gfs2_inum *)buf;
70
71 str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
72 str->no_addr = cpu_to_be64(no->no_addr);
73}
74
75void gfs2_inum_print(struct gfs2_inum *no)
76{
77 pv(no, no_formal_ino, "%llu");
78 pv(no, no_addr, "%llu");
79}
80
81static void gfs2_meta_header_in(struct gfs2_meta_header *mh, char *buf)
82{
83 struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf;
84
85 mh->mh_magic = be32_to_cpu(str->mh_magic);
86 mh->mh_type = be32_to_cpu(str->mh_type);
87 mh->mh_format = be32_to_cpu(str->mh_format);
88}
89
90static void gfs2_meta_header_out(struct gfs2_meta_header *mh, char *buf)
91{
92 struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf;
93
94 str->mh_magic = cpu_to_be32(mh->mh_magic);
95 str->mh_type = cpu_to_be32(mh->mh_type);
96 str->mh_format = cpu_to_be32(mh->mh_format);
97}
98
99void gfs2_meta_header_print(struct gfs2_meta_header *mh)
100{
101 pv(mh, mh_magic, "0x%.8X");
102 pv(mh, mh_type, "%u");
103 pv(mh, mh_format, "%u");
104}
105
106void gfs2_sb_in(struct gfs2_sb *sb, char *buf)
107{
108 struct gfs2_sb *str = (struct gfs2_sb *)buf;
109
110 gfs2_meta_header_in(&sb->sb_header, buf);
111
112 sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
113 sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
114 sb->sb_bsize = be32_to_cpu(str->sb_bsize);
115 sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
116
117 gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
118 gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
119
120 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
121 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
122}
123
124void gfs2_sb_print(struct gfs2_sb *sb)
125{
126 gfs2_meta_header_print(&sb->sb_header);
127
128 pv(sb, sb_fs_format, "%u");
129 pv(sb, sb_multihost_format, "%u");
130
131 pv(sb, sb_bsize, "%u");
132 pv(sb, sb_bsize_shift, "%u");
133
134 gfs2_inum_print(&sb->sb_master_dir);
135
136 pv(sb, sb_lockproto, "%s");
137 pv(sb, sb_locktable, "%s");
138}
139
140void gfs2_rindex_in(struct gfs2_rindex *ri, char *buf)
141{
142 struct gfs2_rindex *str = (struct gfs2_rindex *)buf;
143
144 ri->ri_addr = be64_to_cpu(str->ri_addr);
145 ri->ri_length = be32_to_cpu(str->ri_length);
146 ri->ri_data0 = be64_to_cpu(str->ri_data0);
147 ri->ri_data = be32_to_cpu(str->ri_data);
148 ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
149
150}
151
152void gfs2_rindex_out(struct gfs2_rindex *ri, char *buf)
153{
154 struct gfs2_rindex *str = (struct gfs2_rindex *)buf;
155
156 str->ri_addr = cpu_to_be64(ri->ri_addr);
157 str->ri_length = cpu_to_be32(ri->ri_length);
158 str->__pad = 0;
159
160 str->ri_data0 = cpu_to_be64(ri->ri_data0);
161 str->ri_data = cpu_to_be32(ri->ri_data);
162 str->ri_bitbytes = cpu_to_be32(ri->ri_bitbytes);
163 memset(str->ri_reserved, 0, sizeof(str->ri_reserved));
164}
165
166void gfs2_rindex_print(struct gfs2_rindex *ri)
167{
168 pv(ri, ri_addr, "%llu");
169 pv(ri, ri_length, "%u");
170
171 pv(ri, ri_data0, "%llu");
172 pv(ri, ri_data, "%u");
173
174 pv(ri, ri_bitbytes, "%u");
175}
176
177void gfs2_rgrp_in(struct gfs2_rgrp *rg, char *buf)
178{
179 struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf;
180
181 gfs2_meta_header_in(&rg->rg_header, buf);
182 rg->rg_flags = be32_to_cpu(str->rg_flags);
183 rg->rg_free = be32_to_cpu(str->rg_free);
184 rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
185}
186
187void gfs2_rgrp_out(struct gfs2_rgrp *rg, char *buf)
188{
189 struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf;
190
191 gfs2_meta_header_out(&rg->rg_header, buf);
192 str->rg_flags = cpu_to_be32(rg->rg_flags);
193 str->rg_free = cpu_to_be32(rg->rg_free);
194 str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
195
196 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
197}
198
199void gfs2_rgrp_print(struct gfs2_rgrp *rg)
200{
201 gfs2_meta_header_print(&rg->rg_header);
202 pv(rg, rg_flags, "%u");
203 pv(rg, rg_free, "%u");
204 pv(rg, rg_dinodes, "%u");
205
206 pa(rg, rg_reserved, 36);
207}
208
209void gfs2_quota_in(struct gfs2_quota *qu, char *buf)
210{
211 struct gfs2_quota *str = (struct gfs2_quota *)buf;
212
213 qu->qu_limit = be64_to_cpu(str->qu_limit);
214 qu->qu_warn = be64_to_cpu(str->qu_warn);
215 qu->qu_value = be64_to_cpu(str->qu_value);
216}
217
218void gfs2_quota_out(struct gfs2_quota *qu, char *buf)
219{
220 struct gfs2_quota *str = (struct gfs2_quota *)buf;
221
222 str->qu_limit = cpu_to_be64(qu->qu_limit);
223 str->qu_warn = cpu_to_be64(qu->qu_warn);
224 str->qu_value = cpu_to_be64(qu->qu_value);
225}
226
227void gfs2_quota_print(struct gfs2_quota *qu)
228{
229 pv(qu, qu_limit, "%llu");
230 pv(qu, qu_warn, "%llu");
231 pv(qu, qu_value, "%lld");
232}
233
234void gfs2_dinode_in(struct gfs2_dinode *di, char *buf)
235{
236 struct gfs2_dinode *str = (struct gfs2_dinode *)buf;
237
238 gfs2_meta_header_in(&di->di_header, buf);
239 gfs2_inum_in(&di->di_num, (char *)&str->di_num);
240
241 di->di_mode = be32_to_cpu(str->di_mode);
242 di->di_uid = be32_to_cpu(str->di_uid);
243 di->di_gid = be32_to_cpu(str->di_gid);
244 di->di_nlink = be32_to_cpu(str->di_nlink);
245 di->di_size = be64_to_cpu(str->di_size);
246 di->di_blocks = be64_to_cpu(str->di_blocks);
247 di->di_atime = be64_to_cpu(str->di_atime);
248 di->di_mtime = be64_to_cpu(str->di_mtime);
249 di->di_ctime = be64_to_cpu(str->di_ctime);
250 di->di_major = be32_to_cpu(str->di_major);
251 di->di_minor = be32_to_cpu(str->di_minor);
252
253 di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
254 di->di_goal_data = be64_to_cpu(str->di_goal_data);
255
256 di->di_flags = be32_to_cpu(str->di_flags);
257 di->di_payload_format = be32_to_cpu(str->di_payload_format);
258 di->di_height = be16_to_cpu(str->di_height);
259
260 di->di_depth = be16_to_cpu(str->di_depth);
261 di->di_entries = be32_to_cpu(str->di_entries);
262
263 di->di_eattr = be64_to_cpu(str->di_eattr);
264
265}
266
267void gfs2_dinode_out(struct gfs2_dinode *di, char *buf)
268{
269 struct gfs2_dinode *str = (struct gfs2_dinode *)buf;
270
271 gfs2_meta_header_out(&di->di_header, buf);
272 gfs2_inum_out(&di->di_num, (char *)&str->di_num);
273
274 str->di_mode = cpu_to_be32(di->di_mode);
275 str->di_uid = cpu_to_be32(di->di_uid);
276 str->di_gid = cpu_to_be32(di->di_gid);
277 str->di_nlink = cpu_to_be32(di->di_nlink);
278 str->di_size = cpu_to_be64(di->di_size);
279 str->di_blocks = cpu_to_be64(di->di_blocks);
280 str->di_atime = cpu_to_be64(di->di_atime);
281 str->di_mtime = cpu_to_be64(di->di_mtime);
282 str->di_ctime = cpu_to_be64(di->di_ctime);
283 str->di_major = cpu_to_be32(di->di_major);
284 str->di_minor = cpu_to_be32(di->di_minor);
285
286 str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
287 str->di_goal_data = cpu_to_be64(di->di_goal_data);
288
289 str->di_flags = cpu_to_be32(di->di_flags);
290 str->di_payload_format = cpu_to_be32(di->di_payload_format);
291 str->di_height = cpu_to_be16(di->di_height);
292
293 str->di_depth = cpu_to_be16(di->di_depth);
294 str->di_entries = cpu_to_be32(di->di_entries);
295
296 str->di_eattr = cpu_to_be64(di->di_eattr);
297
298}
299
300void gfs2_dinode_print(struct gfs2_dinode *di)
301{
302 gfs2_meta_header_print(&di->di_header);
303 gfs2_inum_print(&di->di_num);
304
305 pv(di, di_mode, "0%o");
306 pv(di, di_uid, "%u");
307 pv(di, di_gid, "%u");
308 pv(di, di_nlink, "%u");
309 pv(di, di_size, "%llu");
310 pv(di, di_blocks, "%llu");
311 pv(di, di_atime, "%lld");
312 pv(di, di_mtime, "%lld");
313 pv(di, di_ctime, "%lld");
314 pv(di, di_major, "%u");
315 pv(di, di_minor, "%u");
316
317 pv(di, di_goal_meta, "%llu");
318 pv(di, di_goal_data, "%llu");
319
320 pv(di, di_flags, "0x%.8X");
321 pv(di, di_payload_format, "%u");
322 pv(di, di_height, "%u");
323
324 pv(di, di_depth, "%u");
325 pv(di, di_entries, "%u");
326
327 pv(di, di_eattr, "%llu");
328}
329
330void gfs2_dirent_print(struct gfs2_dirent *de, char *name)
331{
332 char buf[GFS2_FNAMESIZE + 1];
333
334 gfs2_inum_print(&de->de_inum);
335 pv(de, de_hash, "0x%.8X");
336 pv(de, de_rec_len, "%u");
337 pv(de, de_name_len, "%u");
338 pv(de, de_type, "%u");
339
340 memset(buf, 0, GFS2_FNAMESIZE + 1);
341 memcpy(buf, name, de->de_name_len);
342 printk(KERN_INFO " name = %s\n", buf);
343}
344
345void gfs2_leaf_print(struct gfs2_leaf *lf)
346{
347 gfs2_meta_header_print(&lf->lf_header);
348 pv(lf, lf_depth, "%u");
349 pv(lf, lf_entries, "%u");
350 pv(lf, lf_dirent_format, "%u");
351 pv(lf, lf_next, "%llu");
352
353 pa(lf, lf_reserved, 32);
354}
355
356void gfs2_ea_header_in(struct gfs2_ea_header *ea, char *buf)
357{
358 struct gfs2_ea_header *str = (struct gfs2_ea_header *)buf;
359
360 ea->ea_rec_len = be32_to_cpu(str->ea_rec_len);
361 ea->ea_data_len = be32_to_cpu(str->ea_data_len);
362 ea->ea_name_len = str->ea_name_len;
363 ea->ea_type = str->ea_type;
364 ea->ea_flags = str->ea_flags;
365 ea->ea_num_ptrs = str->ea_num_ptrs;
366}
367
368void gfs2_ea_header_out(struct gfs2_ea_header *ea, char *buf)
369{
370 struct gfs2_ea_header *str = (struct gfs2_ea_header *)buf;
371
372 str->ea_rec_len = cpu_to_be32(ea->ea_rec_len);
373 str->ea_data_len = cpu_to_be32(ea->ea_data_len);
374 str->ea_name_len = ea->ea_name_len;
375 str->ea_type = ea->ea_type;
376 str->ea_flags = ea->ea_flags;
377 str->ea_num_ptrs = ea->ea_num_ptrs;
378 str->__pad = 0;
379}
380
381void gfs2_ea_header_print(struct gfs2_ea_header *ea, char *name)
382{
383 char buf[GFS2_EA_MAX_NAME_LEN + 1];
384
385 pv(ea, ea_rec_len, "%u");
386 pv(ea, ea_data_len, "%u");
387 pv(ea, ea_name_len, "%u");
388 pv(ea, ea_type, "%u");
389 pv(ea, ea_flags, "%u");
390 pv(ea, ea_num_ptrs, "%u");
391
392 memset(buf, 0, GFS2_EA_MAX_NAME_LEN + 1);
393 memcpy(buf, name, ea->ea_name_len);
394 printk(KERN_INFO " name = %s\n", buf);
395}
396
397void gfs2_log_header_in(struct gfs2_log_header *lh, char *buf)
398{
399 struct gfs2_log_header *str = (struct gfs2_log_header *)buf;
400
401 gfs2_meta_header_in(&lh->lh_header, buf);
402 lh->lh_sequence = be64_to_cpu(str->lh_sequence);
403 lh->lh_flags = be32_to_cpu(str->lh_flags);
404 lh->lh_tail = be32_to_cpu(str->lh_tail);
405 lh->lh_blkno = be32_to_cpu(str->lh_blkno);
406 lh->lh_hash = be32_to_cpu(str->lh_hash);
407}
408
409void gfs2_log_header_print(struct gfs2_log_header *lh)
410{
411 gfs2_meta_header_print(&lh->lh_header);
412 pv(lh, lh_sequence, "%llu");
413 pv(lh, lh_flags, "0x%.8X");
414 pv(lh, lh_tail, "%u");
415 pv(lh, lh_blkno, "%u");
416 pv(lh, lh_hash, "0x%.8X");
417}
418
419void gfs2_log_descriptor_print(struct gfs2_log_descriptor *ld)
420{
421 gfs2_meta_header_print(&ld->ld_header);
422 pv(ld, ld_type, "%u");
423 pv(ld, ld_length, "%u");
424 pv(ld, ld_data1, "%u");
425 pv(ld, ld_data2, "%u");
426
427 pa(ld, ld_reserved, 32);
428}
429
430void gfs2_inum_range_in(struct gfs2_inum_range *ir, char *buf)
431{
432 struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf;
433
434 ir->ir_start = be64_to_cpu(str->ir_start);
435 ir->ir_length = be64_to_cpu(str->ir_length);
436}
437
438void gfs2_inum_range_out(struct gfs2_inum_range *ir, char *buf)
439{
440 struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf;
441
442 str->ir_start = cpu_to_be64(ir->ir_start);
443 str->ir_length = cpu_to_be64(ir->ir_length);
444}
445
446void gfs2_inum_range_print(struct gfs2_inum_range *ir)
447{
448 pv(ir, ir_start, "%llu");
449 pv(ir, ir_length, "%llu");
450}
451
452void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, char *buf)
453{
454 struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf;
455
456 sc->sc_total = be64_to_cpu(str->sc_total);
457 sc->sc_free = be64_to_cpu(str->sc_free);
458 sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
459}
460
461void gfs2_statfs_change_out(struct gfs2_statfs_change *sc, char *buf)
462{
463 struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf;
464
465 str->sc_total = cpu_to_be64(sc->sc_total);
466 str->sc_free = cpu_to_be64(sc->sc_free);
467 str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
468}
469
470void gfs2_statfs_change_print(struct gfs2_statfs_change *sc)
471{
472 pv(sc, sc_total, "%lld");
473 pv(sc, sc_free, "%lld");
474 pv(sc, sc_dinodes, "%lld");
475}
476
477void gfs2_unlinked_tag_in(struct gfs2_unlinked_tag *ut, char *buf)
478{
479 struct gfs2_unlinked_tag *str = (struct gfs2_unlinked_tag *)buf;
480
481 gfs2_inum_in(&ut->ut_inum, buf);
482 ut->ut_flags = be32_to_cpu(str->ut_flags);
483}
484
485void gfs2_unlinked_tag_out(struct gfs2_unlinked_tag *ut, char *buf)
486{
487 struct gfs2_unlinked_tag *str = (struct gfs2_unlinked_tag *)buf;
488
489 gfs2_inum_out(&ut->ut_inum, buf);
490 str->ut_flags = cpu_to_be32(ut->ut_flags);
491 str->__pad = 0;
492}
493
494void gfs2_unlinked_tag_print(struct gfs2_unlinked_tag *ut)
495{
496 gfs2_inum_print(&ut->ut_inum);
497 pv(ut, ut_flags, "%u");
498}
499
500void gfs2_quota_change_in(struct gfs2_quota_change *qc, char *buf)
501{
502 struct gfs2_quota_change *str = (struct gfs2_quota_change *)buf;
503
504 qc->qc_change = be64_to_cpu(str->qc_change);
505 qc->qc_flags = be32_to_cpu(str->qc_flags);
506 qc->qc_id = be32_to_cpu(str->qc_id);
507}
508
509void gfs2_quota_change_print(struct gfs2_quota_change *qc)
510{
511 pv(qc, qc_change, "%lld");
512 pv(qc, qc_flags, "0x%.8X");
513 pv(qc, qc_id, "%u");
514}
515
516
517
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
new file mode 100644
index 000000000000..01aa4a9b48c3
--- /dev/null
+++ b/fs/gfs2/ops_address.c
@@ -0,0 +1,636 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/mpage.h>
17#include <linux/fs.h>
18#include <linux/gfs2_ondisk.h>
19#include <asm/semaphore.h>
20
21#include "gfs2.h"
22#include "lm_interface.h"
23#include "incore.h"
24#include "bmap.h"
25#include "glock.h"
26#include "inode.h"
27#include "log.h"
28#include "meta_io.h"
29#include "ops_address.h"
30#include "page.h"
31#include "quota.h"
32#include "trans.h"
33#include "rgrp.h"
34#include "ops_file.h"
35#include "util.h"
36
37/**
38 * gfs2_get_block - Fills in a buffer head with details about a block
39 * @inode: The inode
40 * @lblock: The block number to look up
41 * @bh_result: The buffer head to return the result in
42 * @create: Non-zero if we may add block to the file
43 *
44 * Returns: errno
45 */
46
47int gfs2_get_block(struct inode *inode, sector_t lblock,
48 struct buffer_head *bh_result, int create)
49{
50 struct gfs2_inode *ip = inode->u.generic_ip;
51 int new = create;
52 uint64_t dblock;
53 int error;
54
55 error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
56 if (error)
57 return error;
58
59 if (!dblock)
60 return 0;
61
62 map_bh(bh_result, inode->i_sb, dblock);
63 if (new)
64 set_buffer_new(bh_result);
65
66 return 0;
67}
68
69/**
70 * get_block_noalloc - Fills in a buffer head with details about a block
71 * @inode: The inode
72 * @lblock: The block number to look up
73 * @bh_result: The buffer head to return the result in
74 * @create: Non-zero if we may add block to the file
75 *
76 * Returns: errno
77 */
78
79static int get_block_noalloc(struct inode *inode, sector_t lblock,
80 struct buffer_head *bh_result, int create)
81{
82 struct gfs2_inode *ip = inode->u.generic_ip;
83 int new = 0;
84 uint64_t dblock;
85 int error;
86
87 error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
88 if (error)
89 return error;
90
91 if (dblock)
92 map_bh(bh_result, inode->i_sb, dblock);
93 else if (gfs2_assert_withdraw(ip->i_sbd, !create))
94 error = -EIO;
95
96 return error;
97}
98
99static int get_blocks(struct inode *inode, sector_t lblock,
100 unsigned long max_blocks, struct buffer_head *bh_result,
101 int create)
102{
103 struct gfs2_inode *ip = inode->u.generic_ip;
104 int new = create;
105 uint64_t dblock;
106 uint32_t extlen;
107 int error;
108
109 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
110 if (error)
111 return error;
112
113 if (!dblock)
114 return 0;
115
116 map_bh(bh_result, inode->i_sb, dblock);
117 if (new)
118 set_buffer_new(bh_result);
119
120 if (extlen > max_blocks)
121 extlen = max_blocks;
122 bh_result->b_size = extlen << inode->i_blkbits;
123
124 return 0;
125}
126
127static int get_blocks_noalloc(struct inode *inode, sector_t lblock,
128 unsigned long max_blocks,
129 struct buffer_head *bh_result, int create)
130{
131 struct gfs2_inode *ip = inode->u.generic_ip;
132 int new = 0;
133 uint64_t dblock;
134 uint32_t extlen;
135 int error;
136
137 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
138 if (error)
139 return error;
140
141 if (dblock) {
142 map_bh(bh_result, inode->i_sb, dblock);
143 if (extlen > max_blocks)
144 extlen = max_blocks;
145 bh_result->b_size = extlen << inode->i_blkbits;
146 } else if (gfs2_assert_withdraw(ip->i_sbd, !create))
147 error = -EIO;
148
149 return error;
150}
151
152/**
153 * gfs2_writepage - Write complete page
154 * @page: Page to write
155 *
156 * Returns: errno
157 *
158 * Some of this is copied from block_write_full_page() although we still
159 * call it to do most of the work.
160 */
161
162static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
163{
164 struct inode *inode = page->mapping->host;
165 struct gfs2_inode *ip = page->mapping->host->u.generic_ip;
166 struct gfs2_sbd *sdp = ip->i_sbd;
167 loff_t i_size = i_size_read(inode);
168 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
169 unsigned offset;
170 int error;
171 int done_trans = 0;
172
173 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
174 unlock_page(page);
175 return -EIO;
176 }
177 if (current->journal_info)
178 goto out_ignore;
179
180 /* Is the page fully outside i_size? (truncate in progress) */
181 offset = i_size & (PAGE_CACHE_SIZE-1);
182 if (page->index >= end_index+1 || !offset) {
183 page->mapping->a_ops->invalidatepage(page, 0);
184 unlock_page(page);
185 return 0; /* don't care */
186 }
187
188 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
189 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
190 if (error)
191 goto out_ignore;
192 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
193 done_trans = 1;
194 }
195 error = block_write_full_page(page, get_block_noalloc, wbc);
196 if (done_trans)
197 gfs2_trans_end(sdp);
198 gfs2_meta_cache_flush(ip);
199 return error;
200
201out_ignore:
202 redirty_page_for_writepage(wbc, page);
203 unlock_page(page);
204 return 0;
205}
206
207/**
208 * stuffed_readpage - Fill in a Linux page with stuffed file data
209 * @ip: the inode
210 * @page: the page
211 *
212 * Returns: errno
213 */
214
215static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
216{
217 struct buffer_head *dibh;
218 void *kaddr;
219 int error;
220
221 error = gfs2_meta_inode_buffer(ip, &dibh);
222 if (error)
223 return error;
224
225 kaddr = kmap_atomic(page, KM_USER0);
226 memcpy((char *)kaddr,
227 dibh->b_data + sizeof(struct gfs2_dinode),
228 ip->i_di.di_size);
229 memset((char *)kaddr + ip->i_di.di_size,
230 0,
231 PAGE_CACHE_SIZE - ip->i_di.di_size);
232 kunmap_atomic(page, KM_USER0);
233
234 brelse(dibh);
235
236 SetPageUptodate(page);
237
238 return 0;
239}
240
241static int zero_readpage(struct page *page)
242{
243 void *kaddr;
244
245 kaddr = kmap_atomic(page, KM_USER0);
246 memset(kaddr, 0, PAGE_CACHE_SIZE);
247 kunmap_atomic(page, KM_USER0);
248
249 SetPageUptodate(page);
250 unlock_page(page);
251
252 return 0;
253}
254
255/**
256 * gfs2_readpage - readpage with locking
257 * @file: The file to read a page for. N.B. This may be NULL if we are
258 * reading an internal file.
259 * @page: The page to read
260 *
261 * Returns: errno
262 */
263
264static int gfs2_readpage(struct file *file, struct page *page)
265{
266 struct gfs2_inode *ip = page->mapping->host->u.generic_ip;
267 struct gfs2_sbd *sdp = ip->i_sbd;
268 struct gfs2_holder gh;
269 int error;
270
271 if (file != &gfs2_internal_file_sentinal) {
272 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
273 error = gfs2_glock_nq_m_atime(1, &gh);
274 if (error)
275 goto out_unlock;
276 }
277
278 if (gfs2_is_stuffed(ip)) {
279 if (!page->index) {
280 error = stuffed_readpage(ip, page);
281 unlock_page(page);
282 } else
283 error = zero_readpage(page);
284 } else
285 error = mpage_readpage(page, gfs2_get_block);
286
287 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
288 error = -EIO;
289
290 if (file != &gfs2_internal_file_sentinal) {
291 gfs2_glock_dq_m(1, &gh);
292 gfs2_holder_uninit(&gh);
293 }
294out:
295 return error;
296out_unlock:
297 unlock_page(page);
298 goto out;
299}
300
301/**
302 * gfs2_prepare_write - Prepare to write a page to a file
303 * @file: The file to write to
304 * @page: The page which is to be prepared for writing
305 * @from: From (byte range within page)
306 * @to: To (byte range within page)
307 *
308 * Returns: errno
309 */
310
311static int gfs2_prepare_write(struct file *file, struct page *page,
312 unsigned from, unsigned to)
313{
314 struct gfs2_inode *ip = page->mapping->host->u.generic_ip;
315 struct gfs2_sbd *sdp = ip->i_sbd;
316 unsigned int data_blocks, ind_blocks, rblocks;
317 int alloc_required;
318 int error = 0;
319 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
320 loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
321 struct gfs2_alloc *al;
322
323 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh);
324 error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
325 if (error)
326 goto out_uninit;
327
328 gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks);
329
330 error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required);
331 if (error)
332 goto out_unlock;
333
334
335 if (alloc_required) {
336 al = gfs2_alloc_get(ip);
337
338 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
339 if (error)
340 goto out_alloc_put;
341
342 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
343 if (error)
344 goto out_qunlock;
345
346 al->al_requested = data_blocks + ind_blocks;
347 error = gfs2_inplace_reserve(ip);
348 if (error)
349 goto out_qunlock;
350 }
351
352 rblocks = RES_DINODE + ind_blocks;
353 if (gfs2_is_jdata(ip))
354 rblocks += data_blocks ? data_blocks : 1;
355 if (ind_blocks || data_blocks)
356 rblocks += RES_STATFS + RES_QUOTA;
357
358 error = gfs2_trans_begin(sdp, rblocks, 0);
359 if (error)
360 goto out;
361
362 if (gfs2_is_stuffed(ip)) {
363 if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
364 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
365 page);
366 if (error == 0)
367 goto prepare_write;
368 } else if (!PageUptodate(page))
369 error = stuffed_readpage(ip, page);
370 goto out;
371 }
372
373prepare_write:
374 error = block_prepare_write(page, from, to, gfs2_get_block);
375
376out:
377 if (error) {
378 gfs2_trans_end(sdp);
379 if (alloc_required) {
380 gfs2_inplace_release(ip);
381out_qunlock:
382 gfs2_quota_unlock(ip);
383out_alloc_put:
384 gfs2_alloc_put(ip);
385 }
386out_unlock:
387 gfs2_glock_dq_m(1, &ip->i_gh);
388out_uninit:
389 gfs2_holder_uninit(&ip->i_gh);
390 }
391
392 return error;
393}
394
395/**
396 * gfs2_commit_write - Commit write to a file
397 * @file: The file to write to
398 * @page: The page containing the data
399 * @from: From (byte range within page)
400 * @to: To (byte range within page)
401 *
402 * Returns: errno
403 */
404
405static int gfs2_commit_write(struct file *file, struct page *page,
406 unsigned from, unsigned to)
407{
408 struct inode *inode = page->mapping->host;
409 struct gfs2_inode *ip = inode->u.generic_ip;
410 struct gfs2_sbd *sdp = ip->i_sbd;
411 int error = -EOPNOTSUPP;
412 struct buffer_head *dibh;
413 struct gfs2_alloc *al = &ip->i_alloc;;
414
415 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
416 goto fail_nounlock;
417
418 error = gfs2_meta_inode_buffer(ip, &dibh);
419 if (error)
420 goto fail_endtrans;
421
422 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
423
424 if (gfs2_is_stuffed(ip)) {
425 uint64_t file_size;
426 void *kaddr;
427
428 file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
429
430 kaddr = kmap_atomic(page, KM_USER0);
431 memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
432 (char *)kaddr + from, to - from);
433 kunmap_atomic(page, KM_USER0);
434
435 SetPageUptodate(page);
436
437 if (inode->i_size < file_size)
438 i_size_write(inode, file_size);
439 } else {
440 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
441 gfs2_is_jdata(ip))
442 gfs2_page_add_databufs(ip, page, from, to);
443 error = generic_commit_write(file, page, from, to);
444 if (error)
445 goto fail;
446 }
447
448 if (ip->i_di.di_size < inode->i_size)
449 ip->i_di.di_size = inode->i_size;
450
451 gfs2_dinode_out(&ip->i_di, dibh->b_data);
452 brelse(dibh);
453 gfs2_trans_end(sdp);
454 if (al->al_requested) {
455 gfs2_inplace_release(ip);
456 gfs2_quota_unlock(ip);
457 gfs2_alloc_put(ip);
458 }
459 gfs2_glock_dq_m(1, &ip->i_gh);
460 gfs2_holder_uninit(&ip->i_gh);
461 return 0;
462
463fail:
464 brelse(dibh);
465fail_endtrans:
466 gfs2_trans_end(sdp);
467 if (al->al_requested) {
468 gfs2_inplace_release(ip);
469 gfs2_quota_unlock(ip);
470 gfs2_alloc_put(ip);
471 }
472 gfs2_glock_dq_m(1, &ip->i_gh);
473 gfs2_holder_uninit(&ip->i_gh);
474fail_nounlock:
475 ClearPageUptodate(page);
476 return error;
477}
478
479/**
480 * gfs2_bmap - Block map function
481 * @mapping: Address space info
482 * @lblock: The block to map
483 *
484 * Returns: The disk address for the block or 0 on hole or error
485 */
486
487static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
488{
489 struct gfs2_inode *ip = mapping->host->u.generic_ip;
490 struct gfs2_holder i_gh;
491 sector_t dblock = 0;
492 int error;
493
494 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
495 if (error)
496 return 0;
497
498 if (!gfs2_is_stuffed(ip))
499 dblock = generic_block_bmap(mapping, lblock, gfs2_get_block);
500
501 gfs2_glock_dq_uninit(&i_gh);
502
503 return dblock;
504}
505
506static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
507{
508 struct gfs2_bufdata *bd;
509
510 gfs2_log_lock(sdp);
511 bd = bh->b_private;
512 if (bd) {
513 bd->bd_bh = NULL;
514 bh->b_private = NULL;
515 gfs2_log_unlock(sdp);
516 brelse(bh);
517 } else
518 gfs2_log_unlock(sdp);
519
520 lock_buffer(bh);
521 clear_buffer_dirty(bh);
522 bh->b_bdev = NULL;
523 clear_buffer_mapped(bh);
524 clear_buffer_req(bh);
525 clear_buffer_new(bh);
526 clear_buffer_delay(bh);
527 unlock_buffer(bh);
528}
529
530static int gfs2_invalidatepage(struct page *page, unsigned long offset)
531{
532 struct gfs2_sbd *sdp = page->mapping->host->i_sb->s_fs_info;
533 struct buffer_head *head, *bh, *next;
534 unsigned int curr_off = 0;
535 int ret = 1;
536
537 BUG_ON(!PageLocked(page));
538 if (!page_has_buffers(page))
539 return 1;
540
541 bh = head = page_buffers(page);
542 do {
543 unsigned int next_off = curr_off + bh->b_size;
544 next = bh->b_this_page;
545
546 if (offset <= curr_off)
547 discard_buffer(sdp, bh);
548
549 curr_off = next_off;
550 bh = next;
551 } while (bh != head);
552
553 if (!offset)
554 ret = try_to_release_page(page, 0);
555
556 return ret;
557}
558
559static ssize_t gfs2_direct_IO_write(struct kiocb *iocb, const struct iovec *iov,
560 loff_t offset, unsigned long nr_segs)
561{
562 struct file *file = iocb->ki_filp;
563 struct inode *inode = file->f_mapping->host;
564 struct gfs2_inode *ip = inode->u.generic_ip;
565 struct gfs2_holder gh;
566 int rv;
567
568 /*
569 * Shared lock, even though its write, since we do no allocation
570 * on this path. All we need change is atime.
571 */
572 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
573 rv = gfs2_glock_nq_m_atime(1, &gh);
574 if (rv)
575 goto out;
576
577 /*
578 * Should we return an error here? I can't see that O_DIRECT for
579 * a journaled file makes any sense. For now we'll silently fall
580 * back to buffered I/O, likewise we do the same for stuffed
581 * files since they are (a) small and (b) unaligned.
582 */
583 if (gfs2_is_jdata(ip))
584 goto out;
585
586 if (gfs2_is_stuffed(ip))
587 goto out;
588
589 rv = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
590 iov, offset, nr_segs, get_blocks_noalloc,
591 NULL, DIO_OWN_LOCKING);
592out:
593 gfs2_glock_dq_m(1, &gh);
594 gfs2_holder_uninit(&gh);
595
596 return rv;
597}
598
599/**
600 * gfs2_direct_IO
601 *
602 * This is called with a shared lock already held for the read path.
603 * Currently, no locks are held when the write path is called.
604 */
605static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
606 const struct iovec *iov, loff_t offset,
607 unsigned long nr_segs)
608{
609 struct file *file = iocb->ki_filp;
610 struct inode *inode = file->f_mapping->host;
611 struct gfs2_inode *ip = inode->u.generic_ip;
612 struct gfs2_sbd *sdp = ip->i_sbd;
613
614 if (rw == WRITE)
615 return gfs2_direct_IO_write(iocb, iov, offset, nr_segs);
616
617 if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
618 gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
619 return -EINVAL;
620
621 return __blockdev_direct_IO(READ, iocb, inode, inode->i_sb->s_bdev, iov,
622 offset, nr_segs, get_blocks, NULL,
623 DIO_OWN_LOCKING);
624}
625
626struct address_space_operations gfs2_file_aops = {
627 .writepage = gfs2_writepage,
628 .readpage = gfs2_readpage,
629 .sync_page = block_sync_page,
630 .prepare_write = gfs2_prepare_write,
631 .commit_write = gfs2_commit_write,
632 .bmap = gfs2_bmap,
633 .invalidatepage = gfs2_invalidatepage,
634 .direct_IO = gfs2_direct_IO,
635};
636
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
new file mode 100644
index 000000000000..f201a059fd91
--- /dev/null
+++ b/fs/gfs2/ops_address.h
@@ -0,0 +1,17 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_ADDRESS_DOT_H__
11#define __OPS_ADDRESS_DOT_H__
12
13extern struct address_space_operations gfs2_file_aops;
14extern int gfs2_get_block(struct inode *inode, sector_t lblock,
15 struct buffer_head *bh_result, int create);
16
17#endif /* __OPS_ADDRESS_DOT_H__ */
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
new file mode 100644
index 000000000000..958371076093
--- /dev/null
+++ b/fs/gfs2/ops_dentry.c
@@ -0,0 +1,124 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/smp_lock.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/crc32.h>
18#include <asm/semaphore.h>
19
20#include "gfs2.h"
21#include "lm_interface.h"
22#include "incore.h"
23#include "dir.h"
24#include "glock.h"
25#include "ops_dentry.h"
26#include "util.h"
27
28/**
29 * gfs2_drevalidate - Check directory lookup consistency
30 * @dentry: the mapping to check
31 * @nd:
32 *
33 * Check to make sure the lookup necessary to arrive at this inode from its
34 * parent is still good.
35 *
36 * Returns: 1 if the dentry is ok, 0 if it isn't
37 */
38
39static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
40{
41 struct dentry *parent = dget_parent(dentry);
42 struct gfs2_sbd *sdp = parent->d_inode->i_sb->s_fs_info;
43 struct gfs2_inode *dip = parent->d_inode->u.generic_ip;
44 struct inode *inode = dentry->d_inode;
45 struct gfs2_holder d_gh;
46 struct gfs2_inode *ip;
47 struct gfs2_inum inum;
48 unsigned int type;
49 int error;
50
51 if (inode && is_bad_inode(inode))
52 goto invalid;
53
54 if (sdp->sd_args.ar_localcaching)
55 goto valid;
56
57 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
58 if (error)
59 goto fail;
60
61 error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
62 switch (error) {
63 case 0:
64 if (!inode)
65 goto invalid_gunlock;
66 break;
67 case -ENOENT:
68 if (!inode)
69 goto valid_gunlock;
70 goto invalid_gunlock;
71 default:
72 goto fail_gunlock;
73 }
74
75 ip = inode->u.generic_ip;
76
77 if (!gfs2_inum_equal(&ip->i_num, &inum))
78 goto invalid_gunlock;
79
80 if (IF2DT(ip->i_di.di_mode) != type) {
81 gfs2_consist_inode(dip);
82 goto fail_gunlock;
83 }
84
85 valid_gunlock:
86 gfs2_glock_dq_uninit(&d_gh);
87
88 valid:
89 dput(parent);
90 return 1;
91
92 invalid_gunlock:
93 gfs2_glock_dq_uninit(&d_gh);
94
95 invalid:
96 if (inode && S_ISDIR(inode->i_mode)) {
97 if (have_submounts(dentry))
98 goto valid;
99 shrink_dcache_parent(dentry);
100 }
101 d_drop(dentry);
102
103 dput(parent);
104 return 0;
105
106 fail_gunlock:
107 gfs2_glock_dq_uninit(&d_gh);
108
109 fail:
110 dput(parent);
111 return 0;
112}
113
114static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
115{
116 str->hash = gfs2_disk_hash(str->name, str->len);
117 return 0;
118}
119
120struct dentry_operations gfs2_dops = {
121 .d_revalidate = gfs2_drevalidate,
122 .d_hash = gfs2_dhash,
123};
124
diff --git a/fs/gfs2/ops_dentry.h b/fs/gfs2/ops_dentry.h
new file mode 100644
index 000000000000..94e3ee170165
--- /dev/null
+++ b/fs/gfs2/ops_dentry.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_DENTRY_DOT_H__
11#define __OPS_DENTRY_DOT_H__
12
13extern struct dentry_operations gfs2_dops;
14
15#endif /* __OPS_DENTRY_DOT_H__ */
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
new file mode 100644
index 000000000000..be16c68263d1
--- /dev/null
+++ b/fs/gfs2/ops_export.c
@@ -0,0 +1,298 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "dir.h"
23#include "glock.h"
24#include "glops.h"
25#include "inode.h"
26#include "ops_export.h"
27#include "rgrp.h"
28#include "util.h"
29
30static struct dentry *gfs2_decode_fh(struct super_block *sb,
31 __u32 *fh,
32 int fh_len,
33 int fh_type,
34 int (*acceptable)(void *context,
35 struct dentry *dentry),
36 void *context)
37{
38 struct gfs2_inum this, parent;
39
40 if (fh_type != fh_len)
41 return NULL;
42
43 memset(&parent, 0, sizeof(struct gfs2_inum));
44
45 switch (fh_type) {
46 case 8:
47 parent.no_formal_ino = ((uint64_t)be32_to_cpu(fh[4])) << 32;
48 parent.no_formal_ino |= be32_to_cpu(fh[5]);
49 parent.no_addr = ((uint64_t)be32_to_cpu(fh[6])) << 32;
50 parent.no_addr |= be32_to_cpu(fh[7]);
51 case 4:
52 this.no_formal_ino = ((uint64_t)be32_to_cpu(fh[0])) << 32;
53 this.no_formal_ino |= be32_to_cpu(fh[1]);
54 this.no_addr = ((uint64_t)be32_to_cpu(fh[2])) << 32;
55 this.no_addr |= be32_to_cpu(fh[3]);
56 break;
57 default:
58 return NULL;
59 }
60
61 return gfs2_export_ops.find_exported_dentry(sb, &this, &parent,
62 acceptable, context);
63}
64
65static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
66 int connectable)
67{
68 struct inode *inode = dentry->d_inode;
69 struct super_block *sb = inode->i_sb;
70 struct gfs2_inode *ip = inode->u.generic_ip;
71
72 if (*len < 4 || (connectable && *len < 8))
73 return 255;
74
75 fh[0] = ip->i_num.no_formal_ino >> 32;
76 fh[0] = cpu_to_be32(fh[0]);
77 fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
78 fh[1] = cpu_to_be32(fh[1]);
79 fh[2] = ip->i_num.no_addr >> 32;
80 fh[2] = cpu_to_be32(fh[2]);
81 fh[3] = ip->i_num.no_addr & 0xFFFFFFFF;
82 fh[3] = cpu_to_be32(fh[3]);
83 *len = 4;
84
85 if (!connectable || inode == sb->s_root->d_inode)
86 return *len;
87
88 spin_lock(&dentry->d_lock);
89 inode = dentry->d_parent->d_inode;
90 ip = inode->u.generic_ip;
91 gfs2_inode_hold(ip);
92 spin_unlock(&dentry->d_lock);
93
94 fh[4] = ip->i_num.no_formal_ino >> 32;
95 fh[4] = cpu_to_be32(fh[4]);
96 fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
97 fh[5] = cpu_to_be32(fh[5]);
98 fh[6] = ip->i_num.no_addr >> 32;
99 fh[6] = cpu_to_be32(fh[6]);
100 fh[7] = ip->i_num.no_addr & 0xFFFFFFFF;
101 fh[7] = cpu_to_be32(fh[7]);
102 *len = 8;
103
104 gfs2_inode_put(ip);
105
106 return *len;
107}
108
109struct get_name_filldir {
110 struct gfs2_inum inum;
111 char *name;
112};
113
114static int get_name_filldir(void *opaque, const char *name, unsigned int length,
115 uint64_t offset, struct gfs2_inum *inum,
116 unsigned int type)
117{
118 struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
119
120 if (!gfs2_inum_equal(inum, &gnfd->inum))
121 return 0;
122
123 memcpy(gnfd->name, name, length);
124 gnfd->name[length] = 0;
125
126 return 1;
127}
128
129static int gfs2_get_name(struct dentry *parent, char *name,
130 struct dentry *child)
131{
132 struct inode *dir = parent->d_inode;
133 struct inode *inode = child->d_inode;
134 struct gfs2_inode *dip, *ip;
135 struct get_name_filldir gnfd;
136 struct gfs2_holder gh;
137 uint64_t offset = 0;
138 int error;
139
140 if (!dir)
141 return -EINVAL;
142
143 if (!S_ISDIR(dir->i_mode) || !inode)
144 return -EINVAL;
145
146 dip = dir->u.generic_ip;
147 ip = inode->u.generic_ip;
148
149 *name = 0;
150 gnfd.inum = ip->i_num;
151 gnfd.name = name;
152
153 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
154 if (error)
155 return error;
156
157 error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir);
158
159 gfs2_glock_dq_uninit(&gh);
160
161 if (!error && !*name)
162 error = -ENOENT;
163
164 return error;
165}
166
167static struct dentry *gfs2_get_parent(struct dentry *child)
168{
169 struct qstr dotdot;
170 struct inode *inode;
171 struct dentry *dentry;
172
173 gfs2_str2qstr(&dotdot, "..");
174 inode = gfs2_lookupi(child->d_inode, &dotdot, 1, NULL);
175
176 if (!inode)
177 return ERR_PTR(-ENOENT);
178 if (IS_ERR(inode))
179 return ERR_PTR(PTR_ERR(inode));
180
181 dentry = d_alloc_anon(inode);
182 if (!dentry) {
183 iput(inode);
184 return ERR_PTR(-ENOMEM);
185 }
186
187 return dentry;
188}
189
190static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_p)
191{
192 struct gfs2_sbd *sdp = sb->s_fs_info;
193 struct gfs2_inum *inum = (struct gfs2_inum *)inum_p;
194 struct gfs2_holder i_gh, ri_gh, rgd_gh;
195 struct gfs2_rgrpd *rgd;
196 struct gfs2_inode *ip;
197 struct inode *inode;
198 struct dentry *dentry;
199 int error;
200
201 /* System files? */
202
203 inode = gfs2_iget(sb, inum);
204 if (inode) {
205 ip = inode->u.generic_ip;
206 if (ip->i_num.no_formal_ino != inum->no_formal_ino) {
207 iput(inode);
208 return ERR_PTR(-ESTALE);
209 }
210 goto out_inode;
211 }
212
213 error = gfs2_glock_nq_num(sdp,
214 inum->no_addr, &gfs2_inode_glops,
215 LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL,
216 &i_gh);
217 if (error)
218 return ERR_PTR(error);
219
220 error = gfs2_inode_get(i_gh.gh_gl, inum, NO_CREATE, &ip);
221 if (error)
222 goto fail;
223 if (ip)
224 goto out_ip;
225
226 error = gfs2_rindex_hold(sdp, &ri_gh);
227 if (error)
228 goto fail;
229
230 error = -EINVAL;
231 rgd = gfs2_blk2rgrpd(sdp, inum->no_addr);
232 if (!rgd)
233 goto fail_rindex;
234
235 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
236 if (error)
237 goto fail_rindex;
238
239 error = -ESTALE;
240 if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE)
241 goto fail_rgd;
242
243 gfs2_glock_dq_uninit(&rgd_gh);
244 gfs2_glock_dq_uninit(&ri_gh);
245
246 error = gfs2_inode_get(i_gh.gh_gl, inum, CREATE, &ip);
247 if (error)
248 goto fail;
249
250 error = gfs2_inode_refresh(ip);
251 if (error) {
252 gfs2_inode_put(ip);
253 goto fail;
254 }
255
256 out_ip:
257 error = -EIO;
258 if (ip->i_di.di_flags & GFS2_DIF_SYSTEM) {
259 gfs2_inode_put(ip);
260 goto fail;
261 }
262
263 gfs2_glock_dq_uninit(&i_gh);
264
265 inode = gfs2_ip2v(ip);
266 gfs2_inode_put(ip);
267
268 if (!inode)
269 return ERR_PTR(-ENOMEM);
270
271 out_inode:
272 dentry = d_alloc_anon(inode);
273 if (!dentry) {
274 iput(inode);
275 return ERR_PTR(-ENOMEM);
276 }
277
278 return dentry;
279
280 fail_rgd:
281 gfs2_glock_dq_uninit(&rgd_gh);
282
283 fail_rindex:
284 gfs2_glock_dq_uninit(&ri_gh);
285
286 fail:
287 gfs2_glock_dq_uninit(&i_gh);
288 return ERR_PTR(error);
289}
290
291struct export_operations gfs2_export_ops = {
292 .decode_fh = gfs2_decode_fh,
293 .encode_fh = gfs2_encode_fh,
294 .get_name = gfs2_get_name,
295 .get_parent = gfs2_get_parent,
296 .get_dentry = gfs2_get_dentry,
297};
298
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
new file mode 100644
index 000000000000..2f342f3d8755
--- /dev/null
+++ b/fs/gfs2/ops_export.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_EXPORT_DOT_H__
11#define __OPS_EXPORT_DOT_H__
12
13extern struct export_operations gfs2_export_ops;
14
15#endif /* __OPS_EXPORT_DOT_H__ */
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
new file mode 100644
index 000000000000..db4484a3efcc
--- /dev/null
+++ b/fs/gfs2/ops_file.c
@@ -0,0 +1,997 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/uio.h>
17#include <linux/blkdev.h>
18#include <linux/mm.h>
19#include <linux/smp_lock.h>
20#include <linux/fs.h>
21#include <linux/gfs2_ondisk.h>
22#include <linux/ext2_fs.h>
23#include <linux/crc32.h>
24#include <linux/iflags.h>
25#include <asm/semaphore.h>
26#include <asm/uaccess.h>
27
28#include "gfs2.h"
29#include "lm_interface.h"
30#include "incore.h"
31#include "bmap.h"
32#include "dir.h"
33#include "glock.h"
34#include "glops.h"
35#include "inode.h"
36#include "lm.h"
37#include "log.h"
38#include "meta_io.h"
39#include "ops_file.h"
40#include "ops_vm.h"
41#include "quota.h"
42#include "rgrp.h"
43#include "trans.h"
44#include "util.h"
45#include "eaops.h"
46
47/* "bad" is for NFS support */
48struct filldir_bad_entry {
49 char *fbe_name;
50 unsigned int fbe_length;
51 uint64_t fbe_offset;
52 struct gfs2_inum fbe_inum;
53 unsigned int fbe_type;
54};
55
56struct filldir_bad {
57 struct gfs2_sbd *fdb_sbd;
58
59 struct filldir_bad_entry *fdb_entry;
60 unsigned int fdb_entry_num;
61 unsigned int fdb_entry_off;
62
63 char *fdb_name;
64 unsigned int fdb_name_size;
65 unsigned int fdb_name_off;
66};
67
68/* For regular, non-NFS */
69struct filldir_reg {
70 struct gfs2_sbd *fdr_sbd;
71 int fdr_prefetch;
72
73 filldir_t fdr_filldir;
74 void *fdr_opaque;
75};
76
77/*
78 * Most fields left uninitialised to catch anybody who tries to
79 * use them. f_flags set to prevent file_accessed() from touching
80 * any other part of this. Its use is purely as a flag so that we
81 * know (in readpage()) whether or not do to locking.
82 */
83struct file gfs2_internal_file_sentinal = {
84 .f_flags = O_NOATIME|O_RDONLY,
85};
86
87static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
88 unsigned long offset, unsigned long size)
89{
90 char *kaddr;
91 unsigned long count = desc->count;
92
93 if (size > count)
94 size = count;
95
96 kaddr = kmap(page);
97 memcpy(desc->arg.buf, kaddr + offset, size);
98 kunmap(page);
99
100 desc->count = count - size;
101 desc->written += size;
102 desc->arg.buf += size;
103 return size;
104}
105
106int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
107 char *buf, loff_t *pos, unsigned size)
108{
109 struct inode *inode = ip->i_vnode;
110 read_descriptor_t desc;
111 desc.written = 0;
112 desc.arg.buf = buf;
113 desc.count = size;
114 desc.error = 0;
115 do_generic_mapping_read(inode->i_mapping, ra_state,
116 &gfs2_internal_file_sentinal, pos, &desc,
117 gfs2_read_actor);
118 return desc.written ? desc.written : desc.error;
119}
120
121/**
122 * gfs2_llseek - seek to a location in a file
123 * @file: the file
124 * @offset: the offset
125 * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
126 *
127 * SEEK_END requires the glock for the file because it references the
128 * file's size.
129 *
130 * Returns: The new offset, or errno
131 */
132
133static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
134{
135 struct gfs2_inode *ip = file->f_mapping->host->u.generic_ip;
136 struct gfs2_holder i_gh;
137 loff_t error;
138
139 if (origin == 2) {
140 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
141 &i_gh);
142 if (!error) {
143 error = remote_llseek(file, offset, origin);
144 gfs2_glock_dq_uninit(&i_gh);
145 }
146 } else
147 error = remote_llseek(file, offset, origin);
148
149 return error;
150}
151
152
153static ssize_t gfs2_direct_IO_read(struct kiocb *iocb, const struct iovec *iov,
154 loff_t offset, unsigned long nr_segs)
155{
156 struct file *file = iocb->ki_filp;
157 struct address_space *mapping = file->f_mapping;
158 ssize_t retval;
159
160 retval = filemap_write_and_wait(mapping);
161 if (retval == 0) {
162 retval = mapping->a_ops->direct_IO(READ, iocb, iov, offset,
163 nr_segs);
164 }
165 return retval;
166}
167
168/**
169 * __gfs2_file_aio_read - The main GFS2 read function
170 *
171 * N.B. This is almost, but not quite the same as __generic_file_aio_read()
172 * the important subtle different being that inode->i_size isn't valid
173 * unless we are holding a lock, and we do this _only_ on the O_DIRECT
174 * path since otherwise locking is done entirely at the page cache
175 * layer.
176 */
177static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
178 const struct iovec *iov,
179 unsigned long nr_segs, loff_t *ppos)
180{
181 struct file *filp = iocb->ki_filp;
182 struct gfs2_inode *ip = filp->f_mapping->host->u.generic_ip;
183 struct gfs2_holder gh;
184 ssize_t retval;
185 unsigned long seg;
186 size_t count;
187
188 count = 0;
189 for (seg = 0; seg < nr_segs; seg++) {
190 const struct iovec *iv = &iov[seg];
191
192 /*
193 * If any segment has a negative length, or the cumulative
194 * length ever wraps negative then return -EINVAL.
195 */
196 count += iv->iov_len;
197 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
198 return -EINVAL;
199 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
200 continue;
201 if (seg == 0)
202 return -EFAULT;
203 nr_segs = seg;
204 count -= iv->iov_len; /* This segment is no good */
205 break;
206 }
207
208 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
209 if (filp->f_flags & O_DIRECT) {
210 loff_t pos = *ppos, size;
211 struct address_space *mapping;
212 struct inode *inode;
213
214 mapping = filp->f_mapping;
215 inode = mapping->host;
216 retval = 0;
217 if (!count)
218 goto out; /* skip atime */
219
220 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
221 retval = gfs2_glock_nq_m_atime(1, &gh);
222 if (retval)
223 goto out;
224 if (gfs2_is_stuffed(ip)) {
225 gfs2_glock_dq_m(1, &gh);
226 gfs2_holder_uninit(&gh);
227 goto fallback_to_normal;
228 }
229 size = i_size_read(inode);
230 if (pos < size) {
231 retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
232 if (retval > 0 && !is_sync_kiocb(iocb))
233 retval = -EIOCBQUEUED;
234 if (retval > 0)
235 *ppos = pos + retval;
236 }
237 file_accessed(filp);
238 gfs2_glock_dq_m(1, &gh);
239 gfs2_holder_uninit(&gh);
240 goto out;
241 }
242
243fallback_to_normal:
244 retval = 0;
245 if (count) {
246 for (seg = 0; seg < nr_segs; seg++) {
247 read_descriptor_t desc;
248
249 desc.written = 0;
250 desc.arg.buf = iov[seg].iov_base;
251 desc.count = iov[seg].iov_len;
252 if (desc.count == 0)
253 continue;
254 desc.error = 0;
255 do_generic_file_read(filp,ppos,&desc,file_read_actor);
256 retval += desc.written;
257 if (desc.error) {
258 retval = retval ?: desc.error;
259 break;
260 }
261 }
262 }
263out:
264 return retval;
265}
266
267/**
268 * gfs2_read - Read bytes from a file
269 * @file: The file to read from
270 * @buf: The buffer to copy into
271 * @size: The amount of data requested
272 * @offset: The current file offset
273 *
274 * Outputs: Offset - updated according to number of bytes read
275 *
276 * Returns: The number of bytes read, errno on failure
277 */
278
279static ssize_t gfs2_read(struct file *filp, char __user *buf, size_t size,
280 loff_t *offset)
281{
282 struct iovec local_iov = { .iov_base = buf, .iov_len = size };
283 struct kiocb kiocb;
284 ssize_t ret;
285
286 init_sync_kiocb(&kiocb, filp);
287 ret = __gfs2_file_aio_read(&kiocb, &local_iov, 1, offset);
288 if (-EIOCBQUEUED == ret)
289 ret = wait_on_sync_kiocb(&kiocb);
290 return ret;
291}
292
293static ssize_t gfs2_file_readv(struct file *filp, const struct iovec *iov,
294 unsigned long nr_segs, loff_t *ppos)
295{
296 struct kiocb kiocb;
297 ssize_t ret;
298
299 init_sync_kiocb(&kiocb, filp);
300 ret = __gfs2_file_aio_read(&kiocb, iov, nr_segs, ppos);
301 if (-EIOCBQUEUED == ret)
302 ret = wait_on_sync_kiocb(&kiocb);
303 return ret;
304}
305
306static ssize_t gfs2_file_aio_read(struct kiocb *iocb, char __user *buf,
307 size_t count, loff_t pos)
308{
309 struct iovec local_iov = { .iov_base = buf, .iov_len = count };
310
311 BUG_ON(iocb->ki_pos != pos);
312 return __gfs2_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
313}
314
315
316/**
317 * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read()
318 * @opaque: opaque data used by the function
319 * @name: the name of the directory entry
320 * @length: the length of the name
321 * @offset: the entry's offset in the directory
322 * @inum: the inode number the entry points to
323 * @type: the type of inode the entry points to
324 *
325 * Returns: 0 on success, 1 if buffer full
326 */
327
328static int filldir_reg_func(void *opaque, const char *name, unsigned int length,
329 uint64_t offset, struct gfs2_inum *inum,
330 unsigned int type)
331{
332 struct filldir_reg *fdr = (struct filldir_reg *)opaque;
333 struct gfs2_sbd *sdp = fdr->fdr_sbd;
334 int error;
335
336 error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
337 inum->no_formal_ino, type);
338 if (error)
339 return 1;
340
341 if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
342 gfs2_glock_prefetch_num(sdp,
343 inum->no_addr, &gfs2_inode_glops,
344 LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
345 gfs2_glock_prefetch_num(sdp,
346 inum->no_addr, &gfs2_iopen_glops,
347 LM_ST_SHARED, LM_FLAG_TRY);
348 }
349
350 return 0;
351}
352
353/**
354 * readdir_reg - Read directory entries from a directory
355 * @file: The directory to read from
356 * @dirent: Buffer for dirents
357 * @filldir: Function used to do the copying
358 *
359 * Returns: errno
360 */
361
362static int readdir_reg(struct file *file, void *dirent, filldir_t filldir)
363{
364 struct inode *dir = file->f_mapping->host;
365 struct gfs2_inode *dip = dir->u.generic_ip;
366 struct filldir_reg fdr;
367 struct gfs2_holder d_gh;
368 uint64_t offset = file->f_pos;
369 int error;
370
371 fdr.fdr_sbd = dip->i_sbd;
372 fdr.fdr_prefetch = 1;
373 fdr.fdr_filldir = filldir;
374 fdr.fdr_opaque = dirent;
375
376 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
377 error = gfs2_glock_nq_atime(&d_gh);
378 if (error) {
379 gfs2_holder_uninit(&d_gh);
380 return error;
381 }
382
383 error = gfs2_dir_read(dir, &offset, &fdr, filldir_reg_func);
384
385 gfs2_glock_dq_uninit(&d_gh);
386
387 file->f_pos = offset;
388
389 return error;
390}
391
392/**
393 * filldir_bad_func - Report a directory entry to the caller of gfs2_dir_read()
394 * @opaque: opaque data used by the function
395 * @name: the name of the directory entry
396 * @length: the length of the name
397 * @offset: the entry's offset in the directory
398 * @inum: the inode number the entry points to
399 * @type: the type of inode the entry points to
400 *
401 * For supporting NFS.
402 *
403 * Returns: 0 on success, 1 if buffer full
404 */
405
406static int filldir_bad_func(void *opaque, const char *name, unsigned int length,
407 uint64_t offset, struct gfs2_inum *inum,
408 unsigned int type)
409{
410 struct filldir_bad *fdb = (struct filldir_bad *)opaque;
411 struct gfs2_sbd *sdp = fdb->fdb_sbd;
412 struct filldir_bad_entry *fbe;
413
414 if (fdb->fdb_entry_off == fdb->fdb_entry_num ||
415 fdb->fdb_name_off + length > fdb->fdb_name_size)
416 return 1;
417
418 fbe = &fdb->fdb_entry[fdb->fdb_entry_off];
419 fbe->fbe_name = fdb->fdb_name + fdb->fdb_name_off;
420 memcpy(fbe->fbe_name, name, length);
421 fbe->fbe_length = length;
422 fbe->fbe_offset = offset;
423 fbe->fbe_inum = *inum;
424 fbe->fbe_type = type;
425
426 fdb->fdb_entry_off++;
427 fdb->fdb_name_off += length;
428
429 if (!(length == 1 && *name == '.')) {
430 gfs2_glock_prefetch_num(sdp,
431 inum->no_addr, &gfs2_inode_glops,
432 LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
433 gfs2_glock_prefetch_num(sdp,
434 inum->no_addr, &gfs2_iopen_glops,
435 LM_ST_SHARED, LM_FLAG_TRY);
436 }
437
438 return 0;
439}
440
441/**
442 * readdir_bad - Read directory entries from a directory
443 * @file: The directory to read from
444 * @dirent: Buffer for dirents
445 * @filldir: Function used to do the copying
446 *
447 * For supporting NFS.
448 *
449 * Returns: errno
450 */
451
452static int readdir_bad(struct file *file, void *dirent, filldir_t filldir)
453{
454 struct inode *dir = file->f_mapping->host;
455 struct gfs2_inode *dip = dir->u.generic_ip;
456 struct gfs2_sbd *sdp = dip->i_sbd;
457 struct filldir_reg fdr;
458 unsigned int entries, size;
459 struct filldir_bad *fdb;
460 struct gfs2_holder d_gh;
461 uint64_t offset = file->f_pos;
462 unsigned int x;
463 struct filldir_bad_entry *fbe;
464 int error;
465
466 entries = gfs2_tune_get(sdp, gt_entries_per_readdir);
467 size = sizeof(struct filldir_bad) +
468 entries * (sizeof(struct filldir_bad_entry) + GFS2_FAST_NAME_SIZE);
469
470 fdb = kzalloc(size, GFP_KERNEL);
471 if (!fdb)
472 return -ENOMEM;
473
474 fdb->fdb_sbd = sdp;
475 fdb->fdb_entry = (struct filldir_bad_entry *)(fdb + 1);
476 fdb->fdb_entry_num = entries;
477 fdb->fdb_name = ((char *)fdb) + sizeof(struct filldir_bad) +
478 entries * sizeof(struct filldir_bad_entry);
479 fdb->fdb_name_size = entries * GFS2_FAST_NAME_SIZE;
480
481 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
482 error = gfs2_glock_nq_atime(&d_gh);
483 if (error) {
484 gfs2_holder_uninit(&d_gh);
485 goto out;
486 }
487
488 error = gfs2_dir_read(dir, &offset, fdb, filldir_bad_func);
489
490 gfs2_glock_dq_uninit(&d_gh);
491
492 fdr.fdr_sbd = sdp;
493 fdr.fdr_prefetch = 0;
494 fdr.fdr_filldir = filldir;
495 fdr.fdr_opaque = dirent;
496
497 for (x = 0; x < fdb->fdb_entry_off; x++) {
498 fbe = &fdb->fdb_entry[x];
499
500 error = filldir_reg_func(&fdr,
501 fbe->fbe_name, fbe->fbe_length,
502 fbe->fbe_offset,
503 &fbe->fbe_inum, fbe->fbe_type);
504 if (error) {
505 file->f_pos = fbe->fbe_offset;
506 error = 0;
507 goto out;
508 }
509 }
510
511 file->f_pos = offset;
512
513 out:
514 kfree(fdb);
515
516 return error;
517}
518
519/**
520 * gfs2_readdir - Read directory entries from a directory
521 * @file: The directory to read from
522 * @dirent: Buffer for dirents
523 * @filldir: Function used to do the copying
524 *
525 * Returns: errno
526 */
527
528static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
529{
530 int error;
531
532 if (strcmp(current->comm, "nfsd") != 0)
533 error = readdir_reg(file, dirent, filldir);
534 else
535 error = readdir_bad(file, dirent, filldir);
536
537 return error;
538}
539
540static const u32 iflags_to_gfs2[32] = {
541 [iflag_Sync] = GFS2_DIF_SYNC,
542 [iflag_Immutable] = GFS2_DIF_IMMUTABLE,
543 [iflag_Append] = GFS2_DIF_APPENDONLY,
544 [iflag_NoAtime] = GFS2_DIF_NOATIME,
545 [iflag_Index] = GFS2_DIF_EXHASH,
546 [iflag_JournalData] = GFS2_DIF_JDATA,
547 [iflag_DirectIO] = GFS2_DIF_DIRECTIO,
548 [iflag_InheritDirectIO] = GFS2_DIF_INHERIT_DIRECTIO,
549 [iflag_InheritJdata] = GFS2_DIF_INHERIT_JDATA,
550};
551
552static const u32 gfs2_to_iflags[32] = {
553 [gfs2fl_Sync] = IFLAG_SYNC,
554 [gfs2fl_Immutable] = IFLAG_IMMUTABLE,
555 [gfs2fl_AppendOnly] = IFLAG_APPEND,
556 [gfs2fl_NoAtime] = IFLAG_NOATIME,
557 [gfs2fl_ExHash] = IFLAG_INDEX,
558 [gfs2fl_Jdata] = IFLAG_JOURNAL_DATA,
559 [gfs2fl_Directio] = IFLAG_DIRECTIO,
560 [gfs2fl_InheritDirectio] = IFLAG_INHERITDIRECTIO,
561 [gfs2fl_InheritJdata] = IFLAG_INHERITJDATA,
562};
563
564static int gfs2_get_flags(struct inode *inode, u32 __user *ptr)
565{
566 struct gfs2_inode *ip = inode->u.generic_ip;
567 struct gfs2_holder gh;
568 int error;
569 u32 iflags;
570
571 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
572 error = gfs2_glock_nq_m_atime(1, &gh);
573 if (error)
574 return error;
575
576 iflags = iflags_cvt(gfs2_to_iflags, ip->i_di.di_flags);
577 if (put_user(iflags, ptr))
578 error = -EFAULT;
579
580 gfs2_glock_dq_m(1, &gh);
581 gfs2_holder_uninit(&gh);
582 return error;
583}
584
585/* Flags that can be set by user space */
586#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \
587 GFS2_DIF_DIRECTIO| \
588 GFS2_DIF_IMMUTABLE| \
589 GFS2_DIF_APPENDONLY| \
590 GFS2_DIF_NOATIME| \
591 GFS2_DIF_SYNC| \
592 GFS2_DIF_SYSTEM| \
593 GFS2_DIF_INHERIT_DIRECTIO| \
594 GFS2_DIF_INHERIT_JDATA)
595
596/**
597 * gfs2_set_flags - set flags on an inode
598 * @inode: The inode
599 * @flags: The flags to set
600 * @mask: Indicates which flags are valid
601 *
602 */
603static int do_gfs2_set_flags(struct inode *inode, u32 flags, u32 mask)
604{
605 struct gfs2_inode *ip = inode->u.generic_ip;
606 struct buffer_head *bh;
607 struct gfs2_holder gh;
608 int error;
609 u32 new_flags;
610
611 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
612 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
613 if (error)
614 return error;
615
616 new_flags = (ip->i_di.di_flags & ~mask) | (flags & mask);
617 if ((new_flags ^ flags) == 0)
618 goto out;
619
620 error = -EINVAL;
621 if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET)
622 goto out;
623
624 if (S_ISDIR(inode->i_mode)) {
625 if ((new_flags ^ flags) & (GFS2_DIF_JDATA | GFS2_DIF_DIRECTIO))
626 goto out;
627 } else if (S_ISREG(inode->i_mode)) {
628 if ((new_flags ^ flags) & (GFS2_DIF_INHERIT_DIRECTIO|
629 GFS2_DIF_INHERIT_JDATA))
630 goto out;
631 } else
632 goto out;
633
634 error = -EPERM;
635 if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE))
636 goto out;
637 if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY))
638 goto out;
639 error = gfs2_repermission(inode, MAY_WRITE, NULL);
640 if (error)
641 goto out;
642
643 error = gfs2_meta_inode_buffer(ip, &bh);
644 if (error)
645 goto out;
646 gfs2_trans_add_bh(ip->i_gl, bh, 1);
647 ip->i_di.di_flags = new_flags;
648 gfs2_dinode_out(&ip->i_di, bh->b_data);
649 brelse(bh);
650out:
651 gfs2_glock_dq_uninit(&gh);
652 return error;
653}
654
655static int gfs2_set_flags(struct inode *inode, u32 __user *ptr)
656{
657 u32 iflags, gfsflags;
658 if (get_user(iflags, ptr))
659 return -EFAULT;
660 gfsflags = iflags_cvt(iflags_to_gfs2, iflags);
661 return do_gfs2_set_flags(inode, gfsflags, ~0);
662}
663
664int gfs2_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
665 unsigned long arg)
666{
667 switch(cmd) {
668 case IFLAGS_GET_IOC:
669 return gfs2_get_flags(inode, (u32 __user *)arg);
670 case IFLAGS_SET_IOC:
671 return gfs2_set_flags(inode, (u32 __user *)arg);
672 }
673 return -ENOTTY;
674}
675
676
677/**
678 * gfs2_mmap -
679 * @file: The file to map
680 * @vma: The VMA which described the mapping
681 *
682 * Returns: 0 or error code
683 */
684
685static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
686{
687 struct gfs2_inode *ip = file->f_mapping->host->u.generic_ip;
688 struct gfs2_holder i_gh;
689 int error;
690
691 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
692 error = gfs2_glock_nq_atime(&i_gh);
693 if (error) {
694 gfs2_holder_uninit(&i_gh);
695 return error;
696 }
697
698 /* This is VM_MAYWRITE instead of VM_WRITE because a call
699 to mprotect() can turn on VM_WRITE later. */
700
701 if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
702 (VM_MAYSHARE | VM_MAYWRITE))
703 vma->vm_ops = &gfs2_vm_ops_sharewrite;
704 else
705 vma->vm_ops = &gfs2_vm_ops_private;
706
707 gfs2_glock_dq_uninit(&i_gh);
708
709 return error;
710}
711
712/**
713 * gfs2_open - open a file
714 * @inode: the inode to open
715 * @file: the struct file for this opening
716 *
717 * Returns: errno
718 */
719
720static int gfs2_open(struct inode *inode, struct file *file)
721{
722 struct gfs2_inode *ip = inode->u.generic_ip;
723 struct gfs2_holder i_gh;
724 struct gfs2_file *fp;
725 int error;
726
727 fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL);
728 if (!fp)
729 return -ENOMEM;
730
731 mutex_init(&fp->f_fl_mutex);
732
733 fp->f_inode = ip;
734 fp->f_vfile = file;
735
736 gfs2_assert_warn(ip->i_sbd, !file->private_data);
737 file->private_data = fp;
738
739 if (S_ISREG(ip->i_di.di_mode)) {
740 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
741 &i_gh);
742 if (error)
743 goto fail;
744
745 if (!(file->f_flags & O_LARGEFILE) &&
746 ip->i_di.di_size > MAX_NON_LFS) {
747 error = -EFBIG;
748 goto fail_gunlock;
749 }
750
751 /* Listen to the Direct I/O flag */
752
753 if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
754 file->f_flags |= O_DIRECT;
755
756 gfs2_glock_dq_uninit(&i_gh);
757 }
758
759 return 0;
760
761 fail_gunlock:
762 gfs2_glock_dq_uninit(&i_gh);
763
764 fail:
765 file->private_data = NULL;
766 kfree(fp);
767
768 return error;
769}
770
771/**
772 * gfs2_close - called to close a struct file
773 * @inode: the inode the struct file belongs to
774 * @file: the struct file being closed
775 *
776 * Returns: errno
777 */
778
779static int gfs2_close(struct inode *inode, struct file *file)
780{
781 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
782 struct gfs2_file *fp;
783
784 fp = file->private_data;
785 file->private_data = NULL;
786
787 if (gfs2_assert_warn(sdp, fp))
788 return -EIO;
789
790 kfree(fp);
791
792 return 0;
793}
794
795/**
796 * gfs2_fsync - sync the dirty data for a file (across the cluster)
797 * @file: the file that points to the dentry (we ignore this)
798 * @dentry: the dentry that points to the inode to sync
799 *
800 * Returns: errno
801 */
802
803static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
804{
805 struct gfs2_inode *ip = dentry->d_inode->u.generic_ip;
806
807 gfs2_log_flush_glock(ip->i_gl);
808
809 return 0;
810}
811
812/**
813 * gfs2_lock - acquire/release a posix lock on a file
814 * @file: the file pointer
815 * @cmd: either modify or retrieve lock state, possibly wait
816 * @fl: type and range of lock
817 *
818 * Returns: errno
819 */
820
821static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
822{
823 struct gfs2_inode *ip = file->f_mapping->host->u.generic_ip;
824 struct gfs2_sbd *sdp = ip->i_sbd;
825 struct lm_lockname name =
826 { .ln_number = ip->i_num.no_addr,
827 .ln_type = LM_TYPE_PLOCK };
828
829 if (!(fl->fl_flags & FL_POSIX))
830 return -ENOLCK;
831 if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
832 return -ENOLCK;
833
834 if (sdp->sd_args.ar_localflocks) {
835 if (IS_GETLK(cmd)) {
836 struct file_lock *tmp;
837 lock_kernel();
838 tmp = posix_test_lock(file, fl);
839 fl->fl_type = F_UNLCK;
840 if (tmp)
841 memcpy(fl, tmp, sizeof(struct file_lock));
842 unlock_kernel();
843 return 0;
844 } else {
845 int error;
846 lock_kernel();
847 error = posix_lock_file_wait(file, fl);
848 unlock_kernel();
849 return error;
850 }
851 }
852
853 if (IS_GETLK(cmd))
854 return gfs2_lm_plock_get(sdp, &name, file, fl);
855 else if (fl->fl_type == F_UNLCK)
856 return gfs2_lm_punlock(sdp, &name, file, fl);
857 else
858 return gfs2_lm_plock(sdp, &name, file, cmd, fl);
859}
860
861/**
862 * gfs2_sendfile - Send bytes to a file or socket
863 * @in_file: The file to read from
864 * @out_file: The file to write to
865 * @count: The amount of data
866 * @offset: The beginning file offset
867 *
868 * Outputs: offset - updated according to number of bytes read
869 *
870 * Returns: The number of bytes sent, errno on failure
871 */
872
873static ssize_t gfs2_sendfile(struct file *in_file, loff_t *offset, size_t count,
874 read_actor_t actor, void *target)
875{
876 return generic_file_sendfile(in_file, offset, count, actor, target);
877}
878
879static int do_flock(struct file *file, int cmd, struct file_lock *fl)
880{
881 struct gfs2_file *fp = file->private_data;
882 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
883 struct gfs2_inode *ip = fp->f_inode;
884 struct gfs2_glock *gl;
885 unsigned int state;
886 int flags;
887 int error = 0;
888
889 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
890 flags = ((IS_SETLKW(cmd)) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
891
892 mutex_lock(&fp->f_fl_mutex);
893
894 gl = fl_gh->gh_gl;
895 if (gl) {
896 if (fl_gh->gh_state == state)
897 goto out;
898 gfs2_glock_hold(gl);
899 flock_lock_file_wait(file,
900 &(struct file_lock){.fl_type = F_UNLCK});
901 gfs2_glock_dq_uninit(fl_gh);
902 } else {
903 error = gfs2_glock_get(ip->i_sbd,
904 ip->i_num.no_addr, &gfs2_flock_glops,
905 CREATE, &gl);
906 if (error)
907 goto out;
908 }
909
910 gfs2_holder_init(gl, state, flags, fl_gh);
911 gfs2_glock_put(gl);
912
913 error = gfs2_glock_nq(fl_gh);
914 if (error) {
915 gfs2_holder_uninit(fl_gh);
916 if (error == GLR_TRYFAILED)
917 error = -EAGAIN;
918 } else {
919 error = flock_lock_file_wait(file, fl);
920 gfs2_assert_warn(ip->i_sbd, !error);
921 }
922
923 out:
924 mutex_unlock(&fp->f_fl_mutex);
925
926 return error;
927}
928
929static void do_unflock(struct file *file, struct file_lock *fl)
930{
931 struct gfs2_file *fp = file->private_data;
932 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
933
934 mutex_lock(&fp->f_fl_mutex);
935 flock_lock_file_wait(file, fl);
936 if (fl_gh->gh_gl)
937 gfs2_glock_dq_uninit(fl_gh);
938 mutex_unlock(&fp->f_fl_mutex);
939}
940
941/**
942 * gfs2_flock - acquire/release a flock lock on a file
943 * @file: the file pointer
944 * @cmd: either modify or retrieve lock state, possibly wait
945 * @fl: type and range of lock
946 *
947 * Returns: errno
948 */
949
950static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
951{
952 struct gfs2_inode *ip = file->f_mapping->host->u.generic_ip;
953 struct gfs2_sbd *sdp = ip->i_sbd;
954
955 if (!(fl->fl_flags & FL_FLOCK))
956 return -ENOLCK;
957 if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
958 return -ENOLCK;
959
960 if (sdp->sd_args.ar_localflocks)
961 return flock_lock_file_wait(file, fl);
962
963 if (fl->fl_type == F_UNLCK) {
964 do_unflock(file, fl);
965 return 0;
966 } else
967 return do_flock(file, cmd, fl);
968}
969
970struct file_operations gfs2_file_fops = {
971 .llseek = gfs2_llseek,
972 .read = gfs2_read,
973 .readv = gfs2_file_readv,
974 .aio_read = gfs2_file_aio_read,
975 .write = generic_file_write,
976 .writev = generic_file_writev,
977 .aio_write = generic_file_aio_write,
978 .ioctl = gfs2_ioctl,
979 .mmap = gfs2_mmap,
980 .open = gfs2_open,
981 .release = gfs2_close,
982 .fsync = gfs2_fsync,
983 .lock = gfs2_lock,
984 .sendfile = gfs2_sendfile,
985 .flock = gfs2_flock,
986};
987
988struct file_operations gfs2_dir_fops = {
989 .readdir = gfs2_readdir,
990 .ioctl = gfs2_ioctl,
991 .open = gfs2_open,
992 .release = gfs2_close,
993 .fsync = gfs2_fsync,
994 .lock = gfs2_lock,
995 .flock = gfs2_flock,
996};
997
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h
new file mode 100644
index 000000000000..192577b411f0
--- /dev/null
+++ b/fs/gfs2/ops_file.h
@@ -0,0 +1,20 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_FILE_DOT_H__
11#define __OPS_FILE_DOT_H__
12extern struct file gfs2_internal_file_sentinal;
13extern int gfs2_internal_read(struct gfs2_inode *ip,
14 struct file_ra_state *ra_state,
15 char *buf, loff_t *pos, unsigned size);
16
17extern struct file_operations gfs2_file_fops;
18extern struct file_operations gfs2_dir_fops;
19
20#endif /* __OPS_FILE_DOT_H__ */
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
new file mode 100644
index 000000000000..5166455b9fdd
--- /dev/null
+++ b/fs/gfs2/ops_fstype.c
@@ -0,0 +1,905 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/vmalloc.h>
16#include <linux/blkdev.h>
17#include <linux/kthread.h>
18#include <linux/gfs2_ondisk.h>
19#include <asm/semaphore.h>
20
21#include "gfs2.h"
22#include "lm_interface.h"
23#include "incore.h"
24#include "daemon.h"
25#include "glock.h"
26#include "glops.h"
27#include "inode.h"
28#include "lm.h"
29#include "mount.h"
30#include "ops_export.h"
31#include "ops_fstype.h"
32#include "ops_super.h"
33#include "recovery.h"
34#include "rgrp.h"
35#include "super.h"
36#include "unlinked.h"
37#include "sys.h"
38#include "util.h"
39
40#define DO 0
41#define UNDO 1
42
43static struct gfs2_sbd *init_sbd(struct super_block *sb)
44{
45 struct gfs2_sbd *sdp;
46 unsigned int x;
47
48 sdp = vmalloc(sizeof(struct gfs2_sbd));
49 if (!sdp)
50 return NULL;
51
52 memset(sdp, 0, sizeof(struct gfs2_sbd));
53
54 sb->s_fs_info = sdp;
55 sdp->sd_vfs = sb;
56
57 gfs2_tune_init(&sdp->sd_tune);
58
59 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
60 sdp->sd_gl_hash[x].hb_lock = RW_LOCK_UNLOCKED;
61 INIT_LIST_HEAD(&sdp->sd_gl_hash[x].hb_list);
62 }
63 INIT_LIST_HEAD(&sdp->sd_reclaim_list);
64 spin_lock_init(&sdp->sd_reclaim_lock);
65 init_waitqueue_head(&sdp->sd_reclaim_wq);
66 mutex_init(&sdp->sd_invalidate_inodes_mutex);
67
68 mutex_init(&sdp->sd_inum_mutex);
69 spin_lock_init(&sdp->sd_statfs_spin);
70 mutex_init(&sdp->sd_statfs_mutex);
71
72 spin_lock_init(&sdp->sd_rindex_spin);
73 mutex_init(&sdp->sd_rindex_mutex);
74 INIT_LIST_HEAD(&sdp->sd_rindex_list);
75 INIT_LIST_HEAD(&sdp->sd_rindex_mru_list);
76 INIT_LIST_HEAD(&sdp->sd_rindex_recent_list);
77
78 INIT_LIST_HEAD(&sdp->sd_jindex_list);
79 spin_lock_init(&sdp->sd_jindex_spin);
80 mutex_init(&sdp->sd_jindex_mutex);
81
82 INIT_LIST_HEAD(&sdp->sd_unlinked_list);
83 spin_lock_init(&sdp->sd_unlinked_spin);
84 mutex_init(&sdp->sd_unlinked_mutex);
85
86 INIT_LIST_HEAD(&sdp->sd_quota_list);
87 spin_lock_init(&sdp->sd_quota_spin);
88 mutex_init(&sdp->sd_quota_mutex);
89
90 spin_lock_init(&sdp->sd_log_lock);
91
92 INIT_LIST_HEAD(&sdp->sd_log_le_gl);
93 INIT_LIST_HEAD(&sdp->sd_log_le_buf);
94 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
95 INIT_LIST_HEAD(&sdp->sd_log_le_rg);
96 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
97
98 mutex_init(&sdp->sd_log_reserve_mutex);
99 INIT_LIST_HEAD(&sdp->sd_ail1_list);
100 INIT_LIST_HEAD(&sdp->sd_ail2_list);
101
102 init_rwsem(&sdp->sd_log_flush_lock);
103 INIT_LIST_HEAD(&sdp->sd_log_flush_list);
104
105 INIT_LIST_HEAD(&sdp->sd_revoke_list);
106
107 mutex_init(&sdp->sd_freeze_lock);
108
109 return sdp;
110}
111
112static void init_vfs(struct super_block *sb, unsigned noatime)
113{
114 struct gfs2_sbd *sdp = sb->s_fs_info;
115
116 sb->s_magic = GFS2_MAGIC;
117 sb->s_op = &gfs2_super_ops;
118 sb->s_export_op = &gfs2_export_ops;
119 sb->s_maxbytes = MAX_LFS_FILESIZE;
120
121 if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
122 set_bit(noatime, &sdp->sd_flags);
123
124 /* Don't let the VFS update atimes. GFS2 handles this itself. */
125 sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
126}
127
128static int init_names(struct gfs2_sbd *sdp, int silent)
129{
130 struct gfs2_sb *sb = NULL;
131 char *proto, *table;
132 int error = 0;
133
134 proto = sdp->sd_args.ar_lockproto;
135 table = sdp->sd_args.ar_locktable;
136
137 /* Try to autodetect */
138
139 if (!proto[0] || !table[0]) {
140 struct buffer_head *bh;
141 bh = sb_getblk(sdp->sd_vfs,
142 GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
143 lock_buffer(bh);
144 clear_buffer_uptodate(bh);
145 clear_buffer_dirty(bh);
146 unlock_buffer(bh);
147 ll_rw_block(READ, 1, &bh);
148 wait_on_buffer(bh);
149
150 if (!buffer_uptodate(bh)) {
151 brelse(bh);
152 return -EIO;
153 }
154
155 sb = kmalloc(sizeof(struct gfs2_sb), GFP_KERNEL);
156 if (!sb) {
157 brelse(bh);
158 return -ENOMEM;
159 }
160 gfs2_sb_in(sb, bh->b_data);
161 brelse(bh);
162
163 error = gfs2_check_sb(sdp, sb, silent);
164 if (error)
165 goto out;
166
167 if (!proto[0])
168 proto = sb->sb_lockproto;
169 if (!table[0])
170 table = sb->sb_locktable;
171 }
172
173 if (!table[0])
174 table = sdp->sd_vfs->s_id;
175
176 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
177 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
178
179 out:
180 kfree(sb);
181
182 return error;
183}
184
185static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
186 int undo)
187{
188 struct task_struct *p;
189 int error = 0;
190
191 if (undo)
192 goto fail_trans;
193
194 p = kthread_run(gfs2_scand, sdp, "gfs2_scand");
195 error = IS_ERR(p);
196 if (error) {
197 fs_err(sdp, "can't start scand thread: %d\n", error);
198 return error;
199 }
200 sdp->sd_scand_process = p;
201
202 for (sdp->sd_glockd_num = 0;
203 sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
204 sdp->sd_glockd_num++) {
205 p = kthread_run(gfs2_glockd, sdp, "gfs2_glockd");
206 error = IS_ERR(p);
207 if (error) {
208 fs_err(sdp, "can't start glockd thread: %d\n", error);
209 goto fail;
210 }
211 sdp->sd_glockd_process[sdp->sd_glockd_num] = p;
212 }
213
214 error = gfs2_glock_nq_num(sdp,
215 GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
216 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
217 mount_gh);
218 if (error) {
219 fs_err(sdp, "can't acquire mount glock: %d\n", error);
220 goto fail;
221 }
222
223 error = gfs2_glock_nq_num(sdp,
224 GFS2_LIVE_LOCK, &gfs2_nondisk_glops,
225 LM_ST_SHARED,
226 LM_FLAG_NOEXP | GL_EXACT | GL_NEVER_RECURSE,
227 &sdp->sd_live_gh);
228 if (error) {
229 fs_err(sdp, "can't acquire live glock: %d\n", error);
230 goto fail_mount;
231 }
232
233 error = gfs2_glock_get(sdp, GFS2_RENAME_LOCK, &gfs2_nondisk_glops,
234 CREATE, &sdp->sd_rename_gl);
235 if (error) {
236 fs_err(sdp, "can't create rename glock: %d\n", error);
237 goto fail_live;
238 }
239
240 error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops,
241 CREATE, &sdp->sd_trans_gl);
242 if (error) {
243 fs_err(sdp, "can't create transaction glock: %d\n", error);
244 goto fail_rename;
245 }
246 set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags);
247
248 return 0;
249
250 fail_trans:
251 gfs2_glock_put(sdp->sd_trans_gl);
252
253 fail_rename:
254 gfs2_glock_put(sdp->sd_rename_gl);
255
256 fail_live:
257 gfs2_glock_dq_uninit(&sdp->sd_live_gh);
258
259 fail_mount:
260 gfs2_glock_dq_uninit(mount_gh);
261
262 fail:
263 while (sdp->sd_glockd_num--)
264 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
265
266 kthread_stop(sdp->sd_scand_process);
267
268 return error;
269}
270
271static struct inode *gfs2_lookup_root(struct gfs2_sbd *sdp,
272 const struct gfs2_inum *inum)
273{
274 int error;
275 struct gfs2_glock *gl;
276 struct gfs2_inode *ip;
277 struct inode *inode;
278
279 error = gfs2_glock_get(sdp, inum->no_addr,
280 &gfs2_inode_glops, CREATE, &gl);
281 if (!error) {
282 error = gfs2_inode_get(gl, inum, CREATE, &ip);
283 if (!error) {
284 gfs2_inode_min_init(ip, DT_DIR);
285 inode = gfs2_ip2v(ip);
286 gfs2_inode_put(ip);
287 gfs2_glock_put(gl);
288 return inode;
289 }
290 gfs2_glock_put(gl);
291 }
292 return ERR_PTR(error);
293}
294
295static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
296{
297 struct super_block *sb = sdp->sd_vfs;
298 struct gfs2_holder sb_gh;
299 struct gfs2_inum *inum;
300 struct inode *inode;
301 int error = 0;
302
303 if (undo) {
304 return 0;
305 }
306
307 error = gfs2_glock_nq_num(sdp,
308 GFS2_SB_LOCK, &gfs2_meta_glops,
309 LM_ST_SHARED, 0, &sb_gh);
310 if (error) {
311 fs_err(sdp, "can't acquire superblock glock: %d\n", error);
312 return error;
313 }
314
315 error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
316 if (error) {
317 fs_err(sdp, "can't read superblock: %d\n", error);
318 goto out;
319 }
320
321 /* Set up the buffer cache and SB for real */
322 if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
323 error = -EINVAL;
324 fs_err(sdp, "FS block size (%u) is too small for device "
325 "block size (%u)\n",
326 sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
327 goto out;
328 }
329 if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
330 error = -EINVAL;
331 fs_err(sdp, "FS block size (%u) is too big for machine "
332 "page size (%u)\n",
333 sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
334 goto out;
335 }
336
337 /* Get rid of buffers from the original block size */
338 sb_gh.gh_gl->gl_ops->go_inval(sb_gh.gh_gl, DIO_METADATA | DIO_DATA);
339 sb_gh.gh_gl->gl_aspace->i_blkbits = sdp->sd_sb.sb_bsize_shift;
340
341 sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
342
343 /* Get the root inode */
344 inum = &sdp->sd_sb.sb_root_dir;
345 if (sb->s_type == &gfs2meta_fs_type)
346 inum = &sdp->sd_sb.sb_master_dir;
347 inode = gfs2_lookup_root(sdp, inum);
348 if (IS_ERR(inode)) {
349 error = PTR_ERR(inode);
350 fs_err(sdp, "can't read in root inode: %d\n", error);
351 goto out;
352 }
353
354 sb->s_root = d_alloc_root(inode);
355 if (!sb->s_root) {
356 fs_err(sdp, "can't get root dentry\n");
357 error = -ENOMEM;
358 iput(inode);
359 }
360out:
361 gfs2_glock_dq_uninit(&sb_gh);
362 return error;
363}
364
365static int init_journal(struct gfs2_sbd *sdp, int undo)
366{
367 struct gfs2_holder ji_gh;
368 struct task_struct *p;
369 struct gfs2_inode *ip;
370 int jindex = 1;
371 int error = 0;
372
373 if (undo) {
374 jindex = 0;
375 goto fail_recoverd;
376 }
377
378 sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex");
379 if (IS_ERR(sdp->sd_jindex)) {
380 fs_err(sdp, "can't lookup journal index: %d\n", error);
381 return PTR_ERR(sdp->sd_jindex);
382 }
383 ip = sdp->sd_jindex->u.generic_ip;
384 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
385
386 /* Load in the journal index special file */
387
388 error = gfs2_jindex_hold(sdp, &ji_gh);
389 if (error) {
390 fs_err(sdp, "can't read journal index: %d\n", error);
391 goto fail;
392 }
393
394 error = -EINVAL;
395 if (!gfs2_jindex_size(sdp)) {
396 fs_err(sdp, "no journals!\n");
397 goto fail_jindex;
398 }
399
400 if (sdp->sd_args.ar_spectator) {
401 sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
402 sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
403 } else {
404 if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
405 fs_err(sdp, "can't mount journal #%u\n",
406 sdp->sd_lockstruct.ls_jid);
407 fs_err(sdp, "there are only %u journals (0 - %u)\n",
408 gfs2_jindex_size(sdp),
409 gfs2_jindex_size(sdp) - 1);
410 goto fail_jindex;
411 }
412 sdp->sd_jdesc = gfs2_jdesc_find(sdp, sdp->sd_lockstruct.ls_jid);
413
414 error = gfs2_glock_nq_num(sdp,
415 sdp->sd_lockstruct.ls_jid,
416 &gfs2_journal_glops,
417 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
418 &sdp->sd_journal_gh);
419 if (error) {
420 fs_err(sdp, "can't acquire journal glock: %d\n", error);
421 goto fail_jindex;
422 }
423
424 ip = sdp->sd_jdesc->jd_inode->u.generic_ip;
425 error = gfs2_glock_nq_init(ip->i_gl,
426 LM_ST_SHARED,
427 LM_FLAG_NOEXP | GL_EXACT,
428 &sdp->sd_jinode_gh);
429 if (error) {
430 fs_err(sdp, "can't acquire journal inode glock: %d\n",
431 error);
432 goto fail_journal_gh;
433 }
434
435 error = gfs2_jdesc_check(sdp->sd_jdesc);
436 if (error) {
437 fs_err(sdp, "my journal (%u) is bad: %d\n",
438 sdp->sd_jdesc->jd_jid, error);
439 goto fail_jinode_gh;
440 }
441 sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
442 }
443
444 if (sdp->sd_lockstruct.ls_first) {
445 unsigned int x;
446 for (x = 0; x < sdp->sd_journals; x++) {
447 error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x),
448 WAIT);
449 if (error) {
450 fs_err(sdp, "error recovering journal %u: %d\n",
451 x, error);
452 goto fail_jinode_gh;
453 }
454 }
455
456 gfs2_lm_others_may_mount(sdp);
457 } else if (!sdp->sd_args.ar_spectator) {
458 error = gfs2_recover_journal(sdp->sd_jdesc, WAIT);
459 if (error) {
460 fs_err(sdp, "error recovering my journal: %d\n", error);
461 goto fail_jinode_gh;
462 }
463 }
464
465 set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
466 gfs2_glock_dq_uninit(&ji_gh);
467 jindex = 0;
468
469 /* Disown my Journal glock */
470
471 sdp->sd_journal_gh.gh_owner = NULL;
472 sdp->sd_jinode_gh.gh_owner = NULL;
473
474 p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd");
475 error = IS_ERR(p);
476 if (error) {
477 fs_err(sdp, "can't start recoverd thread: %d\n", error);
478 goto fail_jinode_gh;
479 }
480 sdp->sd_recoverd_process = p;
481
482 return 0;
483
484 fail_recoverd:
485 kthread_stop(sdp->sd_recoverd_process);
486
487 fail_jinode_gh:
488 if (!sdp->sd_args.ar_spectator)
489 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
490
491 fail_journal_gh:
492 if (!sdp->sd_args.ar_spectator)
493 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
494
495 fail_jindex:
496 gfs2_jindex_free(sdp);
497 if (jindex)
498 gfs2_glock_dq_uninit(&ji_gh);
499
500 fail:
501 iput(sdp->sd_jindex);
502
503 return error;
504}
505
506
507static int init_inodes(struct gfs2_sbd *sdp, int undo)
508{
509 int error = 0;
510 struct gfs2_inode *ip;
511 struct inode *inode;
512
513 if (undo)
514 goto fail_qinode;
515
516 inode = gfs2_lookup_root(sdp, &sdp->sd_sb.sb_master_dir);
517 if (IS_ERR(inode)) {
518 error = PTR_ERR(inode);
519 fs_err(sdp, "can't read in master directory: %d\n", error);
520 goto fail;
521 }
522 sdp->sd_master_dir = inode;
523
524 error = init_journal(sdp, undo);
525 if (error)
526 goto fail_master;
527
528 /* Read in the master inode number inode */
529 sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum");
530 if (IS_ERR(sdp->sd_inum_inode)) {
531 error = PTR_ERR(sdp->sd_inum_inode);
532 fs_err(sdp, "can't read in inum inode: %d\n", error);
533 goto fail_journal;
534 }
535
536
537 /* Read in the master statfs inode */
538 sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs");
539 if (IS_ERR(sdp->sd_statfs_inode)) {
540 error = PTR_ERR(sdp->sd_statfs_inode);
541 fs_err(sdp, "can't read in statfs inode: %d\n", error);
542 goto fail_inum;
543 }
544
545 /* Read in the resource index inode */
546 sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex");
547 if (IS_ERR(sdp->sd_rindex)) {
548 error = PTR_ERR(sdp->sd_rindex);
549 fs_err(sdp, "can't get resource index inode: %d\n", error);
550 goto fail_statfs;
551 }
552 ip = sdp->sd_rindex->u.generic_ip;
553 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
554 sdp->sd_rindex_vn = ip->i_gl->gl_vn - 1;
555
556 /* Read in the quota inode */
557 sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota");
558 if (IS_ERR(sdp->sd_quota_inode)) {
559 error = PTR_ERR(sdp->sd_quota_inode);
560 fs_err(sdp, "can't get quota file inode: %d\n", error);
561 goto fail_rindex;
562 }
563 return 0;
564
565fail_qinode:
566 iput(sdp->sd_quota_inode);
567
568fail_rindex:
569 gfs2_clear_rgrpd(sdp);
570 iput(sdp->sd_rindex);
571
572fail_statfs:
573 iput(sdp->sd_statfs_inode);
574
575fail_inum:
576 iput(sdp->sd_inum_inode);
577fail_journal:
578 init_journal(sdp, UNDO);
579fail_master:
580 iput(sdp->sd_master_dir);
581fail:
582 return error;
583}
584
585static int init_per_node(struct gfs2_sbd *sdp, int undo)
586{
587 struct inode *pn = NULL;
588 char buf[30];
589 int error = 0;
590 struct gfs2_inode *ip;
591
592 if (sdp->sd_args.ar_spectator)
593 return 0;
594
595 if (undo)
596 goto fail_qc_gh;
597
598 pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node");
599 if (IS_ERR(pn)) {
600 error = PTR_ERR(pn);
601 fs_err(sdp, "can't find per_node directory: %d\n", error);
602 return error;
603 }
604
605 sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid);
606 sdp->sd_ir_inode = gfs2_lookup_simple(pn, buf);
607 if (IS_ERR(sdp->sd_ir_inode)) {
608 error = PTR_ERR(sdp->sd_ir_inode);
609 fs_err(sdp, "can't find local \"ir\" file: %d\n", error);
610 goto fail;
611 }
612
613 sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
614 sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf);
615 if (IS_ERR(sdp->sd_sc_inode)) {
616 error = PTR_ERR(sdp->sd_sc_inode);
617 fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
618 goto fail_ir_i;
619 }
620
621 sprintf(buf, "unlinked_tag%u", sdp->sd_jdesc->jd_jid);
622 sdp->sd_ut_inode = gfs2_lookup_simple(pn, buf);
623 if (IS_ERR(sdp->sd_ut_inode)) {
624 error = PTR_ERR(sdp->sd_ut_inode);
625 fs_err(sdp, "can't find local \"ut\" file: %d\n", error);
626 goto fail_sc_i;
627 }
628
629 sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
630 sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf);
631 if (IS_ERR(sdp->sd_qc_inode)) {
632 error = PTR_ERR(sdp->sd_qc_inode);
633 fs_err(sdp, "can't find local \"qc\" file: %d\n", error);
634 goto fail_ut_i;
635 }
636
637 iput(pn);
638 pn = NULL;
639
640 ip = sdp->sd_ir_inode->u.generic_ip;
641 error = gfs2_glock_nq_init(ip->i_gl,
642 LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
643 &sdp->sd_ir_gh);
644 if (error) {
645 fs_err(sdp, "can't lock local \"ir\" file: %d\n", error);
646 goto fail_qc_i;
647 }
648
649 ip = sdp->sd_sc_inode->u.generic_ip;
650 error = gfs2_glock_nq_init(ip->i_gl,
651 LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
652 &sdp->sd_sc_gh);
653 if (error) {
654 fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
655 goto fail_ir_gh;
656 }
657
658 ip = sdp->sd_ut_inode->u.generic_ip;
659 error = gfs2_glock_nq_init(ip->i_gl,
660 LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
661 &sdp->sd_ut_gh);
662 if (error) {
663 fs_err(sdp, "can't lock local \"ut\" file: %d\n", error);
664 goto fail_sc_gh;
665 }
666
667 ip = sdp->sd_qc_inode->u.generic_ip;
668 error = gfs2_glock_nq_init(ip->i_gl,
669 LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
670 &sdp->sd_qc_gh);
671 if (error) {
672 fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
673 goto fail_ut_gh;
674 }
675
676 return 0;
677
678 fail_qc_gh:
679 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
680
681 fail_ut_gh:
682 gfs2_glock_dq_uninit(&sdp->sd_ut_gh);
683
684 fail_sc_gh:
685 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
686
687 fail_ir_gh:
688 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
689
690 fail_qc_i:
691 iput(sdp->sd_qc_inode);
692
693 fail_ut_i:
694 iput(sdp->sd_ut_inode);
695
696 fail_sc_i:
697 iput(sdp->sd_sc_inode);
698
699 fail_ir_i:
700 iput(sdp->sd_ir_inode);
701
702 fail:
703 if (pn)
704 iput(pn);
705 return error;
706}
707
708static int init_threads(struct gfs2_sbd *sdp, int undo)
709{
710 struct task_struct *p;
711 int error = 0;
712
713 if (undo)
714 goto fail_inoded;
715
716 sdp->sd_log_flush_time = jiffies;
717 sdp->sd_jindex_refresh_time = jiffies;
718
719 p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
720 error = IS_ERR(p);
721 if (error) {
722 fs_err(sdp, "can't start logd thread: %d\n", error);
723 return error;
724 }
725 sdp->sd_logd_process = p;
726
727 sdp->sd_statfs_sync_time = jiffies;
728 sdp->sd_quota_sync_time = jiffies;
729
730 p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
731 error = IS_ERR(p);
732 if (error) {
733 fs_err(sdp, "can't start quotad thread: %d\n", error);
734 goto fail;
735 }
736 sdp->sd_quotad_process = p;
737
738 p = kthread_run(gfs2_inoded, sdp, "gfs2_inoded");
739 error = IS_ERR(p);
740 if (error) {
741 fs_err(sdp, "can't start inoded thread: %d\n", error);
742 goto fail_quotad;
743 }
744 sdp->sd_inoded_process = p;
745
746 return 0;
747
748 fail_inoded:
749 kthread_stop(sdp->sd_inoded_process);
750
751 fail_quotad:
752 kthread_stop(sdp->sd_quotad_process);
753
754 fail:
755 kthread_stop(sdp->sd_logd_process);
756
757 return error;
758}
759
760/**
761 * fill_super - Read in superblock
762 * @sb: The VFS superblock
763 * @data: Mount options
764 * @silent: Don't complain if it's not a GFS2 filesystem
765 *
766 * Returns: errno
767 */
768
769static int fill_super(struct super_block *sb, void *data, int silent)
770{
771 struct gfs2_sbd *sdp;
772 struct gfs2_holder mount_gh;
773 int error;
774
775 sdp = init_sbd(sb);
776 if (!sdp) {
777 printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n");
778 return -ENOMEM;
779 }
780
781 error = gfs2_mount_args(sdp, (char *)data, 0);
782 if (error) {
783 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
784 goto fail;
785 }
786
787 init_vfs(sb, SDF_NOATIME);
788
789 /* Set up the buffer cache and fill in some fake block size values
790 to allow us to read-in the on-disk superblock. */
791 sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK);
792 sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
793 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
794 GFS2_BASIC_BLOCK_SHIFT;
795 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
796
797 error = init_names(sdp, silent);
798 if (error)
799 goto fail;
800
801 error = gfs2_sys_fs_add(sdp);
802 if (error)
803 goto fail;
804
805 error = gfs2_lm_mount(sdp, silent);
806 if (error)
807 goto fail_sys;
808
809 error = init_locking(sdp, &mount_gh, DO);
810 if (error)
811 goto fail_lm;
812
813 error = init_sb(sdp, silent, DO);
814 if (error)
815 goto fail_locking;
816
817 error = init_inodes(sdp, DO);
818 if (error)
819 goto fail_sb;
820
821 error = init_per_node(sdp, DO);
822 if (error)
823 goto fail_inodes;
824
825 error = gfs2_statfs_init(sdp);
826 if (error) {
827 fs_err(sdp, "can't initialize statfs subsystem: %d\n", error);
828 goto fail_per_node;
829 }
830
831 error = init_threads(sdp, DO);
832 if (error)
833 goto fail_per_node;
834
835 if (!(sb->s_flags & MS_RDONLY)) {
836 error = gfs2_make_fs_rw(sdp);
837 if (error) {
838 fs_err(sdp, "can't make FS RW: %d\n", error);
839 goto fail_threads;
840 }
841 }
842
843 gfs2_glock_dq_uninit(&mount_gh);
844
845 return 0;
846
847 fail_threads:
848 init_threads(sdp, UNDO);
849
850 fail_per_node:
851 init_per_node(sdp, UNDO);
852
853 fail_inodes:
854 init_inodes(sdp, UNDO);
855
856 fail_sb:
857 init_sb(sdp, 0, UNDO);
858
859 fail_locking:
860 init_locking(sdp, &mount_gh, UNDO);
861
862 fail_lm:
863 gfs2_gl_hash_clear(sdp, WAIT);
864 gfs2_lm_unmount(sdp);
865 while (invalidate_inodes(sb))
866 yield();
867
868 fail_sys:
869 gfs2_sys_fs_del(sdp);
870
871 fail:
872 vfree(sdp);
873 sb->s_fs_info = NULL;
874
875 return error;
876}
877
878static struct super_block *gfs2_get_sb(struct file_system_type *fs_type,
879 int flags, const char *dev_name,
880 void *data)
881{
882 return get_sb_bdev(fs_type, flags, dev_name, data, fill_super);
883}
884
885static void gfs2_kill_sb(struct super_block *sb)
886{
887 kill_block_super(sb);
888}
889
890struct file_system_type gfs2_fs_type = {
891 .name = "gfs2",
892 .fs_flags = FS_REQUIRES_DEV,
893 .get_sb = gfs2_get_sb,
894 .kill_sb = gfs2_kill_sb,
895 .owner = THIS_MODULE,
896};
897
898struct file_system_type gfs2meta_fs_type = {
899 .name = "gfs2meta",
900 .fs_flags = FS_REQUIRES_DEV,
901 .get_sb = gfs2_get_sb,
902 .kill_sb = gfs2_kill_sb,
903 .owner = THIS_MODULE,
904};
905
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h
new file mode 100644
index 000000000000..c6452874483d
--- /dev/null
+++ b/fs/gfs2/ops_fstype.h
@@ -0,0 +1,16 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_FSTYPE_DOT_H__
11#define __OPS_FSTYPE_DOT_H__
12
13extern struct file_system_type gfs2_fs_type;
14extern struct file_system_type gfs2meta_fs_type;
15
16#endif /* __OPS_FSTYPE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
new file mode 100644
index 000000000000..1e2b709711ae
--- /dev/null
+++ b/fs/gfs2/ops_inode.c
@@ -0,0 +1,1197 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/namei.h>
16#include <linux/utsname.h>
17#include <linux/mm.h>
18#include <linux/xattr.h>
19#include <linux/posix_acl.h>
20#include <linux/gfs2_ondisk.h>
21#include <linux/crc32.h>
22#include <asm/semaphore.h>
23#include <asm/uaccess.h>
24
25#include "gfs2.h"
26#include "lm_interface.h"
27#include "incore.h"
28#include "acl.h"
29#include "bmap.h"
30#include "dir.h"
31#include "eaops.h"
32#include "eattr.h"
33#include "glock.h"
34#include "inode.h"
35#include "meta_io.h"
36#include "ops_dentry.h"
37#include "ops_inode.h"
38#include "page.h"
39#include "quota.h"
40#include "rgrp.h"
41#include "trans.h"
42#include "unlinked.h"
43#include "util.h"
44
45/**
46 * gfs2_create - Create a file
47 * @dir: The directory in which to create the file
48 * @dentry: The dentry of the new file
49 * @mode: The mode of the new file
50 *
51 * Returns: errno
52 */
53
54static int gfs2_create(struct inode *dir, struct dentry *dentry,
55 int mode, struct nameidata *nd)
56{
57 struct gfs2_inode *dip = dir->u.generic_ip;
58 struct gfs2_sbd *sdp = dip->i_sbd;
59 struct gfs2_holder ghs[2];
60 struct inode *inode;
61 int new = 1;
62
63 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
64
65 for (;;) {
66 inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode);
67 if (!IS_ERR(inode)) {
68 gfs2_trans_end(sdp);
69 if (dip->i_alloc.al_rgd)
70 gfs2_inplace_release(dip);
71 gfs2_quota_unlock(dip);
72 gfs2_alloc_put(dip);
73 gfs2_glock_dq_uninit_m(2, ghs);
74 break;
75 } else if (PTR_ERR(inode) != -EEXIST ||
76 (nd->intent.open.flags & O_EXCL)) {
77 gfs2_holder_uninit(ghs);
78 return PTR_ERR(inode);
79 }
80
81 inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
82 if (inode) {
83 if (!IS_ERR(inode)) {
84 new = 0;
85 gfs2_holder_uninit(ghs);
86 break;
87 } else {
88 gfs2_holder_uninit(ghs);
89 return PTR_ERR(inode);
90 }
91 }
92 }
93
94 d_instantiate(dentry, inode);
95 if (new)
96 mark_inode_dirty(inode);
97
98 return 0;
99}
100
101/**
102 * gfs2_lookup - Look up a filename in a directory and return its inode
103 * @dir: The directory inode
104 * @dentry: The dentry of the new inode
105 * @nd: passed from Linux VFS, ignored by us
106 *
107 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
108 *
109 * Returns: errno
110 */
111
112static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
113 struct nameidata *nd)
114{
115 struct inode *inode = NULL;
116
117 dentry->d_op = &gfs2_dops;
118
119 inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
120 if (inode && IS_ERR(inode))
121 return ERR_PTR(PTR_ERR(inode));
122
123 if (inode)
124 return d_splice_alias(inode, dentry);
125 d_add(dentry, inode);
126
127 return NULL;
128}
129
130/**
131 * gfs2_link - Link to a file
132 * @old_dentry: The inode to link
133 * @dir: Add link to this directory
134 * @dentry: The name of the link
135 *
136 * Link the inode in "old_dentry" into the directory "dir" with the
137 * name in "dentry".
138 *
139 * Returns: errno
140 */
141
142static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
143 struct dentry *dentry)
144{
145 struct gfs2_inode *dip = dir->u.generic_ip;
146 struct gfs2_sbd *sdp = dip->i_sbd;
147 struct inode *inode = old_dentry->d_inode;
148 struct gfs2_inode *ip = inode->u.generic_ip;
149 struct gfs2_holder ghs[2];
150 int alloc_required;
151 int error;
152
153 if (S_ISDIR(ip->i_di.di_mode))
154 return -EPERM;
155
156 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
157 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
158
159 error = gfs2_glock_nq_m(2, ghs);
160 if (error)
161 goto out;
162
163 error = gfs2_repermission(dir, MAY_WRITE | MAY_EXEC, NULL);
164 if (error)
165 goto out_gunlock;
166
167 error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL);
168 switch (error) {
169 case -ENOENT:
170 break;
171 case 0:
172 error = -EEXIST;
173 default:
174 goto out_gunlock;
175 }
176
177 error = -EINVAL;
178 if (!dip->i_di.di_nlink)
179 goto out_gunlock;
180 error = -EFBIG;
181 if (dip->i_di.di_entries == (uint32_t)-1)
182 goto out_gunlock;
183 error = -EPERM;
184 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
185 goto out_gunlock;
186 error = -EINVAL;
187 if (!ip->i_di.di_nlink)
188 goto out_gunlock;
189 error = -EMLINK;
190 if (ip->i_di.di_nlink == (uint32_t)-1)
191 goto out_gunlock;
192
193 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
194 if (error < 0)
195 goto out_gunlock;
196 error = 0;
197
198 if (alloc_required) {
199 struct gfs2_alloc *al = gfs2_alloc_get(dip);
200
201 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
202 if (error)
203 goto out_alloc;
204
205 error = gfs2_quota_check(dip, dip->i_di.di_uid,
206 dip->i_di.di_gid);
207 if (error)
208 goto out_gunlock_q;
209
210 al->al_requested = sdp->sd_max_dirres;
211
212 error = gfs2_inplace_reserve(dip);
213 if (error)
214 goto out_gunlock_q;
215
216 error = gfs2_trans_begin(sdp,
217 sdp->sd_max_dirres +
218 al->al_rgd->rd_ri.ri_length +
219 2 * RES_DINODE + RES_STATFS +
220 RES_QUOTA, 0);
221 if (error)
222 goto out_ipres;
223 } else {
224 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
225 if (error)
226 goto out_ipres;
227 }
228
229 error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num,
230 IF2DT(ip->i_di.di_mode));
231 if (error)
232 goto out_end_trans;
233
234 error = gfs2_change_nlink(ip, +1);
235
236 out_end_trans:
237 gfs2_trans_end(sdp);
238
239 out_ipres:
240 if (alloc_required)
241 gfs2_inplace_release(dip);
242
243 out_gunlock_q:
244 if (alloc_required)
245 gfs2_quota_unlock(dip);
246
247 out_alloc:
248 if (alloc_required)
249 gfs2_alloc_put(dip);
250
251 out_gunlock:
252 gfs2_glock_dq_m(2, ghs);
253
254 out:
255 gfs2_holder_uninit(ghs);
256 gfs2_holder_uninit(ghs + 1);
257
258 if (!error) {
259 atomic_inc(&inode->i_count);
260 d_instantiate(dentry, inode);
261 mark_inode_dirty(inode);
262 }
263
264 return error;
265}
266
267/**
268 * gfs2_unlink - Unlink a file
269 * @dir: The inode of the directory containing the file to unlink
270 * @dentry: The file itself
271 *
272 * Unlink a file. Call gfs2_unlinki()
273 *
274 * Returns: errno
275 */
276
277static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
278{
279 struct gfs2_inode *dip = dir->u.generic_ip;
280 struct gfs2_sbd *sdp = dip->i_sbd;
281 struct gfs2_inode *ip = dentry->d_inode->u.generic_ip;
282 struct gfs2_unlinked *ul;
283 struct gfs2_holder ghs[2];
284 int error;
285
286 error = gfs2_unlinked_get(sdp, &ul);
287 if (error)
288 return error;
289
290 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
291 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
292
293 error = gfs2_glock_nq_m(2, ghs);
294 if (error)
295 goto out;
296
297 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
298 if (error)
299 goto out_gunlock;
300
301 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF +
302 RES_UNLINKED, 0);
303 if (error)
304 goto out_gunlock;
305
306 error = gfs2_unlinki(dip, &dentry->d_name, ip,ul);
307
308 gfs2_trans_end(sdp);
309
310 out_gunlock:
311 gfs2_glock_dq_m(2, ghs);
312
313 out:
314 gfs2_holder_uninit(ghs);
315 gfs2_holder_uninit(ghs + 1);
316
317 gfs2_unlinked_put(sdp, ul);
318
319 return error;
320}
321
322/**
323 * gfs2_symlink - Create a symlink
324 * @dir: The directory to create the symlink in
325 * @dentry: The dentry to put the symlink in
326 * @symname: The thing which the link points to
327 *
328 * Returns: errno
329 */
330
331static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
332 const char *symname)
333{
334 struct gfs2_inode *dip = dir->u.generic_ip, *ip;
335 struct gfs2_sbd *sdp = dip->i_sbd;
336 struct gfs2_holder ghs[2];
337 struct inode *inode;
338 struct buffer_head *dibh;
339 int size;
340 int error;
341
342 /* Must be stuffed with a null terminator for gfs2_follow_link() */
343 size = strlen(symname);
344 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
345 return -ENAMETOOLONG;
346
347 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
348
349 inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO);
350 if (IS_ERR(inode)) {
351 gfs2_holder_uninit(ghs);
352 return PTR_ERR(inode);
353 }
354
355 ip = ghs[1].gh_gl->gl_object;
356
357 ip->i_di.di_size = size;
358
359 error = gfs2_meta_inode_buffer(ip, &dibh);
360
361 if (!gfs2_assert_withdraw(sdp, !error)) {
362 gfs2_dinode_out(&ip->i_di, dibh->b_data);
363 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
364 size);
365 brelse(dibh);
366 }
367
368 gfs2_trans_end(sdp);
369 if (dip->i_alloc.al_rgd)
370 gfs2_inplace_release(dip);
371 gfs2_quota_unlock(dip);
372 gfs2_alloc_put(dip);
373
374 gfs2_glock_dq_uninit_m(2, ghs);
375
376 d_instantiate(dentry, inode);
377 mark_inode_dirty(inode);
378
379 return 0;
380}
381
382/**
383 * gfs2_mkdir - Make a directory
384 * @dir: The parent directory of the new one
385 * @dentry: The dentry of the new directory
386 * @mode: The mode of the new directory
387 *
388 * Returns: errno
389 */
390
391static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
392{
393 struct gfs2_inode *dip = dir->u.generic_ip, *ip;
394 struct gfs2_sbd *sdp = dip->i_sbd;
395 struct gfs2_holder ghs[2];
396 struct inode *inode;
397 struct buffer_head *dibh;
398 int error;
399
400 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
401
402 inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode);
403 if (IS_ERR(inode)) {
404 gfs2_holder_uninit(ghs);
405 return PTR_ERR(inode);
406 }
407
408 ip = ghs[1].gh_gl->gl_object;
409
410 ip->i_di.di_nlink = 2;
411 ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
412 ip->i_di.di_flags |= GFS2_DIF_JDATA;
413 ip->i_di.di_payload_format = GFS2_FORMAT_DE;
414 ip->i_di.di_entries = 2;
415
416 error = gfs2_meta_inode_buffer(ip, &dibh);
417
418 if (!gfs2_assert_withdraw(sdp, !error)) {
419 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
420 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
421 struct qstr str;
422
423 gfs2_str2qstr(&str, ".");
424 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
425 gfs2_qstr2dirent(&str, GFS2_DIRENT_SIZE(str.len), dent);
426 dent->de_inum = di->di_num; /* already GFS2 endian */
427 dent->de_type = DT_DIR;
428 di->di_entries = cpu_to_be32(1);
429
430 gfs2_str2qstr(&str, "..");
431 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
432 gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
433
434 gfs2_inum_out(&dip->i_num, (char *) &dent->de_inum);
435 dent->de_type = DT_DIR;
436
437 gfs2_dinode_out(&ip->i_di, (char *)di);
438
439 brelse(dibh);
440 }
441
442 error = gfs2_change_nlink(dip, +1);
443 gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
444
445 gfs2_trans_end(sdp);
446 if (dip->i_alloc.al_rgd)
447 gfs2_inplace_release(dip);
448 gfs2_quota_unlock(dip);
449 gfs2_alloc_put(dip);
450
451 gfs2_glock_dq_uninit_m(2, ghs);
452
453 d_instantiate(dentry, inode);
454 mark_inode_dirty(inode);
455
456 return 0;
457}
458
459/**
460 * gfs2_rmdir - Remove a directory
461 * @dir: The parent directory of the directory to be removed
462 * @dentry: The dentry of the directory to remove
463 *
464 * Remove a directory. Call gfs2_rmdiri()
465 *
466 * Returns: errno
467 */
468
469static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
470{
471 struct gfs2_inode *dip = dir->u.generic_ip;
472 struct gfs2_sbd *sdp = dip->i_sbd;
473 struct gfs2_inode *ip = dentry->d_inode->u.generic_ip;
474 struct gfs2_unlinked *ul;
475 struct gfs2_holder ghs[2];
476 int error;
477
478 error = gfs2_unlinked_get(sdp, &ul);
479 if (error)
480 return error;
481
482 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
483 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
484
485 error = gfs2_glock_nq_m(2, ghs);
486 if (error)
487 goto out;
488
489 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
490 if (error)
491 goto out_gunlock;
492
493 if (ip->i_di.di_entries < 2) {
494 if (gfs2_consist_inode(ip))
495 gfs2_dinode_print(&ip->i_di);
496 error = -EIO;
497 goto out_gunlock;
498 }
499 if (ip->i_di.di_entries > 2) {
500 error = -ENOTEMPTY;
501 goto out_gunlock;
502 }
503
504 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF +
505 RES_UNLINKED, 0);
506 if (error)
507 goto out_gunlock;
508
509 error = gfs2_rmdiri(dip, &dentry->d_name, ip, ul);
510
511 gfs2_trans_end(sdp);
512
513 out_gunlock:
514 gfs2_glock_dq_m(2, ghs);
515
516 out:
517 gfs2_holder_uninit(ghs);
518 gfs2_holder_uninit(ghs + 1);
519
520 gfs2_unlinked_put(sdp, ul);
521
522 return error;
523}
524
525/**
526 * gfs2_mknod - Make a special file
527 * @dir: The directory in which the special file will reside
528 * @dentry: The dentry of the special file
529 * @mode: The mode of the special file
530 * @rdev: The device specification of the special file
531 *
532 */
533
534static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
535 dev_t dev)
536{
537 struct gfs2_inode *dip = dir->u.generic_ip, *ip;
538 struct gfs2_sbd *sdp = dip->i_sbd;
539 struct gfs2_holder ghs[2];
540 struct inode *inode;
541 struct buffer_head *dibh;
542 uint32_t major = 0, minor = 0;
543 int error;
544
545 switch (mode & S_IFMT) {
546 case S_IFBLK:
547 case S_IFCHR:
548 major = MAJOR(dev);
549 minor = MINOR(dev);
550 break;
551 case S_IFIFO:
552 case S_IFSOCK:
553 break;
554 default:
555 return -EOPNOTSUPP;
556 };
557
558 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
559
560 inode = gfs2_createi(ghs, &dentry->d_name, mode);
561 if (IS_ERR(inode)) {
562 gfs2_holder_uninit(ghs);
563 return PTR_ERR(inode);
564 }
565
566 ip = ghs[1].gh_gl->gl_object;
567
568 ip->i_di.di_major = major;
569 ip->i_di.di_minor = minor;
570
571 error = gfs2_meta_inode_buffer(ip, &dibh);
572
573 if (!gfs2_assert_withdraw(sdp, !error)) {
574 gfs2_dinode_out(&ip->i_di, dibh->b_data);
575 brelse(dibh);
576 }
577
578 gfs2_trans_end(sdp);
579 if (dip->i_alloc.al_rgd)
580 gfs2_inplace_release(dip);
581 gfs2_quota_unlock(dip);
582 gfs2_alloc_put(dip);
583
584 gfs2_glock_dq_uninit_m(2, ghs);
585
586 d_instantiate(dentry, inode);
587 mark_inode_dirty(inode);
588
589 return 0;
590}
591
592/**
593 * gfs2_rename - Rename a file
594 * @odir: Parent directory of old file name
595 * @odentry: The old dentry of the file
596 * @ndir: Parent directory of new file name
597 * @ndentry: The new dentry of the file
598 *
599 * Returns: errno
600 */
601
602static int gfs2_rename(struct inode *odir, struct dentry *odentry,
603 struct inode *ndir, struct dentry *ndentry)
604{
605 struct gfs2_inode *odip = odir->u.generic_ip;
606 struct gfs2_inode *ndip = ndir->u.generic_ip;
607 struct gfs2_inode *ip = odentry->d_inode->u.generic_ip;
608 struct gfs2_inode *nip = NULL;
609 struct gfs2_sbd *sdp = odip->i_sbd;
610 struct gfs2_unlinked *ul;
611 struct gfs2_holder ghs[4], r_gh;
612 unsigned int num_gh;
613 int dir_rename = 0;
614 int alloc_required;
615 unsigned int x;
616 int error;
617
618 if (ndentry->d_inode) {
619 nip = ndentry->d_inode->u.generic_ip;
620 if (ip == nip)
621 return 0;
622 }
623
624 error = gfs2_unlinked_get(sdp, &ul);
625 if (error)
626 return error;
627
628 /* Make sure we aren't trying to move a dirctory into it's subdir */
629
630 if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) {
631 dir_rename = 1;
632
633 error = gfs2_glock_nq_init(sdp->sd_rename_gl,
634 LM_ST_EXCLUSIVE, 0,
635 &r_gh);
636 if (error)
637 goto out;
638
639 error = gfs2_ok_to_move(ip, ndip);
640 if (error)
641 goto out_gunlock_r;
642 }
643
644 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
645 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
646 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
647 num_gh = 3;
648
649 if (nip)
650 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
651
652 error = gfs2_glock_nq_m(num_gh, ghs);
653 if (error)
654 goto out_uninit;
655
656 /* Check out the old directory */
657
658 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
659 if (error)
660 goto out_gunlock;
661
662 /* Check out the new directory */
663
664 if (nip) {
665 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
666 if (error)
667 goto out_gunlock;
668
669 if (S_ISDIR(nip->i_di.di_mode)) {
670 if (nip->i_di.di_entries < 2) {
671 if (gfs2_consist_inode(nip))
672 gfs2_dinode_print(&nip->i_di);
673 error = -EIO;
674 goto out_gunlock;
675 }
676 if (nip->i_di.di_entries > 2) {
677 error = -ENOTEMPTY;
678 goto out_gunlock;
679 }
680 }
681 } else {
682 error = gfs2_repermission(ndir, MAY_WRITE | MAY_EXEC, NULL);
683 if (error)
684 goto out_gunlock;
685
686 error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL);
687 switch (error) {
688 case -ENOENT:
689 error = 0;
690 break;
691 case 0:
692 error = -EEXIST;
693 default:
694 goto out_gunlock;
695 };
696
697 if (odip != ndip) {
698 if (!ndip->i_di.di_nlink) {
699 error = -EINVAL;
700 goto out_gunlock;
701 }
702 if (ndip->i_di.di_entries == (uint32_t)-1) {
703 error = -EFBIG;
704 goto out_gunlock;
705 }
706 if (S_ISDIR(ip->i_di.di_mode) &&
707 ndip->i_di.di_nlink == (uint32_t)-1) {
708 error = -EMLINK;
709 goto out_gunlock;
710 }
711 }
712 }
713
714 /* Check out the dir to be renamed */
715
716 if (dir_rename) {
717 error = gfs2_repermission(odentry->d_inode, MAY_WRITE, NULL);
718 if (error)
719 goto out_gunlock;
720 }
721
722 alloc_required = error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
723 if (error < 0)
724 goto out_gunlock;
725 error = 0;
726
727 if (alloc_required) {
728 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
729
730 error = gfs2_quota_lock(ndip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
731 if (error)
732 goto out_alloc;
733
734 error = gfs2_quota_check(ndip, ndip->i_di.di_uid,
735 ndip->i_di.di_gid);
736 if (error)
737 goto out_gunlock_q;
738
739 al->al_requested = sdp->sd_max_dirres;
740
741 error = gfs2_inplace_reserve(ndip);
742 if (error)
743 goto out_gunlock_q;
744
745 error = gfs2_trans_begin(sdp,
746 sdp->sd_max_dirres +
747 al->al_rgd->rd_ri.ri_length +
748 4 * RES_DINODE + 4 * RES_LEAF +
749 RES_UNLINKED + RES_STATFS +
750 RES_QUOTA, 0);
751 if (error)
752 goto out_ipreserv;
753 } else {
754 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
755 5 * RES_LEAF +
756 RES_UNLINKED, 0);
757 if (error)
758 goto out_gunlock;
759 }
760
761 /* Remove the target file, if it exists */
762
763 if (nip) {
764 if (S_ISDIR(nip->i_di.di_mode))
765 error = gfs2_rmdiri(ndip, &ndentry->d_name, nip, ul);
766 else
767 error = gfs2_unlinki(ndip, &ndentry->d_name, nip, ul);
768 if (error)
769 goto out_end_trans;
770 }
771
772 if (dir_rename) {
773 struct qstr name;
774 gfs2_str2qstr(&name, "..");
775
776 error = gfs2_change_nlink(ndip, +1);
777 if (error)
778 goto out_end_trans;
779 error = gfs2_change_nlink(odip, -1);
780 if (error)
781 goto out_end_trans;
782
783 error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR);
784 if (error)
785 goto out_end_trans;
786 } else {
787 struct buffer_head *dibh;
788 error = gfs2_meta_inode_buffer(ip, &dibh);
789 if (error)
790 goto out_end_trans;
791 ip->i_di.di_ctime = get_seconds();
792 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
793 gfs2_dinode_out(&ip->i_di, dibh->b_data);
794 brelse(dibh);
795 }
796
797 error = gfs2_dir_del(odip, &odentry->d_name);
798 if (error)
799 goto out_end_trans;
800
801 error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num,
802 IF2DT(ip->i_di.di_mode));
803 if (error)
804 goto out_end_trans;
805
806 out_end_trans:
807 gfs2_trans_end(sdp);
808
809 out_ipreserv:
810 if (alloc_required)
811 gfs2_inplace_release(ndip);
812
813 out_gunlock_q:
814 if (alloc_required)
815 gfs2_quota_unlock(ndip);
816
817 out_alloc:
818 if (alloc_required)
819 gfs2_alloc_put(ndip);
820
821 out_gunlock:
822 gfs2_glock_dq_m(num_gh, ghs);
823
824 out_uninit:
825 for (x = 0; x < num_gh; x++)
826 gfs2_holder_uninit(ghs + x);
827
828 out_gunlock_r:
829 if (dir_rename)
830 gfs2_glock_dq_uninit(&r_gh);
831
832 out:
833 gfs2_unlinked_put(sdp, ul);
834
835 return error;
836}
837
838/**
839 * gfs2_readlink - Read the value of a symlink
840 * @dentry: the symlink
841 * @buf: the buffer to read the symlink data into
842 * @size: the size of the buffer
843 *
844 * Returns: errno
845 */
846
847static int gfs2_readlink(struct dentry *dentry, char __user *user_buf,
848 int user_size)
849{
850 struct gfs2_inode *ip = dentry->d_inode->u.generic_ip;
851 char array[GFS2_FAST_NAME_SIZE], *buf = array;
852 unsigned int len = GFS2_FAST_NAME_SIZE;
853 int error;
854
855 error = gfs2_readlinki(ip, &buf, &len);
856 if (error)
857 return error;
858
859 if (user_size > len - 1)
860 user_size = len - 1;
861
862 if (copy_to_user(user_buf, buf, user_size))
863 error = -EFAULT;
864 else
865 error = user_size;
866
867 if (buf != array)
868 kfree(buf);
869
870 return error;
871}
872
873/**
874 * gfs2_follow_link - Follow a symbolic link
875 * @dentry: The dentry of the link
876 * @nd: Data that we pass to vfs_follow_link()
877 *
878 * This can handle symlinks of any size. It is optimised for symlinks
879 * under GFS2_FAST_NAME_SIZE.
880 *
881 * Returns: 0 on success or error code
882 */
883
884static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
885{
886 struct gfs2_inode *ip = dentry->d_inode->u.generic_ip;
887 char array[GFS2_FAST_NAME_SIZE], *buf = array;
888 unsigned int len = GFS2_FAST_NAME_SIZE;
889 int error;
890
891 error = gfs2_readlinki(ip, &buf, &len);
892 if (!error) {
893 error = vfs_follow_link(nd, buf);
894 if (buf != array)
895 kfree(buf);
896 }
897
898 return ERR_PTR(error);
899}
900
901/**
902 * gfs2_permission -
903 * @inode:
904 * @mask:
905 * @nd: passed from Linux VFS, ignored by us
906 *
907 * Returns: errno
908 */
909
910static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
911{
912 struct gfs2_inode *ip = inode->u.generic_ip;
913 struct gfs2_holder i_gh;
914 int error;
915
916 if (ip->i_vn == ip->i_gl->gl_vn)
917 return generic_permission(inode, mask, gfs2_check_acl);
918
919 error = gfs2_glock_nq_init(ip->i_gl,
920 LM_ST_SHARED, LM_FLAG_ANY,
921 &i_gh);
922 if (!error) {
923 error = generic_permission(inode, mask, gfs2_check_acl_locked);
924 gfs2_glock_dq_uninit(&i_gh);
925 }
926
927 return error;
928}
929
930static int setattr_size(struct inode *inode, struct iattr *attr)
931{
932 struct gfs2_inode *ip = inode->u.generic_ip;
933 int error;
934
935 if (attr->ia_size != ip->i_di.di_size) {
936 error = vmtruncate(inode, attr->ia_size);
937 if (error)
938 return error;
939 }
940
941 error = gfs2_truncatei(ip, attr->ia_size);
942 if (error)
943 return error;
944
945 return error;
946}
947
948static int setattr_chown(struct inode *inode, struct iattr *attr)
949{
950 struct gfs2_inode *ip = inode->u.generic_ip;
951 struct gfs2_sbd *sdp = ip->i_sbd;
952 struct buffer_head *dibh;
953 uint32_t ouid, ogid, nuid, ngid;
954 int error;
955
956 ouid = ip->i_di.di_uid;
957 ogid = ip->i_di.di_gid;
958 nuid = attr->ia_uid;
959 ngid = attr->ia_gid;
960
961 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
962 ouid = nuid = NO_QUOTA_CHANGE;
963 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
964 ogid = ngid = NO_QUOTA_CHANGE;
965
966 gfs2_alloc_get(ip);
967
968 error = gfs2_quota_lock(ip, nuid, ngid);
969 if (error)
970 goto out_alloc;
971
972 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
973 error = gfs2_quota_check(ip, nuid, ngid);
974 if (error)
975 goto out_gunlock_q;
976 }
977
978 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
979 if (error)
980 goto out_gunlock_q;
981
982 error = gfs2_meta_inode_buffer(ip, &dibh);
983 if (error)
984 goto out_end_trans;
985
986 error = inode_setattr(inode, attr);
987 gfs2_assert_warn(sdp, !error);
988 gfs2_inode_attr_out(ip);
989
990 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
991 gfs2_dinode_out(&ip->i_di, dibh->b_data);
992 brelse(dibh);
993
994 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
995 gfs2_quota_change(ip, -ip->i_di.di_blocks,
996 ouid, ogid);
997 gfs2_quota_change(ip, ip->i_di.di_blocks,
998 nuid, ngid);
999 }
1000
1001 out_end_trans:
1002 gfs2_trans_end(sdp);
1003
1004 out_gunlock_q:
1005 gfs2_quota_unlock(ip);
1006
1007 out_alloc:
1008 gfs2_alloc_put(ip);
1009
1010 return error;
1011}
1012
1013/**
1014 * gfs2_setattr - Change attributes on an inode
1015 * @dentry: The dentry which is changing
1016 * @attr: The structure describing the change
1017 *
1018 * The VFS layer wants to change one or more of an inodes attributes. Write
1019 * that change out to disk.
1020 *
1021 * Returns: errno
1022 */
1023
1024static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1025{
1026 struct inode *inode = dentry->d_inode;
1027 struct gfs2_inode *ip = inode->u.generic_ip;
1028 struct gfs2_holder i_gh;
1029 int error;
1030
1031 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1032 if (error)
1033 return error;
1034
1035 error = -EPERM;
1036 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1037 goto out;
1038
1039 error = inode_change_ok(inode, attr);
1040 if (error)
1041 goto out;
1042
1043 if (attr->ia_valid & ATTR_SIZE)
1044 error = setattr_size(inode, attr);
1045 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1046 error = setattr_chown(inode, attr);
1047 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1048 error = gfs2_acl_chmod(ip, attr);
1049 else
1050 error = gfs2_setattr_simple(ip, attr);
1051
1052 out:
1053 gfs2_glock_dq_uninit(&i_gh);
1054
1055 if (!error)
1056 mark_inode_dirty(inode);
1057
1058 return error;
1059}
1060
1061/**
1062 * gfs2_getattr - Read out an inode's attributes
1063 * @mnt: ?
1064 * @dentry: The dentry to stat
1065 * @stat: The inode's stats
1066 *
1067 * Returns: errno
1068 */
1069
1070static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1071 struct kstat *stat)
1072{
1073 struct inode *inode = dentry->d_inode;
1074 struct gfs2_inode *ip = inode->u.generic_ip;
1075 struct gfs2_holder gh;
1076 int error;
1077
1078 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1079 if (!error) {
1080 generic_fillattr(inode, stat);
1081 gfs2_glock_dq_uninit(&gh);
1082 }
1083
1084 return error;
1085}
1086
1087static int gfs2_setxattr(struct dentry *dentry, const char *name,
1088 const void *data, size_t size, int flags)
1089{
1090 struct gfs2_inode *ip = dentry->d_inode->u.generic_ip;
1091 struct gfs2_ea_request er;
1092
1093 memset(&er, 0, sizeof(struct gfs2_ea_request));
1094 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1095 if (er.er_type == GFS2_EATYPE_UNUSED)
1096 return -EOPNOTSUPP;
1097 er.er_data = (char *)data;
1098 er.er_name_len = strlen(er.er_name);
1099 er.er_data_len = size;
1100 er.er_flags = flags;
1101
1102 gfs2_assert_warn(ip->i_sbd, !(er.er_flags & GFS2_ERF_MODE));
1103
1104 return gfs2_ea_set(ip, &er);
1105}
1106
1107static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1108 void *data, size_t size)
1109{
1110 struct gfs2_ea_request er;
1111
1112 memset(&er, 0, sizeof(struct gfs2_ea_request));
1113 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1114 if (er.er_type == GFS2_EATYPE_UNUSED)
1115 return -EOPNOTSUPP;
1116 er.er_data = data;
1117 er.er_name_len = strlen(er.er_name);
1118 er.er_data_len = size;
1119
1120 return gfs2_ea_get(dentry->d_inode->u.generic_ip, &er);
1121}
1122
1123static ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
1124{
1125 struct gfs2_ea_request er;
1126
1127 memset(&er, 0, sizeof(struct gfs2_ea_request));
1128 er.er_data = (size) ? buffer : NULL;
1129 er.er_data_len = size;
1130
1131 return gfs2_ea_list(dentry->d_inode->u.generic_ip, &er);
1132}
1133
1134static int gfs2_removexattr(struct dentry *dentry, const char *name)
1135{
1136 struct gfs2_ea_request er;
1137
1138 memset(&er, 0, sizeof(struct gfs2_ea_request));
1139 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1140 if (er.er_type == GFS2_EATYPE_UNUSED)
1141 return -EOPNOTSUPP;
1142 er.er_name_len = strlen(er.er_name);
1143
1144 return gfs2_ea_remove(dentry->d_inode->u.generic_ip, &er);
1145}
1146
1147struct inode_operations gfs2_file_iops = {
1148 .permission = gfs2_permission,
1149 .setattr = gfs2_setattr,
1150 .getattr = gfs2_getattr,
1151 .setxattr = gfs2_setxattr,
1152 .getxattr = gfs2_getxattr,
1153 .listxattr = gfs2_listxattr,
1154 .removexattr = gfs2_removexattr,
1155};
1156
1157struct inode_operations gfs2_dev_iops = {
1158 .permission = gfs2_permission,
1159 .setattr = gfs2_setattr,
1160 .getattr = gfs2_getattr,
1161 .setxattr = gfs2_setxattr,
1162 .getxattr = gfs2_getxattr,
1163 .listxattr = gfs2_listxattr,
1164 .removexattr = gfs2_removexattr,
1165};
1166
1167struct inode_operations gfs2_dir_iops = {
1168 .create = gfs2_create,
1169 .lookup = gfs2_lookup,
1170 .link = gfs2_link,
1171 .unlink = gfs2_unlink,
1172 .symlink = gfs2_symlink,
1173 .mkdir = gfs2_mkdir,
1174 .rmdir = gfs2_rmdir,
1175 .mknod = gfs2_mknod,
1176 .rename = gfs2_rename,
1177 .permission = gfs2_permission,
1178 .setattr = gfs2_setattr,
1179 .getattr = gfs2_getattr,
1180 .setxattr = gfs2_setxattr,
1181 .getxattr = gfs2_getxattr,
1182 .listxattr = gfs2_listxattr,
1183 .removexattr = gfs2_removexattr,
1184};
1185
1186struct inode_operations gfs2_symlink_iops = {
1187 .readlink = gfs2_readlink,
1188 .follow_link = gfs2_follow_link,
1189 .permission = gfs2_permission,
1190 .setattr = gfs2_setattr,
1191 .getattr = gfs2_getattr,
1192 .setxattr = gfs2_setxattr,
1193 .getxattr = gfs2_getxattr,
1194 .listxattr = gfs2_listxattr,
1195 .removexattr = gfs2_removexattr,
1196};
1197
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
new file mode 100644
index 000000000000..5fafd87c8d7b
--- /dev/null
+++ b/fs/gfs2/ops_inode.h
@@ -0,0 +1,18 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_INODE_DOT_H__
11#define __OPS_INODE_DOT_H__
12
13extern struct inode_operations gfs2_file_iops;
14extern struct inode_operations gfs2_dir_iops;
15extern struct inode_operations gfs2_symlink_iops;
16extern struct inode_operations gfs2_dev_iops;
17
18#endif /* __OPS_INODE_DOT_H__ */
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
new file mode 100644
index 000000000000..f7349c0989a9
--- /dev/null
+++ b/fs/gfs2/ops_super.c
@@ -0,0 +1,387 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/statfs.h>
16#include <linux/vmalloc.h>
17#include <linux/seq_file.h>
18#include <linux/mount.h>
19#include <linux/kthread.h>
20#include <linux/delay.h>
21#include <linux/gfs2_ondisk.h>
22#include <asm/semaphore.h>
23
24#include "gfs2.h"
25#include "lm_interface.h"
26#include "incore.h"
27#include "glock.h"
28#include "inode.h"
29#include "lm.h"
30#include "log.h"
31#include "mount.h"
32#include "ops_super.h"
33#include "page.h"
34#include "quota.h"
35#include "recovery.h"
36#include "rgrp.h"
37#include "super.h"
38#include "sys.h"
39#include "util.h"
40
41/**
42 * gfs2_write_inode - Make sure the inode is stable on the disk
43 * @inode: The inode
44 * @sync: synchronous write flag
45 *
46 * Returns: errno
47 */
48
49static int gfs2_write_inode(struct inode *inode, int sync)
50{
51 struct gfs2_inode *ip = inode->u.generic_ip;
52
53 if (current->flags & PF_MEMALLOC)
54 return 0;
55 if (ip && sync)
56 gfs2_log_flush_glock(ip->i_gl);
57
58 return 0;
59}
60
61/**
62 * gfs2_put_super - Unmount the filesystem
63 * @sb: The VFS superblock
64 *
65 */
66
67static void gfs2_put_super(struct super_block *sb)
68{
69 struct gfs2_sbd *sdp = sb->s_fs_info;
70 int error;
71
72 if (!sdp)
73 return;
74
75 /* Unfreeze the filesystem, if we need to */
76
77 mutex_lock(&sdp->sd_freeze_lock);
78 if (sdp->sd_freeze_count)
79 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
80 mutex_unlock(&sdp->sd_freeze_lock);
81
82 kthread_stop(sdp->sd_inoded_process);
83 kthread_stop(sdp->sd_quotad_process);
84 kthread_stop(sdp->sd_logd_process);
85 kthread_stop(sdp->sd_recoverd_process);
86 while (sdp->sd_glockd_num--)
87 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
88 kthread_stop(sdp->sd_scand_process);
89
90 if (!(sb->s_flags & MS_RDONLY)) {
91 error = gfs2_make_fs_ro(sdp);
92 if (error)
93 gfs2_io_error(sdp);
94 }
95
96 /* At this point, we're through modifying the disk */
97
98 /* Release stuff */
99
100 iput(sdp->sd_master_dir);
101 iput(sdp->sd_jindex);
102 iput(sdp->sd_inum_inode);
103 iput(sdp->sd_statfs_inode);
104 iput(sdp->sd_rindex);
105 iput(sdp->sd_quota_inode);
106
107 gfs2_glock_put(sdp->sd_rename_gl);
108 gfs2_glock_put(sdp->sd_trans_gl);
109
110 if (!sdp->sd_args.ar_spectator) {
111 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
112 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
113 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
114 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
115 gfs2_glock_dq_uninit(&sdp->sd_ut_gh);
116 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
117 iput(sdp->sd_ir_inode);
118 iput(sdp->sd_sc_inode);
119 iput(sdp->sd_ut_inode);
120 iput(sdp->sd_qc_inode);
121 }
122
123 gfs2_glock_dq_uninit(&sdp->sd_live_gh);
124
125 gfs2_clear_rgrpd(sdp);
126 gfs2_jindex_free(sdp);
127
128 /* Take apart glock structures and buffer lists */
129 gfs2_gl_hash_clear(sdp, WAIT);
130
131 /* Unmount the locking protocol */
132 gfs2_lm_unmount(sdp);
133
134 /* At this point, we're through participating in the lockspace */
135
136 gfs2_sys_fs_del(sdp);
137
138 /* Get rid of any extra inodes */
139 while (invalidate_inodes(sb))
140 yield();
141
142 vfree(sdp);
143
144 sb->s_fs_info = NULL;
145}
146
147/**
148 * gfs2_write_super - disk commit all incore transactions
149 * @sb: the filesystem
150 *
151 * This function is called every time sync(2) is called.
152 * After this exits, all dirty buffers and synced.
153 */
154
155static void gfs2_write_super(struct super_block *sb)
156{
157 struct gfs2_sbd *sdp = sb->s_fs_info;
158 gfs2_log_flush(sdp);
159}
160
161/**
162 * gfs2_write_super_lockfs - prevent further writes to the filesystem
163 * @sb: the VFS structure for the filesystem
164 *
165 */
166
167static void gfs2_write_super_lockfs(struct super_block *sb)
168{
169 struct gfs2_sbd *sdp = sb->s_fs_info;
170 int error;
171
172 for (;;) {
173 error = gfs2_freeze_fs(sdp);
174 if (!error)
175 break;
176
177 switch (error) {
178 case -EBUSY:
179 fs_err(sdp, "waiting for recovery before freeze\n");
180 break;
181
182 default:
183 fs_err(sdp, "error freezing FS: %d\n", error);
184 break;
185 }
186
187 fs_err(sdp, "retrying...\n");
188 msleep(1000);
189 }
190}
191
192/**
193 * gfs2_unlockfs - reallow writes to the filesystem
194 * @sb: the VFS structure for the filesystem
195 *
196 */
197
198static void gfs2_unlockfs(struct super_block *sb)
199{
200 struct gfs2_sbd *sdp = sb->s_fs_info;
201 gfs2_unfreeze_fs(sdp);
202}
203
204/**
205 * gfs2_statfs - Gather and return stats about the filesystem
206 * @sb: The superblock
207 * @statfsbuf: The buffer
208 *
209 * Returns: 0 on success or error code
210 */
211
212static int gfs2_statfs(struct super_block *sb, struct kstatfs *buf)
213{
214 struct gfs2_sbd *sdp = sb->s_fs_info;
215 struct gfs2_statfs_change sc;
216 int error;
217
218 if (gfs2_tune_get(sdp, gt_statfs_slow))
219 error = gfs2_statfs_slow(sdp, &sc);
220 else
221 error = gfs2_statfs_i(sdp, &sc);
222
223 if (error)
224 return error;
225
226 memset(buf, 0, sizeof(struct kstatfs));
227
228 buf->f_type = GFS2_MAGIC;
229 buf->f_bsize = sdp->sd_sb.sb_bsize;
230 buf->f_blocks = sc.sc_total;
231 buf->f_bfree = sc.sc_free;
232 buf->f_bavail = sc.sc_free;
233 buf->f_files = sc.sc_dinodes + sc.sc_free;
234 buf->f_ffree = sc.sc_free;
235 buf->f_namelen = GFS2_FNAMESIZE;
236
237 return 0;
238}
239
240/**
241 * gfs2_remount_fs - called when the FS is remounted
242 * @sb: the filesystem
243 * @flags: the remount flags
244 * @data: extra data passed in (not used right now)
245 *
246 * Returns: errno
247 */
248
249static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
250{
251 struct gfs2_sbd *sdp = sb->s_fs_info;
252 int error;
253
254 error = gfs2_mount_args(sdp, data, 1);
255 if (error)
256 return error;
257
258 if (sdp->sd_args.ar_spectator)
259 *flags |= MS_RDONLY;
260 else {
261 if (*flags & MS_RDONLY) {
262 if (!(sb->s_flags & MS_RDONLY))
263 error = gfs2_make_fs_ro(sdp);
264 } else if (!(*flags & MS_RDONLY) &&
265 (sb->s_flags & MS_RDONLY)) {
266 error = gfs2_make_fs_rw(sdp);
267 }
268 }
269
270 if (*flags & (MS_NOATIME | MS_NODIRATIME))
271 set_bit(SDF_NOATIME, &sdp->sd_flags);
272 else
273 clear_bit(SDF_NOATIME, &sdp->sd_flags);
274
275 /* Don't let the VFS update atimes. GFS2 handles this itself. */
276 *flags |= MS_NOATIME | MS_NODIRATIME;
277
278 return error;
279}
280
281/**
282 * gfs2_clear_inode - Deallocate an inode when VFS is done with it
283 * @inode: The VFS inode
284 *
285 */
286
287static void gfs2_clear_inode(struct inode *inode)
288{
289 struct gfs2_inode *ip = inode->u.generic_ip;
290
291 if (ip) {
292 spin_lock(&ip->i_spin);
293 ip->i_vnode = NULL;
294 inode->u.generic_ip = NULL;
295 spin_unlock(&ip->i_spin);
296
297 gfs2_glock_schedule_for_reclaim(ip->i_gl);
298 gfs2_inode_put(ip);
299 }
300}
301
302/**
303 * gfs2_show_options - Show mount options for /proc/mounts
304 * @s: seq_file structure
305 * @mnt: vfsmount
306 *
307 * Returns: 0 on success or error code
308 */
309
310static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
311{
312 struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
313 struct gfs2_args *args = &sdp->sd_args;
314
315 if (args->ar_lockproto[0])
316 seq_printf(s, ",lockproto=%s", args->ar_lockproto);
317 if (args->ar_locktable[0])
318 seq_printf(s, ",locktable=%s", args->ar_locktable);
319 if (args->ar_hostdata[0])
320 seq_printf(s, ",hostdata=%s", args->ar_hostdata);
321 if (args->ar_spectator)
322 seq_printf(s, ",spectator");
323 if (args->ar_ignore_local_fs)
324 seq_printf(s, ",ignore_local_fs");
325 if (args->ar_localflocks)
326 seq_printf(s, ",localflocks");
327 if (args->ar_localcaching)
328 seq_printf(s, ",localcaching");
329 if (args->ar_debug)
330 seq_printf(s, ",debug");
331 if (args->ar_upgrade)
332 seq_printf(s, ",upgrade");
333 if (args->ar_num_glockd != GFS2_GLOCKD_DEFAULT)
334 seq_printf(s, ",num_glockd=%u", args->ar_num_glockd);
335 if (args->ar_posix_acl)
336 seq_printf(s, ",acl");
337 if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
338 char *state;
339 switch (args->ar_quota) {
340 case GFS2_QUOTA_OFF:
341 state = "off";
342 break;
343 case GFS2_QUOTA_ACCOUNT:
344 state = "account";
345 break;
346 case GFS2_QUOTA_ON:
347 state = "on";
348 break;
349 default:
350 state = "unknown";
351 break;
352 }
353 seq_printf(s, ",quota=%s", state);
354 }
355 if (args->ar_suiddir)
356 seq_printf(s, ",suiddir");
357 if (args->ar_data != GFS2_DATA_DEFAULT) {
358 char *state;
359 switch (args->ar_data) {
360 case GFS2_DATA_WRITEBACK:
361 state = "writeback";
362 break;
363 case GFS2_DATA_ORDERED:
364 state = "ordered";
365 break;
366 default:
367 state = "unknown";
368 break;
369 }
370 seq_printf(s, ",data=%s", state);
371 }
372
373 return 0;
374}
375
376struct super_operations gfs2_super_ops = {
377 .write_inode = gfs2_write_inode,
378 .put_super = gfs2_put_super,
379 .write_super = gfs2_write_super,
380 .write_super_lockfs = gfs2_write_super_lockfs,
381 .unlockfs = gfs2_unlockfs,
382 .statfs = gfs2_statfs,
383 .remount_fs = gfs2_remount_fs,
384 .clear_inode = gfs2_clear_inode,
385 .show_options = gfs2_show_options,
386};
387
diff --git a/fs/gfs2/ops_super.h b/fs/gfs2/ops_super.h
new file mode 100644
index 000000000000..a41d208dc558
--- /dev/null
+++ b/fs/gfs2/ops_super.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_SUPER_DOT_H__
11#define __OPS_SUPER_DOT_H__
12
13extern struct super_operations gfs2_super_ops;
14
15#endif /* __OPS_SUPER_DOT_H__ */
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
new file mode 100644
index 000000000000..dbc57071e7bb
--- /dev/null
+++ b/fs/gfs2/ops_vm.c
@@ -0,0 +1,198 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <linux/gfs2_ondisk.h>
18#include <asm/semaphore.h>
19
20#include "gfs2.h"
21#include "lm_interface.h"
22#include "incore.h"
23#include "bmap.h"
24#include "glock.h"
25#include "inode.h"
26#include "ops_vm.h"
27#include "page.h"
28#include "quota.h"
29#include "rgrp.h"
30#include "trans.h"
31#include "util.h"
32
33static void pfault_be_greedy(struct gfs2_inode *ip)
34{
35 unsigned int time;
36
37 spin_lock(&ip->i_spin);
38 time = ip->i_greedy;
39 ip->i_last_pfault = jiffies;
40 spin_unlock(&ip->i_spin);
41
42 gfs2_inode_hold(ip);
43 if (gfs2_glock_be_greedy(ip->i_gl, time))
44 gfs2_inode_put(ip);
45}
46
47static struct page *gfs2_private_nopage(struct vm_area_struct *area,
48 unsigned long address, int *type)
49{
50 struct gfs2_inode *ip = area->vm_file->f_mapping->host->u.generic_ip;
51 struct gfs2_holder i_gh;
52 struct page *result;
53 int error;
54
55 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
56 if (error)
57 return NULL;
58
59 set_bit(GIF_PAGED, &ip->i_flags);
60
61 result = filemap_nopage(area, address, type);
62
63 if (result && result != NOPAGE_OOM)
64 pfault_be_greedy(ip);
65
66 gfs2_glock_dq_uninit(&i_gh);
67
68 return result;
69}
70
71static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
72{
73 struct gfs2_sbd *sdp = ip->i_sbd;
74 unsigned long index = page->index;
75 uint64_t lblock = index << (PAGE_CACHE_SHIFT -
76 sdp->sd_sb.sb_bsize_shift);
77 unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift;
78 struct gfs2_alloc *al;
79 unsigned int data_blocks, ind_blocks;
80 unsigned int x;
81 int error;
82
83 al = gfs2_alloc_get(ip);
84
85 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
86 if (error)
87 goto out;
88
89 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
90 if (error)
91 goto out_gunlock_q;
92
93 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE,
94 &data_blocks, &ind_blocks);
95
96 al->al_requested = data_blocks + ind_blocks;
97
98 error = gfs2_inplace_reserve(ip);
99 if (error)
100 goto out_gunlock_q;
101
102 error = gfs2_trans_begin(sdp,
103 al->al_rgd->rd_ri.ri_length +
104 ind_blocks + RES_DINODE +
105 RES_STATFS + RES_QUOTA, 0);
106 if (error)
107 goto out_ipres;
108
109 if (gfs2_is_stuffed(ip)) {
110 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, NULL);
111 if (error)
112 goto out_trans;
113 }
114
115 for (x = 0; x < blocks; ) {
116 uint64_t dblock;
117 unsigned int extlen;
118 int new = 1;
119
120 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
121 if (error)
122 goto out_trans;
123
124 lblock += extlen;
125 x += extlen;
126 }
127
128 gfs2_assert_warn(sdp, al->al_alloced);
129
130 out_trans:
131 gfs2_trans_end(sdp);
132
133 out_ipres:
134 gfs2_inplace_release(ip);
135
136 out_gunlock_q:
137 gfs2_quota_unlock(ip);
138
139 out:
140 gfs2_alloc_put(ip);
141
142 return error;
143}
144
145static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
146 unsigned long address, int *type)
147{
148 struct gfs2_inode *ip = area->vm_file->f_mapping->host->u.generic_ip;
149 struct gfs2_holder i_gh;
150 struct page *result = NULL;
151 unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) +
152 area->vm_pgoff;
153 int alloc_required;
154 int error;
155
156 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
157 if (error)
158 return NULL;
159
160 set_bit(GIF_PAGED, &ip->i_flags);
161 set_bit(GIF_SW_PAGED, &ip->i_flags);
162
163 error = gfs2_write_alloc_required(ip,
164 (uint64_t)index << PAGE_CACHE_SHIFT,
165 PAGE_CACHE_SIZE, &alloc_required);
166 if (error)
167 goto out;
168
169 result = filemap_nopage(area, address, type);
170 if (!result || result == NOPAGE_OOM)
171 goto out;
172
173 if (alloc_required) {
174 error = alloc_page_backing(ip, result);
175 if (error) {
176 page_cache_release(result);
177 result = NULL;
178 goto out;
179 }
180 set_page_dirty(result);
181 }
182
183 pfault_be_greedy(ip);
184
185 out:
186 gfs2_glock_dq_uninit(&i_gh);
187
188 return result;
189}
190
191struct vm_operations_struct gfs2_vm_ops_private = {
192 .nopage = gfs2_private_nopage,
193};
194
195struct vm_operations_struct gfs2_vm_ops_sharewrite = {
196 .nopage = gfs2_sharewrite_nopage,
197};
198
diff --git a/fs/gfs2/ops_vm.h b/fs/gfs2/ops_vm.h
new file mode 100644
index 000000000000..54e3a8769cbb
--- /dev/null
+++ b/fs/gfs2/ops_vm.h
@@ -0,0 +1,16 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_VM_DOT_H__
11#define __OPS_VM_DOT_H__
12
13extern struct vm_operations_struct gfs2_vm_ops_private;
14extern struct vm_operations_struct gfs2_vm_ops_sharewrite;
15
16#endif /* __OPS_VM_DOT_H__ */
diff --git a/fs/gfs2/page.c b/fs/gfs2/page.c
new file mode 100644
index 000000000000..a2c9e93c7c39
--- /dev/null
+++ b/fs/gfs2/page.c
@@ -0,0 +1,283 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/mm.h>
17#include <linux/gfs2_ondisk.h>
18#include <asm/semaphore.h>
19
20#include "gfs2.h"
21#include "lm_interface.h"
22#include "incore.h"
23#include "bmap.h"
24#include "inode.h"
25#include "page.h"
26#include "trans.h"
27#include "ops_address.h"
28#include "util.h"
29
30/**
31 * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock
32 * @gl: the glock
33 *
34 */
35
36void gfs2_pte_inval(struct gfs2_glock *gl)
37{
38 struct gfs2_inode *ip;
39 struct inode *inode;
40
41 ip = gl->gl_object;
42 if (!ip || !S_ISREG(ip->i_di.di_mode))
43 return;
44
45 if (!test_bit(GIF_PAGED, &ip->i_flags))
46 return;
47
48 inode = gfs2_ip2v_lookup(ip);
49 if (inode) {
50 unmap_shared_mapping_range(inode->i_mapping, 0, 0);
51 iput(inode);
52
53 if (test_bit(GIF_SW_PAGED, &ip->i_flags))
54 set_bit(GLF_DIRTY, &gl->gl_flags);
55 }
56
57 clear_bit(GIF_SW_PAGED, &ip->i_flags);
58}
59
60/**
61 * gfs2_page_inval - Invalidate all pages associated with a glock
62 * @gl: the glock
63 *
64 */
65
66void gfs2_page_inval(struct gfs2_glock *gl)
67{
68 struct gfs2_inode *ip;
69 struct inode *inode;
70
71 ip = gl->gl_object;
72 if (!ip || !S_ISREG(ip->i_di.di_mode))
73 return;
74
75 inode = gfs2_ip2v_lookup(ip);
76 if (inode) {
77 struct address_space *mapping = inode->i_mapping;
78
79 truncate_inode_pages(mapping, 0);
80 gfs2_assert_withdraw(ip->i_sbd, !mapping->nrpages);
81
82 iput(inode);
83 }
84
85 clear_bit(GIF_PAGED, &ip->i_flags);
86}
87
88/**
89 * gfs2_page_sync - Sync the data pages (not metadata) associated with a glock
90 * @gl: the glock
91 * @flags: DIO_START | DIO_WAIT
92 *
93 * Syncs data (not metadata) for a regular file.
94 * No-op for all other types.
95 */
96
97void gfs2_page_sync(struct gfs2_glock *gl, int flags)
98{
99 struct gfs2_inode *ip;
100 struct inode *inode;
101
102 ip = gl->gl_object;
103 if (!ip || !S_ISREG(ip->i_di.di_mode))
104 return;
105
106 inode = gfs2_ip2v_lookup(ip);
107 if (inode) {
108 struct address_space *mapping = inode->i_mapping;
109 int error = 0;
110
111 if (flags & DIO_START)
112 filemap_fdatawrite(mapping);
113 if (!error && (flags & DIO_WAIT))
114 error = filemap_fdatawait(mapping);
115
116 /* Put back any errors cleared by filemap_fdatawait()
117 so they can be caught by someone who can pass them
118 up to user space. */
119
120 if (error == -ENOSPC)
121 set_bit(AS_ENOSPC, &mapping->flags);
122 else if (error)
123 set_bit(AS_EIO, &mapping->flags);
124
125 iput(inode);
126 }
127}
128
129/**
130 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
131 * @ip: the inode
132 * @dibh: the dinode buffer
133 * @block: the block number that was allocated
134 * @private: any locked page held by the caller process
135 *
136 * Returns: errno
137 */
138
139int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
140 uint64_t block, void *private)
141{
142 struct gfs2_sbd *sdp = ip->i_sbd;
143 struct inode *inode = ip->i_vnode;
144 struct page *page = (struct page *)private;
145 struct buffer_head *bh;
146 int release = 0;
147
148 if (!page || page->index) {
149 page = grab_cache_page(inode->i_mapping, 0);
150 if (!page)
151 return -ENOMEM;
152 release = 1;
153 }
154
155 if (!PageUptodate(page)) {
156 void *kaddr = kmap(page);
157
158 memcpy(kaddr,
159 dibh->b_data + sizeof(struct gfs2_dinode),
160 ip->i_di.di_size);
161 memset(kaddr + ip->i_di.di_size,
162 0,
163 PAGE_CACHE_SIZE - ip->i_di.di_size);
164 kunmap(page);
165
166 SetPageUptodate(page);
167 }
168
169 if (!page_has_buffers(page))
170 create_empty_buffers(page, 1 << inode->i_blkbits,
171 (1 << BH_Uptodate));
172
173 bh = page_buffers(page);
174
175 if (!buffer_mapped(bh))
176 map_bh(bh, inode->i_sb, block);
177
178 set_buffer_uptodate(bh);
179 if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED) || gfs2_is_jdata(ip))
180 gfs2_trans_add_bh(ip->i_gl, bh, 0);
181 mark_buffer_dirty(bh);
182
183 if (release) {
184 unlock_page(page);
185 page_cache_release(page);
186 }
187
188 return 0;
189}
190
191/**
192 * gfs2_block_truncate_page - Deal with zeroing out data for truncate
193 *
194 * This is partly borrowed from ext3.
195 */
196int gfs2_block_truncate_page(struct address_space *mapping)
197{
198 struct inode *inode = mapping->host;
199 struct gfs2_inode *ip = inode->u.generic_ip;
200 struct gfs2_sbd *sdp = ip->i_sbd;
201 loff_t from = inode->i_size;
202 unsigned long index = from >> PAGE_CACHE_SHIFT;
203 unsigned offset = from & (PAGE_CACHE_SIZE-1);
204 unsigned blocksize, iblock, length, pos;
205 struct buffer_head *bh;
206 struct page *page;
207 void *kaddr;
208 int err;
209
210 page = grab_cache_page(mapping, index);
211 if (!page)
212 return 0;
213
214 blocksize = inode->i_sb->s_blocksize;
215 length = blocksize - (offset & (blocksize - 1));
216 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
217
218 if (!page_has_buffers(page))
219 create_empty_buffers(page, blocksize, 0);
220
221 /* Find the buffer that contains "offset" */
222 bh = page_buffers(page);
223 pos = blocksize;
224 while (offset >= pos) {
225 bh = bh->b_this_page;
226 iblock++;
227 pos += blocksize;
228 }
229
230 err = 0;
231
232 if (!buffer_mapped(bh)) {
233 gfs2_get_block(inode, iblock, bh, 0);
234 /* unmapped? It's a hole - nothing to do */
235 if (!buffer_mapped(bh))
236 goto unlock;
237 }
238
239 /* Ok, it's mapped. Make sure it's up-to-date */
240 if (PageUptodate(page))
241 set_buffer_uptodate(bh);
242
243 if (!buffer_uptodate(bh)) {
244 err = -EIO;
245 ll_rw_block(READ, 1, &bh);
246 wait_on_buffer(bh);
247 /* Uhhuh. Read error. Complain and punt. */
248 if (!buffer_uptodate(bh))
249 goto unlock;
250 }
251
252 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
253 gfs2_trans_add_bh(ip->i_gl, bh, 0);
254
255 kaddr = kmap_atomic(page, KM_USER0);
256 memset(kaddr + offset, 0, length);
257 flush_dcache_page(page);
258 kunmap_atomic(kaddr, KM_USER0);
259
260unlock:
261 unlock_page(page);
262 page_cache_release(page);
263 return err;
264}
265
266void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
267 unsigned int from, unsigned int to)
268{
269 struct buffer_head *head = page_buffers(page);
270 unsigned int bsize = head->b_size;
271 struct buffer_head *bh;
272 unsigned int start, end;
273
274 for (bh = head, start = 0;
275 bh != head || !start;
276 bh = bh->b_this_page, start = end) {
277 end = start + bsize;
278 if (end <= from || start >= to)
279 continue;
280 gfs2_trans_add_bh(ip->i_gl, bh, 0);
281 }
282}
283
diff --git a/fs/gfs2/page.h b/fs/gfs2/page.h
new file mode 100644
index 000000000000..346e296420c6
--- /dev/null
+++ b/fs/gfs2/page.h
@@ -0,0 +1,23 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __PAGE_DOT_H__
11#define __PAGE_DOT_H__
12
13void gfs2_pte_inval(struct gfs2_glock *gl);
14void gfs2_page_inval(struct gfs2_glock *gl);
15void gfs2_page_sync(struct gfs2_glock *gl, int flags);
16
17int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
18 uint64_t block, void *private);
19int gfs2_block_truncate_page(struct address_space *mapping);
20void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
21 unsigned int from, unsigned int to);
22
23#endif /* __PAGE_DOT_H__ */
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
new file mode 100644
index 000000000000..c57b5cf1d583
--- /dev/null
+++ b/fs/gfs2/quota.c
@@ -0,0 +1,1303 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10/*
11 * Quota change tags are associated with each transaction that allocates or
12 * deallocates space. Those changes are accumulated locally to each node (in a
13 * per-node file) and then are periodically synced to the quota file. This
14 * avoids the bottleneck of constantly touching the quota file, but introduces
15 * fuzziness in the current usage value of IDs that are being used on different
16 * nodes in the cluster simultaneously. So, it is possible for a user on
17 * multiple nodes to overrun their quota, but that overrun is controlable.
18 * Since quota tags are part of transactions, there is no need to a quota check
19 * program to be run on node crashes or anything like that.
20 *
21 * There are couple of knobs that let the administrator manage the quota
22 * fuzziness. "quota_quantum" sets the maximum time a quota change can be
23 * sitting on one node before being synced to the quota file. (The default is
24 * 60 seconds.) Another knob, "quota_scale" controls how quickly the frequency
25 * of quota file syncs increases as the user moves closer to their limit. The
26 * more frequent the syncs, the more accurate the quota enforcement, but that
27 * means that there is more contention between the nodes for the quota file.
28 * The default value is one. This sets the maximum theoretical quota overrun
29 * (with infinite node with infinite bandwidth) to twice the user's limit. (In
30 * practice, the maximum overrun you see should be much less.) A "quota_scale"
31 * number greater than one makes quota syncs more frequent and reduces the
32 * maximum overrun. Numbers less than one (but greater than zero) make quota
33 * syncs less frequent.
34 *
35 * GFS quotas also use per-ID Lock Value Blocks (LVBs) to cache the contents of
36 * the quota file, so it is not being constantly read.
37 */
38
39#include <linux/sched.h>
40#include <linux/slab.h>
41#include <linux/spinlock.h>
42#include <linux/completion.h>
43#include <linux/buffer_head.h>
44#include <linux/tty.h>
45#include <linux/sort.h>
46#include <linux/fs.h>
47#include <linux/gfs2_ondisk.h>
48#include <asm/semaphore.h>
49
50#include "gfs2.h"
51#include "lm_interface.h"
52#include "incore.h"
53#include "bmap.h"
54#include "glock.h"
55#include "glops.h"
56#include "log.h"
57#include "lvb.h"
58#include "meta_io.h"
59#include "quota.h"
60#include "rgrp.h"
61#include "super.h"
62#include "trans.h"
63#include "inode.h"
64#include "ops_file.h"
65#include "ops_address.h"
66#include "util.h"
67
68#define QUOTA_USER 1
69#define QUOTA_GROUP 0
70
71static uint64_t qd2offset(struct gfs2_quota_data *qd)
72{
73 uint64_t offset;
74
75 offset = 2 * (uint64_t)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags);
76 offset *= sizeof(struct gfs2_quota);
77
78 return offset;
79}
80
81static int qd_alloc(struct gfs2_sbd *sdp, int user, uint32_t id,
82 struct gfs2_quota_data **qdp)
83{
84 struct gfs2_quota_data *qd;
85 int error;
86
87 qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_KERNEL);
88 if (!qd)
89 return -ENOMEM;
90
91 qd->qd_count = 1;
92 qd->qd_id = id;
93 if (user)
94 set_bit(QDF_USER, &qd->qd_flags);
95 qd->qd_slot = -1;
96
97 error = gfs2_glock_get(sdp, 2 * (uint64_t)id + !user,
98 &gfs2_quota_glops, CREATE, &qd->qd_gl);
99 if (error)
100 goto fail;
101
102 error = gfs2_lvb_hold(qd->qd_gl);
103 gfs2_glock_put(qd->qd_gl);
104 if (error)
105 goto fail;
106
107 *qdp = qd;
108
109 return 0;
110
111 fail:
112 kfree(qd);
113 return error;
114}
115
116static int qd_get(struct gfs2_sbd *sdp, int user, uint32_t id, int create,
117 struct gfs2_quota_data **qdp)
118{
119 struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
120 int error, found;
121
122 *qdp = NULL;
123
124 for (;;) {
125 found = 0;
126 spin_lock(&sdp->sd_quota_spin);
127 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
128 if (qd->qd_id == id &&
129 !test_bit(QDF_USER, &qd->qd_flags) == !user) {
130 qd->qd_count++;
131 found = 1;
132 break;
133 }
134 }
135
136 if (!found)
137 qd = NULL;
138
139 if (!qd && new_qd) {
140 qd = new_qd;
141 list_add(&qd->qd_list, &sdp->sd_quota_list);
142 atomic_inc(&sdp->sd_quota_count);
143 new_qd = NULL;
144 }
145
146 spin_unlock(&sdp->sd_quota_spin);
147
148 if (qd || !create) {
149 if (new_qd) {
150 gfs2_lvb_unhold(new_qd->qd_gl);
151 kfree(new_qd);
152 }
153 *qdp = qd;
154 return 0;
155 }
156
157 error = qd_alloc(sdp, user, id, &new_qd);
158 if (error)
159 return error;
160 }
161}
162
163static void qd_hold(struct gfs2_quota_data *qd)
164{
165 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
166
167 spin_lock(&sdp->sd_quota_spin);
168 gfs2_assert(sdp, qd->qd_count);
169 qd->qd_count++;
170 spin_unlock(&sdp->sd_quota_spin);
171}
172
173static void qd_put(struct gfs2_quota_data *qd)
174{
175 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
176 spin_lock(&sdp->sd_quota_spin);
177 gfs2_assert(sdp, qd->qd_count);
178 if (!--qd->qd_count)
179 qd->qd_last_touched = jiffies;
180 spin_unlock(&sdp->sd_quota_spin);
181}
182
183static int slot_get(struct gfs2_quota_data *qd)
184{
185 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
186 unsigned int c, o = 0, b;
187 unsigned char byte = 0;
188
189 spin_lock(&sdp->sd_quota_spin);
190
191 if (qd->qd_slot_count++) {
192 spin_unlock(&sdp->sd_quota_spin);
193 return 0;
194 }
195
196 for (c = 0; c < sdp->sd_quota_chunks; c++)
197 for (o = 0; o < PAGE_SIZE; o++) {
198 byte = sdp->sd_quota_bitmap[c][o];
199 if (byte != 0xFF)
200 goto found;
201 }
202
203 goto fail;
204
205 found:
206 for (b = 0; b < 8; b++)
207 if (!(byte & (1 << b)))
208 break;
209 qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b;
210
211 if (qd->qd_slot >= sdp->sd_quota_slots)
212 goto fail;
213
214 sdp->sd_quota_bitmap[c][o] |= 1 << b;
215
216 spin_unlock(&sdp->sd_quota_spin);
217
218 return 0;
219
220 fail:
221 qd->qd_slot_count--;
222 spin_unlock(&sdp->sd_quota_spin);
223 return -ENOSPC;
224}
225
226static void slot_hold(struct gfs2_quota_data *qd)
227{
228 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
229
230 spin_lock(&sdp->sd_quota_spin);
231 gfs2_assert(sdp, qd->qd_slot_count);
232 qd->qd_slot_count++;
233 spin_unlock(&sdp->sd_quota_spin);
234}
235
236static void slot_put(struct gfs2_quota_data *qd)
237{
238 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
239
240 spin_lock(&sdp->sd_quota_spin);
241 gfs2_assert(sdp, qd->qd_slot_count);
242 if (!--qd->qd_slot_count) {
243 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
244 qd->qd_slot = -1;
245 }
246 spin_unlock(&sdp->sd_quota_spin);
247}
248
249static int bh_get(struct gfs2_quota_data *qd)
250{
251 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
252 struct gfs2_inode *ip = sdp->sd_qc_inode->u.generic_ip;
253 unsigned int block, offset;
254 uint64_t dblock;
255 int new = 0;
256 struct buffer_head *bh;
257 int error;
258
259 mutex_lock(&sdp->sd_quota_mutex);
260
261 if (qd->qd_bh_count++) {
262 mutex_unlock(&sdp->sd_quota_mutex);
263 return 0;
264 }
265
266 block = qd->qd_slot / sdp->sd_qc_per_block;
267 offset = qd->qd_slot % sdp->sd_qc_per_block;;
268
269 error = gfs2_block_map(ip, block, &new, &dblock, NULL);
270 if (error)
271 goto fail;
272 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, &bh);
273 if (error)
274 goto fail;
275 error = -EIO;
276 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC))
277 goto fail_brelse;
278
279 qd->qd_bh = bh;
280 qd->qd_bh_qc = (struct gfs2_quota_change *)
281 (bh->b_data + sizeof(struct gfs2_meta_header) +
282 offset * sizeof(struct gfs2_quota_change));
283
284 mutex_lock(&sdp->sd_quota_mutex);
285
286 return 0;
287
288 fail_brelse:
289 brelse(bh);
290
291 fail:
292 qd->qd_bh_count--;
293 mutex_unlock(&sdp->sd_quota_mutex);
294 return error;
295}
296
297static void bh_put(struct gfs2_quota_data *qd)
298{
299 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
300
301 mutex_lock(&sdp->sd_quota_mutex);
302 gfs2_assert(sdp, qd->qd_bh_count);
303 if (!--qd->qd_bh_count) {
304 brelse(qd->qd_bh);
305 qd->qd_bh = NULL;
306 qd->qd_bh_qc = NULL;
307 }
308 mutex_unlock(&sdp->sd_quota_mutex);
309}
310
311static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
312{
313 struct gfs2_quota_data *qd = NULL;
314 int error;
315 int found = 0;
316
317 *qdp = NULL;
318
319 if (sdp->sd_vfs->s_flags & MS_RDONLY)
320 return 0;
321
322 spin_lock(&sdp->sd_quota_spin);
323
324 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
325 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
326 !test_bit(QDF_CHANGE, &qd->qd_flags) ||
327 qd->qd_sync_gen >= sdp->sd_quota_sync_gen)
328 continue;
329
330 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
331
332 set_bit(QDF_LOCKED, &qd->qd_flags);
333 gfs2_assert_warn(sdp, qd->qd_count);
334 qd->qd_count++;
335 qd->qd_change_sync = qd->qd_change;
336 gfs2_assert_warn(sdp, qd->qd_slot_count);
337 qd->qd_slot_count++;
338 found = 1;
339
340 break;
341 }
342
343 if (!found)
344 qd = NULL;
345
346 spin_unlock(&sdp->sd_quota_spin);
347
348 if (qd) {
349 gfs2_assert_warn(sdp, qd->qd_change_sync);
350 error = bh_get(qd);
351 if (error) {
352 clear_bit(QDF_LOCKED, &qd->qd_flags);
353 slot_put(qd);
354 qd_put(qd);
355 return error;
356 }
357 }
358
359 *qdp = qd;
360
361 return 0;
362}
363
364static int qd_trylock(struct gfs2_quota_data *qd)
365{
366 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
367
368 if (sdp->sd_vfs->s_flags & MS_RDONLY)
369 return 0;
370
371 spin_lock(&sdp->sd_quota_spin);
372
373 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
374 !test_bit(QDF_CHANGE, &qd->qd_flags)) {
375 spin_unlock(&sdp->sd_quota_spin);
376 return 0;
377 }
378
379 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
380
381 set_bit(QDF_LOCKED, &qd->qd_flags);
382 gfs2_assert_warn(sdp, qd->qd_count);
383 qd->qd_count++;
384 qd->qd_change_sync = qd->qd_change;
385 gfs2_assert_warn(sdp, qd->qd_slot_count);
386 qd->qd_slot_count++;
387
388 spin_unlock(&sdp->sd_quota_spin);
389
390 gfs2_assert_warn(sdp, qd->qd_change_sync);
391 if (bh_get(qd)) {
392 clear_bit(QDF_LOCKED, &qd->qd_flags);
393 slot_put(qd);
394 qd_put(qd);
395 return 0;
396 }
397
398 return 1;
399}
400
401static void qd_unlock(struct gfs2_quota_data *qd)
402{
403 gfs2_assert_warn(qd->qd_gl->gl_sbd,
404 test_bit(QDF_LOCKED, &qd->qd_flags));
405 clear_bit(QDF_LOCKED, &qd->qd_flags);
406 bh_put(qd);
407 slot_put(qd);
408 qd_put(qd);
409}
410
411static int qdsb_get(struct gfs2_sbd *sdp, int user, uint32_t id, int create,
412 struct gfs2_quota_data **qdp)
413{
414 int error;
415
416 error = qd_get(sdp, user, id, create, qdp);
417 if (error)
418 return error;
419
420 error = slot_get(*qdp);
421 if (error)
422 goto fail;
423
424 error = bh_get(*qdp);
425 if (error)
426 goto fail_slot;
427
428 return 0;
429
430 fail_slot:
431 slot_put(*qdp);
432
433 fail:
434 qd_put(*qdp);
435 return error;
436}
437
438static void qdsb_put(struct gfs2_quota_data *qd)
439{
440 bh_put(qd);
441 slot_put(qd);
442 qd_put(qd);
443}
444
445int gfs2_quota_hold(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
446{
447 struct gfs2_sbd *sdp = ip->i_sbd;
448 struct gfs2_alloc *al = &ip->i_alloc;
449 struct gfs2_quota_data **qd = al->al_qd;
450 int error;
451
452 if (gfs2_assert_warn(sdp, !al->al_qd_num) ||
453 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
454 return -EIO;
455
456 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
457 return 0;
458
459 error = qdsb_get(sdp, QUOTA_USER, ip->i_di.di_uid, CREATE, qd);
460 if (error)
461 goto out;
462 al->al_qd_num++;
463 qd++;
464
465 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_di.di_gid, CREATE, qd);
466 if (error)
467 goto out;
468 al->al_qd_num++;
469 qd++;
470
471 if (uid != NO_QUOTA_CHANGE && uid != ip->i_di.di_uid) {
472 error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd);
473 if (error)
474 goto out;
475 al->al_qd_num++;
476 qd++;
477 }
478
479 if (gid != NO_QUOTA_CHANGE && gid != ip->i_di.di_gid) {
480 error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd);
481 if (error)
482 goto out;
483 al->al_qd_num++;
484 qd++;
485 }
486
487 out:
488 if (error)
489 gfs2_quota_unhold(ip);
490
491 return error;
492}
493
494void gfs2_quota_unhold(struct gfs2_inode *ip)
495{
496 struct gfs2_sbd *sdp = ip->i_sbd;
497 struct gfs2_alloc *al = &ip->i_alloc;
498 unsigned int x;
499
500 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
501
502 for (x = 0; x < al->al_qd_num; x++) {
503 qdsb_put(al->al_qd[x]);
504 al->al_qd[x] = NULL;
505 }
506 al->al_qd_num = 0;
507}
508
509static int sort_qd(const void *a, const void *b)
510{
511 struct gfs2_quota_data *qd_a = *(struct gfs2_quota_data **)a;
512 struct gfs2_quota_data *qd_b = *(struct gfs2_quota_data **)b;
513 int ret = 0;
514
515 if (!test_bit(QDF_USER, &qd_a->qd_flags) !=
516 !test_bit(QDF_USER, &qd_b->qd_flags)) {
517 if (test_bit(QDF_USER, &qd_a->qd_flags))
518 ret = -1;
519 else
520 ret = 1;
521 } else {
522 if (qd_a->qd_id < qd_b->qd_id)
523 ret = -1;
524 else if (qd_a->qd_id > qd_b->qd_id)
525 ret = 1;
526 }
527
528 return ret;
529}
530
531static void do_qc(struct gfs2_quota_data *qd, int64_t change)
532{
533 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
534 struct gfs2_inode *ip = sdp->sd_qc_inode->u.generic_ip;
535 struct gfs2_quota_change *qc = qd->qd_bh_qc;
536 int64_t x;
537
538 mutex_lock(&sdp->sd_quota_mutex);
539 gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1);
540
541 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
542 qc->qc_change = 0;
543 qc->qc_flags = 0;
544 if (test_bit(QDF_USER, &qd->qd_flags))
545 qc->qc_flags = cpu_to_be32(GFS2_QCF_USER);
546 qc->qc_id = cpu_to_be32(qd->qd_id);
547 }
548
549 x = qc->qc_change;
550 x = be64_to_cpu(x) + change;
551 qc->qc_change = cpu_to_be64(x);
552
553 spin_lock(&sdp->sd_quota_spin);
554 qd->qd_change = x;
555 spin_unlock(&sdp->sd_quota_spin);
556
557 if (!x) {
558 gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
559 clear_bit(QDF_CHANGE, &qd->qd_flags);
560 qc->qc_flags = 0;
561 qc->qc_id = 0;
562 slot_put(qd);
563 qd_put(qd);
564 } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) {
565 qd_hold(qd);
566 slot_hold(qd);
567 }
568
569 mutex_unlock(&sdp->sd_quota_mutex);
570}
571
572/**
573 * gfs2_adjust_quota
574 *
575 * This function was mostly borrowed from gfs2_block_truncate_page which was
576 * in turn mostly borrowed from ext3
577 */
578static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
579 int64_t change, struct gfs2_quota_data *qd)
580{
581 struct inode *inode = ip->i_vnode;
582 struct address_space *mapping = inode->i_mapping;
583 unsigned long index = loc >> PAGE_CACHE_SHIFT;
584 unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
585 unsigned blocksize, iblock, pos;
586 struct buffer_head *bh;
587 struct page *page;
588 void *kaddr;
589 __be64 *ptr;
590 u64 value;
591 int err = -EIO;
592
593 page = grab_cache_page(mapping, index);
594 if (!page)
595 return -ENOMEM;
596
597 blocksize = inode->i_sb->s_blocksize;
598 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
599
600 if (!page_has_buffers(page))
601 create_empty_buffers(page, blocksize, 0);
602
603 bh = page_buffers(page);
604 pos = blocksize;
605 while (offset >= pos) {
606 bh = bh->b_this_page;
607 iblock++;
608 pos += blocksize;
609 }
610
611 if (!buffer_mapped(bh)) {
612 gfs2_get_block(inode, iblock, bh, 1);
613 if (!buffer_mapped(bh))
614 goto unlock;
615 }
616
617 if (PageUptodate(page))
618 set_buffer_uptodate(bh);
619
620 if (!buffer_uptodate(bh)) {
621 ll_rw_block(READ, 1, &bh);
622 wait_on_buffer(bh);
623 if (!buffer_uptodate(bh))
624 goto unlock;
625 }
626
627 gfs2_trans_add_bh(ip->i_gl, bh, 0);
628
629 kaddr = kmap_atomic(page, KM_USER0);
630 ptr = (__be64 *)(kaddr + offset);
631 value = *ptr = cpu_to_be64(be64_to_cpu(*ptr) + change);
632 flush_dcache_page(page);
633 kunmap_atomic(kaddr, KM_USER0);
634 err = 0;
635 qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
636#if 0
637 qd->qd_qb.qb_limit = cpu_to_be64(q.qu_limit);
638 qd->qd_qb.qb_warn = cpu_to_be64(q.qu_warn);
639#endif
640 qd->qd_qb.qb_value = cpu_to_be64(value);
641unlock:
642 unlock_page(page);
643 page_cache_release(page);
644 return err;
645}
646
647static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
648{
649 struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
650 struct gfs2_inode *ip = sdp->sd_quota_inode->u.generic_ip;
651 unsigned int data_blocks, ind_blocks;
652 struct file_ra_state ra_state;
653 struct gfs2_holder *ghs, i_gh;
654 unsigned int qx, x;
655 struct gfs2_quota_data *qd;
656 loff_t offset;
657 unsigned int nalloc = 0;
658 struct gfs2_alloc *al = NULL;
659 int error;
660
661 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
662 &data_blocks, &ind_blocks);
663
664 ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_KERNEL);
665 if (!ghs)
666 return -ENOMEM;
667
668 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
669 for (qx = 0; qx < num_qd; qx++) {
670 error = gfs2_glock_nq_init(qda[qx]->qd_gl,
671 LM_ST_EXCLUSIVE,
672 GL_NOCACHE, &ghs[qx]);
673 if (error)
674 goto out;
675 }
676
677 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
678 if (error)
679 goto out;
680
681 for (x = 0; x < num_qd; x++) {
682 int alloc_required;
683
684 offset = qd2offset(qda[x]);
685 error = gfs2_write_alloc_required(ip, offset,
686 sizeof(struct gfs2_quota),
687 &alloc_required);
688 if (error)
689 goto out_gunlock;
690 if (alloc_required)
691 nalloc++;
692 }
693
694 if (nalloc) {
695 al = gfs2_alloc_get(ip);
696
697 al->al_requested = nalloc * (data_blocks + ind_blocks);
698
699 error = gfs2_inplace_reserve(ip);
700 if (error)
701 goto out_alloc;
702
703 error = gfs2_trans_begin(sdp,
704 al->al_rgd->rd_ri.ri_length +
705 num_qd * data_blocks +
706 nalloc * ind_blocks +
707 RES_DINODE + num_qd +
708 RES_STATFS, 0);
709 if (error)
710 goto out_ipres;
711 } else {
712 error = gfs2_trans_begin(sdp,
713 num_qd * data_blocks +
714 RES_DINODE + num_qd, 0);
715 if (error)
716 goto out_gunlock;
717 }
718
719 file_ra_state_init(&ra_state, ip->i_vnode->i_mapping);
720 for (x = 0; x < num_qd; x++) {
721 qd = qda[x];
722 offset = qd2offset(qd);
723 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
724 (struct gfs2_quota_data *)
725 qd->qd_gl->gl_lvb);
726 if (error)
727 goto out_end_trans;
728
729 do_qc(qd, -qd->qd_change_sync);
730 }
731
732 error = 0;
733
734 out_end_trans:
735 gfs2_trans_end(sdp);
736
737 out_ipres:
738 if (nalloc)
739 gfs2_inplace_release(ip);
740
741 out_alloc:
742 if (nalloc)
743 gfs2_alloc_put(ip);
744
745 out_gunlock:
746 gfs2_glock_dq_uninit(&i_gh);
747
748 out:
749 while (qx--)
750 gfs2_glock_dq_uninit(&ghs[qx]);
751 kfree(ghs);
752 gfs2_log_flush_glock(ip->i_gl);
753
754 return error;
755}
756
757static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
758 struct gfs2_holder *q_gh)
759{
760 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
761 struct gfs2_inode *ip = sdp->sd_quota_inode->u.generic_ip;
762 struct gfs2_holder i_gh;
763 struct gfs2_quota q;
764 char buf[sizeof(struct gfs2_quota)];
765 struct file_ra_state ra_state;
766 int error;
767
768 file_ra_state_init(&ra_state, sdp->sd_quota_inode->i_mapping);
769 restart:
770 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
771 if (error)
772 return error;
773
774 gfs2_quota_lvb_in(&qd->qd_qb, qd->qd_gl->gl_lvb);
775
776 if (force_refresh || qd->qd_qb.qb_magic != GFS2_MAGIC) {
777 loff_t pos;
778 gfs2_glock_dq_uninit(q_gh);
779 error = gfs2_glock_nq_init(qd->qd_gl,
780 LM_ST_EXCLUSIVE, GL_NOCACHE,
781 q_gh);
782 if (error)
783 return error;
784
785 error = gfs2_glock_nq_init(ip->i_gl,
786 LM_ST_SHARED, 0,
787 &i_gh);
788 if (error)
789 goto fail;
790
791 memset(buf, 0, sizeof(struct gfs2_quota));
792 pos = qd2offset(qd);
793 error = gfs2_internal_read(ip,
794 &ra_state, buf,
795 &pos,
796 sizeof(struct gfs2_quota));
797 if (error < 0)
798 goto fail_gunlock;
799
800 gfs2_glock_dq_uninit(&i_gh);
801
802 gfs2_quota_in(&q, buf);
803
804 memset(&qd->qd_qb, 0, sizeof(struct gfs2_quota_lvb));
805 qd->qd_qb.qb_magic = GFS2_MAGIC;
806 qd->qd_qb.qb_limit = q.qu_limit;
807 qd->qd_qb.qb_warn = q.qu_warn;
808 qd->qd_qb.qb_value = q.qu_value;
809
810 gfs2_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb);
811
812 if (gfs2_glock_is_blocking(qd->qd_gl)) {
813 gfs2_glock_dq_uninit(q_gh);
814 force_refresh = 0;
815 goto restart;
816 }
817 }
818
819 return 0;
820
821 fail_gunlock:
822 gfs2_glock_dq_uninit(&i_gh);
823
824 fail:
825 gfs2_glock_dq_uninit(q_gh);
826
827 return error;
828}
829
830int gfs2_quota_lock(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
831{
832 struct gfs2_sbd *sdp = ip->i_sbd;
833 struct gfs2_alloc *al = &ip->i_alloc;
834 unsigned int x;
835 int error = 0;
836
837 gfs2_quota_hold(ip, uid, gid);
838
839 if (capable(CAP_SYS_RESOURCE) ||
840 sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
841 return 0;
842
843 sort(al->al_qd, al->al_qd_num, sizeof(struct gfs2_quota_data *),
844 sort_qd, NULL);
845
846 for (x = 0; x < al->al_qd_num; x++) {
847 error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]);
848 if (error)
849 break;
850 }
851
852 if (!error)
853 set_bit(GIF_QD_LOCKED, &ip->i_flags);
854 else {
855 while (x--)
856 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
857 gfs2_quota_unhold(ip);
858 }
859
860 return error;
861}
862
863static int need_sync(struct gfs2_quota_data *qd)
864{
865 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
866 struct gfs2_tune *gt = &sdp->sd_tune;
867 int64_t value;
868 unsigned int num, den;
869 int do_sync = 1;
870
871 if (!qd->qd_qb.qb_limit)
872 return 0;
873
874 spin_lock(&sdp->sd_quota_spin);
875 value = qd->qd_change;
876 spin_unlock(&sdp->sd_quota_spin);
877
878 spin_lock(&gt->gt_spin);
879 num = gt->gt_quota_scale_num;
880 den = gt->gt_quota_scale_den;
881 spin_unlock(&gt->gt_spin);
882
883 if (value < 0)
884 do_sync = 0;
885 else if (qd->qd_qb.qb_value >= (int64_t)qd->qd_qb.qb_limit)
886 do_sync = 0;
887 else {
888 value *= gfs2_jindex_size(sdp) * num;
889 do_div(value, den);
890 value += qd->qd_qb.qb_value;
891 if (value < (int64_t)qd->qd_qb.qb_limit)
892 do_sync = 0;
893 }
894
895 return do_sync;
896}
897
898void gfs2_quota_unlock(struct gfs2_inode *ip)
899{
900 struct gfs2_alloc *al = &ip->i_alloc;
901 struct gfs2_quota_data *qda[4];
902 unsigned int count = 0;
903 unsigned int x;
904
905 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
906 goto out;
907
908 for (x = 0; x < al->al_qd_num; x++) {
909 struct gfs2_quota_data *qd;
910 int sync;
911
912 qd = al->al_qd[x];
913 sync = need_sync(qd);
914
915 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
916
917 if (sync && qd_trylock(qd))
918 qda[count++] = qd;
919 }
920
921 if (count) {
922 do_sync(count, qda);
923 for (x = 0; x < count; x++)
924 qd_unlock(qda[x]);
925 }
926
927 out:
928 gfs2_quota_unhold(ip);
929}
930
931#define MAX_LINE 256
932
933static int print_message(struct gfs2_quota_data *qd, char *type)
934{
935 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
936 char *line;
937 int len;
938
939 line = kmalloc(MAX_LINE, GFP_KERNEL);
940 if (!line)
941 return -ENOMEM;
942
943 len = snprintf(line, MAX_LINE-1,
944 "GFS2: fsid=%s: quota %s for %s %u\r\n",
945 sdp->sd_fsname, type,
946 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
947 qd->qd_id);
948 line[MAX_LINE-1] = 0;
949
950 if (current->signal) { /* Is this test still required? */
951 tty_write_message(current->signal->tty, line);
952 }
953
954 kfree(line);
955
956 return 0;
957}
958
959int gfs2_quota_check(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
960{
961 struct gfs2_sbd *sdp = ip->i_sbd;
962 struct gfs2_alloc *al = &ip->i_alloc;
963 struct gfs2_quota_data *qd;
964 int64_t value;
965 unsigned int x;
966 int error = 0;
967
968 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
969 return 0;
970
971 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
972 return 0;
973
974 for (x = 0; x < al->al_qd_num; x++) {
975 qd = al->al_qd[x];
976
977 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
978 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
979 continue;
980
981 value = qd->qd_qb.qb_value;
982 spin_lock(&sdp->sd_quota_spin);
983 value += qd->qd_change;
984 spin_unlock(&sdp->sd_quota_spin);
985
986 if (qd->qd_qb.qb_limit && (int64_t)qd->qd_qb.qb_limit < value) {
987 print_message(qd, "exceeded");
988 error = -EDQUOT;
989 break;
990 } else if (qd->qd_qb.qb_warn &&
991 (int64_t)qd->qd_qb.qb_warn < value &&
992 time_after_eq(jiffies, qd->qd_last_warn +
993 gfs2_tune_get(sdp,
994 gt_quota_warn_period) * HZ)) {
995 error = print_message(qd, "warning");
996 qd->qd_last_warn = jiffies;
997 }
998 }
999
1000 return error;
1001}
1002
1003void gfs2_quota_change(struct gfs2_inode *ip, int64_t change,
1004 uint32_t uid, uint32_t gid)
1005{
1006 struct gfs2_alloc *al = &ip->i_alloc;
1007 struct gfs2_quota_data *qd;
1008 unsigned int x;
1009 unsigned int found = 0;
1010
1011 if (gfs2_assert_warn(ip->i_sbd, change))
1012 return;
1013 if (ip->i_di.di_flags & GFS2_DIF_SYSTEM)
1014 return;
1015
1016 for (x = 0; x < al->al_qd_num; x++) {
1017 qd = al->al_qd[x];
1018
1019 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
1020 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
1021 do_qc(qd, change);
1022 found++;
1023 }
1024 }
1025}
1026
1027int gfs2_quota_sync(struct gfs2_sbd *sdp)
1028{
1029 struct gfs2_quota_data **qda;
1030 unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync);
1031 unsigned int num_qd;
1032 unsigned int x;
1033 int error = 0;
1034
1035 sdp->sd_quota_sync_gen++;
1036
1037 qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
1038 if (!qda)
1039 return -ENOMEM;
1040
1041 do {
1042 num_qd = 0;
1043
1044 for (;;) {
1045 error = qd_fish(sdp, qda + num_qd);
1046 if (error || !qda[num_qd])
1047 break;
1048 if (++num_qd == max_qd)
1049 break;
1050 }
1051
1052 if (num_qd) {
1053 if (!error)
1054 error = do_sync(num_qd, qda);
1055 if (!error)
1056 for (x = 0; x < num_qd; x++)
1057 qda[x]->qd_sync_gen =
1058 sdp->sd_quota_sync_gen;
1059
1060 for (x = 0; x < num_qd; x++)
1061 qd_unlock(qda[x]);
1062 }
1063 } while (!error && num_qd == max_qd);
1064
1065 kfree(qda);
1066
1067 return error;
1068}
1069
1070int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, uint32_t id)
1071{
1072 struct gfs2_quota_data *qd;
1073 struct gfs2_holder q_gh;
1074 int error;
1075
1076 error = qd_get(sdp, user, id, CREATE, &qd);
1077 if (error)
1078 return error;
1079
1080 error = do_glock(qd, FORCE, &q_gh);
1081 if (!error)
1082 gfs2_glock_dq_uninit(&q_gh);
1083
1084 qd_put(qd);
1085
1086 return error;
1087}
1088
1089int gfs2_quota_read(struct gfs2_sbd *sdp, int user, uint32_t id,
1090 struct gfs2_quota *q)
1091{
1092 struct gfs2_quota_data *qd;
1093 struct gfs2_holder q_gh;
1094 int error;
1095
1096 if (((user) ? (id != current->fsuid) : (!in_group_p(id))) &&
1097 !capable(CAP_SYS_ADMIN))
1098 return -EACCES;
1099
1100 error = qd_get(sdp, user, id, CREATE, &qd);
1101 if (error)
1102 return error;
1103
1104 error = do_glock(qd, NO_FORCE, &q_gh);
1105 if (error)
1106 goto out;
1107
1108 memset(q, 0, sizeof(struct gfs2_quota));
1109 q->qu_limit = qd->qd_qb.qb_limit;
1110 q->qu_warn = qd->qd_qb.qb_warn;
1111 q->qu_value = qd->qd_qb.qb_value;
1112
1113 spin_lock(&sdp->sd_quota_spin);
1114 q->qu_value += qd->qd_change;
1115 spin_unlock(&sdp->sd_quota_spin);
1116
1117 gfs2_glock_dq_uninit(&q_gh);
1118
1119 out:
1120 qd_put(qd);
1121
1122 return error;
1123}
1124
1125int gfs2_quota_init(struct gfs2_sbd *sdp)
1126{
1127 struct gfs2_inode *ip = sdp->sd_qc_inode->u.generic_ip;
1128 unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
1129 unsigned int x, slot = 0;
1130 unsigned int found = 0;
1131 uint64_t dblock;
1132 uint32_t extlen = 0;
1133 int error;
1134
1135 if (!ip->i_di.di_size ||
1136 ip->i_di.di_size > (64 << 20) ||
1137 ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
1138 gfs2_consist_inode(ip);
1139 return -EIO;
1140 }
1141 sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block;
1142 sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE);
1143
1144 error = -ENOMEM;
1145
1146 sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks,
1147 sizeof(unsigned char *), GFP_KERNEL);
1148 if (!sdp->sd_quota_bitmap)
1149 return error;
1150
1151 for (x = 0; x < sdp->sd_quota_chunks; x++) {
1152 sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL);
1153 if (!sdp->sd_quota_bitmap[x])
1154 goto fail;
1155 }
1156
1157 for (x = 0; x < blocks; x++) {
1158 struct buffer_head *bh;
1159 unsigned int y;
1160
1161 if (!extlen) {
1162 int new = 0;
1163 error = gfs2_block_map(ip, x, &new, &dblock, &extlen);
1164 if (error)
1165 goto fail;
1166 }
1167 gfs2_meta_ra(ip->i_gl, dblock, extlen);
1168 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT,
1169 &bh);
1170 if (error)
1171 goto fail;
1172 error = -EIO;
1173 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) {
1174 brelse(bh);
1175 goto fail;
1176 }
1177
1178 for (y = 0;
1179 y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots;
1180 y++, slot++) {
1181 struct gfs2_quota_change qc;
1182 struct gfs2_quota_data *qd;
1183
1184 gfs2_quota_change_in(&qc, bh->b_data +
1185 sizeof(struct gfs2_meta_header) +
1186 y * sizeof(struct gfs2_quota_change));
1187 if (!qc.qc_change)
1188 continue;
1189
1190 error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER),
1191 qc.qc_id, &qd);
1192 if (error) {
1193 brelse(bh);
1194 goto fail;
1195 }
1196
1197 set_bit(QDF_CHANGE, &qd->qd_flags);
1198 qd->qd_change = qc.qc_change;
1199 qd->qd_slot = slot;
1200 qd->qd_slot_count = 1;
1201 qd->qd_last_touched = jiffies;
1202
1203 spin_lock(&sdp->sd_quota_spin);
1204 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1);
1205 list_add(&qd->qd_list, &sdp->sd_quota_list);
1206 atomic_inc(&sdp->sd_quota_count);
1207 spin_unlock(&sdp->sd_quota_spin);
1208
1209 found++;
1210 }
1211
1212 brelse(bh);
1213 dblock++;
1214 extlen--;
1215 }
1216
1217 if (found)
1218 fs_info(sdp, "found %u quota changes\n", found);
1219
1220 return 0;
1221
1222 fail:
1223 gfs2_quota_cleanup(sdp);
1224 return error;
1225}
1226
1227void gfs2_quota_scan(struct gfs2_sbd *sdp)
1228{
1229 struct gfs2_quota_data *qd, *safe;
1230 LIST_HEAD(dead);
1231
1232 spin_lock(&sdp->sd_quota_spin);
1233 list_for_each_entry_safe(qd, safe, &sdp->sd_quota_list, qd_list) {
1234 if (!qd->qd_count &&
1235 time_after_eq(jiffies, qd->qd_last_touched +
1236 gfs2_tune_get(sdp, gt_quota_cache_secs) * HZ)) {
1237 list_move(&qd->qd_list, &dead);
1238 gfs2_assert_warn(sdp,
1239 atomic_read(&sdp->sd_quota_count) > 0);
1240 atomic_dec(&sdp->sd_quota_count);
1241 }
1242 }
1243 spin_unlock(&sdp->sd_quota_spin);
1244
1245 while (!list_empty(&dead)) {
1246 qd = list_entry(dead.next, struct gfs2_quota_data, qd_list);
1247 list_del(&qd->qd_list);
1248
1249 gfs2_assert_warn(sdp, !qd->qd_change);
1250 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1251 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1252
1253 gfs2_lvb_unhold(qd->qd_gl);
1254 kfree(qd);
1255 }
1256}
1257
1258void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
1259{
1260 struct list_head *head = &sdp->sd_quota_list;
1261 struct gfs2_quota_data *qd;
1262 unsigned int x;
1263
1264 spin_lock(&sdp->sd_quota_spin);
1265 while (!list_empty(head)) {
1266 qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
1267
1268 if (qd->qd_count > 1 ||
1269 (qd->qd_count && !test_bit(QDF_CHANGE, &qd->qd_flags))) {
1270 list_move(&qd->qd_list, head);
1271 spin_unlock(&sdp->sd_quota_spin);
1272 schedule();
1273 spin_lock(&sdp->sd_quota_spin);
1274 continue;
1275 }
1276
1277 list_del(&qd->qd_list);
1278 atomic_dec(&sdp->sd_quota_count);
1279 spin_unlock(&sdp->sd_quota_spin);
1280
1281 if (!qd->qd_count) {
1282 gfs2_assert_warn(sdp, !qd->qd_change);
1283 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1284 } else
1285 gfs2_assert_warn(sdp, qd->qd_slot_count == 1);
1286 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1287
1288 gfs2_lvb_unhold(qd->qd_gl);
1289 kfree(qd);
1290
1291 spin_lock(&sdp->sd_quota_spin);
1292 }
1293 spin_unlock(&sdp->sd_quota_spin);
1294
1295 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
1296
1297 if (sdp->sd_quota_bitmap) {
1298 for (x = 0; x < sdp->sd_quota_chunks; x++)
1299 kfree(sdp->sd_quota_bitmap[x]);
1300 kfree(sdp->sd_quota_bitmap);
1301 }
1302}
1303
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
new file mode 100644
index 000000000000..005529f6895d
--- /dev/null
+++ b/fs/gfs2/quota.h
@@ -0,0 +1,34 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __QUOTA_DOT_H__
11#define __QUOTA_DOT_H__
12
13#define NO_QUOTA_CHANGE ((uint32_t)-1)
14
15int gfs2_quota_hold(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
16void gfs2_quota_unhold(struct gfs2_inode *ip);
17
18int gfs2_quota_lock(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
19void gfs2_quota_unlock(struct gfs2_inode *ip);
20
21int gfs2_quota_check(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
22void gfs2_quota_change(struct gfs2_inode *ip, int64_t change,
23 uint32_t uid, uint32_t gid);
24
25int gfs2_quota_sync(struct gfs2_sbd *sdp);
26int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, uint32_t id);
27int gfs2_quota_read(struct gfs2_sbd *sdp, int user, uint32_t id,
28 struct gfs2_quota *q);
29
30int gfs2_quota_init(struct gfs2_sbd *sdp);
31void gfs2_quota_scan(struct gfs2_sbd *sdp);
32void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
33
34#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
new file mode 100644
index 000000000000..68c85610fb5b
--- /dev/null
+++ b/fs/gfs2/recovery.c
@@ -0,0 +1,580 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "bmap.h"
23#include "glock.h"
24#include "glops.h"
25#include "lm.h"
26#include "lops.h"
27#include "meta_io.h"
28#include "recovery.h"
29#include "super.h"
30#include "util.h"
31#include "dir.h"
32
33int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
34 struct buffer_head **bh)
35{
36 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
37 struct gfs2_glock *gl = ip->i_gl;
38 int new = 0;
39 uint64_t dblock;
40 uint32_t extlen;
41 int error;
42
43 error = gfs2_block_map(ip, blk, &new, &dblock,
44 &extlen);
45 if (error)
46 return error;
47 if (!dblock) {
48 gfs2_consist_inode(ip);
49 return -EIO;
50 }
51
52 gfs2_meta_ra(gl, dblock, extlen);
53 error = gfs2_meta_read(gl, dblock, DIO_START | DIO_WAIT, bh);
54
55 return error;
56}
57
58int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
59{
60 struct list_head *head = &sdp->sd_revoke_list;
61 struct gfs2_revoke_replay *rr;
62 int found = 0;
63
64 list_for_each_entry(rr, head, rr_list) {
65 if (rr->rr_blkno == blkno) {
66 found = 1;
67 break;
68 }
69 }
70
71 if (found) {
72 rr->rr_where = where;
73 return 0;
74 }
75
76 rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL);
77 if (!rr)
78 return -ENOMEM;
79
80 rr->rr_blkno = blkno;
81 rr->rr_where = where;
82 list_add(&rr->rr_list, head);
83
84 return 1;
85}
86
87int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
88{
89 struct gfs2_revoke_replay *rr;
90 int wrap, a, b, revoke;
91 int found = 0;
92
93 list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) {
94 if (rr->rr_blkno == blkno) {
95 found = 1;
96 break;
97 }
98 }
99
100 if (!found)
101 return 0;
102
103 wrap = (rr->rr_where < sdp->sd_replay_tail);
104 a = (sdp->sd_replay_tail < where);
105 b = (where < rr->rr_where);
106 revoke = (wrap) ? (a || b) : (a && b);
107
108 return revoke;
109}
110
111void gfs2_revoke_clean(struct gfs2_sbd *sdp)
112{
113 struct list_head *head = &sdp->sd_revoke_list;
114 struct gfs2_revoke_replay *rr;
115
116 while (!list_empty(head)) {
117 rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
118 list_del(&rr->rr_list);
119 kfree(rr);
120 }
121}
122
123/**
124 * get_log_header - read the log header for a given segment
125 * @jd: the journal
126 * @blk: the block to look at
127 * @lh: the log header to return
128 *
129 * Read the log header for a given segement in a given journal. Do a few
130 * sanity checks on it.
131 *
132 * Returns: 0 on success,
133 * 1 if the header was invalid or incomplete,
134 * errno on error
135 */
136
137static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
138 struct gfs2_log_header *head)
139{
140 struct buffer_head *bh;
141 struct gfs2_log_header lh;
142 uint32_t hash;
143 int error;
144
145 error = gfs2_replay_read_block(jd, blk, &bh);
146 if (error)
147 return error;
148
149 memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));
150 lh.lh_hash = 0;
151 hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
152 gfs2_log_header_in(&lh, bh->b_data);
153
154 brelse(bh);
155
156 if (lh.lh_header.mh_magic != GFS2_MAGIC ||
157 lh.lh_header.mh_type != GFS2_METATYPE_LH ||
158 lh.lh_blkno != blk ||
159 lh.lh_hash != hash)
160 return 1;
161
162 *head = lh;
163
164 return 0;
165}
166
167/**
168 * find_good_lh - find a good log header
169 * @jd: the journal
170 * @blk: the segment to start searching from
171 * @lh: the log header to fill in
172 * @forward: if true search forward in the log, else search backward
173 *
174 * Call get_log_header() to get a log header for a segment, but if the
175 * segment is bad, either scan forward or backward until we find a good one.
176 *
177 * Returns: errno
178 */
179
180static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
181 struct gfs2_log_header *head)
182{
183 unsigned int orig_blk = *blk;
184 int error;
185
186 for (;;) {
187 error = get_log_header(jd, *blk, head);
188 if (error <= 0)
189 return error;
190
191 if (++*blk == jd->jd_blocks)
192 *blk = 0;
193
194 if (*blk == orig_blk) {
195 gfs2_consist_inode(jd->jd_inode->u.generic_ip);
196 return -EIO;
197 }
198 }
199}
200
201/**
202 * jhead_scan - make sure we've found the head of the log
203 * @jd: the journal
204 * @head: this is filled in with the log descriptor of the head
205 *
206 * At this point, seg and lh should be either the head of the log or just
207 * before. Scan forward until we find the head.
208 *
209 * Returns: errno
210 */
211
212static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
213{
214 unsigned int blk = head->lh_blkno;
215 struct gfs2_log_header lh;
216 int error;
217
218 for (;;) {
219 if (++blk == jd->jd_blocks)
220 blk = 0;
221
222 error = get_log_header(jd, blk, &lh);
223 if (error < 0)
224 return error;
225 if (error == 1)
226 continue;
227
228 if (lh.lh_sequence == head->lh_sequence) {
229 gfs2_consist_inode(jd->jd_inode->u.generic_ip);
230 return -EIO;
231 }
232 if (lh.lh_sequence < head->lh_sequence)
233 break;
234
235 *head = lh;
236 }
237
238 return 0;
239}
240
241/**
242 * gfs2_find_jhead - find the head of a log
243 * @jd: the journal
244 * @head: the log descriptor for the head of the log is returned here
245 *
246 * Do a binary search of a journal and find the valid log entry with the
247 * highest sequence number. (i.e. the log head)
248 *
249 * Returns: errno
250 */
251
252int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
253{
254 struct gfs2_log_header lh_1, lh_m;
255 uint32_t blk_1, blk_2, blk_m;
256 int error;
257
258 blk_1 = 0;
259 blk_2 = jd->jd_blocks - 1;
260
261 for (;;) {
262 blk_m = (blk_1 + blk_2) / 2;
263
264 error = find_good_lh(jd, &blk_1, &lh_1);
265 if (error)
266 return error;
267
268 error = find_good_lh(jd, &blk_m, &lh_m);
269 if (error)
270 return error;
271
272 if (blk_1 == blk_m || blk_m == blk_2)
273 break;
274
275 if (lh_1.lh_sequence <= lh_m.lh_sequence)
276 blk_1 = blk_m;
277 else
278 blk_2 = blk_m;
279 }
280
281 error = jhead_scan(jd, &lh_1);
282 if (error)
283 return error;
284
285 *head = lh_1;
286
287 return error;
288}
289
290/**
291 * foreach_descriptor - go through the active part of the log
292 * @jd: the journal
293 * @start: the first log header in the active region
294 * @end: the last log header (don't process the contents of this entry))
295 *
296 * Call a given function once for every log descriptor in the active
297 * portion of the log.
298 *
299 * Returns: errno
300 */
301
302static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
303 unsigned int end, int pass)
304{
305 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
306 struct gfs2_sbd *sdp = ip->i_sbd;
307 struct buffer_head *bh;
308 struct gfs2_log_descriptor *ld;
309 int error = 0;
310 u32 length;
311 __be64 *ptr;
312 unsigned int offset = sizeof(struct gfs2_log_descriptor);
313 offset += (sizeof(__be64)-1);
314 offset &= ~(sizeof(__be64)-1);
315
316 while (start != end) {
317 error = gfs2_replay_read_block(jd, start, &bh);
318 if (error)
319 return error;
320 if (gfs2_meta_check(sdp, bh)) {
321 brelse(bh);
322 return -EIO;
323 }
324 ld = (struct gfs2_log_descriptor *)bh->b_data;
325 length = be32_to_cpu(ld->ld_length);
326
327 if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
328 struct gfs2_log_header lh;
329 error = get_log_header(jd, start, &lh);
330 if (!error) {
331 gfs2_replay_incr_blk(sdp, &start);
332 continue;
333 }
334 if (error == 1) {
335 gfs2_consist_inode(jd->jd_inode->u.generic_ip);
336 error = -EIO;
337 }
338 brelse(bh);
339 return error;
340 } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
341 brelse(bh);
342 return -EIO;
343 }
344 ptr = (__be64 *)(bh->b_data + offset);
345 error = lops_scan_elements(jd, start, ld, ptr, pass);
346 if (error) {
347 brelse(bh);
348 return error;
349 }
350
351 while (length--)
352 gfs2_replay_incr_blk(sdp, &start);
353
354 brelse(bh);
355 }
356
357 return 0;
358}
359
360/**
361 * clean_journal - mark a dirty journal as being clean
362 * @sdp: the filesystem
363 * @jd: the journal
364 * @gl: the journal's glock
365 * @head: the head journal to start from
366 *
367 * Returns: errno
368 */
369
370static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
371{
372 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
373 struct gfs2_sbd *sdp = ip->i_sbd;
374 unsigned int lblock;
375 int new = 0;
376 uint64_t dblock;
377 struct gfs2_log_header *lh;
378 uint32_t hash;
379 struct buffer_head *bh;
380 int error;
381
382 lblock = head->lh_blkno;
383 gfs2_replay_incr_blk(sdp, &lblock);
384 error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
385 if (error)
386 return error;
387 if (!dblock) {
388 gfs2_consist_inode(ip);
389 return -EIO;
390 }
391
392 bh = sb_getblk(sdp->sd_vfs, dblock);
393 lock_buffer(bh);
394 memset(bh->b_data, 0, bh->b_size);
395 set_buffer_uptodate(bh);
396 clear_buffer_dirty(bh);
397 unlock_buffer(bh);
398
399 lh = (struct gfs2_log_header *)bh->b_data;
400 memset(lh, 0, sizeof(struct gfs2_log_header));
401 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
402 lh->lh_header.mh_type = cpu_to_be16(GFS2_METATYPE_LH);
403 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
404 lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
405 lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
406 lh->lh_blkno = cpu_to_be32(lblock);
407 hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
408 lh->lh_hash = cpu_to_be32(hash);
409
410 set_buffer_dirty(bh);
411 if (sync_dirty_buffer(bh))
412 gfs2_io_error_bh(sdp, bh);
413 brelse(bh);
414
415 return error;
416}
417
418/**
419 * gfs2_recover_journal - recovery a given journal
420 * @jd: the struct gfs2_jdesc describing the journal
421 * @wait: Don't return until the journal is clean (or an error is encountered)
422 *
423 * Acquire the journal's lock, check to see if the journal is clean, and
424 * do recovery if necessary.
425 *
426 * Returns: errno
427 */
428
429int gfs2_recover_journal(struct gfs2_jdesc *jd, int wait)
430{
431 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
432 struct gfs2_sbd *sdp = ip->i_sbd;
433 struct gfs2_log_header head;
434 struct gfs2_holder j_gh, ji_gh, t_gh;
435 unsigned long t;
436 int ro = 0;
437 unsigned int pass;
438 int error;
439
440 fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", jd->jd_jid);
441
442 /* Aquire the journal lock so we can do recovery */
443
444 error = gfs2_glock_nq_num(sdp,
445 jd->jd_jid, &gfs2_journal_glops,
446 LM_ST_EXCLUSIVE,
447 LM_FLAG_NOEXP |
448 ((wait) ? 0 : LM_FLAG_TRY) |
449 GL_NOCACHE, &j_gh);
450 switch (error) {
451 case 0:
452 break;
453
454 case GLR_TRYFAILED:
455 fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
456 error = 0;
457
458 default:
459 goto fail;
460 };
461
462 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
463 LM_FLAG_NOEXP, &ji_gh);
464 if (error)
465 goto fail_gunlock_j;
466
467 fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
468
469 error = gfs2_jdesc_check(jd);
470 if (error)
471 goto fail_gunlock_ji;
472
473 error = gfs2_find_jhead(jd, &head);
474 if (error)
475 goto fail_gunlock_ji;
476
477 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
478 fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
479 jd->jd_jid);
480
481 t = jiffies;
482
483 /* Acquire a shared hold on the transaction lock */
484
485 error = gfs2_glock_nq_init(sdp->sd_trans_gl,
486 LM_ST_SHARED,
487 LM_FLAG_NOEXP |
488 LM_FLAG_PRIORITY |
489 GL_NEVER_RECURSE |
490 GL_NOCANCEL |
491 GL_NOCACHE,
492 &t_gh);
493 if (error)
494 goto fail_gunlock_ji;
495
496 if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
497 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
498 ro = 1;
499 } else {
500 if (sdp->sd_vfs->s_flags & MS_RDONLY)
501 ro = 1;
502 }
503
504 if (ro) {
505 fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n",
506 jd->jd_jid);
507 error = -EROFS;
508 goto fail_gunlock_tr;
509 }
510
511 fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
512
513 for (pass = 0; pass < 2; pass++) {
514 lops_before_scan(jd, &head, pass);
515 error = foreach_descriptor(jd, head.lh_tail,
516 head.lh_blkno, pass);
517 lops_after_scan(jd, error, pass);
518 if (error)
519 goto fail_gunlock_tr;
520 }
521
522 error = clean_journal(jd, &head);
523 if (error)
524 goto fail_gunlock_tr;
525
526 gfs2_glock_dq_uninit(&t_gh);
527
528 t = DIV_ROUND_UP(jiffies - t, HZ);
529
530 fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
531 jd->jd_jid, t);
532 }
533
534 gfs2_glock_dq_uninit(&ji_gh);
535
536 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
537
538 gfs2_glock_dq_uninit(&j_gh);
539
540 fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
541
542 return 0;
543
544 fail_gunlock_tr:
545 gfs2_glock_dq_uninit(&t_gh);
546
547 fail_gunlock_ji:
548 gfs2_glock_dq_uninit(&ji_gh);
549
550 fail_gunlock_j:
551 gfs2_glock_dq_uninit(&j_gh);
552
553 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
554
555 fail:
556 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
557
558 return error;
559}
560
561/**
562 * gfs2_check_journals - Recover any dirty journals
563 * @sdp: the filesystem
564 *
565 */
566
567void gfs2_check_journals(struct gfs2_sbd *sdp)
568{
569 struct gfs2_jdesc *jd;
570
571 for (;;) {
572 jd = gfs2_jdesc_find_dirty(sdp);
573 if (!jd)
574 break;
575
576 if (jd != sdp->sd_jdesc)
577 gfs2_recover_journal(jd, NO_WAIT);
578 }
579}
580
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
new file mode 100644
index 000000000000..50d7eb57881c
--- /dev/null
+++ b/fs/gfs2/recovery.h
@@ -0,0 +1,32 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __RECOVERY_DOT_H__
11#define __RECOVERY_DOT_H__
12
13static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
14{
15 if (++*blk == sdp->sd_jdesc->jd_blocks)
16 *blk = 0;
17}
18
19int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
20 struct buffer_head **bh);
21
22int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where);
23int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where);
24void gfs2_revoke_clean(struct gfs2_sbd *sdp);
25
26int gfs2_find_jhead(struct gfs2_jdesc *jd,
27 struct gfs2_log_header *head);
28int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, int wait);
29void gfs2_check_journals(struct gfs2_sbd *sdp);
30
31#endif /* __RECOVERY_DOT_H__ */
32
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
new file mode 100644
index 000000000000..4ae559694396
--- /dev/null
+++ b/fs/gfs2/rgrp.c
@@ -0,0 +1,1369 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/fs.h>
16#include <linux/gfs2_ondisk.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "bits.h"
23#include "glock.h"
24#include "glops.h"
25#include "lops.h"
26#include "meta_io.h"
27#include "quota.h"
28#include "rgrp.h"
29#include "super.h"
30#include "trans.h"
31#include "ops_file.h"
32#include "util.h"
33
34/**
35 * gfs2_rgrp_verify - Verify that a resource group is consistent
36 * @sdp: the filesystem
37 * @rgd: the rgrp
38 *
39 */
40
41void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
42{
43 struct gfs2_sbd *sdp = rgd->rd_sbd;
44 struct gfs2_bitmap *bi = NULL;
45 uint32_t length = rgd->rd_ri.ri_length;
46 uint32_t count[4], tmp;
47 int buf, x;
48
49 memset(count, 0, 4 * sizeof(uint32_t));
50
51 /* Count # blocks in each of 4 possible allocation states */
52 for (buf = 0; buf < length; buf++) {
53 bi = rgd->rd_bits + buf;
54 for (x = 0; x < 4; x++)
55 count[x] += gfs2_bitcount(rgd,
56 bi->bi_bh->b_data +
57 bi->bi_offset,
58 bi->bi_len, x);
59 }
60
61 if (count[0] != rgd->rd_rg.rg_free) {
62 if (gfs2_consist_rgrpd(rgd))
63 fs_err(sdp, "free data mismatch: %u != %u\n",
64 count[0], rgd->rd_rg.rg_free);
65 return;
66 }
67
68 tmp = rgd->rd_ri.ri_data -
69 rgd->rd_rg.rg_free -
70 rgd->rd_rg.rg_dinodes;
71 if (count[1] != tmp) {
72 if (gfs2_consist_rgrpd(rgd))
73 fs_err(sdp, "used data mismatch: %u != %u\n",
74 count[1], tmp);
75 return;
76 }
77
78 if (count[2]) {
79 if (gfs2_consist_rgrpd(rgd))
80 fs_err(sdp, "free metadata mismatch: %u != 0\n",
81 count[2]);
82 return;
83 }
84
85 if (count[3] != rgd->rd_rg.rg_dinodes) {
86 if (gfs2_consist_rgrpd(rgd))
87 fs_err(sdp, "used metadata mismatch: %u != %u\n",
88 count[3], rgd->rd_rg.rg_dinodes);
89 return;
90 }
91}
92
93static inline int rgrp_contains_block(struct gfs2_rindex *ri, uint64_t block)
94{
95 uint64_t first = ri->ri_data0;
96 uint64_t last = first + ri->ri_data;
97 return !!(first <= block && block < last);
98}
99
100/**
101 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
102 * @sdp: The GFS2 superblock
103 * @n: The data block number
104 *
105 * Returns: The resource group, or NULL if not found
106 */
107
108struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk)
109{
110 struct gfs2_rgrpd *rgd;
111
112 spin_lock(&sdp->sd_rindex_spin);
113
114 list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
115 if (rgrp_contains_block(&rgd->rd_ri, blk)) {
116 list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
117 spin_unlock(&sdp->sd_rindex_spin);
118 return rgd;
119 }
120 }
121
122 spin_unlock(&sdp->sd_rindex_spin);
123
124 return NULL;
125}
126
127/**
128 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
129 * @sdp: The GFS2 superblock
130 *
131 * Returns: The first rgrp in the filesystem
132 */
133
134struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
135{
136 gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list));
137 return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list);
138}
139
140/**
141 * gfs2_rgrpd_get_next - get the next RG
142 * @rgd: A RG
143 *
144 * Returns: The next rgrp
145 */
146
147struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
148{
149 if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list)
150 return NULL;
151 return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list);
152}
153
154static void clear_rgrpdi(struct gfs2_sbd *sdp)
155{
156 struct list_head *head;
157 struct gfs2_rgrpd *rgd;
158 struct gfs2_glock *gl;
159
160 spin_lock(&sdp->sd_rindex_spin);
161 sdp->sd_rindex_forward = NULL;
162 head = &sdp->sd_rindex_recent_list;
163 while (!list_empty(head)) {
164 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
165 list_del(&rgd->rd_recent);
166 }
167 spin_unlock(&sdp->sd_rindex_spin);
168
169 head = &sdp->sd_rindex_list;
170 while (!list_empty(head)) {
171 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list);
172 gl = rgd->rd_gl;
173
174 list_del(&rgd->rd_list);
175 list_del(&rgd->rd_list_mru);
176
177 if (gl) {
178 gl->gl_object = NULL;
179 gfs2_glock_put(gl);
180 }
181
182 kfree(rgd->rd_bits);
183 kfree(rgd);
184 }
185}
186
187void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
188{
189 mutex_lock(&sdp->sd_rindex_mutex);
190 clear_rgrpdi(sdp);
191 mutex_unlock(&sdp->sd_rindex_mutex);
192}
193
194/**
195 * gfs2_compute_bitstructs - Compute the bitmap sizes
196 * @rgd: The resource group descriptor
197 *
198 * Calculates bitmap descriptors, one for each block that contains bitmap data
199 *
200 * Returns: errno
201 */
202
203static int compute_bitstructs(struct gfs2_rgrpd *rgd)
204{
205 struct gfs2_sbd *sdp = rgd->rd_sbd;
206 struct gfs2_bitmap *bi;
207 uint32_t length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
208 uint32_t bytes_left, bytes;
209 int x;
210
211 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_KERNEL);
212 if (!rgd->rd_bits)
213 return -ENOMEM;
214
215 bytes_left = rgd->rd_ri.ri_bitbytes;
216
217 for (x = 0; x < length; x++) {
218 bi = rgd->rd_bits + x;
219
220 /* small rgrp; bitmap stored completely in header block */
221 if (length == 1) {
222 bytes = bytes_left;
223 bi->bi_offset = sizeof(struct gfs2_rgrp);
224 bi->bi_start = 0;
225 bi->bi_len = bytes;
226 /* header block */
227 } else if (x == 0) {
228 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
229 bi->bi_offset = sizeof(struct gfs2_rgrp);
230 bi->bi_start = 0;
231 bi->bi_len = bytes;
232 /* last block */
233 } else if (x + 1 == length) {
234 bytes = bytes_left;
235 bi->bi_offset = sizeof(struct gfs2_meta_header);
236 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
237 bi->bi_len = bytes;
238 /* other blocks */
239 } else {
240 bytes = sdp->sd_sb.sb_bsize -
241 sizeof(struct gfs2_meta_header);
242 bi->bi_offset = sizeof(struct gfs2_meta_header);
243 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
244 bi->bi_len = bytes;
245 }
246
247 bytes_left -= bytes;
248 }
249
250 if (bytes_left) {
251 gfs2_consist_rgrpd(rgd);
252 return -EIO;
253 }
254 bi = rgd->rd_bits + (length - 1);
255 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) {
256 if (gfs2_consist_rgrpd(rgd)) {
257 gfs2_rindex_print(&rgd->rd_ri);
258 fs_err(sdp, "start=%u len=%u offset=%u\n",
259 bi->bi_start, bi->bi_len, bi->bi_offset);
260 }
261 return -EIO;
262 }
263
264 return 0;
265}
266
267/**
268 * gfs2_ri_update - Pull in a new resource index from the disk
269 * @gl: The glock covering the rindex inode
270 *
271 * Returns: 0 on successful update, error code otherwise
272 */
273
274static int gfs2_ri_update(struct gfs2_inode *ip)
275{
276 struct gfs2_sbd *sdp = ip->i_sbd;
277 struct inode *inode = ip->i_vnode;
278 struct gfs2_rgrpd *rgd;
279 char buf[sizeof(struct gfs2_rindex)];
280 struct file_ra_state ra_state;
281 uint64_t junk = ip->i_di.di_size;
282 int error;
283
284 if (do_div(junk, sizeof(struct gfs2_rindex))) {
285 gfs2_consist_inode(ip);
286 return -EIO;
287 }
288
289 clear_rgrpdi(sdp);
290
291 file_ra_state_init(&ra_state, inode->i_mapping);
292 for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
293 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
294 error = gfs2_internal_read(ip, &ra_state, buf, &pos,
295 sizeof(struct gfs2_rindex));
296 if (!error)
297 break;
298 if (error != sizeof(struct gfs2_rindex)) {
299 if (error > 0)
300 error = -EIO;
301 goto fail;
302 }
303
304 rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_KERNEL);
305 error = -ENOMEM;
306 if (!rgd)
307 goto fail;
308
309 mutex_init(&rgd->rd_mutex);
310 lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
311 rgd->rd_sbd = sdp;
312
313 list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
314 list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
315
316 gfs2_rindex_in(&rgd->rd_ri, buf);
317
318 error = compute_bitstructs(rgd);
319 if (error)
320 goto fail;
321
322 error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr,
323 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
324 if (error)
325 goto fail;
326
327 rgd->rd_gl->gl_object = rgd;
328 rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
329 }
330
331 sdp->sd_rindex_vn = ip->i_gl->gl_vn;
332
333 return 0;
334
335 fail:
336 clear_rgrpdi(sdp);
337
338 return error;
339}
340
341/**
342 * gfs2_rindex_hold - Grab a lock on the rindex
343 * @sdp: The GFS2 superblock
344 * @ri_gh: the glock holder
345 *
346 * We grab a lock on the rindex inode to make sure that it doesn't
347 * change whilst we are performing an operation. We keep this lock
348 * for quite long periods of time compared to other locks. This
349 * doesn't matter, since it is shared and it is very, very rarely
350 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
351 *
352 * This makes sure that we're using the latest copy of the resource index
353 * special file, which might have been updated if someone expanded the
354 * filesystem (via gfs2_grow utility), which adds new resource groups.
355 *
356 * Returns: 0 on success, error code otherwise
357 */
358
359int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
360{
361 struct gfs2_inode *ip = sdp->sd_rindex->u.generic_ip;
362 struct gfs2_glock *gl = ip->i_gl;
363 int error;
364
365 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh);
366 if (error)
367 return error;
368
369 /* Read new copy from disk if we don't have the latest */
370 if (sdp->sd_rindex_vn != gl->gl_vn) {
371 mutex_lock(&sdp->sd_rindex_mutex);
372 if (sdp->sd_rindex_vn != gl->gl_vn) {
373 error = gfs2_ri_update(ip);
374 if (error)
375 gfs2_glock_dq_uninit(ri_gh);
376 }
377 mutex_unlock(&sdp->sd_rindex_mutex);
378 }
379
380 return error;
381}
382
383/**
384 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
385 * @rgd: the struct gfs2_rgrpd describing the RG to read in
386 *
387 * Read in all of a Resource Group's header and bitmap blocks.
388 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
389 *
390 * Returns: errno
391 */
392
393int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
394{
395 struct gfs2_sbd *sdp = rgd->rd_sbd;
396 struct gfs2_glock *gl = rgd->rd_gl;
397 unsigned int length = rgd->rd_ri.ri_length;
398 struct gfs2_bitmap *bi;
399 unsigned int x, y;
400 int error;
401
402 mutex_lock(&rgd->rd_mutex);
403
404 spin_lock(&sdp->sd_rindex_spin);
405 if (rgd->rd_bh_count) {
406 rgd->rd_bh_count++;
407 spin_unlock(&sdp->sd_rindex_spin);
408 mutex_unlock(&rgd->rd_mutex);
409 return 0;
410 }
411 spin_unlock(&sdp->sd_rindex_spin);
412
413 for (x = 0; x < length; x++) {
414 bi = rgd->rd_bits + x;
415 error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, DIO_START,
416 &bi->bi_bh);
417 if (error)
418 goto fail;
419 }
420
421 for (y = length; y--;) {
422 bi = rgd->rd_bits + y;
423 error = gfs2_meta_reread(sdp, bi->bi_bh, DIO_WAIT);
424 if (error)
425 goto fail;
426 if (gfs2_metatype_check(sdp, bi->bi_bh,
427 (y) ? GFS2_METATYPE_RB :
428 GFS2_METATYPE_RG)) {
429 error = -EIO;
430 goto fail;
431 }
432 }
433
434 if (rgd->rd_rg_vn != gl->gl_vn) {
435 gfs2_rgrp_in(&rgd->rd_rg, (rgd->rd_bits[0].bi_bh)->b_data);
436 rgd->rd_rg_vn = gl->gl_vn;
437 }
438
439 spin_lock(&sdp->sd_rindex_spin);
440 rgd->rd_free_clone = rgd->rd_rg.rg_free;
441 rgd->rd_bh_count++;
442 spin_unlock(&sdp->sd_rindex_spin);
443
444 mutex_unlock(&rgd->rd_mutex);
445
446 return 0;
447
448 fail:
449 while (x--) {
450 bi = rgd->rd_bits + x;
451 brelse(bi->bi_bh);
452 bi->bi_bh = NULL;
453 gfs2_assert_warn(sdp, !bi->bi_clone);
454 }
455 mutex_unlock(&rgd->rd_mutex);
456
457 return error;
458}
459
460void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
461{
462 struct gfs2_sbd *sdp = rgd->rd_sbd;
463
464 spin_lock(&sdp->sd_rindex_spin);
465 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
466 rgd->rd_bh_count++;
467 spin_unlock(&sdp->sd_rindex_spin);
468}
469
470/**
471 * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get()
472 * @rgd: the struct gfs2_rgrpd describing the RG to read in
473 *
474 */
475
476void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
477{
478 struct gfs2_sbd *sdp = rgd->rd_sbd;
479 int x, length = rgd->rd_ri.ri_length;
480
481 spin_lock(&sdp->sd_rindex_spin);
482 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
483 if (--rgd->rd_bh_count) {
484 spin_unlock(&sdp->sd_rindex_spin);
485 return;
486 }
487
488 for (x = 0; x < length; x++) {
489 struct gfs2_bitmap *bi = rgd->rd_bits + x;
490 kfree(bi->bi_clone);
491 bi->bi_clone = NULL;
492 brelse(bi->bi_bh);
493 bi->bi_bh = NULL;
494 }
495
496 spin_unlock(&sdp->sd_rindex_spin);
497}
498
499void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
500{
501 struct gfs2_sbd *sdp = rgd->rd_sbd;
502 unsigned int length = rgd->rd_ri.ri_length;
503 unsigned int x;
504
505 for (x = 0; x < length; x++) {
506 struct gfs2_bitmap *bi = rgd->rd_bits + x;
507 if (!bi->bi_clone)
508 continue;
509 memcpy(bi->bi_clone + bi->bi_offset,
510 bi->bi_bh->b_data + bi->bi_offset,
511 bi->bi_len);
512 }
513
514 spin_lock(&sdp->sd_rindex_spin);
515 rgd->rd_free_clone = rgd->rd_rg.rg_free;
516 spin_unlock(&sdp->sd_rindex_spin);
517}
518
519/**
520 * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode
521 * @ip: the incore GFS2 inode structure
522 *
523 * Returns: the struct gfs2_alloc
524 */
525
526struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
527{
528 struct gfs2_alloc *al = &ip->i_alloc;
529
530 /* FIXME: Should assert that the correct locks are held here... */
531 memset(al, 0, sizeof(*al));
532 return al;
533}
534
535/**
536 * gfs2_alloc_put - throw away the struct gfs2_alloc for an inode
537 * @ip: the inode
538 *
539 */
540
541void gfs2_alloc_put(struct gfs2_inode *ip)
542{
543 return;
544}
545
546/**
547 * try_rgrp_fit - See if a given reservation will fit in a given RG
548 * @rgd: the RG data
549 * @al: the struct gfs2_alloc structure describing the reservation
550 *
551 * If there's room for the requested blocks to be allocated from the RG:
552 * Sets the $al_reserved_data field in @al.
553 * Sets the $al_reserved_meta field in @al.
554 * Sets the $al_rgd field in @al.
555 *
556 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
557 */
558
559static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
560{
561 struct gfs2_sbd *sdp = rgd->rd_sbd;
562 int ret = 0;
563
564 spin_lock(&sdp->sd_rindex_spin);
565 if (rgd->rd_free_clone >= al->al_requested) {
566 al->al_rgd = rgd;
567 ret = 1;
568 }
569 spin_unlock(&sdp->sd_rindex_spin);
570
571 return ret;
572}
573
574/**
575 * recent_rgrp_first - get first RG from "recent" list
576 * @sdp: The GFS2 superblock
577 * @rglast: address of the rgrp used last
578 *
579 * Returns: The first rgrp in the recent list
580 */
581
582static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
583 uint64_t rglast)
584{
585 struct gfs2_rgrpd *rgd = NULL;
586
587 spin_lock(&sdp->sd_rindex_spin);
588
589 if (list_empty(&sdp->sd_rindex_recent_list))
590 goto out;
591
592 if (!rglast)
593 goto first;
594
595 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
596 if (rgd->rd_ri.ri_addr == rglast)
597 goto out;
598 }
599
600 first:
601 rgd = list_entry(sdp->sd_rindex_recent_list.next, struct gfs2_rgrpd,
602 rd_recent);
603
604 out:
605 spin_unlock(&sdp->sd_rindex_spin);
606
607 return rgd;
608}
609
610/**
611 * recent_rgrp_next - get next RG from "recent" list
612 * @cur_rgd: current rgrp
613 * @remove:
614 *
615 * Returns: The next rgrp in the recent list
616 */
617
618static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd,
619 int remove)
620{
621 struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
622 struct list_head *head;
623 struct gfs2_rgrpd *rgd;
624
625 spin_lock(&sdp->sd_rindex_spin);
626
627 head = &sdp->sd_rindex_recent_list;
628
629 list_for_each_entry(rgd, head, rd_recent) {
630 if (rgd == cur_rgd) {
631 if (cur_rgd->rd_recent.next != head)
632 rgd = list_entry(cur_rgd->rd_recent.next,
633 struct gfs2_rgrpd, rd_recent);
634 else
635 rgd = NULL;
636
637 if (remove)
638 list_del(&cur_rgd->rd_recent);
639
640 goto out;
641 }
642 }
643
644 rgd = NULL;
645 if (!list_empty(head))
646 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
647
648 out:
649 spin_unlock(&sdp->sd_rindex_spin);
650
651 return rgd;
652}
653
654/**
655 * recent_rgrp_add - add an RG to tail of "recent" list
656 * @new_rgd: The rgrp to add
657 *
658 */
659
660static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd)
661{
662 struct gfs2_sbd *sdp = new_rgd->rd_sbd;
663 struct gfs2_rgrpd *rgd;
664 unsigned int count = 0;
665 unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp);
666
667 spin_lock(&sdp->sd_rindex_spin);
668
669 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
670 if (rgd == new_rgd)
671 goto out;
672
673 if (++count >= max)
674 goto out;
675 }
676 list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list);
677
678 out:
679 spin_unlock(&sdp->sd_rindex_spin);
680}
681
682/**
683 * forward_rgrp_get - get an rgrp to try next from full list
684 * @sdp: The GFS2 superblock
685 *
686 * Returns: The rgrp to try next
687 */
688
689static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp)
690{
691 struct gfs2_rgrpd *rgd;
692 unsigned int journals = gfs2_jindex_size(sdp);
693 unsigned int rg = 0, x;
694
695 spin_lock(&sdp->sd_rindex_spin);
696
697 rgd = sdp->sd_rindex_forward;
698 if (!rgd) {
699 if (sdp->sd_rgrps >= journals)
700 rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals;
701
702 for (x = 0, rgd = gfs2_rgrpd_get_first(sdp);
703 x < rg;
704 x++, rgd = gfs2_rgrpd_get_next(rgd))
705 /* Do Nothing */;
706
707 sdp->sd_rindex_forward = rgd;
708 }
709
710 spin_unlock(&sdp->sd_rindex_spin);
711
712 return rgd;
713}
714
715/**
716 * forward_rgrp_set - set the forward rgrp pointer
717 * @sdp: the filesystem
718 * @rgd: The new forward rgrp
719 *
720 */
721
722static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
723{
724 spin_lock(&sdp->sd_rindex_spin);
725 sdp->sd_rindex_forward = rgd;
726 spin_unlock(&sdp->sd_rindex_spin);
727}
728
729/**
730 * get_local_rgrp - Choose and lock a rgrp for allocation
731 * @ip: the inode to reserve space for
732 * @rgp: the chosen and locked rgrp
733 *
734 * Try to acquire rgrp in way which avoids contending with others.
735 *
736 * Returns: errno
737 */
738
739static int get_local_rgrp(struct gfs2_inode *ip)
740{
741 struct gfs2_sbd *sdp = ip->i_sbd;
742 struct gfs2_rgrpd *rgd, *begin = NULL;
743 struct gfs2_alloc *al = &ip->i_alloc;
744 int flags = LM_FLAG_TRY;
745 int skipped = 0;
746 int loops = 0;
747 int error;
748
749 /* Try recently successful rgrps */
750
751 rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc);
752
753 while (rgd) {
754 error = gfs2_glock_nq_init(rgd->rd_gl,
755 LM_ST_EXCLUSIVE, LM_FLAG_TRY,
756 &al->al_rgd_gh);
757 switch (error) {
758 case 0:
759 if (try_rgrp_fit(rgd, al))
760 goto out;
761 gfs2_glock_dq_uninit(&al->al_rgd_gh);
762 rgd = recent_rgrp_next(rgd, 1);
763 break;
764
765 case GLR_TRYFAILED:
766 rgd = recent_rgrp_next(rgd, 0);
767 break;
768
769 default:
770 return error;
771 }
772 }
773
774 /* Go through full list of rgrps */
775
776 begin = rgd = forward_rgrp_get(sdp);
777
778 for (;;) {
779 error = gfs2_glock_nq_init(rgd->rd_gl,
780 LM_ST_EXCLUSIVE, flags,
781 &al->al_rgd_gh);
782 switch (error) {
783 case 0:
784 if (try_rgrp_fit(rgd, al))
785 goto out;
786 gfs2_glock_dq_uninit(&al->al_rgd_gh);
787 break;
788
789 case GLR_TRYFAILED:
790 skipped++;
791 break;
792
793 default:
794 return error;
795 }
796
797 rgd = gfs2_rgrpd_get_next(rgd);
798 if (!rgd)
799 rgd = gfs2_rgrpd_get_first(sdp);
800
801 if (rgd == begin) {
802 if (++loops >= 2 || !skipped)
803 return -ENOSPC;
804 flags = 0;
805 }
806 }
807
808 out:
809 ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
810
811 if (begin) {
812 recent_rgrp_add(rgd);
813 rgd = gfs2_rgrpd_get_next(rgd);
814 if (!rgd)
815 rgd = gfs2_rgrpd_get_first(sdp);
816 forward_rgrp_set(sdp, rgd);
817 }
818
819 return 0;
820}
821
822/**
823 * gfs2_inplace_reserve_i - Reserve space in the filesystem
824 * @ip: the inode to reserve space for
825 *
826 * Returns: errno
827 */
828
829int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
830{
831 struct gfs2_sbd *sdp = ip->i_sbd;
832 struct gfs2_alloc *al = &ip->i_alloc;
833 int error;
834
835 if (gfs2_assert_warn(sdp, al->al_requested))
836 return -EINVAL;
837
838 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
839 if (error)
840 return error;
841
842 error = get_local_rgrp(ip);
843 if (error) {
844 gfs2_glock_dq_uninit(&al->al_ri_gh);
845 return error;
846 }
847
848 al->al_file = file;
849 al->al_line = line;
850
851 return 0;
852}
853
854/**
855 * gfs2_inplace_release - release an inplace reservation
856 * @ip: the inode the reservation was taken out on
857 *
858 * Release a reservation made by gfs2_inplace_reserve().
859 */
860
861void gfs2_inplace_release(struct gfs2_inode *ip)
862{
863 struct gfs2_sbd *sdp = ip->i_sbd;
864 struct gfs2_alloc *al = &ip->i_alloc;
865
866 if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
867 fs_warn(sdp, "al_alloced = %u, al_requested = %u "
868 "al_file = %s, al_line = %u\n",
869 al->al_alloced, al->al_requested, al->al_file,
870 al->al_line);
871
872 al->al_rgd = NULL;
873 gfs2_glock_dq_uninit(&al->al_rgd_gh);
874 gfs2_glock_dq_uninit(&al->al_ri_gh);
875}
876
877/**
878 * gfs2_get_block_type - Check a block in a RG is of given type
879 * @rgd: the resource group holding the block
880 * @block: the block number
881 *
882 * Returns: The block type (GFS2_BLKST_*)
883 */
884
885unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, uint64_t block)
886{
887 struct gfs2_bitmap *bi = NULL;
888 uint32_t length, rgrp_block, buf_block;
889 unsigned int buf;
890 unsigned char type;
891
892 length = rgd->rd_ri.ri_length;
893 rgrp_block = block - rgd->rd_ri.ri_data0;
894
895 for (buf = 0; buf < length; buf++) {
896 bi = rgd->rd_bits + buf;
897 if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
898 break;
899 }
900
901 gfs2_assert(rgd->rd_sbd, buf < length);
902 buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
903
904 type = gfs2_testbit(rgd,
905 bi->bi_bh->b_data + bi->bi_offset,
906 bi->bi_len, buf_block);
907
908 return type;
909}
910
911/**
912 * rgblk_search - find a block in @old_state, change allocation
913 * state to @new_state
914 * @rgd: the resource group descriptor
915 * @goal: the goal block within the RG (start here to search for avail block)
916 * @old_state: GFS2_BLKST_XXX the before-allocation state to find
917 * @new_state: GFS2_BLKST_XXX the after-allocation block state
918 *
919 * Walk rgrp's bitmap to find bits that represent a block in @old_state.
920 * Add the found bitmap buffer to the transaction.
921 * Set the found bits to @new_state to change block's allocation state.
922 *
923 * This function never fails, because we wouldn't call it unless we
924 * know (from reservation results, etc.) that a block is available.
925 *
926 * Scope of @goal and returned block is just within rgrp, not the whole
927 * filesystem.
928 *
929 * Returns: the block number allocated
930 */
931
932static uint32_t rgblk_search(struct gfs2_rgrpd *rgd, uint32_t goal,
933 unsigned char old_state, unsigned char new_state)
934{
935 struct gfs2_bitmap *bi = NULL;
936 uint32_t length = rgd->rd_ri.ri_length;
937 uint32_t blk = 0;
938 unsigned int buf, x;
939
940 /* Find bitmap block that contains bits for goal block */
941 for (buf = 0; buf < length; buf++) {
942 bi = rgd->rd_bits + buf;
943 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
944 break;
945 }
946
947 gfs2_assert(rgd->rd_sbd, buf < length);
948
949 /* Convert scope of "goal" from rgrp-wide to within found bit block */
950 goal -= bi->bi_start * GFS2_NBBY;
951
952 /* Search (up to entire) bitmap in this rgrp for allocatable block.
953 "x <= length", instead of "x < length", because we typically start
954 the search in the middle of a bit block, but if we can't find an
955 allocatable block anywhere else, we want to be able wrap around and
956 search in the first part of our first-searched bit block. */
957 for (x = 0; x <= length; x++) {
958 if (bi->bi_clone)
959 blk = gfs2_bitfit(rgd,
960 bi->bi_clone + bi->bi_offset,
961 bi->bi_len, goal, old_state);
962 else
963 blk = gfs2_bitfit(rgd,
964 bi->bi_bh->b_data + bi->bi_offset,
965 bi->bi_len, goal, old_state);
966 if (blk != BFITNOENT)
967 break;
968
969 /* Try next bitmap block (wrap back to rgrp header if at end) */
970 buf = (buf + 1) % length;
971 bi = rgd->rd_bits + buf;
972 goal = 0;
973 }
974
975 if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
976 blk = 0;
977
978 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
979 gfs2_setbit(rgd,
980 bi->bi_bh->b_data + bi->bi_offset,
981 bi->bi_len, blk, new_state);
982 if (bi->bi_clone)
983 gfs2_setbit(rgd,
984 bi->bi_clone + bi->bi_offset,
985 bi->bi_len, blk, new_state);
986
987 return bi->bi_start * GFS2_NBBY + blk;
988}
989
990/**
991 * rgblk_free - Change alloc state of given block(s)
992 * @sdp: the filesystem
993 * @bstart: the start of a run of blocks to free
994 * @blen: the length of the block run (all must lie within ONE RG!)
995 * @new_state: GFS2_BLKST_XXX the after-allocation block state
996 *
997 * Returns: Resource group containing the block(s)
998 */
999
1000static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, uint64_t bstart,
1001 uint32_t blen, unsigned char new_state)
1002{
1003 struct gfs2_rgrpd *rgd;
1004 struct gfs2_bitmap *bi = NULL;
1005 uint32_t length, rgrp_blk, buf_blk;
1006 unsigned int buf;
1007
1008 rgd = gfs2_blk2rgrpd(sdp, bstart);
1009 if (!rgd) {
1010 if (gfs2_consist(sdp))
1011 fs_err(sdp, "block = %llu\n", bstart);
1012 return NULL;
1013 }
1014
1015 length = rgd->rd_ri.ri_length;
1016
1017 rgrp_blk = bstart - rgd->rd_ri.ri_data0;
1018
1019 while (blen--) {
1020 for (buf = 0; buf < length; buf++) {
1021 bi = rgd->rd_bits + buf;
1022 if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1023 break;
1024 }
1025
1026 gfs2_assert(rgd->rd_sbd, buf < length);
1027
1028 buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
1029 rgrp_blk++;
1030
1031 if (!bi->bi_clone) {
1032 bi->bi_clone = kmalloc(bi->bi_bh->b_size,
1033 GFP_KERNEL | __GFP_NOFAIL);
1034 memcpy(bi->bi_clone + bi->bi_offset,
1035 bi->bi_bh->b_data + bi->bi_offset,
1036 bi->bi_len);
1037 }
1038 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1039 gfs2_setbit(rgd,
1040 bi->bi_bh->b_data + bi->bi_offset,
1041 bi->bi_len, buf_blk, new_state);
1042 }
1043
1044 return rgd;
1045}
1046
1047/**
1048 * gfs2_alloc_data - Allocate a data block
1049 * @ip: the inode to allocate the data block for
1050 *
1051 * Returns: the allocated block
1052 */
1053
1054uint64_t gfs2_alloc_data(struct gfs2_inode *ip)
1055{
1056 struct gfs2_sbd *sdp = ip->i_sbd;
1057 struct gfs2_alloc *al = &ip->i_alloc;
1058 struct gfs2_rgrpd *rgd = al->al_rgd;
1059 uint32_t goal, blk;
1060 uint64_t block;
1061
1062 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data))
1063 goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0;
1064 else
1065 goal = rgd->rd_last_alloc_data;
1066
1067 blk = rgblk_search(rgd, goal,
1068 GFS2_BLKST_FREE, GFS2_BLKST_USED);
1069 rgd->rd_last_alloc_data = blk;
1070
1071 block = rgd->rd_ri.ri_data0 + blk;
1072 ip->i_di.di_goal_data = block;
1073
1074 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1075 rgd->rd_rg.rg_free--;
1076
1077 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1078 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1079
1080 al->al_alloced++;
1081
1082 gfs2_statfs_change(sdp, 0, -1, 0);
1083 gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
1084
1085 spin_lock(&sdp->sd_rindex_spin);
1086 rgd->rd_free_clone--;
1087 spin_unlock(&sdp->sd_rindex_spin);
1088
1089 return block;
1090}
1091
1092/**
1093 * gfs2_alloc_meta - Allocate a metadata block
1094 * @ip: the inode to allocate the metadata block for
1095 *
1096 * Returns: the allocated block
1097 */
1098
1099uint64_t gfs2_alloc_meta(struct gfs2_inode *ip)
1100{
1101 struct gfs2_sbd *sdp = ip->i_sbd;
1102 struct gfs2_alloc *al = &ip->i_alloc;
1103 struct gfs2_rgrpd *rgd = al->al_rgd;
1104 uint32_t goal, blk;
1105 uint64_t block;
1106
1107 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta))
1108 goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0;
1109 else
1110 goal = rgd->rd_last_alloc_meta;
1111
1112 blk = rgblk_search(rgd, goal,
1113 GFS2_BLKST_FREE, GFS2_BLKST_USED);
1114 rgd->rd_last_alloc_meta = blk;
1115
1116 block = rgd->rd_ri.ri_data0 + blk;
1117 ip->i_di.di_goal_meta = block;
1118
1119 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1120 rgd->rd_rg.rg_free--;
1121
1122 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1123 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1124
1125 al->al_alloced++;
1126
1127 gfs2_statfs_change(sdp, 0, -1, 0);
1128 gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
1129 gfs2_trans_add_unrevoke(sdp, block);
1130
1131 spin_lock(&sdp->sd_rindex_spin);
1132 rgd->rd_free_clone--;
1133 spin_unlock(&sdp->sd_rindex_spin);
1134
1135 return block;
1136}
1137
1138/**
1139 * gfs2_alloc_di - Allocate a dinode
1140 * @dip: the directory that the inode is going in
1141 *
1142 * Returns: the block allocated
1143 */
1144
1145uint64_t gfs2_alloc_di(struct gfs2_inode *dip)
1146{
1147 struct gfs2_sbd *sdp = dip->i_sbd;
1148 struct gfs2_alloc *al = &dip->i_alloc;
1149 struct gfs2_rgrpd *rgd = al->al_rgd;
1150 uint32_t blk;
1151 uint64_t block;
1152
1153 blk = rgblk_search(rgd, rgd->rd_last_alloc_meta,
1154 GFS2_BLKST_FREE, GFS2_BLKST_DINODE);
1155
1156 rgd->rd_last_alloc_meta = blk;
1157
1158 block = rgd->rd_ri.ri_data0 + blk;
1159
1160 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1161 rgd->rd_rg.rg_free--;
1162 rgd->rd_rg.rg_dinodes++;
1163
1164 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1165 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1166
1167 al->al_alloced++;
1168
1169 gfs2_statfs_change(sdp, 0, -1, +1);
1170 gfs2_trans_add_unrevoke(sdp, block);
1171
1172 spin_lock(&sdp->sd_rindex_spin);
1173 rgd->rd_free_clone--;
1174 spin_unlock(&sdp->sd_rindex_spin);
1175
1176 return block;
1177}
1178
1179/**
1180 * gfs2_free_data - free a contiguous run of data block(s)
1181 * @ip: the inode these blocks are being freed from
1182 * @bstart: first block of a run of contiguous blocks
1183 * @blen: the length of the block run
1184 *
1185 */
1186
1187void gfs2_free_data(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
1188{
1189 struct gfs2_sbd *sdp = ip->i_sbd;
1190 struct gfs2_rgrpd *rgd;
1191
1192 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1193 if (!rgd)
1194 return;
1195
1196 rgd->rd_rg.rg_free += blen;
1197
1198 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1199 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1200
1201 gfs2_trans_add_rg(rgd);
1202
1203 gfs2_statfs_change(sdp, 0, +blen, 0);
1204 gfs2_quota_change(ip, -(int64_t)blen,
1205 ip->i_di.di_uid, ip->i_di.di_gid);
1206}
1207
1208/**
1209 * gfs2_free_meta - free a contiguous run of data block(s)
1210 * @ip: the inode these blocks are being freed from
1211 * @bstart: first block of a run of contiguous blocks
1212 * @blen: the length of the block run
1213 *
1214 */
1215
1216void gfs2_free_meta(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
1217{
1218 struct gfs2_sbd *sdp = ip->i_sbd;
1219 struct gfs2_rgrpd *rgd;
1220
1221 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1222 if (!rgd)
1223 return;
1224
1225 rgd->rd_rg.rg_free += blen;
1226
1227 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1228 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1229
1230 gfs2_trans_add_rg(rgd);
1231
1232 gfs2_statfs_change(sdp, 0, +blen, 0);
1233 gfs2_quota_change(ip, -(int64_t)blen,
1234 ip->i_di.di_uid, ip->i_di.di_gid);
1235 gfs2_meta_wipe(ip, bstart, blen);
1236}
1237
1238void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, uint64_t blkno)
1239{
1240 struct gfs2_sbd *sdp = rgd->rd_sbd;
1241 struct gfs2_rgrpd *tmp_rgd;
1242
1243 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
1244 if (!tmp_rgd)
1245 return;
1246 gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
1247
1248 if (!rgd->rd_rg.rg_dinodes)
1249 gfs2_consist_rgrpd(rgd);
1250 rgd->rd_rg.rg_dinodes--;
1251 rgd->rd_rg.rg_free++;
1252
1253 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1254 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1255
1256 gfs2_statfs_change(sdp, 0, +1, -1);
1257 gfs2_trans_add_rg(rgd);
1258}
1259
1260/**
1261 * gfs2_free_uninit_di - free a dinode block
1262 * @rgd: the resource group that contains the dinode
1263 * @ip: the inode
1264 *
1265 */
1266
1267void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1268{
1269 gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
1270 gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid);
1271 gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
1272}
1273
1274/**
1275 * gfs2_rlist_add - add a RG to a list of RGs
1276 * @sdp: the filesystem
1277 * @rlist: the list of resource groups
1278 * @block: the block
1279 *
1280 * Figure out what RG a block belongs to and add that RG to the list
1281 *
1282 * FIXME: Don't use NOFAIL
1283 *
1284 */
1285
1286void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
1287 uint64_t block)
1288{
1289 struct gfs2_rgrpd *rgd;
1290 struct gfs2_rgrpd **tmp;
1291 unsigned int new_space;
1292 unsigned int x;
1293
1294 if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
1295 return;
1296
1297 rgd = gfs2_blk2rgrpd(sdp, block);
1298 if (!rgd) {
1299 if (gfs2_consist(sdp))
1300 fs_err(sdp, "block = %llu\n", block);
1301 return;
1302 }
1303
1304 for (x = 0; x < rlist->rl_rgrps; x++)
1305 if (rlist->rl_rgd[x] == rgd)
1306 return;
1307
1308 if (rlist->rl_rgrps == rlist->rl_space) {
1309 new_space = rlist->rl_space + 10;
1310
1311 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
1312 GFP_KERNEL | __GFP_NOFAIL);
1313
1314 if (rlist->rl_rgd) {
1315 memcpy(tmp, rlist->rl_rgd,
1316 rlist->rl_space * sizeof(struct gfs2_rgrpd *));
1317 kfree(rlist->rl_rgd);
1318 }
1319
1320 rlist->rl_space = new_space;
1321 rlist->rl_rgd = tmp;
1322 }
1323
1324 rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
1325}
1326
1327/**
1328 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
1329 * and initialize an array of glock holders for them
1330 * @rlist: the list of resource groups
1331 * @state: the lock state to acquire the RG lock in
1332 * @flags: the modifier flags for the holder structures
1333 *
1334 * FIXME: Don't use NOFAIL
1335 *
1336 */
1337
1338void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
1339 int flags)
1340{
1341 unsigned int x;
1342
1343 rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
1344 GFP_KERNEL | __GFP_NOFAIL);
1345 for (x = 0; x < rlist->rl_rgrps; x++)
1346 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
1347 state, flags,
1348 &rlist->rl_ghs[x]);
1349}
1350
1351/**
1352 * gfs2_rlist_free - free a resource group list
1353 * @list: the list of resource groups
1354 *
1355 */
1356
1357void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
1358{
1359 unsigned int x;
1360
1361 kfree(rlist->rl_rgd);
1362
1363 if (rlist->rl_ghs) {
1364 for (x = 0; x < rlist->rl_rgrps; x++)
1365 gfs2_holder_uninit(&rlist->rl_ghs[x]);
1366 kfree(rlist->rl_ghs);
1367 }
1368}
1369
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
new file mode 100644
index 000000000000..4c44a191b1c1
--- /dev/null
+++ b/fs/gfs2/rgrp.h
@@ -0,0 +1,62 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __RGRP_DOT_H__
11#define __RGRP_DOT_H__
12
13void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
14
15struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk);
16struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
17struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
18
19void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
20int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
21
22int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
23void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
24void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
25
26void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
27
28struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
29void gfs2_alloc_put(struct gfs2_inode *ip);
30
31int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
32 char *file, unsigned int line);
33#define gfs2_inplace_reserve(ip) \
34gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
35
36void gfs2_inplace_release(struct gfs2_inode *ip);
37
38unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, uint64_t block);
39
40uint64_t gfs2_alloc_data(struct gfs2_inode *ip);
41uint64_t gfs2_alloc_meta(struct gfs2_inode *ip);
42uint64_t gfs2_alloc_di(struct gfs2_inode *ip);
43
44void gfs2_free_data(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
45void gfs2_free_meta(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
46void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, uint64_t blkno);
47void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
48
49struct gfs2_rgrp_list {
50 unsigned int rl_rgrps;
51 unsigned int rl_space;
52 struct gfs2_rgrpd **rl_rgd;
53 struct gfs2_holder *rl_ghs;
54};
55
56void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
57 uint64_t block);
58void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
59 int flags);
60void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
61
62#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
new file mode 100644
index 000000000000..a4da649d086f
--- /dev/null
+++ b/fs/gfs2/super.c
@@ -0,0 +1,950 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/crc32.h>
16#include <linux/gfs2_ondisk.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "bmap.h"
23#include "dir.h"
24#include "format.h"
25#include "glock.h"
26#include "glops.h"
27#include "inode.h"
28#include "log.h"
29#include "meta_io.h"
30#include "quota.h"
31#include "recovery.h"
32#include "rgrp.h"
33#include "super.h"
34#include "trans.h"
35#include "unlinked.h"
36#include "util.h"
37
38/**
39 * gfs2_tune_init - Fill a gfs2_tune structure with default values
40 * @gt: tune
41 *
42 */
43
44void gfs2_tune_init(struct gfs2_tune *gt)
45{
46 spin_lock_init(&gt->gt_spin);
47
48 gt->gt_ilimit = 100;
49 gt->gt_ilimit_tries = 3;
50 gt->gt_ilimit_min = 1;
51 gt->gt_demote_secs = 300;
52 gt->gt_incore_log_blocks = 1024;
53 gt->gt_log_flush_secs = 60;
54 gt->gt_jindex_refresh_secs = 60;
55 gt->gt_scand_secs = 15;
56 gt->gt_recoverd_secs = 60;
57 gt->gt_logd_secs = 1;
58 gt->gt_quotad_secs = 5;
59 gt->gt_inoded_secs = 15;
60 gt->gt_quota_simul_sync = 64;
61 gt->gt_quota_warn_period = 10;
62 gt->gt_quota_scale_num = 1;
63 gt->gt_quota_scale_den = 1;
64 gt->gt_quota_cache_secs = 300;
65 gt->gt_quota_quantum = 60;
66 gt->gt_atime_quantum = 3600;
67 gt->gt_new_files_jdata = 0;
68 gt->gt_new_files_directio = 0;
69 gt->gt_max_atomic_write = 4 << 20;
70 gt->gt_max_readahead = 1 << 18;
71 gt->gt_lockdump_size = 131072;
72 gt->gt_stall_secs = 600;
73 gt->gt_complain_secs = 10;
74 gt->gt_reclaim_limit = 5000;
75 gt->gt_entries_per_readdir = 32;
76 gt->gt_prefetch_secs = 10;
77 gt->gt_greedy_default = HZ / 10;
78 gt->gt_greedy_quantum = HZ / 40;
79 gt->gt_greedy_max = HZ / 4;
80 gt->gt_statfs_quantum = 30;
81 gt->gt_statfs_slow = 0;
82}
83
84/**
85 * gfs2_check_sb - Check superblock
86 * @sdp: the filesystem
87 * @sb: The superblock
88 * @silent: Don't print a message if the check fails
89 *
90 * Checks the version code of the FS is one that we understand how to
91 * read and that the sizes of the various on-disk structures have not
92 * changed.
93 */
94
95int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent)
96{
97 unsigned int x;
98
99 if (sb->sb_header.mh_magic != GFS2_MAGIC ||
100 sb->sb_header.mh_type != GFS2_METATYPE_SB) {
101 if (!silent)
102 printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
103 return -EINVAL;
104 }
105
106 /* If format numbers match exactly, we're done. */
107
108 if (sb->sb_fs_format == GFS2_FORMAT_FS &&
109 sb->sb_multihost_format == GFS2_FORMAT_MULTI)
110 return 0;
111
112 if (sb->sb_fs_format != GFS2_FORMAT_FS) {
113 for (x = 0; gfs2_old_fs_formats[x]; x++)
114 if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
115 break;
116
117 if (!gfs2_old_fs_formats[x]) {
118 printk(KERN_WARNING
119 "GFS2: code version (%u, %u) is incompatible "
120 "with ondisk format (%u, %u)\n",
121 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
122 sb->sb_fs_format, sb->sb_multihost_format);
123 printk(KERN_WARNING
124 "GFS2: I don't know how to upgrade this FS\n");
125 return -EINVAL;
126 }
127 }
128
129 if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
130 for (x = 0; gfs2_old_multihost_formats[x]; x++)
131 if (gfs2_old_multihost_formats[x] ==
132 sb->sb_multihost_format)
133 break;
134
135 if (!gfs2_old_multihost_formats[x]) {
136 printk(KERN_WARNING
137 "GFS2: code version (%u, %u) is incompatible "
138 "with ondisk format (%u, %u)\n",
139 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
140 sb->sb_fs_format, sb->sb_multihost_format);
141 printk(KERN_WARNING
142 "GFS2: I don't know how to upgrade this FS\n");
143 return -EINVAL;
144 }
145 }
146
147 if (!sdp->sd_args.ar_upgrade) {
148 printk(KERN_WARNING
149 "GFS2: code version (%u, %u) is incompatible "
150 "with ondisk format (%u, %u)\n",
151 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
152 sb->sb_fs_format, sb->sb_multihost_format);
153 printk(KERN_INFO
154 "GFS2: Use the \"upgrade\" mount option to upgrade "
155 "the FS\n");
156 printk(KERN_INFO "GFS2: See the manual for more details\n");
157 return -EINVAL;
158 }
159
160 return 0;
161}
162
163/**
164 * gfs2_read_sb - Read super block
165 * @sdp: The GFS2 superblock
166 * @gl: the glock for the superblock (assumed to be held)
167 * @silent: Don't print message if mount fails
168 *
169 */
170
171int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
172{
173 struct buffer_head *bh;
174 uint32_t hash_blocks, ind_blocks, leaf_blocks;
175 uint32_t tmp_blocks;
176 unsigned int x;
177 int error;
178
179 error = gfs2_meta_read(gl, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift,
180 DIO_FORCE | DIO_START | DIO_WAIT, &bh);
181 if (error) {
182 if (!silent)
183 fs_err(sdp, "can't read superblock\n");
184 return error;
185 }
186
187 gfs2_assert(sdp, sizeof(struct gfs2_sb) <= bh->b_size);
188 gfs2_sb_in(&sdp->sd_sb, bh->b_data);
189 brelse(bh);
190
191 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
192 if (error)
193 return error;
194
195 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
196 GFS2_BASIC_BLOCK_SHIFT;
197 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
198 sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
199 sizeof(struct gfs2_dinode)) / sizeof(uint64_t);
200 sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
201 sizeof(struct gfs2_meta_header)) / sizeof(uint64_t);
202 sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
203 sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
204 sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
205 sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(uint64_t);
206 sdp->sd_ut_per_block = (sdp->sd_sb.sb_bsize -
207 sizeof(struct gfs2_meta_header)) /
208 sizeof(struct gfs2_unlinked_tag);
209 sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
210 sizeof(struct gfs2_meta_header)) /
211 sizeof(struct gfs2_quota_change);
212
213 /* Compute maximum reservation required to add a entry to a directory */
214
215 hash_blocks = DIV_ROUND_UP(sizeof(uint64_t) * (1 << GFS2_DIR_MAX_DEPTH),
216 sdp->sd_jbsize);
217
218 ind_blocks = 0;
219 for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
220 tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
221 ind_blocks += tmp_blocks;
222 }
223
224 leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
225
226 sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
227
228 sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
229 sizeof(struct gfs2_dinode);
230 sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
231 for (x = 2;; x++) {
232 uint64_t space, d;
233 uint32_t m;
234
235 space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
236 d = space;
237 m = do_div(d, sdp->sd_inptrs);
238
239 if (d != sdp->sd_heightsize[x - 1] || m)
240 break;
241 sdp->sd_heightsize[x] = space;
242 }
243 sdp->sd_max_height = x;
244 gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
245
246 sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
247 sizeof(struct gfs2_dinode);
248 sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
249 for (x = 2;; x++) {
250 uint64_t space, d;
251 uint32_t m;
252
253 space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
254 d = space;
255 m = do_div(d, sdp->sd_inptrs);
256
257 if (d != sdp->sd_jheightsize[x - 1] || m)
258 break;
259 sdp->sd_jheightsize[x] = space;
260 }
261 sdp->sd_max_jheight = x;
262 gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
263
264 return 0;
265}
266
267int gfs2_do_upgrade(struct gfs2_sbd *sdp, struct gfs2_glock *sb_gl)
268{
269 return 0;
270}
271
272/**
273 * gfs2_jindex_hold - Grab a lock on the jindex
274 * @sdp: The GFS2 superblock
275 * @ji_gh: the holder for the jindex glock
276 *
277 * This is very similar to the gfs2_rindex_hold() function, except that
278 * in general we hold the jindex lock for longer periods of time and
279 * we grab it far less frequently (in general) then the rgrp lock.
280 *
281 * Returns: errno
282 */
283
284int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
285{
286 struct gfs2_inode *dip = sdp->sd_jindex->u.generic_ip;
287 struct qstr name;
288 char buf[20];
289 struct gfs2_jdesc *jd;
290 int error;
291
292 name.name = buf;
293
294 mutex_lock(&sdp->sd_jindex_mutex);
295
296 for (;;) {
297 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED,
298 GL_LOCAL_EXCL, ji_gh);
299 if (error)
300 break;
301
302 name.len = sprintf(buf, "journal%u", sdp->sd_journals);
303 name.hash = gfs2_disk_hash(name.name, name.len);
304
305 error = gfs2_dir_search(sdp->sd_jindex,
306 &name, NULL, NULL);
307 if (error == -ENOENT) {
308 error = 0;
309 break;
310 }
311
312 gfs2_glock_dq_uninit(ji_gh);
313
314 if (error)
315 break;
316
317 error = -ENOMEM;
318 jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
319 if (!jd)
320 break;
321
322 jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL);
323 if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
324 if (!jd->jd_inode)
325 error = -ENOENT;
326 else
327 error = PTR_ERR(jd->jd_inode);
328 kfree(jd);
329 break;
330 }
331
332 spin_lock(&sdp->sd_jindex_spin);
333 jd->jd_jid = sdp->sd_journals++;
334 list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
335 spin_unlock(&sdp->sd_jindex_spin);
336 }
337
338 mutex_unlock(&sdp->sd_jindex_mutex);
339
340 return error;
341}
342
343/**
344 * gfs2_jindex_free - Clear all the journal index information
345 * @sdp: The GFS2 superblock
346 *
347 */
348
349void gfs2_jindex_free(struct gfs2_sbd *sdp)
350{
351 struct list_head list;
352 struct gfs2_jdesc *jd;
353
354 spin_lock(&sdp->sd_jindex_spin);
355 list_add(&list, &sdp->sd_jindex_list);
356 list_del_init(&sdp->sd_jindex_list);
357 sdp->sd_journals = 0;
358 spin_unlock(&sdp->sd_jindex_spin);
359
360 while (!list_empty(&list)) {
361 jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
362 list_del(&jd->jd_list);
363 iput(jd->jd_inode);
364 kfree(jd);
365 }
366}
367
368static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
369{
370 struct gfs2_jdesc *jd;
371 int found = 0;
372
373 list_for_each_entry(jd, head, jd_list) {
374 if (jd->jd_jid == jid) {
375 found = 1;
376 break;
377 }
378 }
379
380 if (!found)
381 jd = NULL;
382
383 return jd;
384}
385
386struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
387{
388 struct gfs2_jdesc *jd;
389
390 spin_lock(&sdp->sd_jindex_spin);
391 jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
392 spin_unlock(&sdp->sd_jindex_spin);
393
394 return jd;
395}
396
397void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
398{
399 struct gfs2_jdesc *jd;
400
401 spin_lock(&sdp->sd_jindex_spin);
402 jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
403 if (jd)
404 jd->jd_dirty = 1;
405 spin_unlock(&sdp->sd_jindex_spin);
406}
407
408struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
409{
410 struct gfs2_jdesc *jd;
411 int found = 0;
412
413 spin_lock(&sdp->sd_jindex_spin);
414
415 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
416 if (jd->jd_dirty) {
417 jd->jd_dirty = 0;
418 found = 1;
419 break;
420 }
421 }
422 spin_unlock(&sdp->sd_jindex_spin);
423
424 if (!found)
425 jd = NULL;
426
427 return jd;
428}
429
430int gfs2_jdesc_check(struct gfs2_jdesc *jd)
431{
432 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
433 struct gfs2_sbd *sdp = ip->i_sbd;
434 int ar;
435 int error;
436
437 if (ip->i_di.di_size < (8 << 20) ||
438 ip->i_di.di_size > (1 << 30) ||
439 (ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) {
440 gfs2_consist_inode(ip);
441 return -EIO;
442 }
443 jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
444
445 error = gfs2_write_alloc_required(ip,
446 0, ip->i_di.di_size,
447 &ar);
448 if (!error && ar) {
449 gfs2_consist_inode(ip);
450 error = -EIO;
451 }
452
453 return error;
454}
455
456/**
457 * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
458 * @sdp: the filesystem
459 *
460 * Returns: errno
461 */
462
463int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
464{
465 struct gfs2_inode *ip = sdp->sd_jdesc->jd_inode->u.generic_ip;
466 struct gfs2_glock *j_gl = ip->i_gl;
467 struct gfs2_holder t_gh;
468 struct gfs2_log_header head;
469 int error;
470
471 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
472 GL_LOCAL_EXCL | GL_NEVER_RECURSE, &t_gh);
473 if (error)
474 return error;
475
476 gfs2_meta_cache_flush(ip);
477 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
478
479 error = gfs2_find_jhead(sdp->sd_jdesc, &head);
480 if (error)
481 goto fail;
482
483 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
484 gfs2_consist(sdp);
485 error = -EIO;
486 goto fail;
487 }
488
489 /* Initialize some head of the log stuff */
490 sdp->sd_log_sequence = head.lh_sequence + 1;
491 gfs2_log_pointers_init(sdp, head.lh_blkno);
492
493 error = gfs2_unlinked_init(sdp);
494 if (error)
495 goto fail;
496 error = gfs2_quota_init(sdp);
497 if (error)
498 goto fail_unlinked;
499
500 set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
501
502 gfs2_glock_dq_uninit(&t_gh);
503
504 return 0;
505
506 fail_unlinked:
507 gfs2_unlinked_cleanup(sdp);
508
509 fail:
510 t_gh.gh_flags |= GL_NOCACHE;
511 gfs2_glock_dq_uninit(&t_gh);
512
513 return error;
514}
515
516/**
517 * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
518 * @sdp: the filesystem
519 *
520 * Returns: errno
521 */
522
523int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
524{
525 struct gfs2_holder t_gh;
526 int error;
527
528 gfs2_unlinked_dealloc(sdp);
529 gfs2_quota_sync(sdp);
530 gfs2_statfs_sync(sdp);
531
532 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
533 GL_LOCAL_EXCL | GL_NEVER_RECURSE | GL_NOCACHE,
534 &t_gh);
535 if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
536 return error;
537
538 gfs2_meta_syncfs(sdp);
539 gfs2_log_shutdown(sdp);
540
541 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
542
543 if (t_gh.gh_gl)
544 gfs2_glock_dq_uninit(&t_gh);
545
546 gfs2_unlinked_cleanup(sdp);
547 gfs2_quota_cleanup(sdp);
548
549 return error;
550}
551
552int gfs2_statfs_init(struct gfs2_sbd *sdp)
553{
554 struct gfs2_inode *m_ip = sdp->sd_statfs_inode->u.generic_ip;
555 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
556 struct gfs2_inode *l_ip = sdp->sd_sc_inode->u.generic_ip;
557 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
558 struct buffer_head *m_bh, *l_bh;
559 struct gfs2_holder gh;
560 int error;
561
562 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
563 &gh);
564 if (error)
565 return error;
566
567 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
568 if (error)
569 goto out;
570
571 if (sdp->sd_args.ar_spectator) {
572 spin_lock(&sdp->sd_statfs_spin);
573 gfs2_statfs_change_in(m_sc, m_bh->b_data +
574 sizeof(struct gfs2_dinode));
575 spin_unlock(&sdp->sd_statfs_spin);
576 } else {
577 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
578 if (error)
579 goto out_m_bh;
580
581 spin_lock(&sdp->sd_statfs_spin);
582 gfs2_statfs_change_in(m_sc, m_bh->b_data +
583 sizeof(struct gfs2_dinode));
584 gfs2_statfs_change_in(l_sc, l_bh->b_data +
585 sizeof(struct gfs2_dinode));
586 spin_unlock(&sdp->sd_statfs_spin);
587
588 brelse(l_bh);
589 }
590
591 out_m_bh:
592 brelse(m_bh);
593
594 out:
595 gfs2_glock_dq_uninit(&gh);
596
597 return 0;
598}
599
600void gfs2_statfs_change(struct gfs2_sbd *sdp, int64_t total, int64_t free,
601 int64_t dinodes)
602{
603 struct gfs2_inode *l_ip = sdp->sd_sc_inode->u.generic_ip;
604 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
605 struct buffer_head *l_bh;
606 int error;
607
608 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
609 if (error)
610 return;
611
612 mutex_lock(&sdp->sd_statfs_mutex);
613 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
614 mutex_unlock(&sdp->sd_statfs_mutex);
615
616 spin_lock(&sdp->sd_statfs_spin);
617 l_sc->sc_total += total;
618 l_sc->sc_free += free;
619 l_sc->sc_dinodes += dinodes;
620 gfs2_statfs_change_out(l_sc, l_bh->b_data +
621 sizeof(struct gfs2_dinode));
622 spin_unlock(&sdp->sd_statfs_spin);
623
624 brelse(l_bh);
625}
626
627int gfs2_statfs_sync(struct gfs2_sbd *sdp)
628{
629 struct gfs2_inode *m_ip = sdp->sd_statfs_inode->u.generic_ip;
630 struct gfs2_inode *l_ip = sdp->sd_sc_inode->u.generic_ip;
631 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
632 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
633 struct gfs2_holder gh;
634 struct buffer_head *m_bh, *l_bh;
635 int error;
636
637 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
638 &gh);
639 if (error)
640 return error;
641
642 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
643 if (error)
644 goto out;
645
646 spin_lock(&sdp->sd_statfs_spin);
647 gfs2_statfs_change_in(m_sc, m_bh->b_data +
648 sizeof(struct gfs2_dinode));
649 if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
650 spin_unlock(&sdp->sd_statfs_spin);
651 goto out_bh;
652 }
653 spin_unlock(&sdp->sd_statfs_spin);
654
655 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
656 if (error)
657 goto out_bh;
658
659 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
660 if (error)
661 goto out_bh2;
662
663 mutex_lock(&sdp->sd_statfs_mutex);
664 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
665 mutex_unlock(&sdp->sd_statfs_mutex);
666
667 spin_lock(&sdp->sd_statfs_spin);
668 m_sc->sc_total += l_sc->sc_total;
669 m_sc->sc_free += l_sc->sc_free;
670 m_sc->sc_dinodes += l_sc->sc_dinodes;
671 memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
672 memset(l_bh->b_data + sizeof(struct gfs2_dinode),
673 0, sizeof(struct gfs2_statfs_change));
674 spin_unlock(&sdp->sd_statfs_spin);
675
676 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
677 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
678
679 gfs2_trans_end(sdp);
680
681 out_bh2:
682 brelse(l_bh);
683
684 out_bh:
685 brelse(m_bh);
686
687 out:
688 gfs2_glock_dq_uninit(&gh);
689
690 return error;
691}
692
693/**
694 * gfs2_statfs_i - Do a statfs
695 * @sdp: the filesystem
696 * @sg: the sg structure
697 *
698 * Returns: errno
699 */
700
701int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
702{
703 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
704 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
705
706 spin_lock(&sdp->sd_statfs_spin);
707
708 *sc = *m_sc;
709 sc->sc_total += l_sc->sc_total;
710 sc->sc_free += l_sc->sc_free;
711 sc->sc_dinodes += l_sc->sc_dinodes;
712
713 spin_unlock(&sdp->sd_statfs_spin);
714
715 if (sc->sc_free < 0)
716 sc->sc_free = 0;
717 if (sc->sc_free > sc->sc_total)
718 sc->sc_free = sc->sc_total;
719 if (sc->sc_dinodes < 0)
720 sc->sc_dinodes = 0;
721
722 return 0;
723}
724
725/**
726 * statfs_fill - fill in the sg for a given RG
727 * @rgd: the RG
728 * @sc: the sc structure
729 *
730 * Returns: 0 on success, -ESTALE if the LVB is invalid
731 */
732
733static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
734 struct gfs2_statfs_change *sc)
735{
736 gfs2_rgrp_verify(rgd);
737 sc->sc_total += rgd->rd_ri.ri_data;
738 sc->sc_free += rgd->rd_rg.rg_free;
739 sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
740 return 0;
741}
742
743/**
744 * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
745 * @sdp: the filesystem
746 * @sc: the sc info that will be returned
747 *
748 * Any error (other than a signal) will cause this routine to fall back
749 * to the synchronous version.
750 *
751 * FIXME: This really shouldn't busy wait like this.
752 *
753 * Returns: errno
754 */
755
756int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
757{
758 struct gfs2_holder ri_gh;
759 struct gfs2_rgrpd *rgd_next;
760 struct gfs2_holder *gha, *gh;
761 unsigned int slots = 64;
762 unsigned int x;
763 int done;
764 int error = 0, err;
765
766 memset(sc, 0, sizeof(struct gfs2_statfs_change));
767 gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
768 if (!gha)
769 return -ENOMEM;
770
771 error = gfs2_rindex_hold(sdp, &ri_gh);
772 if (error)
773 goto out;
774
775 rgd_next = gfs2_rgrpd_get_first(sdp);
776
777 for (;;) {
778 done = 1;
779
780 for (x = 0; x < slots; x++) {
781 gh = gha + x;
782
783 if (gh->gh_gl && gfs2_glock_poll(gh)) {
784 err = gfs2_glock_wait(gh);
785 if (err) {
786 gfs2_holder_uninit(gh);
787 error = err;
788 } else {
789 if (!error)
790 error = statfs_slow_fill(
791 gh->gh_gl->gl_object, sc);
792 gfs2_glock_dq_uninit(gh);
793 }
794 }
795
796 if (gh->gh_gl)
797 done = 0;
798 else if (rgd_next && !error) {
799 error = gfs2_glock_nq_init(rgd_next->rd_gl,
800 LM_ST_SHARED,
801 GL_ASYNC,
802 gh);
803 rgd_next = gfs2_rgrpd_get_next(rgd_next);
804 done = 0;
805 }
806
807 if (signal_pending(current))
808 error = -ERESTARTSYS;
809 }
810
811 if (done)
812 break;
813
814 yield();
815 }
816
817 gfs2_glock_dq_uninit(&ri_gh);
818
819 out:
820 kfree(gha);
821
822 return error;
823}
824
825struct lfcc {
826 struct list_head list;
827 struct gfs2_holder gh;
828};
829
830/**
831 * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
832 * journals are clean
833 * @sdp: the file system
834 * @state: the state to put the transaction lock into
835 * @t_gh: the hold on the transaction lock
836 *
837 * Returns: errno
838 */
839
840int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_holder *t_gh)
841{
842 struct gfs2_inode *ip;
843 struct gfs2_holder ji_gh;
844 struct gfs2_jdesc *jd;
845 struct lfcc *lfcc;
846 LIST_HEAD(list);
847 struct gfs2_log_header lh;
848 int error;
849
850 error = gfs2_jindex_hold(sdp, &ji_gh);
851 if (error)
852 return error;
853
854 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
855 lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
856 if (!lfcc) {
857 error = -ENOMEM;
858 goto out;
859 }
860 ip = jd->jd_inode->u.generic_ip;
861 error = gfs2_glock_nq_init(ip->i_gl,
862 LM_ST_SHARED, 0,
863 &lfcc->gh);
864 if (error) {
865 kfree(lfcc);
866 goto out;
867 }
868 list_add(&lfcc->list, &list);
869 }
870
871 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED,
872 LM_FLAG_PRIORITY | GL_NEVER_RECURSE | GL_NOCACHE,
873 t_gh);
874
875 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
876 error = gfs2_jdesc_check(jd);
877 if (error)
878 break;
879 error = gfs2_find_jhead(jd, &lh);
880 if (error)
881 break;
882 if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
883 error = -EBUSY;
884 break;
885 }
886 }
887
888 if (error)
889 gfs2_glock_dq_uninit(t_gh);
890
891 out:
892 while (!list_empty(&list)) {
893 lfcc = list_entry(list.next, struct lfcc, list);
894 list_del(&lfcc->list);
895 gfs2_glock_dq_uninit(&lfcc->gh);
896 kfree(lfcc);
897 }
898 gfs2_glock_dq_uninit(&ji_gh);
899
900 return error;
901}
902
903/**
904 * gfs2_freeze_fs - freezes the file system
905 * @sdp: the file system
906 *
907 * This function flushes data and meta data for all machines by
908 * aquiring the transaction log exclusively. All journals are
909 * ensured to be in a clean state as well.
910 *
911 * Returns: errno
912 */
913
914int gfs2_freeze_fs(struct gfs2_sbd *sdp)
915{
916 int error = 0;
917
918 mutex_lock(&sdp->sd_freeze_lock);
919
920 if (!sdp->sd_freeze_count++) {
921 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
922 if (error)
923 sdp->sd_freeze_count--;
924 }
925
926 mutex_unlock(&sdp->sd_freeze_lock);
927
928 return error;
929}
930
931/**
932 * gfs2_unfreeze_fs - unfreezes the file system
933 * @sdp: the file system
934 *
935 * This function allows the file system to proceed by unlocking
936 * the exclusively held transaction lock. Other GFS2 nodes are
937 * now free to acquire the lock shared and go on with their lives.
938 *
939 */
940
941void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
942{
943 mutex_lock(&sdp->sd_freeze_lock);
944
945 if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
946 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
947
948 mutex_unlock(&sdp->sd_freeze_lock);
949}
950
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
new file mode 100644
index 000000000000..6abb7b5c8828
--- /dev/null
+++ b/fs/gfs2/super.h
@@ -0,0 +1,54 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __SUPER_DOT_H__
11#define __SUPER_DOT_H__
12
13void gfs2_tune_init(struct gfs2_tune *gt);
14
15int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent);
16int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
17int gfs2_do_upgrade(struct gfs2_sbd *sdp, struct gfs2_glock *gl_sb);
18
19static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
20{
21 unsigned int x;
22 spin_lock(&sdp->sd_jindex_spin);
23 x = sdp->sd_journals;
24 spin_unlock(&sdp->sd_jindex_spin);
25 return x;
26}
27
28int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh);
29void gfs2_jindex_free(struct gfs2_sbd *sdp);
30
31struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
32void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid);
33struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp);
34int gfs2_jdesc_check(struct gfs2_jdesc *jd);
35
36int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
37 struct gfs2_inode **ipp);
38
39int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
40int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
41
42int gfs2_statfs_init(struct gfs2_sbd *sdp);
43void gfs2_statfs_change(struct gfs2_sbd *sdp,
44 int64_t total, int64_t free, int64_t dinodes);
45int gfs2_statfs_sync(struct gfs2_sbd *sdp);
46int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
47int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
48
49int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_holder *t_gh);
50int gfs2_freeze_fs(struct gfs2_sbd *sdp);
51void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
52
53#endif /* __SUPER_DOT_H__ */
54
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
new file mode 100644
index 000000000000..f05ba8f69132
--- /dev/null
+++ b/fs/gfs2/sys.c
@@ -0,0 +1,582 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/kobject.h>
17#include <linux/gfs2_ondisk.h>
18#include <asm/semaphore.h>
19#include <asm/uaccess.h>
20
21#include "gfs2.h"
22#include "lm_interface.h"
23#include "incore.h"
24#include "lm.h"
25#include "sys.h"
26#include "super.h"
27#include "glock.h"
28#include "quota.h"
29#include "util.h"
30
31char *gfs2_sys_margs;
32spinlock_t gfs2_sys_margs_lock;
33
34static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
35{
36 return sprintf(buf, "%s\n", sdp->sd_vfs->s_id);
37}
38
39static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
40{
41 return sprintf(buf, "%s\n", sdp->sd_fsname);
42}
43
44static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
45{
46 unsigned int count;
47
48 mutex_lock(&sdp->sd_freeze_lock);
49 count = sdp->sd_freeze_count;
50 mutex_unlock(&sdp->sd_freeze_lock);
51
52 return sprintf(buf, "%u\n", count);
53}
54
55static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
56{
57 ssize_t ret = len;
58 int error = 0;
59 int n = simple_strtol(buf, NULL, 0);
60
61 if (!capable(CAP_SYS_ADMIN))
62 return -EACCES;
63
64 switch (n) {
65 case 0:
66 gfs2_unfreeze_fs(sdp);
67 break;
68 case 1:
69 error = gfs2_freeze_fs(sdp);
70 break;
71 default:
72 ret = -EINVAL;
73 }
74
75 if (error)
76 fs_warn(sdp, "freeze %d error %d", n, error);
77
78 return ret;
79}
80
81static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
82{
83 unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags);
84 return sprintf(buf, "%u\n", b);
85}
86
87static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
88{
89 if (!capable(CAP_SYS_ADMIN))
90 return -EACCES;
91
92 if (simple_strtol(buf, NULL, 0) != 1)
93 return -EINVAL;
94
95 gfs2_lm_withdraw(sdp,
96 "GFS2: fsid=%s: withdrawing from cluster at user's request\n",
97 sdp->sd_fsname);
98 return len;
99}
100
101static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
102 size_t len)
103{
104 if (!capable(CAP_SYS_ADMIN))
105 return -EACCES;
106
107 if (simple_strtol(buf, NULL, 0) != 1)
108 return -EINVAL;
109
110 gfs2_statfs_sync(sdp);
111 return len;
112}
113
114static ssize_t shrink_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
115{
116 if (!capable(CAP_SYS_ADMIN))
117 return -EACCES;
118
119 if (simple_strtol(buf, NULL, 0) != 1)
120 return -EINVAL;
121
122 gfs2_gl_hash_clear(sdp, NO_WAIT);
123 return len;
124}
125
126static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
127 size_t len)
128{
129 if (!capable(CAP_SYS_ADMIN))
130 return -EACCES;
131
132 if (simple_strtol(buf, NULL, 0) != 1)
133 return -EINVAL;
134
135 gfs2_quota_sync(sdp);
136 return len;
137}
138
139static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
140 size_t len)
141{
142 uint32_t id;
143
144 if (!capable(CAP_SYS_ADMIN))
145 return -EACCES;
146
147 id = simple_strtoul(buf, NULL, 0);
148
149 gfs2_quota_refresh(sdp, 1, id);
150 return len;
151}
152
153static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
154 size_t len)
155{
156 uint32_t id;
157
158 if (!capable(CAP_SYS_ADMIN))
159 return -EACCES;
160
161 id = simple_strtoul(buf, NULL, 0);
162
163 gfs2_quota_refresh(sdp, 0, id);
164 return len;
165}
166
167struct gfs2_attr {
168 struct attribute attr;
169 ssize_t (*show)(struct gfs2_sbd *, char *);
170 ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
171};
172
173#define GFS2_ATTR(name, mode, show, store) \
174static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
175
176GFS2_ATTR(id, 0444, id_show, NULL);
177GFS2_ATTR(fsname, 0444, fsname_show, NULL);
178GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
179GFS2_ATTR(shrink, 0200, NULL, shrink_store);
180GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
181GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
182GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
183GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store);
184GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store);
185
186static struct attribute *gfs2_attrs[] = {
187 &gfs2_attr_id.attr,
188 &gfs2_attr_fsname.attr,
189 &gfs2_attr_freeze.attr,
190 &gfs2_attr_shrink.attr,
191 &gfs2_attr_withdraw.attr,
192 &gfs2_attr_statfs_sync.attr,
193 &gfs2_attr_quota_sync.attr,
194 &gfs2_attr_quota_refresh_user.attr,
195 &gfs2_attr_quota_refresh_group.attr,
196 NULL,
197};
198
199static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
200 char *buf)
201{
202 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
203 struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
204 return a->show ? a->show(sdp, buf) : 0;
205}
206
207static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
208 const char *buf, size_t len)
209{
210 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
211 struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
212 return a->store ? a->store(sdp, buf, len) : len;
213}
214
215static struct sysfs_ops gfs2_attr_ops = {
216 .show = gfs2_attr_show,
217 .store = gfs2_attr_store,
218};
219
220static struct kobj_type gfs2_ktype = {
221 .default_attrs = gfs2_attrs,
222 .sysfs_ops = &gfs2_attr_ops,
223};
224
225static struct kset gfs2_kset = {
226 .subsys = &fs_subsys,
227 .kobj = {.name = "gfs2",},
228 .ktype = &gfs2_ktype,
229};
230
231/*
232 * display struct lm_lockstruct fields
233 */
234
235struct lockstruct_attr {
236 struct attribute attr;
237 ssize_t (*show)(struct gfs2_sbd *, char *);
238};
239
240#define LOCKSTRUCT_ATTR(name, fmt) \
241static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
242{ \
243 return sprintf(buf, fmt, sdp->sd_lockstruct.ls_##name); \
244} \
245static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name)
246
247LOCKSTRUCT_ATTR(jid, "%u\n");
248LOCKSTRUCT_ATTR(first, "%u\n");
249LOCKSTRUCT_ATTR(lvb_size, "%u\n");
250LOCKSTRUCT_ATTR(flags, "%d\n");
251
252static struct attribute *lockstruct_attrs[] = {
253 &lockstruct_attr_jid.attr,
254 &lockstruct_attr_first.attr,
255 &lockstruct_attr_lvb_size.attr,
256 &lockstruct_attr_flags.attr,
257 NULL
258};
259
260/*
261 * display struct gfs2_args fields
262 */
263
264struct args_attr {
265 struct attribute attr;
266 ssize_t (*show)(struct gfs2_sbd *, char *);
267};
268
269#define ARGS_ATTR(name, fmt) \
270static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
271{ \
272 return sprintf(buf, fmt, sdp->sd_args.ar_##name); \
273} \
274static struct args_attr args_attr_##name = __ATTR_RO(name)
275
276ARGS_ATTR(lockproto, "%s\n");
277ARGS_ATTR(locktable, "%s\n");
278ARGS_ATTR(hostdata, "%s\n");
279ARGS_ATTR(spectator, "%d\n");
280ARGS_ATTR(ignore_local_fs, "%d\n");
281ARGS_ATTR(localcaching, "%d\n");
282ARGS_ATTR(localflocks, "%d\n");
283ARGS_ATTR(debug, "%d\n");
284ARGS_ATTR(upgrade, "%d\n");
285ARGS_ATTR(num_glockd, "%u\n");
286ARGS_ATTR(posix_acl, "%d\n");
287ARGS_ATTR(quota, "%u\n");
288ARGS_ATTR(suiddir, "%d\n");
289ARGS_ATTR(data, "%d\n");
290
291/* one oddball doesn't fit the macro mold */
292static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf)
293{
294 return sprintf(buf, "%d\n", !!test_bit(SDF_NOATIME, &sdp->sd_flags));
295}
296static struct args_attr args_attr_noatime = __ATTR_RO(noatime);
297
298static struct attribute *args_attrs[] = {
299 &args_attr_lockproto.attr,
300 &args_attr_locktable.attr,
301 &args_attr_hostdata.attr,
302 &args_attr_spectator.attr,
303 &args_attr_ignore_local_fs.attr,
304 &args_attr_localcaching.attr,
305 &args_attr_localflocks.attr,
306 &args_attr_debug.attr,
307 &args_attr_upgrade.attr,
308 &args_attr_num_glockd.attr,
309 &args_attr_posix_acl.attr,
310 &args_attr_quota.attr,
311 &args_attr_suiddir.attr,
312 &args_attr_data.attr,
313 &args_attr_noatime.attr,
314 NULL
315};
316
317/*
318 * display counters from superblock
319 */
320
321struct counters_attr {
322 struct attribute attr;
323 ssize_t (*show)(struct gfs2_sbd *, char *);
324};
325
326#define COUNTERS_ATTR(name, fmt) \
327static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
328{ \
329 return sprintf(buf, fmt, (unsigned int)atomic_read(&sdp->sd_##name)); \
330} \
331static struct counters_attr counters_attr_##name = __ATTR_RO(name)
332
333COUNTERS_ATTR(glock_count, "%u\n");
334COUNTERS_ATTR(glock_held_count, "%u\n");
335COUNTERS_ATTR(inode_count, "%u\n");
336COUNTERS_ATTR(reclaimed, "%u\n");
337
338static struct attribute *counters_attrs[] = {
339 &counters_attr_glock_count.attr,
340 &counters_attr_glock_held_count.attr,
341 &counters_attr_inode_count.attr,
342 &counters_attr_reclaimed.attr,
343 NULL
344};
345
346/*
347 * get and set struct gfs2_tune fields
348 */
349
350static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf)
351{
352 return sprintf(buf, "%u %u\n", sdp->sd_tune.gt_quota_scale_num,
353 sdp->sd_tune.gt_quota_scale_den);
354}
355
356static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
357 size_t len)
358{
359 struct gfs2_tune *gt = &sdp->sd_tune;
360 unsigned int x, y;
361
362 if (!capable(CAP_SYS_ADMIN))
363 return -EACCES;
364
365 if (sscanf(buf, "%u %u", &x, &y) != 2 || !y)
366 return -EINVAL;
367
368 spin_lock(&gt->gt_spin);
369 gt->gt_quota_scale_num = x;
370 gt->gt_quota_scale_den = y;
371 spin_unlock(&gt->gt_spin);
372 return len;
373}
374
375static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
376 int check_zero, const char *buf, size_t len)
377{
378 struct gfs2_tune *gt = &sdp->sd_tune;
379 unsigned int x;
380
381 if (!capable(CAP_SYS_ADMIN))
382 return -EACCES;
383
384 x = simple_strtoul(buf, NULL, 0);
385
386 if (check_zero && !x)
387 return -EINVAL;
388
389 spin_lock(&gt->gt_spin);
390 *field = x;
391 spin_unlock(&gt->gt_spin);
392 return len;
393}
394
395struct tune_attr {
396 struct attribute attr;
397 ssize_t (*show)(struct gfs2_sbd *, char *);
398 ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
399};
400
401#define TUNE_ATTR_3(name, show, store) \
402static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store)
403
404#define TUNE_ATTR_2(name, store) \
405static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
406{ \
407 return sprintf(buf, "%u\n", sdp->sd_tune.gt_##name); \
408} \
409TUNE_ATTR_3(name, name##_show, store)
410
411#define TUNE_ATTR(name, check_zero) \
412static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
413{ \
414 return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len); \
415} \
416TUNE_ATTR_2(name, name##_store)
417
418#define TUNE_ATTR_DAEMON(name, process) \
419static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
420{ \
421 ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len); \
422 wake_up_process(sdp->sd_##process); \
423 return r; \
424} \
425TUNE_ATTR_2(name, name##_store)
426
427TUNE_ATTR(ilimit, 0);
428TUNE_ATTR(ilimit_tries, 0);
429TUNE_ATTR(ilimit_min, 0);
430TUNE_ATTR(demote_secs, 0);
431TUNE_ATTR(incore_log_blocks, 0);
432TUNE_ATTR(log_flush_secs, 0);
433TUNE_ATTR(jindex_refresh_secs, 0);
434TUNE_ATTR(quota_warn_period, 0);
435TUNE_ATTR(quota_quantum, 0);
436TUNE_ATTR(atime_quantum, 0);
437TUNE_ATTR(max_readahead, 0);
438TUNE_ATTR(complain_secs, 0);
439TUNE_ATTR(reclaim_limit, 0);
440TUNE_ATTR(prefetch_secs, 0);
441TUNE_ATTR(statfs_slow, 0);
442TUNE_ATTR(new_files_jdata, 0);
443TUNE_ATTR(new_files_directio, 0);
444TUNE_ATTR(quota_simul_sync, 1);
445TUNE_ATTR(quota_cache_secs, 1);
446TUNE_ATTR(max_atomic_write, 1);
447TUNE_ATTR(stall_secs, 1);
448TUNE_ATTR(entries_per_readdir, 1);
449TUNE_ATTR(greedy_default, 1);
450TUNE_ATTR(greedy_quantum, 1);
451TUNE_ATTR(greedy_max, 1);
452TUNE_ATTR(statfs_quantum, 1);
453TUNE_ATTR_DAEMON(scand_secs, scand_process);
454TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
455TUNE_ATTR_DAEMON(logd_secs, logd_process);
456TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
457TUNE_ATTR_DAEMON(inoded_secs, inoded_process);
458TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
459
460static struct attribute *tune_attrs[] = {
461 &tune_attr_ilimit.attr,
462 &tune_attr_ilimit_tries.attr,
463 &tune_attr_ilimit_min.attr,
464 &tune_attr_demote_secs.attr,
465 &tune_attr_incore_log_blocks.attr,
466 &tune_attr_log_flush_secs.attr,
467 &tune_attr_jindex_refresh_secs.attr,
468 &tune_attr_quota_warn_period.attr,
469 &tune_attr_quota_quantum.attr,
470 &tune_attr_atime_quantum.attr,
471 &tune_attr_max_readahead.attr,
472 &tune_attr_complain_secs.attr,
473 &tune_attr_reclaim_limit.attr,
474 &tune_attr_prefetch_secs.attr,
475 &tune_attr_statfs_slow.attr,
476 &tune_attr_quota_simul_sync.attr,
477 &tune_attr_quota_cache_secs.attr,
478 &tune_attr_max_atomic_write.attr,
479 &tune_attr_stall_secs.attr,
480 &tune_attr_entries_per_readdir.attr,
481 &tune_attr_greedy_default.attr,
482 &tune_attr_greedy_quantum.attr,
483 &tune_attr_greedy_max.attr,
484 &tune_attr_statfs_quantum.attr,
485 &tune_attr_scand_secs.attr,
486 &tune_attr_recoverd_secs.attr,
487 &tune_attr_logd_secs.attr,
488 &tune_attr_quotad_secs.attr,
489 &tune_attr_inoded_secs.attr,
490 &tune_attr_quota_scale.attr,
491 &tune_attr_new_files_jdata.attr,
492 &tune_attr_new_files_directio.attr,
493 NULL
494};
495
496static struct attribute_group lockstruct_group = {
497 .name = "lockstruct",
498 .attrs = lockstruct_attrs
499};
500
501static struct attribute_group counters_group = {
502 .name = "counters",
503 .attrs = counters_attrs
504};
505
506static struct attribute_group args_group = {
507 .name = "args",
508 .attrs = args_attrs
509};
510
511static struct attribute_group tune_group = {
512 .name = "tune",
513 .attrs = tune_attrs
514};
515
516int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
517{
518 int error;
519
520 sdp->sd_kobj.kset = &gfs2_kset;
521 sdp->sd_kobj.ktype = &gfs2_ktype;
522
523 error = kobject_set_name(&sdp->sd_kobj, "%s", sdp->sd_table_name);
524 if (error)
525 goto fail;
526
527 error = kobject_register(&sdp->sd_kobj);
528 if (error)
529 goto fail;
530
531 error = sysfs_create_group(&sdp->sd_kobj, &lockstruct_group);
532 if (error)
533 goto fail_reg;
534
535 error = sysfs_create_group(&sdp->sd_kobj, &counters_group);
536 if (error)
537 goto fail_lockstruct;
538
539 error = sysfs_create_group(&sdp->sd_kobj, &args_group);
540 if (error)
541 goto fail_counters;
542
543 error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
544 if (error)
545 goto fail_args;
546
547 return 0;
548
549 fail_args:
550 sysfs_remove_group(&sdp->sd_kobj, &args_group);
551 fail_counters:
552 sysfs_remove_group(&sdp->sd_kobj, &counters_group);
553 fail_lockstruct:
554 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
555 fail_reg:
556 kobject_unregister(&sdp->sd_kobj);
557 fail:
558 return error;
559}
560
561void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
562{
563 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
564 sysfs_remove_group(&sdp->sd_kobj, &args_group);
565 sysfs_remove_group(&sdp->sd_kobj, &counters_group);
566 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
567 kobject_unregister(&sdp->sd_kobj);
568}
569
570int gfs2_sys_init(void)
571{
572 gfs2_sys_margs = NULL;
573 spin_lock_init(&gfs2_sys_margs_lock);
574 return kset_register(&gfs2_kset);
575}
576
577void gfs2_sys_uninit(void)
578{
579 kfree(gfs2_sys_margs);
580 kset_unregister(&gfs2_kset);
581}
582
diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h
new file mode 100644
index 000000000000..62c8ed89ab9c
--- /dev/null
+++ b/fs/gfs2/sys.h
@@ -0,0 +1,24 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __SYS_DOT_H__
11#define __SYS_DOT_H__
12
13/* Allow args to be passed to GFS2 when using an initial ram disk */
14extern char *gfs2_sys_margs;
15extern spinlock_t gfs2_sys_margs_lock;
16
17int gfs2_sys_fs_add(struct gfs2_sbd *sdp);
18void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
19
20int gfs2_sys_init(void);
21void gfs2_sys_uninit(void);
22
23#endif /* __SYS_DOT_H__ */
24
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
new file mode 100644
index 000000000000..d72f79e67c94
--- /dev/null
+++ b/fs/gfs2/trans.c
@@ -0,0 +1,193 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/kallsyms.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "glock.h"
23#include "log.h"
24#include "lops.h"
25#include "meta_io.h"
26#include "trans.h"
27#include "util.h"
28
29int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
30 unsigned int revokes)
31{
32 struct gfs2_trans *tr;
33 int error;
34
35 BUG_ON(current->journal_info);
36 BUG_ON(blocks == 0 && revokes == 0);
37
38 tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS);
39 if (!tr)
40 return -ENOMEM;
41
42 tr->tr_ip = (unsigned long)__builtin_return_address(0);
43 tr->tr_blocks = blocks;
44 tr->tr_revokes = revokes;
45 tr->tr_reserved = 1;
46 if (blocks)
47 tr->tr_reserved += 1 + blocks;
48 if (revokes)
49 tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
50 sizeof(uint64_t));
51 INIT_LIST_HEAD(&tr->tr_list_buf);
52
53 gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED,
54 GL_NEVER_RECURSE, &tr->tr_t_gh);
55
56 error = gfs2_glock_nq(&tr->tr_t_gh);
57 if (error)
58 goto fail_holder_uninit;
59
60 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
61 tr->tr_t_gh.gh_flags |= GL_NOCACHE;
62 error = -EROFS;
63 goto fail_gunlock;
64 }
65
66 error = gfs2_log_reserve(sdp, tr->tr_reserved);
67 if (error)
68 goto fail_gunlock;
69
70 current->journal_info = tr;
71
72 return 0;
73
74fail_gunlock:
75 gfs2_glock_dq(&tr->tr_t_gh);
76
77fail_holder_uninit:
78 gfs2_holder_uninit(&tr->tr_t_gh);
79 kfree(tr);
80
81 return error;
82}
83
84void gfs2_trans_end(struct gfs2_sbd *sdp)
85{
86 struct gfs2_trans *tr;
87
88 tr = current->journal_info;
89 current->journal_info = NULL;
90
91 if (gfs2_assert_warn(sdp, tr))
92 return;
93
94 if (!tr->tr_touched) {
95 gfs2_log_release(sdp, tr->tr_reserved);
96
97 gfs2_glock_dq(&tr->tr_t_gh);
98 gfs2_holder_uninit(&tr->tr_t_gh);
99
100 kfree(tr);
101 return;
102 }
103
104 if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks)) {
105 fs_err(sdp, "tr_num_buf = %u, tr_blocks = %u ",
106 tr->tr_num_buf, tr->tr_blocks);
107 print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
108 }
109 if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes)) {
110 fs_err(sdp, "tr_num_revoke = %u, tr_revokes = %u ",
111 tr->tr_num_revoke, tr->tr_revokes);
112 print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
113 }
114
115 gfs2_log_commit(sdp, tr);
116
117 gfs2_glock_dq(&tr->tr_t_gh);
118 gfs2_holder_uninit(&tr->tr_t_gh);
119
120 kfree(tr);
121
122 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
123 gfs2_log_flush(sdp);
124}
125
126void gfs2_trans_add_gl(struct gfs2_glock *gl)
127{
128 lops_add(gl->gl_sbd, &gl->gl_le);
129}
130
131/**
132 * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction
133 * @gl: the glock the buffer belongs to
134 * @bh: The buffer to add
135 * @meta: True in the case of adding metadata
136 *
137 */
138
139void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
140{
141 struct gfs2_sbd *sdp = gl->gl_sbd;
142 struct gfs2_bufdata *bd;
143
144 bd = bh->b_private;
145 if (bd)
146 gfs2_assert(sdp, bd->bd_gl == gl);
147 else {
148 gfs2_attach_bufdata(gl, bh, meta);
149 bd = bh->b_private;
150 }
151 lops_add(sdp, &bd->bd_le);
152}
153
154void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno)
155{
156 struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
157 GFP_NOFS | __GFP_NOFAIL);
158 lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
159 rv->rv_blkno = blkno;
160 lops_add(sdp, &rv->rv_le);
161}
162
163void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno)
164{
165 struct gfs2_revoke *rv;
166 int found = 0;
167
168 gfs2_log_lock(sdp);
169
170 list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) {
171 if (rv->rv_blkno == blkno) {
172 list_del(&rv->rv_le.le_list);
173 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
174 sdp->sd_log_num_revoke--;
175 found = 1;
176 break;
177 }
178 }
179
180 gfs2_log_unlock(sdp);
181
182 if (found) {
183 struct gfs2_trans *tr = current->journal_info;
184 kfree(rv);
185 tr->tr_num_revoke_rm++;
186 }
187}
188
189void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd)
190{
191 lops_add(rgd->rd_sbd, &rgd->rd_le);
192}
193
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
new file mode 100644
index 000000000000..6b5e9e8bf561
--- /dev/null
+++ b/fs/gfs2/trans.h
@@ -0,0 +1,35 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __TRANS_DOT_H__
11#define __TRANS_DOT_H__
12
13#define RES_DINODE 1
14#define RES_INDIRECT 1
15#define RES_JDATA 1
16#define RES_DATA 1
17#define RES_LEAF 1
18#define RES_RG_BIT 2
19#define RES_EATTR 1
20#define RES_UNLINKED 1
21#define RES_STATFS 1
22#define RES_QUOTA 2
23
24int gfs2_trans_begin(struct gfs2_sbd *sdp,
25 unsigned int blocks, unsigned int revokes);
26
27void gfs2_trans_end(struct gfs2_sbd *sdp);
28
29void gfs2_trans_add_gl(struct gfs2_glock *gl);
30void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
31void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno);
32void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno);
33void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
34
35#endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/unlinked.c b/fs/gfs2/unlinked.c
new file mode 100644
index 000000000000..24b91c23bc2d
--- /dev/null
+++ b/fs/gfs2/unlinked.c
@@ -0,0 +1,458 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/kthread.h>
16#include <linux/gfs2_ondisk.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "bmap.h"
23#include "inode.h"
24#include "meta_io.h"
25#include "trans.h"
26#include "unlinked.h"
27#include "util.h"
28
29static int munge_ondisk(struct gfs2_sbd *sdp, unsigned int slot,
30 struct gfs2_unlinked_tag *ut)
31{
32 struct gfs2_inode *ip = sdp->sd_ut_inode->u.generic_ip;
33 unsigned int block, offset;
34 uint64_t dblock;
35 int new = 0;
36 struct buffer_head *bh;
37 int error;
38
39 block = slot / sdp->sd_ut_per_block;
40 offset = slot % sdp->sd_ut_per_block;
41
42 error = gfs2_block_map(ip, block, &new, &dblock, NULL);
43 if (error)
44 return error;
45 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, &bh);
46 if (error)
47 return error;
48 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) {
49 error = -EIO;
50 goto out;
51 }
52
53 mutex_lock(&sdp->sd_unlinked_mutex);
54 gfs2_trans_add_bh(ip->i_gl, bh, 1);
55 gfs2_unlinked_tag_out(ut, bh->b_data +
56 sizeof(struct gfs2_meta_header) +
57 offset * sizeof(struct gfs2_unlinked_tag));
58 mutex_unlock(&sdp->sd_unlinked_mutex);
59
60 out:
61 brelse(bh);
62
63 return error;
64}
65
66static void ul_hash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
67{
68 spin_lock(&sdp->sd_unlinked_spin);
69 list_add(&ul->ul_list, &sdp->sd_unlinked_list);
70 gfs2_assert(sdp, ul->ul_count);
71 ul->ul_count++;
72 atomic_inc(&sdp->sd_unlinked_count);
73 spin_unlock(&sdp->sd_unlinked_spin);
74}
75
76static void ul_unhash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
77{
78 spin_lock(&sdp->sd_unlinked_spin);
79 list_del_init(&ul->ul_list);
80 gfs2_assert(sdp, ul->ul_count > 1);
81 ul->ul_count--;
82 gfs2_assert_warn(sdp, atomic_read(&sdp->sd_unlinked_count) > 0);
83 atomic_dec(&sdp->sd_unlinked_count);
84 spin_unlock(&sdp->sd_unlinked_spin);
85}
86
87static struct gfs2_unlinked *ul_fish(struct gfs2_sbd *sdp)
88{
89 struct list_head *head;
90 struct gfs2_unlinked *ul;
91 int found = 0;
92
93 if (sdp->sd_vfs->s_flags & MS_RDONLY)
94 return NULL;
95
96 spin_lock(&sdp->sd_unlinked_spin);
97
98 head = &sdp->sd_unlinked_list;
99
100 list_for_each_entry(ul, head, ul_list) {
101 if (test_bit(ULF_LOCKED, &ul->ul_flags))
102 continue;
103
104 list_move_tail(&ul->ul_list, head);
105 ul->ul_count++;
106 set_bit(ULF_LOCKED, &ul->ul_flags);
107 found = 1;
108
109 break;
110 }
111
112 if (!found)
113 ul = NULL;
114
115 spin_unlock(&sdp->sd_unlinked_spin);
116
117 return ul;
118}
119
120/**
121 * enforce_limit - limit the number of inodes waiting to be deallocated
122 * @sdp: the filesystem
123 *
124 * Returns: errno
125 */
126
127static void enforce_limit(struct gfs2_sbd *sdp)
128{
129 unsigned int tries = 0, min = 0;
130 int error;
131
132 if (atomic_read(&sdp->sd_unlinked_count) >=
133 gfs2_tune_get(sdp, gt_ilimit)) {
134 tries = gfs2_tune_get(sdp, gt_ilimit_tries);
135 min = gfs2_tune_get(sdp, gt_ilimit_min);
136 }
137
138 while (tries--) {
139 struct gfs2_unlinked *ul = ul_fish(sdp);
140 if (!ul)
141 break;
142 error = gfs2_inode_dealloc(sdp, ul);
143 gfs2_unlinked_put(sdp, ul);
144
145 if (!error) {
146 if (!--min)
147 break;
148 } else if (error != 1)
149 break;
150 }
151}
152
153static struct gfs2_unlinked *ul_alloc(struct gfs2_sbd *sdp)
154{
155 struct gfs2_unlinked *ul;
156
157 ul = kzalloc(sizeof(struct gfs2_unlinked), GFP_KERNEL);
158 if (ul) {
159 INIT_LIST_HEAD(&ul->ul_list);
160 ul->ul_count = 1;
161 set_bit(ULF_LOCKED, &ul->ul_flags);
162 }
163
164 return ul;
165}
166
167int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul)
168{
169 unsigned int c, o = 0, b;
170 unsigned char byte = 0;
171
172 enforce_limit(sdp);
173
174 *ul = ul_alloc(sdp);
175 if (!*ul)
176 return -ENOMEM;
177
178 spin_lock(&sdp->sd_unlinked_spin);
179
180 for (c = 0; c < sdp->sd_unlinked_chunks; c++)
181 for (o = 0; o < PAGE_SIZE; o++) {
182 byte = sdp->sd_unlinked_bitmap[c][o];
183 if (byte != 0xFF)
184 goto found;
185 }
186
187 goto fail;
188
189 found:
190 for (b = 0; b < 8; b++)
191 if (!(byte & (1 << b)))
192 break;
193 (*ul)->ul_slot = c * (8 * PAGE_SIZE) + o * 8 + b;
194
195 if ((*ul)->ul_slot >= sdp->sd_unlinked_slots)
196 goto fail;
197
198 sdp->sd_unlinked_bitmap[c][o] |= 1 << b;
199
200 spin_unlock(&sdp->sd_unlinked_spin);
201
202 return 0;
203
204 fail:
205 spin_unlock(&sdp->sd_unlinked_spin);
206 kfree(*ul);
207 return -ENOSPC;
208}
209
210void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
211{
212 gfs2_assert_warn(sdp, test_and_clear_bit(ULF_LOCKED, &ul->ul_flags));
213
214 spin_lock(&sdp->sd_unlinked_spin);
215 gfs2_assert(sdp, ul->ul_count);
216 ul->ul_count--;
217 if (!ul->ul_count) {
218 gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, ul->ul_slot, 0);
219 spin_unlock(&sdp->sd_unlinked_spin);
220 kfree(ul);
221 } else
222 spin_unlock(&sdp->sd_unlinked_spin);
223}
224
225int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
226{
227 int error;
228
229 gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
230 gfs2_assert_warn(sdp, list_empty(&ul->ul_list));
231
232 error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut);
233 if (!error)
234 ul_hash(sdp, ul);
235
236 return error;
237}
238
239int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
240{
241 int error;
242
243 gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
244 gfs2_assert_warn(sdp, !list_empty(&ul->ul_list));
245
246 error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut);
247
248 return error;
249}
250
251int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
252{
253 struct gfs2_unlinked_tag ut;
254 int error;
255
256 gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
257 gfs2_assert_warn(sdp, !list_empty(&ul->ul_list));
258
259 memset(&ut, 0, sizeof(struct gfs2_unlinked_tag));
260
261 error = munge_ondisk(sdp, ul->ul_slot, &ut);
262 if (error)
263 return error;
264
265 ul_unhash(sdp, ul);
266
267 return 0;
268}
269
270/**
271 * gfs2_unlinked_dealloc - Go through the list of inodes to be deallocated
272 * @sdp: the filesystem
273 *
274 * Returns: errno
275 */
276
277int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp)
278{
279 unsigned int hits, strikes;
280 int error;
281
282 for (;;) {
283 hits = 0;
284 strikes = 0;
285
286 for (;;) {
287 struct gfs2_unlinked *ul = ul_fish(sdp);
288 if (!ul)
289 return 0;
290 error = gfs2_inode_dealloc(sdp, ul);
291 gfs2_unlinked_put(sdp, ul);
292
293 if (!error) {
294 hits++;
295 if (strikes)
296 strikes--;
297 } else if (error == 1) {
298 strikes++;
299 if (strikes >=
300 atomic_read(&sdp->sd_unlinked_count)) {
301 error = 0;
302 break;
303 }
304 } else
305 return error;
306 }
307
308 if (!hits || kthread_should_stop())
309 break;
310
311 cond_resched();
312 }
313
314 return 0;
315}
316
317int gfs2_unlinked_init(struct gfs2_sbd *sdp)
318{
319 struct gfs2_inode *ip = sdp->sd_ut_inode->u.generic_ip;
320 unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
321 unsigned int x, slot = 0;
322 unsigned int found = 0;
323 uint64_t dblock;
324 uint32_t extlen = 0;
325 int error;
326
327 if (!ip->i_di.di_size ||
328 ip->i_di.di_size > (64 << 20) ||
329 ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
330 gfs2_consist_inode(ip);
331 return -EIO;
332 }
333 sdp->sd_unlinked_slots = blocks * sdp->sd_ut_per_block;
334 sdp->sd_unlinked_chunks = DIV_ROUND_UP(sdp->sd_unlinked_slots,
335 8 * PAGE_SIZE);
336
337 error = -ENOMEM;
338
339 sdp->sd_unlinked_bitmap = kcalloc(sdp->sd_unlinked_chunks,
340 sizeof(unsigned char *),
341 GFP_KERNEL);
342 if (!sdp->sd_unlinked_bitmap)
343 return error;
344
345 for (x = 0; x < sdp->sd_unlinked_chunks; x++) {
346 sdp->sd_unlinked_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL);
347 if (!sdp->sd_unlinked_bitmap[x])
348 goto fail;
349 }
350
351 for (x = 0; x < blocks; x++) {
352 struct buffer_head *bh;
353 unsigned int y;
354
355 if (!extlen) {
356 int new = 0;
357 error = gfs2_block_map(ip, x, &new, &dblock, &extlen);
358 if (error)
359 goto fail;
360 }
361 gfs2_meta_ra(ip->i_gl, dblock, extlen);
362 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT,
363 &bh);
364 if (error)
365 goto fail;
366 error = -EIO;
367 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) {
368 brelse(bh);
369 goto fail;
370 }
371
372 for (y = 0;
373 y < sdp->sd_ut_per_block && slot < sdp->sd_unlinked_slots;
374 y++, slot++) {
375 struct gfs2_unlinked_tag ut;
376 struct gfs2_unlinked *ul;
377
378 gfs2_unlinked_tag_in(&ut, bh->b_data +
379 sizeof(struct gfs2_meta_header) +
380 y * sizeof(struct gfs2_unlinked_tag));
381 if (!ut.ut_inum.no_addr)
382 continue;
383
384 error = -ENOMEM;
385 ul = ul_alloc(sdp);
386 if (!ul) {
387 brelse(bh);
388 goto fail;
389 }
390 ul->ul_ut = ut;
391 ul->ul_slot = slot;
392
393 spin_lock(&sdp->sd_unlinked_spin);
394 gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, slot, 1);
395 spin_unlock(&sdp->sd_unlinked_spin);
396 ul_hash(sdp, ul);
397
398 gfs2_unlinked_put(sdp, ul);
399 found++;
400 }
401
402 brelse(bh);
403 dblock++;
404 extlen--;
405 }
406
407 if (found)
408 fs_info(sdp, "found %u unlinked inodes\n", found);
409
410 return 0;
411
412 fail:
413 gfs2_unlinked_cleanup(sdp);
414 return error;
415}
416
417/**
418 * gfs2_unlinked_cleanup - get rid of any extra struct gfs2_unlinked structures
419 * @sdp: the filesystem
420 *
421 */
422
423void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp)
424{
425 struct list_head *head = &sdp->sd_unlinked_list;
426 struct gfs2_unlinked *ul;
427 unsigned int x;
428
429 spin_lock(&sdp->sd_unlinked_spin);
430 while (!list_empty(head)) {
431 ul = list_entry(head->next, struct gfs2_unlinked, ul_list);
432
433 if (ul->ul_count > 1) {
434 list_move_tail(&ul->ul_list, head);
435 spin_unlock(&sdp->sd_unlinked_spin);
436 schedule();
437 spin_lock(&sdp->sd_unlinked_spin);
438 continue;
439 }
440
441 list_del_init(&ul->ul_list);
442 atomic_dec(&sdp->sd_unlinked_count);
443
444 gfs2_assert_warn(sdp, ul->ul_count == 1);
445 gfs2_assert_warn(sdp, !test_bit(ULF_LOCKED, &ul->ul_flags));
446 kfree(ul);
447 }
448 spin_unlock(&sdp->sd_unlinked_spin);
449
450 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_unlinked_count));
451
452 if (sdp->sd_unlinked_bitmap) {
453 for (x = 0; x < sdp->sd_unlinked_chunks; x++)
454 kfree(sdp->sd_unlinked_bitmap[x]);
455 kfree(sdp->sd_unlinked_bitmap);
456 }
457}
458
diff --git a/fs/gfs2/unlinked.h b/fs/gfs2/unlinked.h
new file mode 100644
index 000000000000..51e77f88d74f
--- /dev/null
+++ b/fs/gfs2/unlinked.h
@@ -0,0 +1,25 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __UNLINKED_DOT_H__
11#define __UNLINKED_DOT_H__
12
13int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul);
14void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
15
16int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
17int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
18int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
19
20int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp);
21
22int gfs2_unlinked_init(struct gfs2_sbd *sdp);
23void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp);
24
25#endif /* __UNLINKED_DOT_H__ */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
new file mode 100644
index 000000000000..7cd9e25639c4
--- /dev/null
+++ b/fs/gfs2/util.c
@@ -0,0 +1,245 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/crc32.h>
16#include <linux/gfs2_ondisk.h>
17#include <asm/semaphore.h>
18#include <asm/uaccess.h>
19
20#include "gfs2.h"
21#include "lm_interface.h"
22#include "incore.h"
23#include "glock.h"
24#include "lm.h"
25#include "util.h"
26
27kmem_cache_t *gfs2_glock_cachep __read_mostly;
28kmem_cache_t *gfs2_inode_cachep __read_mostly;
29kmem_cache_t *gfs2_bufdata_cachep __read_mostly;
30
31void gfs2_assert_i(struct gfs2_sbd *sdp)
32{
33 printk(KERN_EMERG "GFS2: fsid=%s: fatal assertion failed\n",
34 sdp->sd_fsname);
35}
36
37/**
38 * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
39 * Returns: -1 if this call withdrew the machine,
40 * -2 if it was already withdrawn
41 */
42
43int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
44 const char *function, char *file, unsigned int line)
45{
46 int me;
47 me = gfs2_lm_withdraw(sdp,
48 "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n"
49 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
50 sdp->sd_fsname, assertion,
51 sdp->sd_fsname, function, file, line);
52 dump_stack();
53 return (me) ? -1 : -2;
54}
55
56/**
57 * gfs2_assert_warn_i - Print a message to the console if @assertion is false
58 * Returns: -1 if we printed something
59 * -2 if we didn't
60 */
61
62int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
63 const char *function, char *file, unsigned int line)
64{
65 if (time_before(jiffies,
66 sdp->sd_last_warning +
67 gfs2_tune_get(sdp, gt_complain_secs) * HZ))
68 return -2;
69
70 printk(KERN_WARNING
71 "GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
72 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
73 sdp->sd_fsname, assertion,
74 sdp->sd_fsname, function, file, line);
75
76 if (sdp->sd_args.ar_debug)
77 BUG();
78 else
79 dump_stack();
80
81 sdp->sd_last_warning = jiffies;
82
83 return -1;
84}
85
86/**
87 * gfs2_consist_i - Flag a filesystem consistency error and withdraw
88 * Returns: -1 if this call withdrew the machine,
89 * 0 if it was already withdrawn
90 */
91
92int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function,
93 char *file, unsigned int line)
94{
95 int rv;
96 rv = gfs2_lm_withdraw(sdp,
97 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
98 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
99 sdp->sd_fsname,
100 sdp->sd_fsname, function, file, line);
101 return rv;
102}
103
104/**
105 * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
106 * Returns: -1 if this call withdrew the machine,
107 * 0 if it was already withdrawn
108 */
109
110int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
111 const char *function, char *file, unsigned int line)
112{
113 struct gfs2_sbd *sdp = ip->i_sbd;
114 int rv;
115 rv = gfs2_lm_withdraw(sdp,
116 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
117 "GFS2: fsid=%s: inode = %llu %llu\n"
118 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
119 sdp->sd_fsname,
120 sdp->sd_fsname, ip->i_num.no_formal_ino, ip->i_num.no_addr,
121 sdp->sd_fsname, function, file, line);
122 return rv;
123}
124
125/**
126 * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
127 * Returns: -1 if this call withdrew the machine,
128 * 0 if it was already withdrawn
129 */
130
131int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
132 const char *function, char *file, unsigned int line)
133{
134 struct gfs2_sbd *sdp = rgd->rd_sbd;
135 int rv;
136 rv = gfs2_lm_withdraw(sdp,
137 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
138 "GFS2: fsid=%s: RG = %llu\n"
139 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
140 sdp->sd_fsname,
141 sdp->sd_fsname, rgd->rd_ri.ri_addr,
142 sdp->sd_fsname, function, file, line);
143 return rv;
144}
145
146/**
147 * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
148 * Returns: -1 if this call withdrew the machine,
149 * -2 if it was already withdrawn
150 */
151
152int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
153 const char *type, const char *function, char *file,
154 unsigned int line)
155{
156 int me;
157 me = gfs2_lm_withdraw(sdp,
158 "GFS2: fsid=%s: fatal: invalid metadata block\n"
159 "GFS2: fsid=%s: bh = %llu (%s)\n"
160 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
161 sdp->sd_fsname,
162 sdp->sd_fsname, (uint64_t)bh->b_blocknr, type,
163 sdp->sd_fsname, function, file, line);
164 return (me) ? -1 : -2;
165}
166
167/**
168 * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
169 * Returns: -1 if this call withdrew the machine,
170 * -2 if it was already withdrawn
171 */
172
173int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
174 uint16_t type, uint16_t t, const char *function,
175 char *file, unsigned int line)
176{
177 int me;
178 me = gfs2_lm_withdraw(sdp,
179 "GFS2: fsid=%s: fatal: invalid metadata block\n"
180 "GFS2: fsid=%s: bh = %llu (type: exp=%u, found=%u)\n"
181 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
182 sdp->sd_fsname,
183 sdp->sd_fsname, (uint64_t)bh->b_blocknr, type, t,
184 sdp->sd_fsname, function, file, line);
185 return (me) ? -1 : -2;
186}
187
188/**
189 * gfs2_io_error_i - Flag an I/O error and withdraw
190 * Returns: -1 if this call withdrew the machine,
191 * 0 if it was already withdrawn
192 */
193
194int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
195 unsigned int line)
196{
197 int rv;
198 rv = gfs2_lm_withdraw(sdp,
199 "GFS2: fsid=%s: fatal: I/O error\n"
200 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
201 sdp->sd_fsname,
202 sdp->sd_fsname, function, file, line);
203 return rv;
204}
205
206/**
207 * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw
208 * Returns: -1 if this call withdrew the machine,
209 * 0 if it was already withdrawn
210 */
211
212int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
213 const char *function, char *file, unsigned int line)
214{
215 int rv;
216 rv = gfs2_lm_withdraw(sdp,
217 "GFS2: fsid=%s: fatal: I/O error\n"
218 "GFS2: fsid=%s: block = %llu\n"
219 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
220 sdp->sd_fsname,
221 sdp->sd_fsname, (uint64_t)bh->b_blocknr,
222 sdp->sd_fsname, function, file, line);
223 return rv;
224}
225
226void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
227 unsigned int bit, int new_value)
228{
229 unsigned int c, o, b = bit;
230 int old_value;
231
232 c = b / (8 * PAGE_SIZE);
233 b %= 8 * PAGE_SIZE;
234 o = b / 8;
235 b %= 8;
236
237 old_value = (bitmap[c][o] & (1 << b));
238 gfs2_assert_withdraw(sdp, !old_value != !new_value);
239
240 if (new_value)
241 bitmap[c][o] |= 1 << b;
242 else
243 bitmap[c][o] &= ~(1 << b);
244}
245
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
new file mode 100644
index 000000000000..4532dbab0a2c
--- /dev/null
+++ b/fs/gfs2/util.h
@@ -0,0 +1,169 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __UTIL_DOT_H__
11#define __UTIL_DOT_H__
12
13
14#define fs_printk(level, fs, fmt, arg...) \
15 printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg)
16
17#define fs_info(fs, fmt, arg...) \
18 fs_printk(KERN_INFO , fs , fmt , ## arg)
19
20#define fs_warn(fs, fmt, arg...) \
21 fs_printk(KERN_WARNING , fs , fmt , ## arg)
22
23#define fs_err(fs, fmt, arg...) \
24 fs_printk(KERN_ERR, fs , fmt , ## arg)
25
26
27void gfs2_assert_i(struct gfs2_sbd *sdp);
28
29#define gfs2_assert(sdp, assertion) \
30do { \
31 if (unlikely(!(assertion))) { \
32 gfs2_assert_i(sdp); \
33 BUG(); \
34 } \
35} while (0)
36
37
38int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
39 const char *function, char *file, unsigned int line);
40
41#define gfs2_assert_withdraw(sdp, assertion) \
42((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
43 __FUNCTION__, __FILE__, __LINE__))
44
45
46int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
47 const char *function, char *file, unsigned int line);
48
49#define gfs2_assert_warn(sdp, assertion) \
50((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
51 __FUNCTION__, __FILE__, __LINE__))
52
53
54int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
55 const char *function, char *file, unsigned int line);
56
57#define gfs2_consist(sdp) \
58gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__)
59
60
61int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
62 const char *function, char *file, unsigned int line);
63
64#define gfs2_consist_inode(ip) \
65gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__)
66
67
68int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
69 const char *function, char *file, unsigned int line);
70
71#define gfs2_consist_rgrpd(rgd) \
72gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__)
73
74
75int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
76 const char *type, const char *function,
77 char *file, unsigned int line);
78
79static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
80 struct buffer_head *bh,
81 const char *function,
82 char *file, unsigned int line)
83{
84 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
85 uint32_t magic = mh->mh_magic;
86 magic = be32_to_cpu(magic);
87 if (unlikely(magic != GFS2_MAGIC))
88 return gfs2_meta_check_ii(sdp, bh, "magic number", function,
89 file, line);
90 return 0;
91}
92
93#define gfs2_meta_check(sdp, bh) \
94gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__)
95
96
97int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
98 uint16_t type, uint16_t t,
99 const char *function,
100 char *file, unsigned int line);
101
102static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
103 struct buffer_head *bh,
104 uint16_t type,
105 const char *function,
106 char *file, unsigned int line)
107{
108 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
109 uint32_t magic = mh->mh_magic;
110 uint16_t t = be32_to_cpu(mh->mh_type);
111 magic = be32_to_cpu(magic);
112 if (unlikely(magic != GFS2_MAGIC))
113 return gfs2_meta_check_ii(sdp, bh, "magic number", function,
114 file, line);
115 if (unlikely(t != type))
116 return gfs2_metatype_check_ii(sdp, bh, type, t, function,
117 file, line);
118 return 0;
119}
120
121#define gfs2_metatype_check(sdp, bh, type) \
122gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__)
123
124static inline void gfs2_metatype_set(struct buffer_head *bh, uint16_t type,
125 uint16_t format)
126{
127 struct gfs2_meta_header *mh;
128 mh = (struct gfs2_meta_header *)bh->b_data;
129 mh->mh_type = cpu_to_be32(type);
130 mh->mh_format = cpu_to_be32(format);
131}
132
133
134int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
135 char *file, unsigned int line);
136
137#define gfs2_io_error(sdp) \
138gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__);
139
140
141int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
142 const char *function, char *file, unsigned int line);
143
144#define gfs2_io_error_bh(sdp, bh) \
145gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
146
147
148extern kmem_cache_t *gfs2_glock_cachep;
149extern kmem_cache_t *gfs2_inode_cachep;
150extern kmem_cache_t *gfs2_bufdata_cachep;
151
152static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
153 unsigned int *p)
154{
155 unsigned int x;
156 spin_lock(&gt->gt_spin);
157 x = *p;
158 spin_unlock(&gt->gt_spin);
159 return x;
160}
161
162#define gfs2_tune_get(sdp, field) \
163gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
164
165void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
166 unsigned int bit, int new_value);
167
168#endif /* __UTIL_DOT_H__ */
169