aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/gfs2')
-rw-r--r--fs/gfs2/Kconfig46
-rw-r--r--fs/gfs2/Makefile42
-rw-r--r--fs/gfs2/acl.c312
-rw-r--r--fs/gfs2/acl.h37
-rw-r--r--fs/gfs2/bits.c178
-rw-r--r--fs/gfs2/bits.h28
-rw-r--r--fs/gfs2/bmap.c1093
-rw-r--r--fs/gfs2/bmap.h35
-rw-r--r--fs/gfs2/daemon.c225
-rw-r--r--fs/gfs2/daemon.h20
-rw-r--r--fs/gfs2/dir.c2366
-rw-r--r--fs/gfs2/dir.h51
-rw-r--r--fs/gfs2/eaops.c185
-rw-r--r--fs/gfs2/eaops.h30
-rw-r--r--fs/gfs2/eattr.c1563
-rw-r--r--fs/gfs2/eattr.h88
-rw-r--r--fs/gfs2/format.h21
-rw-r--r--fs/gfs2/gfs2.h60
-rw-r--r--fs/gfs2/glock.c2492
-rw-r--r--fs/gfs2/glock.h143
-rw-r--r--fs/gfs2/glops.c487
-rw-r--r--fs/gfs2/glops.h23
-rw-r--r--fs/gfs2/incore.h682
-rw-r--r--fs/gfs2/inode.c1838
-rw-r--r--fs/gfs2/inode.h82
-rw-r--r--fs/gfs2/lm.c238
-rw-r--r--fs/gfs2/lm.h42
-rw-r--r--fs/gfs2/lm_interface.h295
-rw-r--r--fs/gfs2/locking.c191
-rw-r--r--fs/gfs2/locking/dlm/Makefile3
-rw-r--r--fs/gfs2/locking/dlm/lock.c538
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h191
-rw-r--r--fs/gfs2/locking/dlm/main.c64
-rw-r--r--fs/gfs2/locking/dlm/mount.c247
-rw-r--r--fs/gfs2/locking/dlm/plock.c298
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c218
-rw-r--r--fs/gfs2/locking/dlm/thread.c352
-rw-r--r--fs/gfs2/locking/nolock/Makefile3
-rw-r--r--fs/gfs2/locking/nolock/main.c264
-rw-r--r--fs/gfs2/log.c643
-rw-r--r--fs/gfs2/log.h65
-rw-r--r--fs/gfs2/lops.c788
-rw-r--r--fs/gfs2/lops.h96
-rw-r--r--fs/gfs2/lvb.c49
-rw-r--r--fs/gfs2/lvb.h28
-rw-r--r--fs/gfs2/main.c103
-rw-r--r--fs/gfs2/meta_io.c882
-rw-r--r--fs/gfs2/meta_io.h89
-rw-r--r--fs/gfs2/mount.c211
-rw-r--r--fs/gfs2/mount.h15
-rw-r--r--fs/gfs2/ondisk.c528
-rw-r--r--fs/gfs2/ops_address.c632
-rw-r--r--fs/gfs2/ops_address.h17
-rw-r--r--fs/gfs2/ops_dentry.c114
-rw-r--r--fs/gfs2/ops_dentry.h15
-rw-r--r--fs/gfs2/ops_export.c290
-rw-r--r--fs/gfs2/ops_export.h15
-rw-r--r--fs/gfs2/ops_file.c943
-rw-r--r--fs/gfs2/ops_file.h20
-rw-r--r--fs/gfs2/ops_fstype.c884
-rw-r--r--fs/gfs2/ops_fstype.h15
-rw-r--r--fs/gfs2/ops_inode.c1198
-rw-r--r--fs/gfs2/ops_inode.h18
-rw-r--r--fs/gfs2/ops_super.c384
-rw-r--r--fs/gfs2/ops_super.h15
-rw-r--r--fs/gfs2/ops_vm.c194
-rw-r--r--fs/gfs2/ops_vm.h16
-rw-r--r--fs/gfs2/page.c279
-rw-r--r--fs/gfs2/page.h23
-rw-r--r--fs/gfs2/quota.c1297
-rw-r--r--fs/gfs2/quota.h34
-rw-r--r--fs/gfs2/recovery.c571
-rw-r--r--fs/gfs2/recovery.h32
-rw-r--r--fs/gfs2/rgrp.c1365
-rw-r--r--fs/gfs2/rgrp.h62
-rw-r--r--fs/gfs2/super.c955
-rw-r--r--fs/gfs2/super.h55
-rw-r--r--fs/gfs2/sys.c578
-rw-r--r--fs/gfs2/sys.h24
-rw-r--r--fs/gfs2/trans.c198
-rw-r--r--fs/gfs2/trans.h39
-rw-r--r--fs/gfs2/unlinked.c453
-rw-r--r--fs/gfs2/unlinked.h25
-rw-r--r--fs/gfs2/util.c246
-rw-r--r--fs/gfs2/util.h172
85 files changed, 29746 insertions, 0 deletions
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
new file mode 100644
index 000000000000..17cb44bea1c0
--- /dev/null
+++ b/fs/gfs2/Kconfig
@@ -0,0 +1,46 @@
1config GFS2_FS
2 tristate "GFS2 file system support"
3 default m
4 depends on EXPERIMENTAL
5 select FS_POSIX_ACL
6 select SYSFS
7 help
8 A cluster filesystem.
9
10 Allows a cluster of computers to simultaneously use a block device
11 that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
12 and writes to the block device like a local filesystem, but also uses
13 a lock module to allow the computers coordinate their I/O so
14 filesystem consistency is maintained. One of the nifty features of
15 GFS is perfect consistency -- changes made to the filesystem on one
16 machine show up immediately on all other machines in the cluster.
17
18 To use the GFS2 filesystem, you will need to enable one or more of
19 the below locking modules. Documentation and utilities for GFS2 can
20 be found here: http://sources.redhat.com/cluster/gfs/
21
22config GFS2_FS_LOCKING_NOLOCK
23 tristate "GFS2 \"nolock\" locking module"
24 depends on GFS2_FS
25 help
26 Single node locking module for GFS2.
27
28 Use this module if you want to use GFS2 on a single node without
29 its clustering features. You can still take advantage of the
30 large file support, and upgrade to running a full cluster later on
31 if required.
32
33 If you will only be using GFS2 in cluster mode, you do not need this
34 module.
35
36config GFS2_FS_LOCKING_DLM
37 tristate "GFS2 DLM locking module"
38 depends on GFS2_FS
39 select DLM
40 help
41 Multiple node locking module for GFS2
42
43 Most users of GFS2 will require this module. It provides the locking
44 interface between GFS2 and the DLM, which is required to use GFS2
45 in a cluster environment.
46
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
new file mode 100644
index 000000000000..88f927948113
--- /dev/null
+++ b/fs/gfs2/Makefile
@@ -0,0 +1,42 @@
1obj-$(CONFIG_GFS2_FS) += gfs2.o
2gfs2-y := \
3 acl.o \
4 bits.o \
5 bmap.o \
6 daemon.o \
7 dir.o \
8 eaops.o \
9 eattr.o \
10 glock.o \
11 glops.o \
12 inode.o \
13 lm.o \
14 log.o \
15 lops.o \
16 locking.o \
17 lvb.o \
18 main.o \
19 meta_io.o \
20 mount.o \
21 ondisk.o \
22 ops_address.o \
23 ops_dentry.o \
24 ops_export.o \
25 ops_file.o \
26 ops_fstype.o \
27 ops_inode.o \
28 ops_super.o \
29 ops_vm.o \
30 page.o \
31 quota.o \
32 recovery.o \
33 rgrp.o \
34 super.o \
35 sys.o \
36 trans.o \
37 unlinked.o \
38 util.o
39
40obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/
41obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/
42
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
new file mode 100644
index 000000000000..9482a677ea47
--- /dev/null
+++ b/fs/gfs2/acl.c
@@ -0,0 +1,312 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/posix_acl.h>
16#include <linux/posix_acl_xattr.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "acl.h"
21#include "eaops.h"
22#include "eattr.h"
23#include "glock.h"
24#include "inode.h"
25#include "meta_io.h"
26#include "trans.h"
27
28#define ACL_ACCESS 1
29#define ACL_DEFAULT 0
30
31int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
32 struct gfs2_ea_request *er,
33 int *remove, mode_t *mode)
34{
35 struct posix_acl *acl;
36 int error;
37
38 error = gfs2_acl_validate_remove(ip, access);
39 if (error)
40 return error;
41
42 if (!er->er_data)
43 return -EINVAL;
44
45 acl = posix_acl_from_xattr(er->er_data, er->er_data_len);
46 if (IS_ERR(acl))
47 return PTR_ERR(acl);
48 if (!acl) {
49 *remove = 1;
50 return 0;
51 }
52
53 error = posix_acl_valid(acl);
54 if (error)
55 goto out;
56
57 if (access) {
58 error = posix_acl_equiv_mode(acl, mode);
59 if (!error)
60 *remove = 1;
61 else if (error > 0)
62 error = 0;
63 }
64
65 out:
66 posix_acl_release(acl);
67
68 return error;
69}
70
71int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
72{
73 if (!ip->i_sbd->sd_args.ar_posix_acl)
74 return -EOPNOTSUPP;
75 if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER))
76 return -EPERM;
77 if (S_ISLNK(ip->i_di.di_mode))
78 return -EOPNOTSUPP;
79 if (!access && !S_ISDIR(ip->i_di.di_mode))
80 return -EACCES;
81
82 return 0;
83}
84
85static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
86 struct gfs2_ea_location *el, char **data, unsigned int *len)
87{
88 struct gfs2_ea_request er;
89 struct gfs2_ea_location el_this;
90 int error;
91
92 if (!ip->i_di.di_eattr)
93 return 0;
94
95 memset(&er, 0, sizeof(struct gfs2_ea_request));
96 if (access) {
97 er.er_name = GFS2_POSIX_ACL_ACCESS;
98 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
99 } else {
100 er.er_name = GFS2_POSIX_ACL_DEFAULT;
101 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
102 }
103 er.er_type = GFS2_EATYPE_SYS;
104
105 if (!el)
106 el = &el_this;
107
108 error = gfs2_ea_find(ip, &er, el);
109 if (error)
110 return error;
111 if (!el->el_ea)
112 return 0;
113 if (!GFS2_EA_DATA_LEN(el->el_ea))
114 goto out;
115
116 er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea);
117 er.er_data = kmalloc(er.er_data_len, GFP_KERNEL);
118 error = -ENOMEM;
119 if (!er.er_data)
120 goto out;
121
122 error = gfs2_ea_get_copy(ip, el, er.er_data);
123 if (error)
124 goto out_kfree;
125
126 if (acl) {
127 *acl = posix_acl_from_xattr(er.er_data, er.er_data_len);
128 if (IS_ERR(*acl))
129 error = PTR_ERR(*acl);
130 }
131
132 out_kfree:
133 if (error || !data)
134 kfree(er.er_data);
135 else {
136 *data = er.er_data;
137 *len = er.er_data_len;
138 }
139
140 out:
141 if (error || el == &el_this)
142 brelse(el->el_bh);
143
144 return error;
145}
146
147/**
148 * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something
149 * @inode: the file we want to do something to
150 * @mask: what we want to do
151 *
152 * Returns: errno
153 */
154
155int gfs2_check_acl_locked(struct inode *inode, int mask)
156{
157 struct posix_acl *acl = NULL;
158 int error;
159
160 error = acl_get(get_v2ip(inode), ACL_ACCESS, &acl, NULL, NULL, NULL);
161 if (error)
162 return error;
163
164 if (acl) {
165 error = posix_acl_permission(inode, acl, mask);
166 posix_acl_release(acl);
167 return error;
168 }
169
170 return -EAGAIN;
171}
172
173int gfs2_check_acl(struct inode *inode, int mask)
174{
175 struct gfs2_inode *ip = get_v2ip(inode);
176 struct gfs2_holder i_gh;
177 int error;
178
179 error = gfs2_glock_nq_init(ip->i_gl,
180 LM_ST_SHARED, LM_FLAG_ANY,
181 &i_gh);
182 if (!error) {
183 error = gfs2_check_acl_locked(inode, mask);
184 gfs2_glock_dq_uninit(&i_gh);
185 }
186
187 return error;
188}
189
190static int munge_mode(struct gfs2_inode *ip, mode_t mode)
191{
192 struct gfs2_sbd *sdp = ip->i_sbd;
193 struct buffer_head *dibh;
194 int error;
195
196 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
197 if (error)
198 return error;
199
200 error = gfs2_meta_inode_buffer(ip, &dibh);
201 if (!error) {
202 gfs2_assert_withdraw(sdp,
203 (ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT));
204 ip->i_di.di_mode = mode;
205 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
206 gfs2_dinode_out(&ip->i_di, dibh->b_data);
207 brelse(dibh);
208 }
209
210 gfs2_trans_end(sdp);
211
212 return 0;
213}
214
215int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
216{
217 struct gfs2_sbd *sdp = dip->i_sbd;
218 struct posix_acl *acl = NULL, *clone;
219 struct gfs2_ea_request er;
220 mode_t mode = ip->i_di.di_mode;
221 int error;
222
223 if (!sdp->sd_args.ar_posix_acl)
224 return 0;
225 if (S_ISLNK(ip->i_di.di_mode))
226 return 0;
227
228 memset(&er, 0, sizeof(struct gfs2_ea_request));
229 er.er_type = GFS2_EATYPE_SYS;
230
231 error = acl_get(dip, ACL_DEFAULT, &acl, NULL,
232 &er.er_data, &er.er_data_len);
233 if (error)
234 return error;
235 if (!acl) {
236 mode &= ~current->fs->umask;
237 if (mode != ip->i_di.di_mode)
238 error = munge_mode(ip, mode);
239 return error;
240 }
241
242 clone = posix_acl_clone(acl, GFP_KERNEL);
243 error = -ENOMEM;
244 if (!clone)
245 goto out;
246 posix_acl_release(acl);
247 acl = clone;
248
249 if (S_ISDIR(ip->i_di.di_mode)) {
250 er.er_name = GFS2_POSIX_ACL_DEFAULT;
251 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
252 error = gfs2_system_eaops.eo_set(ip, &er);
253 if (error)
254 goto out;
255 }
256
257 error = posix_acl_create_masq(acl, &mode);
258 if (error < 0)
259 goto out;
260 if (error > 0) {
261 er.er_name = GFS2_POSIX_ACL_ACCESS;
262 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
263 posix_acl_to_xattr(acl, er.er_data, er.er_data_len);
264 er.er_mode = mode;
265 er.er_flags = GFS2_ERF_MODE;
266 error = gfs2_system_eaops.eo_set(ip, &er);
267 if (error)
268 goto out;
269 } else
270 munge_mode(ip, mode);
271
272 out:
273 posix_acl_release(acl);
274 kfree(er.er_data);
275 return error;
276}
277
278int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
279{
280 struct posix_acl *acl = NULL, *clone;
281 struct gfs2_ea_location el;
282 char *data;
283 unsigned int len;
284 int error;
285
286 error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len);
287 if (error)
288 return error;
289 if (!acl)
290 return gfs2_setattr_simple(ip, attr);
291
292 clone = posix_acl_clone(acl, GFP_KERNEL);
293 error = -ENOMEM;
294 if (!clone)
295 goto out;
296 posix_acl_release(acl);
297 acl = clone;
298
299 error = posix_acl_chmod_masq(acl, attr->ia_mode);
300 if (!error) {
301 posix_acl_to_xattr(acl, data, len);
302 error = gfs2_ea_acl_chmod(ip, &el, attr, data);
303 }
304
305 out:
306 posix_acl_release(acl);
307 brelse(el.el_bh);
308 kfree(data);
309
310 return error;
311}
312
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
new file mode 100644
index 000000000000..a174b4f6bcc2
--- /dev/null
+++ b/fs/gfs2/acl.h
@@ -0,0 +1,37 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __ACL_DOT_H__
11#define __ACL_DOT_H__
12
13#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
14#define GFS2_POSIX_ACL_ACCESS_LEN 16
15#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
16#define GFS2_POSIX_ACL_DEFAULT_LEN 17
17
18#define GFS2_ACL_IS_ACCESS(name, len) \
19 ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \
20 !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len)))
21
22#define GFS2_ACL_IS_DEFAULT(name, len) \
23 ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \
24 !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len)))
25
26struct gfs2_ea_request;
27
28int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
29 struct gfs2_ea_request *er,
30 int *remove, mode_t *mode);
31int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access);
32int gfs2_check_acl_locked(struct inode *inode, int mask);
33int gfs2_check_acl(struct inode *inode, int mask);
34int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip);
35int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
36
37#endif /* __ACL_DOT_H__ */
diff --git a/fs/gfs2/bits.c b/fs/gfs2/bits.c
new file mode 100644
index 000000000000..57d420a86adf
--- /dev/null
+++ b/fs/gfs2/bits.c
@@ -0,0 +1,178 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10/*
11 * These routines are used by the resource group routines (rgrp.c)
12 * to keep track of block allocation. Each block is represented by two
13 * bits. One bit indicates whether or not the block is used. (1=used,
14 * 0=free) The other bit indicates whether or not the block contains a
15 * dinode or not. (1=dinode, 0=not-dinode) So, each byte represents
16 * GFS2_NBBY (i.e. 4) blocks.
17 */
18
19#include <linux/sched.h>
20#include <linux/slab.h>
21#include <linux/spinlock.h>
22#include <linux/completion.h>
23#include <linux/buffer_head.h>
24#include <asm/semaphore.h>
25
26#include "gfs2.h"
27#include "bits.h"
28
29static const char valid_change[16] = {
30 /* current */
31 /* n */ 0, 1, 0, 1,
32 /* e */ 1, 0, 0, 0,
33 /* w */ 0, 0, 0, 0,
34 1, 0, 0, 0
35};
36
37/**
38 * gfs2_setbit - Set a bit in the bitmaps
39 * @buffer: the buffer that holds the bitmaps
40 * @buflen: the length (in bytes) of the buffer
41 * @block: the block to set
42 * @new_state: the new state of the block
43 *
44 */
45
46void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
47 unsigned int buflen, uint32_t block, unsigned char new_state)
48{
49 unsigned char *byte, *end, cur_state;
50 unsigned int bit;
51
52 byte = buffer + (block / GFS2_NBBY);
53 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
54 end = buffer + buflen;
55
56 gfs2_assert(rgd->rd_sbd, byte < end);
57
58 cur_state = (*byte >> bit) & GFS2_BIT_MASK;
59
60 if (valid_change[new_state * 4 + cur_state]) {
61 *byte ^= cur_state << bit;
62 *byte |= new_state << bit;
63 } else
64 gfs2_consist_rgrpd(rgd);
65}
66
67/**
68 * gfs2_testbit - test a bit in the bitmaps
69 * @buffer: the buffer that holds the bitmaps
70 * @buflen: the length (in bytes) of the buffer
71 * @block: the block to read
72 *
73 */
74
75unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
76 unsigned int buflen, uint32_t block)
77{
78 unsigned char *byte, *end, cur_state;
79 unsigned int bit;
80
81 byte = buffer + (block / GFS2_NBBY);
82 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
83 end = buffer + buflen;
84
85 gfs2_assert(rgd->rd_sbd, byte < end);
86
87 cur_state = (*byte >> bit) & GFS2_BIT_MASK;
88
89 return cur_state;
90}
91
92/**
93 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
94 * a block in a given allocation state.
95 * @buffer: the buffer that holds the bitmaps
96 * @buflen: the length (in bytes) of the buffer
97 * @goal: start search at this block's bit-pair (within @buffer)
98 * @old_state: GFS2_BLKST_XXX the state of the block we're looking for;
99 * bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0)
100 *
101 * Scope of @goal and returned block number is only within this bitmap buffer,
102 * not entire rgrp or filesystem. @buffer will be offset from the actual
103 * beginning of a bitmap block buffer, skipping any header structures.
104 *
105 * Return: the block number (bitmap buffer scope) that was found
106 */
107
108uint32_t gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
109 unsigned int buflen, uint32_t goal,
110 unsigned char old_state)
111{
112 unsigned char *byte, *end, alloc;
113 uint32_t blk = goal;
114 unsigned int bit;
115
116 byte = buffer + (goal / GFS2_NBBY);
117 bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
118 end = buffer + buflen;
119 alloc = (old_state & 1) ? 0 : 0x55;
120
121 while (byte < end) {
122 if ((*byte & 0x55) == alloc) {
123 blk += (8 - bit) >> 1;
124
125 bit = 0;
126 byte++;
127
128 continue;
129 }
130
131 if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
132 return blk;
133
134 bit += GFS2_BIT_SIZE;
135 if (bit >= 8) {
136 bit = 0;
137 byte++;
138 }
139
140 blk++;
141 }
142
143 return BFITNOENT;
144}
145
146/**
147 * gfs2_bitcount - count the number of bits in a certain state
148 * @buffer: the buffer that holds the bitmaps
149 * @buflen: the length (in bytes) of the buffer
150 * @state: the state of the block we're looking for
151 *
152 * Returns: The number of bits
153 */
154
155uint32_t gfs2_bitcount(struct gfs2_rgrpd *rgd, unsigned char *buffer,
156 unsigned int buflen, unsigned char state)
157{
158 unsigned char *byte = buffer;
159 unsigned char *end = buffer + buflen;
160 unsigned char state1 = state << 2;
161 unsigned char state2 = state << 4;
162 unsigned char state3 = state << 6;
163 uint32_t count = 0;
164
165 for (; byte < end; byte++) {
166 if (((*byte) & 0x03) == state)
167 count++;
168 if (((*byte) & 0x0C) == state1)
169 count++;
170 if (((*byte) & 0x30) == state2)
171 count++;
172 if (((*byte) & 0xC0) == state3)
173 count++;
174 }
175
176 return count;
177}
178
diff --git a/fs/gfs2/bits.h b/fs/gfs2/bits.h
new file mode 100644
index 000000000000..36ccbdcb1eef
--- /dev/null
+++ b/fs/gfs2/bits.h
@@ -0,0 +1,28 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __BITS_DOT_H__
11#define __BITS_DOT_H__
12
13#define BFITNOENT 0xFFFFFFFF
14
15void gfs2_setbit(struct gfs2_rgrpd *rgd,
16 unsigned char *buffer, unsigned int buflen,
17 uint32_t block, unsigned char new_state);
18unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
19 unsigned char *buffer, unsigned int buflen,
20 uint32_t block);
21uint32_t gfs2_bitfit(struct gfs2_rgrpd *rgd,
22 unsigned char *buffer, unsigned int buflen,
23 uint32_t goal, unsigned char old_state);
24uint32_t gfs2_bitcount(struct gfs2_rgrpd *rgd,
25 unsigned char *buffer, unsigned int buflen,
26 unsigned char state);
27
28#endif /* __BITS_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
new file mode 100644
index 000000000000..e132d8a41008
--- /dev/null
+++ b/fs/gfs2/bmap.c
@@ -0,0 +1,1093 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "bmap.h"
19#include "glock.h"
20#include "inode.h"
21#include "meta_io.h"
22#include "page.h"
23#include "quota.h"
24#include "rgrp.h"
25#include "trans.h"
26#include "dir.h"
27
28/* This doesn't need to be that large as max 64 bit pointers in a 4k
29 * block is 512, so __u16 is fine for that. It saves stack space to
30 * keep it small.
31 */
32struct metapath {
33 __u16 mp_list[GFS2_MAX_META_HEIGHT];
34};
35
36typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
37 struct buffer_head *bh, uint64_t *top,
38 uint64_t *bottom, unsigned int height,
39 void *data);
40
41struct strip_mine {
42 int sm_first;
43 unsigned int sm_height;
44};
45
46/**
47 * @gfs2_unstuffer_sync - Synchronously unstuff a dinode
48 * @ip:
49 * @dibh:
50 * @block:
51 * @private:
52 *
53 * Cheat and use a metadata buffer instead of a data page.
54 *
55 * Returns: errno
56 */
57
58int gfs2_unstuffer_sync(struct gfs2_inode *ip, struct buffer_head *dibh,
59 uint64_t block, void *private)
60{
61 struct buffer_head *bh;
62 int error;
63
64 bh = gfs2_meta_new(ip->i_gl, block);
65
66 gfs2_buffer_copy_tail(bh, 0, dibh, sizeof(struct gfs2_dinode));
67
68 set_buffer_dirty(bh);
69 error = sync_dirty_buffer(bh);
70
71 brelse(bh);
72
73 return error;
74}
75
76/**
77 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
78 * @ip: The GFS2 inode to unstuff
79 * @unstuffer: the routine that handles unstuffing a non-zero length file
80 * @private: private data for the unstuffer
81 *
82 * This routine unstuffs a dinode and returns it to a "normal" state such
83 * that the height can be grown in the traditional way.
84 *
85 * Returns: errno
86 */
87
88int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer,
89 void *private)
90{
91 struct buffer_head *bh, *dibh;
92 uint64_t block = 0;
93 int isdir = gfs2_is_dir(ip);
94 int error;
95
96 down_write(&ip->i_rw_mutex);
97
98 error = gfs2_meta_inode_buffer(ip, &dibh);
99 if (error)
100 goto out;
101
102 if (ip->i_di.di_size) {
103 /* Get a free block, fill it with the stuffed data,
104 and write it out to disk */
105
106 if (isdir) {
107 block = gfs2_alloc_meta(ip);
108
109 error = gfs2_dir_get_buffer(ip, block, 1, &bh);
110 if (error)
111 goto out_brelse;
112 gfs2_buffer_copy_tail(bh,
113 sizeof(struct gfs2_meta_header),
114 dibh, sizeof(struct gfs2_dinode));
115 brelse(bh);
116 } else {
117 block = gfs2_alloc_data(ip);
118
119 error = unstuffer(ip, dibh, block, private);
120 if (error)
121 goto out_brelse;
122 }
123 }
124
125 /* Set up the pointer to the new block */
126
127 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
128
129 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
130
131 if (ip->i_di.di_size) {
132 *(uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)) =
133 cpu_to_be64(block);
134 ip->i_di.di_blocks++;
135 }
136
137 ip->i_di.di_height = 1;
138
139 gfs2_dinode_out(&ip->i_di, dibh->b_data);
140
141 out_brelse:
142 brelse(dibh);
143
144 out:
145 up_write(&ip->i_rw_mutex);
146
147 return error;
148}
149
150/**
151 * calc_tree_height - Calculate the height of a metadata tree
152 * @ip: The GFS2 inode
153 * @size: The proposed size of the file
154 *
155 * Work out how tall a metadata tree needs to be in order to accommodate a
156 * file of a particular size. If size is less than the current size of
157 * the inode, then the current size of the inode is used instead of the
158 * supplied one.
159 *
160 * Returns: the height the tree should be
161 */
162
163static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size)
164{
165 struct gfs2_sbd *sdp = ip->i_sbd;
166 uint64_t *arr;
167 unsigned int max, height;
168
169 if (ip->i_di.di_size > size)
170 size = ip->i_di.di_size;
171
172 if (gfs2_is_dir(ip)) {
173 arr = sdp->sd_jheightsize;
174 max = sdp->sd_max_jheight;
175 } else {
176 arr = sdp->sd_heightsize;
177 max = sdp->sd_max_height;
178 }
179
180 for (height = 0; height < max; height++)
181 if (arr[height] >= size)
182 break;
183
184 return height;
185}
186
187/**
188 * build_height - Build a metadata tree of the requested height
189 * @ip: The GFS2 inode
190 * @height: The height to build to
191 *
192 * This routine makes sure that the metadata tree is tall enough to hold
193 * "size" bytes of data.
194 *
195 * Returns: errno
196 */
197
198static int build_height(struct gfs2_inode *ip, int height)
199{
200 struct gfs2_sbd *sdp = ip->i_sbd;
201 struct buffer_head *bh, *dibh;
202 uint64_t block = 0, *bp;
203 unsigned int x;
204 int new_block;
205 int error;
206
207 while (ip->i_di.di_height < height) {
208 error = gfs2_meta_inode_buffer(ip, &dibh);
209 if (error)
210 return error;
211
212 new_block = 0;
213 bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode));
214 for (x = 0; x < sdp->sd_diptrs; x++, bp++)
215 if (*bp) {
216 new_block = 1;
217 break;
218 }
219
220 if (new_block) {
221 /* Get a new block, fill it with the old direct
222 pointers, and write it out */
223
224 block = gfs2_alloc_meta(ip);
225
226 bh = gfs2_meta_new(ip->i_gl, block);
227 gfs2_trans_add_bh(ip->i_gl, bh, 1);
228 gfs2_metatype_set(bh,
229 GFS2_METATYPE_IN,
230 GFS2_FORMAT_IN);
231 gfs2_buffer_copy_tail(bh,
232 sizeof(struct gfs2_meta_header),
233 dibh, sizeof(struct gfs2_dinode));
234
235 brelse(bh);
236 }
237
238 /* Set up the new direct pointer and write it out to disk */
239
240 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
241
242 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
243
244 if (new_block) {
245 *(uint64_t *)(dibh->b_data +
246 sizeof(struct gfs2_dinode)) =
247 cpu_to_be64(block);
248 ip->i_di.di_blocks++;
249 }
250
251 ip->i_di.di_height++;
252
253 gfs2_dinode_out(&ip->i_di, dibh->b_data);
254 brelse(dibh);
255 }
256
257 return 0;
258}
259
260/**
261 * find_metapath - Find path through the metadata tree
262 * @ip: The inode pointer
263 * @mp: The metapath to return the result in
264 * @block: The disk block to look up
265 *
266 * This routine returns a struct metapath structure that defines a path
267 * through the metadata of inode "ip" to get to block "block".
268 *
269 * Example:
270 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
271 * filesystem with a blocksize of 4096.
272 *
273 * find_metapath() would return a struct metapath structure set to:
274 * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
275 * and mp_list[2] = 165.
276 *
277 * That means that in order to get to the block containing the byte at
278 * offset 101342453, we would load the indirect block pointed to by pointer
279 * 0 in the dinode. We would then load the indirect block pointed to by
280 * pointer 48 in that indirect block. We would then load the data block
281 * pointed to by pointer 165 in that indirect block.
282 *
283 * ----------------------------------------
284 * | Dinode | |
285 * | | 4|
286 * | |0 1 2 3 4 5 9|
287 * | | 6|
288 * ----------------------------------------
289 * |
290 * |
291 * V
292 * ----------------------------------------
293 * | Indirect Block |
294 * | 5|
295 * | 4 4 4 4 4 5 5 1|
296 * |0 5 6 7 8 9 0 1 2|
297 * ----------------------------------------
298 * |
299 * |
300 * V
301 * ----------------------------------------
302 * | Indirect Block |
303 * | 1 1 1 1 1 5|
304 * | 6 6 6 6 6 1|
305 * |0 3 4 5 6 7 2|
306 * ----------------------------------------
307 * |
308 * |
309 * V
310 * ----------------------------------------
311 * | Data block containing offset |
312 * | 101342453 |
313 * | |
314 * | |
315 * ----------------------------------------
316 *
317 */
318
319static void find_metapath(struct gfs2_inode *ip, uint64_t block,
320 struct metapath *mp)
321{
322 struct gfs2_sbd *sdp = ip->i_sbd;
323 uint64_t b = block;
324 unsigned int i;
325
326 for (i = ip->i_di.di_height; i--;)
327 mp->mp_list[i] = (__u16)do_div(b, sdp->sd_inptrs);
328
329}
330
331/**
332 * metapointer - Return pointer to start of metadata in a buffer
333 * @bh: The buffer
334 * @height: The metadata height (0 = dinode)
335 * @mp: The metapath
336 *
337 * Return a pointer to the block number of the next height of the metadata
338 * tree given a buffer containing the pointer to the current height of the
339 * metadata tree.
340 */
341
342static inline uint64_t *metapointer(struct buffer_head *bh,
343 unsigned int height, struct metapath *mp)
344{
345 unsigned int head_size = (height > 0) ?
346 sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
347
348 return ((uint64_t *)(bh->b_data + head_size)) + mp->mp_list[height];
349}
350
351/**
352 * lookup_block - Get the next metadata block in metadata tree
353 * @ip: The GFS2 inode
354 * @bh: Buffer containing the pointers to metadata blocks
355 * @height: The height of the tree (0 = dinode)
356 * @mp: The metapath
357 * @create: Non-zero if we may create a new meatdata block
358 * @new: Used to indicate if we did create a new metadata block
359 * @block: the returned disk block number
360 *
361 * Given a metatree, complete to a particular height, checks to see if the next
362 * height of the tree exists. If not the next height of the tree is created.
363 * The block number of the next height of the metadata tree is returned.
364 *
365 */
366
367static void lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
368 unsigned int height, struct metapath *mp, int create,
369 int *new, uint64_t *block)
370{
371 uint64_t *ptr = metapointer(bh, height, mp);
372
373 if (*ptr) {
374 *block = be64_to_cpu(*ptr);
375 return;
376 }
377
378 *block = 0;
379
380 if (!create)
381 return;
382
383 if (height == ip->i_di.di_height - 1 &&
384 !gfs2_is_dir(ip))
385 *block = gfs2_alloc_data(ip);
386 else
387 *block = gfs2_alloc_meta(ip);
388
389 gfs2_trans_add_bh(ip->i_gl, bh, 1);
390
391 *ptr = cpu_to_be64(*block);
392 ip->i_di.di_blocks++;
393
394 *new = 1;
395}
396
397/**
398 * gfs2_block_map - Map a block from an inode to a disk block
399 * @ip: The GFS2 inode
400 * @lblock: The logical block number
401 * @new: Value/Result argument (1 = may create/did create new blocks)
402 * @dblock: the disk block number of the start of an extent
403 * @extlen: the size of the extent
404 *
405 * Find the block number on the current device which corresponds to an
406 * inode's block. If the block had to be created, "new" will be set.
407 *
408 * Returns: errno
409 */
410
411int gfs2_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new,
412 uint64_t *dblock, uint32_t *extlen)
413{
414 struct gfs2_sbd *sdp = ip->i_sbd;
415 struct buffer_head *bh;
416 struct metapath mp;
417 int create = *new;
418 unsigned int bsize;
419 unsigned int height;
420 unsigned int end_of_metadata;
421 unsigned int x;
422 int error = 0;
423
424 *new = 0;
425 *dblock = 0;
426 if (extlen)
427 *extlen = 0;
428
429 if (create)
430 down_write(&ip->i_rw_mutex);
431 else
432 down_read(&ip->i_rw_mutex);
433
434 if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
435 goto out;
436
437 bsize = (gfs2_is_dir(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
438
439 height = calc_tree_height(ip, (lblock + 1) * bsize);
440 if (ip->i_di.di_height < height) {
441 if (!create)
442 goto out;
443
444 error = build_height(ip, height);
445 if (error)
446 goto out;
447 }
448
449 find_metapath(ip, lblock, &mp);
450 end_of_metadata = ip->i_di.di_height - 1;
451
452 error = gfs2_meta_inode_buffer(ip, &bh);
453 if (error)
454 goto out;
455
456 for (x = 0; x < end_of_metadata; x++) {
457 lookup_block(ip, bh, x, &mp, create, new, dblock);
458 brelse(bh);
459 if (!*dblock)
460 goto out;
461
462 error = gfs2_meta_indirect_buffer(ip, x+1, *dblock, *new, &bh);
463 if (error)
464 goto out;
465 }
466
467 lookup_block(ip, bh, end_of_metadata, &mp, create, new, dblock);
468
469 if (extlen && *dblock) {
470 *extlen = 1;
471
472 if (!*new) {
473 uint64_t tmp_dblock;
474 int tmp_new;
475 unsigned int nptrs;
476
477 nptrs = (end_of_metadata) ? sdp->sd_inptrs :
478 sdp->sd_diptrs;
479
480 while (++mp.mp_list[end_of_metadata] < nptrs) {
481 lookup_block(ip, bh, end_of_metadata, &mp,
482 0, &tmp_new, &tmp_dblock);
483
484 if (*dblock + *extlen != tmp_dblock)
485 break;
486
487 (*extlen)++;
488 }
489 }
490 }
491
492 brelse(bh);
493
494 if (*new) {
495 error = gfs2_meta_inode_buffer(ip, &bh);
496 if (!error) {
497 gfs2_trans_add_bh(ip->i_gl, bh, 1);
498 gfs2_dinode_out(&ip->i_di, bh->b_data);
499 brelse(bh);
500 }
501 }
502
503 out:
504 if (create)
505 up_write(&ip->i_rw_mutex);
506 else
507 up_read(&ip->i_rw_mutex);
508
509 return error;
510}
511
512/**
513 * recursive_scan - recursively scan through the end of a file
514 * @ip: the inode
515 * @dibh: the dinode buffer
516 * @mp: the path through the metadata to the point to start
517 * @height: the height the recursion is at
518 * @block: the indirect block to look at
519 * @first: 1 if this is the first block
520 * @bc: the call to make for each piece of metadata
521 * @data: data opaque to this function to pass to @bc
522 *
523 * When this is first called @height and @block should be zero and
524 * @first should be 1.
525 *
526 * Returns: errno
527 */
528
529static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
530 struct metapath *mp, unsigned int height,
531 uint64_t block, int first, block_call_t bc,
532 void *data)
533{
534 struct gfs2_sbd *sdp = ip->i_sbd;
535 struct buffer_head *bh = NULL;
536 uint64_t *top, *bottom;
537 uint64_t bn;
538 int error;
539 int mh_size = sizeof(struct gfs2_meta_header);
540
541 if (!height) {
542 error = gfs2_meta_inode_buffer(ip, &bh);
543 if (error)
544 return error;
545 dibh = bh;
546
547 top = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) +
548 mp->mp_list[0];
549 bottom = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) +
550 sdp->sd_diptrs;
551 } else {
552 error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
553 if (error)
554 return error;
555
556 top = (uint64_t *)(bh->b_data + mh_size) +
557 ((first) ? mp->mp_list[height] : 0);
558
559 bottom = (uint64_t *)(bh->b_data + mh_size) + sdp->sd_inptrs;
560 }
561
562 error = bc(ip, dibh, bh, top, bottom, height, data);
563 if (error)
564 goto out;
565
566 if (height < ip->i_di.di_height - 1)
567 for (; top < bottom; top++, first = 0) {
568 if (!*top)
569 continue;
570
571 bn = be64_to_cpu(*top);
572
573 error = recursive_scan(ip, dibh, mp, height + 1, bn,
574 first, bc, data);
575 if (error)
576 break;
577 }
578
579 out:
580 brelse(bh);
581
582 return error;
583}
584
585/**
586 * do_strip - Look for a layer a particular layer of the file and strip it off
587 * @ip: the inode
588 * @dibh: the dinode buffer
589 * @bh: A buffer of pointers
590 * @top: The first pointer in the buffer
591 * @bottom: One more than the last pointer
592 * @height: the height this buffer is at
593 * @data: a pointer to a struct strip_mine
594 *
595 * Returns: errno
596 */
597
598static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
599 struct buffer_head *bh, uint64_t *top, uint64_t *bottom,
600 unsigned int height, void *data)
601{
602 struct strip_mine *sm = (struct strip_mine *)data;
603 struct gfs2_sbd *sdp = ip->i_sbd;
604 struct gfs2_rgrp_list rlist;
605 uint64_t bn, bstart;
606 uint32_t blen;
607 uint64_t *p;
608 unsigned int rg_blocks = 0;
609 int metadata;
610 unsigned int revokes = 0;
611 int x;
612 int error;
613
614 if (!*top)
615 sm->sm_first = 0;
616
617 if (height != sm->sm_height)
618 return 0;
619
620 if (sm->sm_first) {
621 top++;
622 sm->sm_first = 0;
623 }
624
625 metadata = (height != ip->i_di.di_height - 1);
626 if (metadata)
627 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
628
629 error = gfs2_rindex_hold(sdp, &ip->i_alloc.al_ri_gh);
630 if (error)
631 return error;
632
633 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
634 bstart = 0;
635 blen = 0;
636
637 for (p = top; p < bottom; p++) {
638 if (!*p)
639 continue;
640
641 bn = be64_to_cpu(*p);
642
643 if (bstart + blen == bn)
644 blen++;
645 else {
646 if (bstart)
647 gfs2_rlist_add(sdp, &rlist, bstart);
648
649 bstart = bn;
650 blen = 1;
651 }
652 }
653
654 if (bstart)
655 gfs2_rlist_add(sdp, &rlist, bstart);
656 else
657 goto out; /* Nothing to do */
658
659 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
660
661 for (x = 0; x < rlist.rl_rgrps; x++) {
662 struct gfs2_rgrpd *rgd;
663 rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl);
664 rg_blocks += rgd->rd_ri.ri_length;
665 }
666
667 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
668 if (error)
669 goto out_rlist;
670
671 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
672 RES_INDIRECT + RES_STATFS + RES_QUOTA,
673 revokes);
674 if (error)
675 goto out_rg_gunlock;
676
677 down_write(&ip->i_rw_mutex);
678
679 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
680 gfs2_trans_add_bh(ip->i_gl, bh, 1);
681
682 bstart = 0;
683 blen = 0;
684
685 for (p = top; p < bottom; p++) {
686 if (!*p)
687 continue;
688
689 bn = be64_to_cpu(*p);
690
691 if (bstart + blen == bn)
692 blen++;
693 else {
694 if (bstart) {
695 if (metadata)
696 gfs2_free_meta(ip, bstart, blen);
697 else
698 gfs2_free_data(ip, bstart, blen);
699 }
700
701 bstart = bn;
702 blen = 1;
703 }
704
705 *p = 0;
706 if (!ip->i_di.di_blocks)
707 gfs2_consist_inode(ip);
708 ip->i_di.di_blocks--;
709 }
710 if (bstart) {
711 if (metadata)
712 gfs2_free_meta(ip, bstart, blen);
713 else
714 gfs2_free_data(ip, bstart, blen);
715 }
716
717 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
718
719 gfs2_dinode_out(&ip->i_di, dibh->b_data);
720
721 up_write(&ip->i_rw_mutex);
722
723 gfs2_trans_end(sdp);
724
725 out_rg_gunlock:
726 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
727
728 out_rlist:
729 gfs2_rlist_free(&rlist);
730
731 out:
732 gfs2_glock_dq_uninit(&ip->i_alloc.al_ri_gh);
733
734 return error;
735}
736
737/**
738 * do_grow - Make a file look bigger than it is
739 * @ip: the inode
740 * @size: the size to set the file to
741 *
742 * Called with an exclusive lock on @ip.
743 *
744 * Returns: errno
745 */
746
747static int do_grow(struct gfs2_inode *ip, uint64_t size)
748{
749 struct gfs2_sbd *sdp = ip->i_sbd;
750 struct gfs2_alloc *al;
751 struct buffer_head *dibh;
752 unsigned int h;
753 int error;
754
755 al = gfs2_alloc_get(ip);
756
757 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
758 if (error)
759 goto out;
760
761 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
762 if (error)
763 goto out_gunlock_q;
764
765 al->al_requested = sdp->sd_max_height + RES_DATA;
766
767 error = gfs2_inplace_reserve(ip);
768 if (error)
769 goto out_gunlock_q;
770
771 error = gfs2_trans_begin(sdp,
772 sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
773 RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
774 if (error)
775 goto out_ipres;
776
777 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
778 if (gfs2_is_stuffed(ip)) {
779 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
780 NULL);
781 if (error)
782 goto out_end_trans;
783 }
784
785 h = calc_tree_height(ip, size);
786 if (ip->i_di.di_height < h) {
787 down_write(&ip->i_rw_mutex);
788 error = build_height(ip, h);
789 up_write(&ip->i_rw_mutex);
790 if (error)
791 goto out_end_trans;
792 }
793 }
794
795 ip->i_di.di_size = size;
796 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
797
798 error = gfs2_meta_inode_buffer(ip, &dibh);
799 if (error)
800 goto out_end_trans;
801
802 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
803 gfs2_dinode_out(&ip->i_di, dibh->b_data);
804 brelse(dibh);
805
806 out_end_trans:
807 gfs2_trans_end(sdp);
808
809 out_ipres:
810 gfs2_inplace_release(ip);
811
812 out_gunlock_q:
813 gfs2_quota_unlock(ip);
814
815 out:
816 gfs2_alloc_put(ip);
817
818 return error;
819}
820
821static int trunc_start(struct gfs2_inode *ip, uint64_t size)
822{
823 struct gfs2_sbd *sdp = ip->i_sbd;
824 struct buffer_head *dibh;
825 int journaled = gfs2_is_jdata(ip);
826 int error;
827
828 error = gfs2_trans_begin(sdp,
829 RES_DINODE + ((journaled) ? RES_JDATA : 0), 0);
830 if (error)
831 return error;
832
833 error = gfs2_meta_inode_buffer(ip, &dibh);
834 if (error)
835 goto out;
836
837 if (gfs2_is_stuffed(ip)) {
838 ip->i_di.di_size = size;
839 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
840 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
841 gfs2_dinode_out(&ip->i_di, dibh->b_data);
842 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
843 error = 1;
844
845 } else {
846 if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1))
847 error = gfs2_block_truncate_page(ip->i_vnode->i_mapping);
848
849 if (!error) {
850 ip->i_di.di_size = size;
851 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
852 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
853 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
854 gfs2_dinode_out(&ip->i_di, dibh->b_data);
855 }
856 }
857
858 brelse(dibh);
859
860 out:
861 gfs2_trans_end(sdp);
862
863 return error;
864}
865
866static int trunc_dealloc(struct gfs2_inode *ip, uint64_t size)
867{
868 unsigned int height = ip->i_di.di_height;
869 uint64_t lblock;
870 struct metapath mp;
871 int error;
872
873 if (!size)
874 lblock = 0;
875 else
876 lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift;
877
878 find_metapath(ip, lblock, &mp);
879 gfs2_alloc_get(ip);
880
881 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
882 if (error)
883 goto out;
884
885 while (height--) {
886 struct strip_mine sm;
887 sm.sm_first = !!size;
888 sm.sm_height = height;
889
890 error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm);
891 if (error)
892 break;
893 }
894
895 gfs2_quota_unhold(ip);
896
897 out:
898 gfs2_alloc_put(ip);
899 return error;
900}
901
902static int trunc_end(struct gfs2_inode *ip)
903{
904 struct gfs2_sbd *sdp = ip->i_sbd;
905 struct buffer_head *dibh;
906 int error;
907
908 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
909 if (error)
910 return error;
911
912 down_write(&ip->i_rw_mutex);
913
914 error = gfs2_meta_inode_buffer(ip, &dibh);
915 if (error)
916 goto out;
917
918 if (!ip->i_di.di_size) {
919 ip->i_di.di_height = 0;
920 ip->i_di.di_goal_meta =
921 ip->i_di.di_goal_data =
922 ip->i_num.no_addr;
923 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
924 }
925 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
926 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
927
928 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
929 gfs2_dinode_out(&ip->i_di, dibh->b_data);
930 brelse(dibh);
931
932 out:
933 up_write(&ip->i_rw_mutex);
934
935 gfs2_trans_end(sdp);
936
937 return error;
938}
939
940/**
941 * do_shrink - make a file smaller
942 * @ip: the inode
943 * @size: the size to make the file
944 * @truncator: function to truncate the last partial block
945 *
946 * Called with an exclusive lock on @ip.
947 *
948 * Returns: errno
949 */
950
951static int do_shrink(struct gfs2_inode *ip, uint64_t size)
952{
953 int error;
954
955 error = trunc_start(ip, size);
956 if (error < 0)
957 return error;
958 if (error > 0)
959 return 0;
960
961 error = trunc_dealloc(ip, size);
962 if (!error)
963 error = trunc_end(ip);
964
965 return error;
966}
967
968/**
969 * gfs2_truncatei - make a file a given size
970 * @ip: the inode
971 * @size: the size to make the file
972 * @truncator: function to truncate the last partial block
973 *
974 * The file size can grow, shrink, or stay the same size.
975 *
976 * Returns: errno
977 */
978
979int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size)
980{
981 int error;
982
983 if (gfs2_assert_warn(ip->i_sbd, S_ISREG(ip->i_di.di_mode)))
984 return -EINVAL;
985
986 if (size > ip->i_di.di_size)
987 error = do_grow(ip, size);
988 else
989 error = do_shrink(ip, size);
990
991 return error;
992}
993
994int gfs2_truncatei_resume(struct gfs2_inode *ip)
995{
996 int error;
997 error = trunc_dealloc(ip, ip->i_di.di_size);
998 if (!error)
999 error = trunc_end(ip);
1000 return error;
1001}
1002
1003int gfs2_file_dealloc(struct gfs2_inode *ip)
1004{
1005 return trunc_dealloc(ip, 0);
1006}
1007
1008/**
1009 * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
1010 * @ip: the file
1011 * @len: the number of bytes to be written to the file
1012 * @data_blocks: returns the number of data blocks required
1013 * @ind_blocks: returns the number of indirect blocks required
1014 *
1015 */
1016
1017void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
1018 unsigned int *data_blocks, unsigned int *ind_blocks)
1019{
1020 struct gfs2_sbd *sdp = ip->i_sbd;
1021 unsigned int tmp;
1022
1023 if (gfs2_is_dir(ip)) {
1024 *data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2;
1025 *ind_blocks = 3 * (sdp->sd_max_jheight - 1);
1026 } else {
1027 *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
1028 *ind_blocks = 3 * (sdp->sd_max_height - 1);
1029 }
1030
1031 for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
1032 tmp = DIV_RU(tmp, sdp->sd_inptrs);
1033 *ind_blocks += tmp;
1034 }
1035}
1036
1037/**
1038 * gfs2_write_alloc_required - figure out if a write will require an allocation
1039 * @ip: the file being written to
1040 * @offset: the offset to write to
1041 * @len: the number of bytes being written
1042 * @alloc_required: set to 1 if an alloc is required, 0 otherwise
1043 *
1044 * Returns: errno
1045 */
1046
1047int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset,
1048 unsigned int len, int *alloc_required)
1049{
1050 struct gfs2_sbd *sdp = ip->i_sbd;
1051 uint64_t lblock, lblock_stop, dblock;
1052 uint32_t extlen;
1053 int new = 0;
1054 int error = 0;
1055
1056 *alloc_required = 0;
1057
1058 if (!len)
1059 return 0;
1060
1061 if (gfs2_is_stuffed(ip)) {
1062 if (offset + len >
1063 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
1064 *alloc_required = 1;
1065 return 0;
1066 }
1067
1068 if (gfs2_is_dir(ip)) {
1069 unsigned int bsize = sdp->sd_jbsize;
1070 lblock = offset;
1071 do_div(lblock, bsize);
1072 lblock_stop = offset + len + bsize - 1;
1073 do_div(lblock_stop, bsize);
1074 } else {
1075 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1076 lblock = offset >> shift;
1077 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1078 }
1079
1080 for (; lblock < lblock_stop; lblock += extlen) {
1081 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
1082 if (error)
1083 return error;
1084
1085 if (!dblock) {
1086 *alloc_required = 1;
1087 return 0;
1088 }
1089 }
1090
1091 return 0;
1092}
1093
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
new file mode 100644
index 000000000000..ee9ec8d7515c
--- /dev/null
+++ b/fs/gfs2/bmap.h
@@ -0,0 +1,35 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __BMAP_DOT_H__
11#define __BMAP_DOT_H__
12
13typedef int (*gfs2_unstuffer_t) (struct gfs2_inode * ip,
14 struct buffer_head * dibh, uint64_t block,
15 void *private);
16int gfs2_unstuffer_sync(struct gfs2_inode *ip, struct buffer_head *dibh,
17 uint64_t block, void *private);
18int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer,
19 void *private);
20
21int gfs2_block_map(struct gfs2_inode *ip,
22 uint64_t lblock, int *new,
23 uint64_t *dblock, uint32_t *extlen);
24
25int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size);
26int gfs2_truncatei_resume(struct gfs2_inode *ip);
27int gfs2_file_dealloc(struct gfs2_inode *ip);
28
29void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
30 unsigned int *data_blocks,
31 unsigned int *ind_blocks);
32int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset,
33 unsigned int len, int *alloc_required);
34
35#endif /* __BMAP_DOT_H__ */
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
new file mode 100644
index 000000000000..cff8d5368d21
--- /dev/null
+++ b/fs/gfs2/daemon.c
@@ -0,0 +1,225 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/kthread.h>
16#include <linux/delay.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "daemon.h"
21#include "glock.h"
22#include "log.h"
23#include "quota.h"
24#include "recovery.h"
25#include "super.h"
26#include "unlinked.h"
27
28/* This uses schedule_timeout() instead of msleep() because it's good for
29 the daemons to wake up more often than the timeout when unmounting so
30 the user's unmount doesn't sit there forever.
31
32 The kthread functions used to start these daemons block and flush signals. */
33
34/**
35 * gfs2_scand - Look for cached glocks and inodes to toss from memory
36 * @sdp: Pointer to GFS2 superblock
37 *
38 * One of these daemons runs, finding candidates to add to sd_reclaim_list.
39 * See gfs2_glockd()
40 */
41
42int gfs2_scand(void *data)
43{
44 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
45 unsigned long t;
46
47 while (!kthread_should_stop()) {
48 gfs2_scand_internal(sdp);
49 t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
50 schedule_timeout_interruptible(t);
51 }
52
53 return 0;
54}
55
56/**
57 * gfs2_glockd - Reclaim unused glock structures
58 * @sdp: Pointer to GFS2 superblock
59 *
60 * One or more of these daemons run, reclaiming glocks on sd_reclaim_list.
61 * Number of daemons can be set by user, with num_glockd mount option.
62 */
63
64int gfs2_glockd(void *data)
65{
66 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
67 DECLARE_WAITQUEUE(wait_chan, current);
68
69 while (!kthread_should_stop()) {
70 while (atomic_read(&sdp->sd_reclaim_count))
71 gfs2_reclaim_glock(sdp);
72
73 set_current_state(TASK_INTERRUPTIBLE);
74 add_wait_queue(&sdp->sd_reclaim_wq, &wait_chan);
75 if (!atomic_read(&sdp->sd_reclaim_count) &&
76 !kthread_should_stop())
77 schedule();
78 remove_wait_queue(&sdp->sd_reclaim_wq, &wait_chan);
79 set_current_state(TASK_RUNNING);
80 }
81
82 return 0;
83}
84
85/**
86 * gfs2_recoverd - Recover dead machine's journals
87 * @sdp: Pointer to GFS2 superblock
88 *
89 */
90
91int gfs2_recoverd(void *data)
92{
93 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
94 unsigned long t;
95
96 while (!kthread_should_stop()) {
97 gfs2_check_journals(sdp);
98 t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ;
99 schedule_timeout_interruptible(t);
100 }
101
102 return 0;
103}
104
105/**
106 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
107 * @sdp: Pointer to GFS2 superblock
108 *
109 * Also, periodically check to make sure that we're using the most recent
110 * journal index.
111 */
112
113int gfs2_logd(void *data)
114{
115 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
116 struct gfs2_holder ji_gh;
117 unsigned long t;
118
119 while (!kthread_should_stop()) {
120 /* Advance the log tail */
121
122 t = sdp->sd_log_flush_time +
123 gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
124
125 gfs2_ail1_empty(sdp, DIO_ALL);
126
127 if (time_after_eq(jiffies, t)) {
128 gfs2_log_flush(sdp);
129 sdp->sd_log_flush_time = jiffies;
130 }
131
132 /* Check for latest journal index */
133
134 t = sdp->sd_jindex_refresh_time +
135 gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ;
136
137 if (time_after_eq(jiffies, t)) {
138 if (!gfs2_jindex_hold(sdp, &ji_gh))
139 gfs2_glock_dq_uninit(&ji_gh);
140 sdp->sd_jindex_refresh_time = jiffies;
141 }
142
143 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
144 schedule_timeout_interruptible(t);
145 }
146
147 return 0;
148}
149
150/**
151 * gfs2_quotad - Write cached quota changes into the quota file
152 * @sdp: Pointer to GFS2 superblock
153 *
154 */
155
156int gfs2_quotad(void *data)
157{
158 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
159 unsigned long t;
160 int error;
161
162 while (!kthread_should_stop()) {
163 /* Update the master statfs file */
164
165 t = sdp->sd_statfs_sync_time +
166 gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
167
168 if (time_after_eq(jiffies, t)) {
169 error = gfs2_statfs_sync(sdp);
170 if (error &&
171 error != -EROFS &&
172 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
173 fs_err(sdp, "quotad: (1) error=%d\n", error);
174 sdp->sd_statfs_sync_time = jiffies;
175 }
176
177 /* Update quota file */
178
179 t = sdp->sd_quota_sync_time +
180 gfs2_tune_get(sdp, gt_quota_quantum) * HZ;
181
182 if (time_after_eq(jiffies, t)) {
183 error = gfs2_quota_sync(sdp);
184 if (error &&
185 error != -EROFS &&
186 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
187 fs_err(sdp, "quotad: (2) error=%d\n", error);
188 sdp->sd_quota_sync_time = jiffies;
189 }
190
191 gfs2_quota_scan(sdp);
192
193 t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
194 schedule_timeout_interruptible(t);
195 }
196
197 return 0;
198}
199
200/**
201 * gfs2_inoded - Deallocate unlinked inodes
202 * @sdp: Pointer to GFS2 superblock
203 *
204 */
205
206int gfs2_inoded(void *data)
207{
208 struct gfs2_sbd *sdp = (struct gfs2_sbd *)data;
209 unsigned long t;
210 int error;
211
212 while (!kthread_should_stop()) {
213 error = gfs2_unlinked_dealloc(sdp);
214 if (error &&
215 error != -EROFS &&
216 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
217 fs_err(sdp, "inoded: error = %d\n", error);
218
219 t = gfs2_tune_get(sdp, gt_inoded_secs) * HZ;
220 schedule_timeout_interruptible(t);
221 }
222
223 return 0;
224}
225
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h
new file mode 100644
index 000000000000..a27fdeda5fbb
--- /dev/null
+++ b/fs/gfs2/daemon.h
@@ -0,0 +1,20 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __DAEMON_DOT_H__
11#define __DAEMON_DOT_H__
12
13int gfs2_scand(void *data);
14int gfs2_glockd(void *data);
15int gfs2_recoverd(void *data);
16int gfs2_logd(void *data);
17int gfs2_quotad(void *data);
18int gfs2_inoded(void *data);
19
20#endif /* __DAEMON_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
new file mode 100644
index 000000000000..56683788a6cf
--- /dev/null
+++ b/fs/gfs2/dir.c
@@ -0,0 +1,2366 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10/*
11* Implements Extendible Hashing as described in:
12* "Extendible Hashing" by Fagin, et al in
13* __ACM Trans. on Database Systems__, Sept 1979.
14*
15*
16* Here's the layout of dirents which is essentially the same as that of ext2
17* within a single block. The field de_name_len is the number of bytes
18* actually required for the name (no null terminator). The field de_rec_len
19* is the number of bytes allocated to the dirent. The offset of the next
20* dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
21* deleted, the preceding dirent inherits its allocated space, ie
22* prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
23* by adding de_rec_len to the current dirent, this essentially causes the
24* deleted dirent to get jumped over when iterating through all the dirents.
25*
26* When deleting the first dirent in a block, there is no previous dirent so
27* the field de_ino is set to zero to designate it as deleted. When allocating
28* a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
29* first dirent has (de_ino == 0) and de_rec_len is large enough, this first
30* dirent is allocated. Otherwise it must go through all the 'used' dirents
31* searching for one in which the amount of total space minus the amount of
32* used space will provide enough space for the new dirent.
33*
34* There are two types of blocks in which dirents reside. In a stuffed dinode,
35* the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
36* the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
37* beginning of the leaf block. The dirents reside in leaves when
38*
39* dip->i_di.di_flags & GFS2_DIF_EXHASH is true
40*
41* Otherwise, the dirents are "linear", within a single stuffed dinode block.
42*
43* When the dirents are in leaves, the actual contents of the directory file are
44* used as an array of 64-bit block pointers pointing to the leaf blocks. The
45* dirents are NOT in the directory file itself. There can be more than one block
46* pointer in the array that points to the same leaf. In fact, when a directory
47* is first converted from linear to exhash, all of the pointers point to the
48* same leaf.
49*
50* When a leaf is completely full, the size of the hash table can be
51* doubled unless it is already at the maximum size which is hard coded into
52* GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
53* but never before the maximum hash table size has been reached.
54*/
55
56#include <linux/sched.h>
57#include <linux/slab.h>
58#include <linux/spinlock.h>
59#include <linux/completion.h>
60#include <linux/buffer_head.h>
61#include <linux/sort.h>
62#include <asm/semaphore.h>
63
64#include "gfs2.h"
65#include "dir.h"
66#include "glock.h"
67#include "inode.h"
68#include "meta_io.h"
69#include "quota.h"
70#include "rgrp.h"
71#include "trans.h"
72#include "bmap.h"
73
74#define IS_LEAF 1 /* Hashed (leaf) directory */
75#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */
76
77#if 1
78#define gfs2_disk_hash2offset(h) (((uint64_t)(h)) >> 1)
79#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)) << 1))
80#else
81#define gfs2_disk_hash2offset(h) (((uint64_t)(h)))
82#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p))))
83#endif
84
85typedef int (*leaf_call_t) (struct gfs2_inode *dip,
86 uint32_t index, uint32_t len, uint64_t leaf_no,
87 void *data);
88
89int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
90 struct buffer_head **bhp)
91{
92 struct buffer_head *bh;
93 int error = 0;
94
95 if (new) {
96 bh = gfs2_meta_new(ip->i_gl, block);
97 gfs2_trans_add_bh(ip->i_gl, bh, 1);
98 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
99 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
100 } else {
101 error = gfs2_meta_read(ip->i_gl, block, DIO_START | DIO_WAIT,
102 &bh);
103 if (error)
104 return error;
105 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) {
106 brelse(bh);
107 return -EIO;
108 }
109 }
110
111 *bhp = bh;
112 return 0;
113}
114
115
116
117static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
118 unsigned int offset, unsigned int size)
119
120{
121 struct buffer_head *dibh;
122 int error;
123
124 error = gfs2_meta_inode_buffer(ip, &dibh);
125 if (error)
126 return error;
127
128 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
129 memcpy(dibh->b_data + offset + sizeof(struct gfs2_inode), buf, size);
130 if (ip->i_di.di_size < offset + size)
131 ip->i_di.di_size = offset + size;
132 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
133 gfs2_dinode_out(&ip->i_di, dibh->b_data);
134
135 brelse(dibh);
136
137 return size;
138}
139
140
141
142/**
143 * gfs2_dir_write_data - Write directory information to the inode
144 * @ip: The GFS2 inode
145 * @buf: The buffer containing information to be written
146 * @offset: The file offset to start writing at
147 * @size: The amount of data to write
148 *
149 * Returns: The number of bytes correctly written or error code
150 */
151static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
152 uint64_t offset, unsigned int size)
153{
154 struct gfs2_sbd *sdp = ip->i_sbd;
155 struct buffer_head *dibh;
156 uint64_t lblock, dblock;
157 uint32_t extlen = 0;
158 unsigned int o;
159 int copied = 0;
160 int error = 0;
161
162 if (!size)
163 return 0;
164
165 if (gfs2_is_stuffed(ip) &&
166 offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
167 return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset,
168 size);
169
170 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
171 return -EINVAL;
172
173 if (gfs2_is_stuffed(ip)) {
174 error = gfs2_unstuff_dinode(ip, NULL, NULL);
175 if (error)
176 return error;
177 }
178
179 lblock = offset;
180 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
181
182 while (copied < size) {
183 unsigned int amount;
184 struct buffer_head *bh;
185 int new;
186
187 amount = size - copied;
188 if (amount > sdp->sd_sb.sb_bsize - o)
189 amount = sdp->sd_sb.sb_bsize - o;
190
191 if (!extlen) {
192 new = 1;
193 error = gfs2_block_map(ip, lblock, &new, &dblock,
194 &extlen);
195 if (error)
196 goto fail;
197 error = -EIO;
198 if (gfs2_assert_withdraw(sdp, dblock))
199 goto fail;
200 }
201
202 error = gfs2_dir_get_buffer(ip, dblock,
203 (amount == sdp->sd_jbsize) ?
204 1 : new, &bh);
205 if (error)
206 goto fail;
207
208 gfs2_trans_add_bh(ip->i_gl, bh, 1);
209 memcpy(bh->b_data + o, buf, amount);
210 brelse(bh);
211 if (error)
212 goto fail;
213
214 copied += amount;
215 lblock++;
216 dblock++;
217 extlen--;
218
219 o = sizeof(struct gfs2_meta_header);
220 }
221
222out:
223 error = gfs2_meta_inode_buffer(ip, &dibh);
224 if (error)
225 return error;
226
227 if (ip->i_di.di_size < offset + copied)
228 ip->i_di.di_size = offset + copied;
229 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
230
231 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
232 gfs2_dinode_out(&ip->i_di, dibh->b_data);
233 brelse(dibh);
234
235 return copied;
236fail:
237 if (copied)
238 goto out;
239 return error;
240}
241
242static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
243 unsigned int offset, unsigned int size)
244{
245 struct buffer_head *dibh;
246 int error;
247
248 error = gfs2_meta_inode_buffer(ip, &dibh);
249 if (!error) {
250 offset += sizeof(struct gfs2_dinode);
251 memcpy(buf, dibh->b_data + offset, size);
252 brelse(dibh);
253 }
254
255 return (error) ? error : size;
256}
257
258
259/**
260 * gfs2_dir_read_data - Read a data from a directory inode
261 * @ip: The GFS2 Inode
262 * @buf: The buffer to place result into
263 * @offset: File offset to begin jdata_readng from
264 * @size: Amount of data to transfer
265 *
266 * Returns: The amount of data actually copied or the error
267 */
268static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf,
269 uint64_t offset, unsigned int size)
270{
271 struct gfs2_sbd *sdp = ip->i_sbd;
272 uint64_t lblock, dblock;
273 uint32_t extlen = 0;
274 unsigned int o;
275 int copied = 0;
276 int error = 0;
277
278 if (offset >= ip->i_di.di_size)
279 return 0;
280
281 if ((offset + size) > ip->i_di.di_size)
282 size = ip->i_di.di_size - offset;
283
284 if (!size)
285 return 0;
286
287 if (gfs2_is_stuffed(ip))
288 return gfs2_dir_read_stuffed(ip, buf, (unsigned int)offset,
289 size);
290
291 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
292 return -EINVAL;
293
294 lblock = offset;
295 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
296
297 while (copied < size) {
298 unsigned int amount;
299 struct buffer_head *bh;
300 int new;
301
302 amount = size - copied;
303 if (amount > sdp->sd_sb.sb_bsize - o)
304 amount = sdp->sd_sb.sb_bsize - o;
305
306 if (!extlen) {
307 new = 0;
308 error = gfs2_block_map(ip, lblock, &new, &dblock,
309 &extlen);
310 if (error)
311 goto fail;
312 }
313
314 if (extlen > 1)
315 gfs2_meta_ra(ip->i_gl, dblock, extlen);
316
317 if (dblock) {
318 error = gfs2_dir_get_buffer(ip, dblock, new, &bh);
319 if (error)
320 goto fail;
321 dblock++;
322 extlen--;
323 } else
324 bh = NULL;
325
326 memcpy(buf, bh->b_data + o, amount);
327 brelse(bh);
328 if (error)
329 goto fail;
330
331 copied += amount;
332 lblock++;
333
334 o = sizeof(struct gfs2_meta_header);
335 }
336
337 return copied;
338fail:
339 return (copied) ? copied : error;
340}
341
342/**
343 * int gfs2_filecmp - Compare two filenames
344 * @file1: The first filename
345 * @file2: The second filename
346 * @len_of_file2: The length of the second file
347 *
348 * This routine compares two filenames and returns 1 if they are equal.
349 *
350 * Returns: 1 if the files are the same, otherwise 0.
351 */
352
353int gfs2_filecmp(struct qstr *file1, char *file2, int len_of_file2)
354{
355 if (file1->len != len_of_file2)
356 return 0;
357 if (memcmp(file1->name, file2, file1->len))
358 return 0;
359 return 1;
360}
361
362/**
363 * dirent_first - Return the first dirent
364 * @dip: the directory
365 * @bh: The buffer
366 * @dent: Pointer to list of dirents
367 *
368 * return first dirent whether bh points to leaf or stuffed dinode
369 *
370 * Returns: IS_LEAF, IS_DINODE, or -errno
371 */
372
373static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh,
374 struct gfs2_dirent **dent)
375{
376 struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data;
377
378 if (be16_to_cpu(h->mh_type) == GFS2_METATYPE_LF) {
379 if (gfs2_meta_check(dip->i_sbd, bh))
380 return -EIO;
381 *dent = (struct gfs2_dirent *)(bh->b_data +
382 sizeof(struct gfs2_leaf));
383 return IS_LEAF;
384 } else {
385 if (gfs2_metatype_check(dip->i_sbd, bh, GFS2_METATYPE_DI))
386 return -EIO;
387 *dent = (struct gfs2_dirent *)(bh->b_data +
388 sizeof(struct gfs2_dinode));
389 return IS_DINODE;
390 }
391}
392
393/**
394 * dirent_next - Next dirent
395 * @dip: the directory
396 * @bh: The buffer
397 * @dent: Pointer to list of dirents
398 *
399 * Returns: 0 on success, error code otherwise
400 */
401
402static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
403 struct gfs2_dirent **dent)
404{
405 struct gfs2_dirent *tmp, *cur;
406 char *bh_end;
407 uint16_t cur_rec_len;
408
409 cur = *dent;
410 bh_end = bh->b_data + bh->b_size;
411 cur_rec_len = be16_to_cpu(cur->de_rec_len);
412
413 if ((char *)cur + cur_rec_len >= bh_end) {
414 if ((char *)cur + cur_rec_len > bh_end) {
415 gfs2_consist_inode(dip);
416 return -EIO;
417 }
418 return -ENOENT;
419 }
420
421 tmp = (struct gfs2_dirent *)((char *)cur + cur_rec_len);
422
423 if ((char *)tmp + be16_to_cpu(tmp->de_rec_len) > bh_end) {
424 gfs2_consist_inode(dip);
425 return -EIO;
426 }
427
428 if (cur_rec_len == 0) {
429 gfs2_consist_inode(dip);
430 return -EIO;
431 }
432
433 /* Only the first dent could ever have de_inum.no_addr == 0 */
434 if (!tmp->de_inum.no_addr) {
435 gfs2_consist_inode(dip);
436 return -EIO;
437 }
438
439 *dent = tmp;
440
441 return 0;
442}
443
444/**
445 * dirent_del - Delete a dirent
446 * @dip: The GFS2 inode
447 * @bh: The buffer
448 * @prev: The previous dirent
449 * @cur: The current dirent
450 *
451 */
452
453static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
454 struct gfs2_dirent *prev, struct gfs2_dirent *cur)
455{
456 uint16_t cur_rec_len, prev_rec_len;
457
458 if (!cur->de_inum.no_addr) {
459 gfs2_consist_inode(dip);
460 return;
461 }
462
463 gfs2_trans_add_bh(dip->i_gl, bh, 1);
464
465 /* If there is no prev entry, this is the first entry in the block.
466 The de_rec_len is already as big as it needs to be. Just zero
467 out the inode number and return. */
468
469 if (!prev) {
470 cur->de_inum.no_addr = 0; /* No endianess worries */
471 return;
472 }
473
474 /* Combine this dentry with the previous one. */
475
476 prev_rec_len = be16_to_cpu(prev->de_rec_len);
477 cur_rec_len = be16_to_cpu(cur->de_rec_len);
478
479 if ((char *)prev + prev_rec_len != (char *)cur)
480 gfs2_consist_inode(dip);
481 if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size)
482 gfs2_consist_inode(dip);
483
484 prev_rec_len += cur_rec_len;
485 prev->de_rec_len = cpu_to_be16(prev_rec_len);
486}
487
488/**
489 * gfs2_dirent_alloc - Allocate a directory entry
490 * @dip: The GFS2 inode
491 * @bh: The buffer
492 * @name_len: The length of the name
493 * @dent_out: Pointer to list of dirents
494 *
495 * Returns: 0 on success, error code otherwise
496 */
497
498int gfs2_dirent_alloc(struct gfs2_inode *dip, struct buffer_head *bh,
499 int name_len, struct gfs2_dirent **dent_out)
500{
501 struct gfs2_dirent *dent, *new;
502 unsigned int rec_len = GFS2_DIRENT_SIZE(name_len);
503 unsigned int entries = 0, offset = 0;
504 int type;
505
506 type = dirent_first(dip, bh, &dent);
507 if (type < 0)
508 return type;
509
510 if (type == IS_LEAF) {
511 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
512 entries = be16_to_cpu(leaf->lf_entries);
513 offset = sizeof(struct gfs2_leaf);
514 } else {
515 struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
516 entries = be32_to_cpu(dinode->di_entries);
517 offset = sizeof(struct gfs2_dinode);
518 }
519
520 if (!entries) {
521 if (dent->de_inum.no_addr) {
522 gfs2_consist_inode(dip);
523 return -EIO;
524 }
525
526 gfs2_trans_add_bh(dip->i_gl, bh, 1);
527
528 dent->de_rec_len = cpu_to_be16(bh->b_size - offset);
529 dent->de_name_len = cpu_to_be16(name_len);
530
531 *dent_out = dent;
532 return 0;
533 }
534
535 do {
536 uint16_t cur_rec_len;
537 uint16_t cur_name_len;
538
539 cur_rec_len = be16_to_cpu(dent->de_rec_len);
540 cur_name_len = be16_to_cpu(dent->de_name_len);
541
542 if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) ||
543 (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len)) {
544 gfs2_trans_add_bh(dip->i_gl, bh, 1);
545
546 if (dent->de_inum.no_addr) {
547 new = (struct gfs2_dirent *)((char *)dent +
548 GFS2_DIRENT_SIZE(cur_name_len));
549 memset(new, 0, sizeof(struct gfs2_dirent));
550
551 new->de_rec_len = cpu_to_be16(cur_rec_len -
552 GFS2_DIRENT_SIZE(cur_name_len));
553 new->de_name_len = cpu_to_be16(name_len);
554
555 dent->de_rec_len = cpu_to_be16(cur_rec_len -
556 be16_to_cpu(new->de_rec_len));
557
558 *dent_out = new;
559 return 0;
560 }
561
562 dent->de_name_len = cpu_to_be16(name_len);
563
564 *dent_out = dent;
565 return 0;
566 }
567 } while (dirent_next(dip, bh, &dent) == 0);
568
569 return -ENOSPC;
570}
571
572/**
573 * dirent_fits - See if we can fit a entry in this buffer
574 * @dip: The GFS2 inode
575 * @bh: The buffer
576 * @name_len: The length of the name
577 *
578 * Returns: 1 if it can fit, 0 otherwise
579 */
580
581static int dirent_fits(struct gfs2_inode *dip, struct buffer_head *bh,
582 int name_len)
583{
584 struct gfs2_dirent *dent;
585 unsigned int rec_len = GFS2_DIRENT_SIZE(name_len);
586 unsigned int entries = 0;
587 int type;
588
589 type = dirent_first(dip, bh, &dent);
590 if (type < 0)
591 return type;
592
593 if (type == IS_LEAF) {
594 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
595 entries = be16_to_cpu(leaf->lf_entries);
596 } else {
597 struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
598 entries = be32_to_cpu(dinode->di_entries);
599 }
600
601 if (!entries)
602 return 1;
603
604 do {
605 uint16_t cur_rec_len;
606 uint32_t cur_name_len;
607
608 cur_rec_len = be16_to_cpu(dent->de_rec_len);
609 cur_name_len = be16_to_cpu(dent->de_name_len);
610
611 if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) ||
612 (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len))
613 return 1;
614 } while (dirent_next(dip, bh, &dent) == 0);
615
616 return 0;
617}
618
619static int leaf_search(struct gfs2_inode *dip, struct buffer_head *bh,
620 struct qstr *filename, struct gfs2_dirent **dent_out,
621 struct gfs2_dirent **dent_prev)
622{
623 uint32_t hash;
624 struct gfs2_dirent *dent, *prev = NULL;
625 unsigned int entries = 0;
626 int type;
627
628 type = dirent_first(dip, bh, &dent);
629 if (type < 0)
630 return type;
631
632 if (type == IS_LEAF) {
633 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
634 entries = be16_to_cpu(leaf->lf_entries);
635 } else if (type == IS_DINODE) {
636 struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
637 entries = be32_to_cpu(dinode->di_entries);
638 }
639
640 hash = gfs2_disk_hash(filename->name, filename->len);
641
642 do {
643 if (!dent->de_inum.no_addr) {
644 prev = dent;
645 continue;
646 }
647
648 if (be32_to_cpu(dent->de_hash) == hash &&
649 gfs2_filecmp(filename, (char *)(dent + 1),
650 be16_to_cpu(dent->de_name_len))) {
651 *dent_out = dent;
652 if (dent_prev)
653 *dent_prev = prev;
654
655 return 0;
656 }
657
658 prev = dent;
659 } while (dirent_next(dip, bh, &dent) == 0);
660
661 return -ENOENT;
662}
663
664static int get_leaf(struct gfs2_inode *dip, uint64_t leaf_no,
665 struct buffer_head **bhp)
666{
667 int error;
668
669 error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_START | DIO_WAIT, bhp);
670 if (!error && gfs2_metatype_check(dip->i_sbd, *bhp, GFS2_METATYPE_LF))
671 error = -EIO;
672
673 return error;
674}
675
676/**
677 * get_leaf_nr - Get a leaf number associated with the index
678 * @dip: The GFS2 inode
679 * @index:
680 * @leaf_out:
681 *
682 * Returns: 0 on success, error code otherwise
683 */
684
685static int get_leaf_nr(struct gfs2_inode *dip, uint32_t index,
686 uint64_t *leaf_out)
687{
688 uint64_t leaf_no;
689 int error;
690
691 error = gfs2_dir_read_data(dip, (char *)&leaf_no,
692 index * sizeof(uint64_t),
693 sizeof(uint64_t));
694 if (error != sizeof(uint64_t))
695 return (error < 0) ? error : -EIO;
696
697 *leaf_out = be64_to_cpu(leaf_no);
698
699 return 0;
700}
701
702static int get_first_leaf(struct gfs2_inode *dip, uint32_t index,
703 struct buffer_head **bh_out)
704{
705 uint64_t leaf_no;
706 int error;
707
708 error = get_leaf_nr(dip, index, &leaf_no);
709 if (!error)
710 error = get_leaf(dip, leaf_no, bh_out);
711
712 return error;
713}
714
715static int get_next_leaf(struct gfs2_inode *dip, struct buffer_head *bh_in,
716 struct buffer_head **bh_out)
717{
718 struct gfs2_leaf *leaf;
719 int error;
720
721 leaf = (struct gfs2_leaf *)bh_in->b_data;
722
723 if (!leaf->lf_next)
724 error = -ENOENT;
725 else
726 error = get_leaf(dip, be64_to_cpu(leaf->lf_next), bh_out);
727
728 return error;
729}
730
731static int linked_leaf_search(struct gfs2_inode *dip, struct qstr *filename,
732 struct gfs2_dirent **dent_out,
733 struct gfs2_dirent **dent_prev,
734 struct buffer_head **bh_out)
735{
736 struct buffer_head *bh = NULL, *bh_next;
737 uint32_t hsize, index;
738 uint32_t hash;
739 int error;
740
741 hsize = 1 << dip->i_di.di_depth;
742 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
743 gfs2_consist_inode(dip);
744 return -EIO;
745 }
746
747 /* Figure out the address of the leaf node. */
748
749 hash = gfs2_disk_hash(filename->name, filename->len);
750 index = hash >> (32 - dip->i_di.di_depth);
751
752 error = get_first_leaf(dip, index, &bh_next);
753 if (error)
754 return error;
755
756 /* Find the entry */
757
758 do {
759 brelse(bh);
760
761 bh = bh_next;
762
763 error = leaf_search(dip, bh, filename, dent_out, dent_prev);
764 switch (error) {
765 case 0:
766 *bh_out = bh;
767 return 0;
768
769 case -ENOENT:
770 break;
771
772 default:
773 brelse(bh);
774 return error;
775 }
776
777 error = get_next_leaf(dip, bh, &bh_next);
778 }
779 while (!error);
780
781 brelse(bh);
782
783 return error;
784}
785
786/**
787 * dir_make_exhash - Convert a stuffed directory into an ExHash directory
788 * @dip: The GFS2 inode
789 *
790 * Returns: 0 on success, error code otherwise
791 */
792
793static int dir_make_exhash(struct gfs2_inode *dip)
794{
795 struct gfs2_sbd *sdp = dip->i_sbd;
796 struct gfs2_dirent *dent;
797 struct buffer_head *bh, *dibh;
798 struct gfs2_leaf *leaf;
799 int y;
800 uint32_t x;
801 uint64_t *lp, bn;
802 int error;
803
804 error = gfs2_meta_inode_buffer(dip, &dibh);
805 if (error)
806 return error;
807
808 /* Allocate a new block for the first leaf node */
809
810 bn = gfs2_alloc_meta(dip);
811
812 /* Turn over a new leaf */
813
814 bh = gfs2_meta_new(dip->i_gl, bn);
815 gfs2_trans_add_bh(dip->i_gl, bh, 1);
816 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
817 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
818
819 /* Fill in the leaf structure */
820
821 leaf = (struct gfs2_leaf *)bh->b_data;
822
823 gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16));
824
825 leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
826 leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries);
827
828 /* Copy dirents */
829
830 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh,
831 sizeof(struct gfs2_dinode));
832
833 /* Find last entry */
834
835 x = 0;
836 dirent_first(dip, bh, &dent);
837
838 do {
839 if (!dent->de_inum.no_addr)
840 continue;
841 if (++x == dip->i_di.di_entries)
842 break;
843 }
844 while (dirent_next(dip, bh, &dent) == 0);
845
846 /* Adjust the last dirent's record length
847 (Remember that dent still points to the last entry.) */
848
849 dent->de_rec_len = cpu_to_be16(be16_to_cpu(dent->de_rec_len) +
850 sizeof(struct gfs2_dinode) -
851 sizeof(struct gfs2_leaf));
852
853 brelse(bh);
854
855 /* We're done with the new leaf block, now setup the new
856 hash table. */
857
858 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
859 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
860
861 lp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode));
862
863 for (x = sdp->sd_hash_ptrs; x--; lp++)
864 *lp = cpu_to_be64(bn);
865
866 dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
867 dip->i_di.di_blocks++;
868 dip->i_di.di_flags |= GFS2_DIF_EXHASH;
869 dip->i_di.di_payload_format = 0;
870
871 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
872 dip->i_di.di_depth = y;
873
874 gfs2_dinode_out(&dip->i_di, dibh->b_data);
875
876 brelse(dibh);
877
878 return 0;
879}
880
881/**
882 * dir_split_leaf - Split a leaf block into two
883 * @dip: The GFS2 inode
884 * @index:
885 * @leaf_no:
886 *
887 * Returns: 0 on success, error code on failure
888 */
889
890static int dir_split_leaf(struct gfs2_inode *dip, uint32_t index,
891 uint64_t leaf_no)
892{
893 struct buffer_head *nbh, *obh, *dibh;
894 struct gfs2_leaf *nleaf, *oleaf;
895 struct gfs2_dirent *dent, *prev = NULL, *next = NULL, *new;
896 uint32_t start, len, half_len, divider;
897 uint64_t bn, *lp;
898 uint32_t name_len;
899 int x, moved = 0;
900 int error;
901
902 /* Allocate the new leaf block */
903
904 bn = gfs2_alloc_meta(dip);
905
906 /* Get the new leaf block */
907
908 nbh = gfs2_meta_new(dip->i_gl, bn);
909 gfs2_trans_add_bh(dip->i_gl, nbh, 1);
910 gfs2_metatype_set(nbh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
911 gfs2_buffer_clear_tail(nbh, sizeof(struct gfs2_meta_header));
912
913 nleaf = (struct gfs2_leaf *)nbh->b_data;
914
915 nleaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
916
917 /* Get the old leaf block */
918
919 error = get_leaf(dip, leaf_no, &obh);
920 if (error)
921 goto fail;
922
923 gfs2_trans_add_bh(dip->i_gl, obh, 1);
924
925 oleaf = (struct gfs2_leaf *)obh->b_data;
926
927 /* Compute the start and len of leaf pointers in the hash table. */
928
929 len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth));
930 half_len = len >> 1;
931 if (!half_len) {
932 gfs2_consist_inode(dip);
933 error = -EIO;
934 goto fail_brelse;
935 }
936
937 start = (index & ~(len - 1));
938
939 /* Change the pointers.
940 Don't bother distinguishing stuffed from non-stuffed.
941 This code is complicated enough already. */
942
943 lp = kcalloc(half_len, sizeof(uint64_t), GFP_KERNEL | __GFP_NOFAIL);
944
945 error = gfs2_dir_read_data(dip, (char *)lp, start * sizeof(uint64_t),
946 half_len * sizeof(uint64_t));
947 if (error != half_len * sizeof(uint64_t)) {
948 if (error >= 0)
949 error = -EIO;
950 goto fail_lpfree;
951 }
952
953 /* Change the pointers */
954
955 for (x = 0; x < half_len; x++)
956 lp[x] = cpu_to_be64(bn);
957
958 error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(uint64_t),
959 half_len * sizeof(uint64_t));
960 if (error != half_len * sizeof(uint64_t)) {
961 if (error >= 0)
962 error = -EIO;
963 goto fail_lpfree;
964 }
965
966 kfree(lp);
967
968 /* Compute the divider */
969
970 divider = (start + half_len) << (32 - dip->i_di.di_depth);
971
972 /* Copy the entries */
973
974 dirent_first(dip, obh, &dent);
975
976 do {
977 next = dent;
978 if (dirent_next(dip, obh, &next))
979 next = NULL;
980
981 if (dent->de_inum.no_addr &&
982 be32_to_cpu(dent->de_hash) < divider) {
983 name_len = be16_to_cpu(dent->de_name_len);
984
985 gfs2_dirent_alloc(dip, nbh, name_len, &new);
986
987 new->de_inum = dent->de_inum; /* No endian worries */
988 new->de_hash = dent->de_hash; /* No endian worries */
989 new->de_type = dent->de_type; /* No endian worries */
990 memcpy((char *)(new + 1), (char *)(dent + 1),
991 name_len);
992
993 nleaf->lf_entries = be16_to_cpu(nleaf->lf_entries)+1;
994 nleaf->lf_entries = cpu_to_be16(nleaf->lf_entries);
995
996 dirent_del(dip, obh, prev, dent);
997
998 if (!oleaf->lf_entries)
999 gfs2_consist_inode(dip);
1000 oleaf->lf_entries = be16_to_cpu(oleaf->lf_entries)-1;
1001 oleaf->lf_entries = cpu_to_be16(oleaf->lf_entries);
1002
1003 if (!prev)
1004 prev = dent;
1005
1006 moved = 1;
1007 } else
1008 prev = dent;
1009
1010 dent = next;
1011 }
1012 while (dent);
1013
1014 /* If none of the entries got moved into the new leaf,
1015 artificially fill in the first entry. */
1016
1017 if (!moved) {
1018 gfs2_dirent_alloc(dip, nbh, 0, &new);
1019 new->de_inum.no_addr = 0;
1020 }
1021
1022 oleaf->lf_depth = be16_to_cpu(oleaf->lf_depth) + 1;
1023 oleaf->lf_depth = cpu_to_be16(oleaf->lf_depth);
1024 nleaf->lf_depth = oleaf->lf_depth;
1025
1026 error = gfs2_meta_inode_buffer(dip, &dibh);
1027 if (!gfs2_assert_withdraw(dip->i_sbd, !error)) {
1028 dip->i_di.di_blocks++;
1029 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1030 brelse(dibh);
1031 }
1032
1033 brelse(obh);
1034 brelse(nbh);
1035
1036 return error;
1037
1038 fail_lpfree:
1039 kfree(lp);
1040
1041 fail_brelse:
1042 brelse(obh);
1043
1044 fail:
1045 brelse(nbh);
1046 return error;
1047}
1048
1049/**
1050 * dir_double_exhash - Double size of ExHash table
1051 * @dip: The GFS2 dinode
1052 *
1053 * Returns: 0 on success, error code on failure
1054 */
1055
1056static int dir_double_exhash(struct gfs2_inode *dip)
1057{
1058 struct gfs2_sbd *sdp = dip->i_sbd;
1059 struct buffer_head *dibh;
1060 uint32_t hsize;
1061 uint64_t *buf;
1062 uint64_t *from, *to;
1063 uint64_t block;
1064 int x;
1065 int error = 0;
1066
1067 hsize = 1 << dip->i_di.di_depth;
1068 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1069 gfs2_consist_inode(dip);
1070 return -EIO;
1071 }
1072
1073 /* Allocate both the "from" and "to" buffers in one big chunk */
1074
1075 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL);
1076
1077 for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
1078 error = gfs2_dir_read_data(dip, (char *)buf,
1079 block * sdp->sd_hash_bsize,
1080 sdp->sd_hash_bsize);
1081 if (error != sdp->sd_hash_bsize) {
1082 if (error >= 0)
1083 error = -EIO;
1084 goto fail;
1085 }
1086
1087 from = buf;
1088 to = (uint64_t *)((char *)buf + sdp->sd_hash_bsize);
1089
1090 for (x = sdp->sd_hash_ptrs; x--; from++) {
1091 *to++ = *from; /* No endianess worries */
1092 *to++ = *from;
1093 }
1094
1095 error = gfs2_dir_write_data(dip,
1096 (char *)buf + sdp->sd_hash_bsize,
1097 block * sdp->sd_sb.sb_bsize,
1098 sdp->sd_sb.sb_bsize);
1099 if (error != sdp->sd_sb.sb_bsize) {
1100 if (error >= 0)
1101 error = -EIO;
1102 goto fail;
1103 }
1104 }
1105
1106 kfree(buf);
1107
1108 error = gfs2_meta_inode_buffer(dip, &dibh);
1109 if (!gfs2_assert_withdraw(sdp, !error)) {
1110 dip->i_di.di_depth++;
1111 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1112 brelse(dibh);
1113 }
1114
1115 return error;
1116
1117 fail:
1118 kfree(buf);
1119
1120 return error;
1121}
1122
1123/**
1124 * compare_dents - compare directory entries by hash value
1125 * @a: first dent
1126 * @b: second dent
1127 *
1128 * When comparing the hash entries of @a to @b:
1129 * gt: returns 1
1130 * lt: returns -1
1131 * eq: returns 0
1132 */
1133
1134static int compare_dents(const void *a, const void *b)
1135{
1136 struct gfs2_dirent *dent_a, *dent_b;
1137 uint32_t hash_a, hash_b;
1138 int ret = 0;
1139
1140 dent_a = *(struct gfs2_dirent **)a;
1141 hash_a = dent_a->de_hash;
1142 hash_a = be32_to_cpu(hash_a);
1143
1144 dent_b = *(struct gfs2_dirent **)b;
1145 hash_b = dent_b->de_hash;
1146 hash_b = be32_to_cpu(hash_b);
1147
1148 if (hash_a > hash_b)
1149 ret = 1;
1150 else if (hash_a < hash_b)
1151 ret = -1;
1152 else {
1153 unsigned int len_a = be16_to_cpu(dent_a->de_name_len);
1154 unsigned int len_b = be16_to_cpu(dent_b->de_name_len);
1155
1156 if (len_a > len_b)
1157 ret = 1;
1158 else if (len_a < len_b)
1159 ret = -1;
1160 else
1161 ret = memcmp((char *)(dent_a + 1),
1162 (char *)(dent_b + 1),
1163 len_a);
1164 }
1165
1166 return ret;
1167}
1168
1169/**
1170 * do_filldir_main - read out directory entries
1171 * @dip: The GFS2 inode
1172 * @offset: The offset in the file to read from
1173 * @opaque: opaque data to pass to filldir
1174 * @filldir: The function to pass entries to
1175 * @darr: an array of struct gfs2_dirent pointers to read
1176 * @entries: the number of entries in darr
1177 * @copied: pointer to int that's non-zero if a entry has been copied out
1178 *
1179 * Jump through some hoops to make sure that if there are hash collsions,
1180 * they are read out at the beginning of a buffer. We want to minimize
1181 * the possibility that they will fall into different readdir buffers or
1182 * that someone will want to seek to that location.
1183 *
1184 * Returns: errno, >0 on exception from filldir
1185 */
1186
1187static int do_filldir_main(struct gfs2_inode *dip, uint64_t *offset,
1188 void *opaque, gfs2_filldir_t filldir,
1189 struct gfs2_dirent **darr, uint32_t entries,
1190 int *copied)
1191{
1192 struct gfs2_dirent *dent, *dent_next;
1193 struct gfs2_inum inum;
1194 uint64_t off, off_next;
1195 unsigned int x, y;
1196 int run = 0;
1197 int error = 0;
1198
1199 sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
1200
1201 dent_next = darr[0];
1202 off_next = be32_to_cpu(dent_next->de_hash);
1203 off_next = gfs2_disk_hash2offset(off_next);
1204
1205 for (x = 0, y = 1; x < entries; x++, y++) {
1206 dent = dent_next;
1207 off = off_next;
1208
1209 if (y < entries) {
1210 dent_next = darr[y];
1211 off_next = be32_to_cpu(dent_next->de_hash);
1212 off_next = gfs2_disk_hash2offset(off_next);
1213
1214 if (off < *offset)
1215 continue;
1216 *offset = off;
1217
1218 if (off_next == off) {
1219 if (*copied && !run)
1220 return 1;
1221 run = 1;
1222 } else
1223 run = 0;
1224 } else {
1225 if (off < *offset)
1226 continue;
1227 *offset = off;
1228 }
1229
1230 gfs2_inum_in(&inum, (char *)&dent->de_inum);
1231
1232 error = filldir(opaque, (char *)(dent + 1),
1233 be16_to_cpu(dent->de_name_len),
1234 off, &inum,
1235 be16_to_cpu(dent->de_type));
1236 if (error)
1237 return 1;
1238
1239 *copied = 1;
1240 }
1241
1242 /* Increment the *offset by one, so the next time we come into the
1243 do_filldir fxn, we get the next entry instead of the last one in the
1244 current leaf */
1245
1246 (*offset)++;
1247
1248 return 0;
1249}
1250
1251/**
1252 * do_filldir_single - Read directory entries out of a single block
1253 * @dip: The GFS2 inode
1254 * @offset: The offset in the file to read from
1255 * @opaque: opaque data to pass to filldir
1256 * @filldir: The function to pass entries to
1257 * @bh: the block
1258 * @entries: the number of entries in the block
1259 * @copied: pointer to int that's non-zero if a entry has been copied out
1260 *
1261 * Returns: errno, >0 on exception from filldir
1262 */
1263
1264static int do_filldir_single(struct gfs2_inode *dip, uint64_t *offset,
1265 void *opaque, gfs2_filldir_t filldir,
1266 struct buffer_head *bh, uint32_t entries,
1267 int *copied)
1268{
1269 struct gfs2_dirent **darr;
1270 struct gfs2_dirent *de;
1271 unsigned int e = 0;
1272 int error;
1273
1274 if (!entries)
1275 return 0;
1276
1277 darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL);
1278 if (!darr)
1279 return -ENOMEM;
1280
1281 dirent_first(dip, bh, &de);
1282 do {
1283 if (!de->de_inum.no_addr)
1284 continue;
1285 if (e >= entries) {
1286 gfs2_consist_inode(dip);
1287 error = -EIO;
1288 goto out;
1289 }
1290 darr[e++] = de;
1291 }
1292 while (dirent_next(dip, bh, &de) == 0);
1293
1294 if (e != entries) {
1295 gfs2_consist_inode(dip);
1296 error = -EIO;
1297 goto out;
1298 }
1299
1300 error = do_filldir_main(dip, offset, opaque, filldir, darr,
1301 entries, copied);
1302
1303 out:
1304 kfree(darr);
1305
1306 return error;
1307}
1308
1309/**
1310 * do_filldir_multi - Read directory entries out of a linked leaf list
1311 * @dip: The GFS2 inode
1312 * @offset: The offset in the file to read from
1313 * @opaque: opaque data to pass to filldir
1314 * @filldir: The function to pass entries to
1315 * @bh: the first leaf in the list
1316 * @copied: pointer to int that's non-zero if a entry has been copied out
1317 *
1318 * Returns: errno, >0 on exception from filldir
1319 */
1320
1321static int do_filldir_multi(struct gfs2_inode *dip, uint64_t *offset,
1322 void *opaque, gfs2_filldir_t filldir,
1323 struct buffer_head *bh, int *copied)
1324{
1325 struct buffer_head **larr = NULL;
1326 struct gfs2_dirent **darr;
1327 struct gfs2_leaf *leaf;
1328 struct buffer_head *tmp_bh;
1329 struct gfs2_dirent *de;
1330 unsigned int entries, e = 0;
1331 unsigned int leaves = 0, l = 0;
1332 unsigned int x;
1333 uint64_t ln;
1334 int error = 0;
1335
1336 /* Count leaves and entries */
1337
1338 leaf = (struct gfs2_leaf *)bh->b_data;
1339 entries = be16_to_cpu(leaf->lf_entries);
1340 ln = leaf->lf_next;
1341
1342 while (ln) {
1343 ln = be64_to_cpu(ln);
1344
1345 error = get_leaf(dip, ln, &tmp_bh);
1346 if (error)
1347 return error;
1348
1349 leaf = (struct gfs2_leaf *)tmp_bh->b_data;
1350 if (leaf->lf_entries) {
1351 entries += be16_to_cpu(leaf->lf_entries);
1352 leaves++;
1353 }
1354 ln = leaf->lf_next;
1355
1356 brelse(tmp_bh);
1357 }
1358
1359 if (!entries)
1360 return 0;
1361
1362 if (leaves) {
1363 larr = kcalloc(leaves, sizeof(struct buffer_head *),GFP_KERNEL);
1364 if (!larr)
1365 return -ENOMEM;
1366 }
1367
1368 darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL);
1369 if (!darr) {
1370 kfree(larr);
1371 return -ENOMEM;
1372 }
1373
1374 leaf = (struct gfs2_leaf *)bh->b_data;
1375 if (leaf->lf_entries) {
1376 dirent_first(dip, bh, &de);
1377 do {
1378 if (!de->de_inum.no_addr)
1379 continue;
1380 if (e >= entries) {
1381 gfs2_consist_inode(dip);
1382 error = -EIO;
1383 goto out;
1384 }
1385 darr[e++] = de;
1386 }
1387 while (dirent_next(dip, bh, &de) == 0);
1388 }
1389 ln = leaf->lf_next;
1390
1391 while (ln) {
1392 ln = be64_to_cpu(ln);
1393
1394 error = get_leaf(dip, ln, &tmp_bh);
1395 if (error)
1396 goto out;
1397
1398 leaf = (struct gfs2_leaf *)tmp_bh->b_data;
1399 if (leaf->lf_entries) {
1400 dirent_first(dip, tmp_bh, &de);
1401 do {
1402 if (!de->de_inum.no_addr)
1403 continue;
1404 if (e >= entries) {
1405 gfs2_consist_inode(dip);
1406 error = -EIO;
1407 goto out;
1408 }
1409 darr[e++] = de;
1410 }
1411 while (dirent_next(dip, tmp_bh, &de) == 0);
1412
1413 larr[l++] = tmp_bh;
1414
1415 ln = leaf->lf_next;
1416 } else {
1417 ln = leaf->lf_next;
1418 brelse(tmp_bh);
1419 }
1420 }
1421
1422 if (gfs2_assert_withdraw(dip->i_sbd, l == leaves)) {
1423 error = -EIO;
1424 goto out;
1425 }
1426 if (e != entries) {
1427 gfs2_consist_inode(dip);
1428 error = -EIO;
1429 goto out;
1430 }
1431
1432 error = do_filldir_main(dip, offset, opaque, filldir, darr,
1433 entries, copied);
1434
1435 out:
1436 kfree(darr);
1437 for (x = 0; x < l; x++)
1438 brelse(larr[x]);
1439 kfree(larr);
1440
1441 return error;
1442}
1443
1444/**
1445 * dir_e_search - Search exhash (leaf) dir for inode matching name
1446 * @dip: The GFS2 inode
1447 * @filename: Filename string
1448 * @inode: If non-NULL, function fills with formal inode # and block address
1449 * @type: If non-NULL, function fills with DT_... dinode type
1450 *
1451 * Returns:
1452 */
1453
1454static int dir_e_search(struct gfs2_inode *dip, struct qstr *filename,
1455 struct gfs2_inum *inum, unsigned int *type)
1456{
1457 struct buffer_head *bh;
1458 struct gfs2_dirent *dent;
1459 int error;
1460
1461 error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
1462 if (error)
1463 return error;
1464
1465 if (inum)
1466 gfs2_inum_in(inum, (char *)&dent->de_inum);
1467 if (type)
1468 *type = be16_to_cpu(dent->de_type);
1469
1470 brelse(bh);
1471
1472 return 0;
1473}
1474
1475static int dir_e_add(struct gfs2_inode *dip, struct qstr *filename,
1476 struct gfs2_inum *inum, unsigned int type)
1477{
1478 struct buffer_head *bh, *nbh, *dibh;
1479 struct gfs2_leaf *leaf, *nleaf;
1480 struct gfs2_dirent *dent;
1481 uint32_t hsize, index;
1482 uint32_t hash;
1483 uint64_t leaf_no, bn;
1484 int error;
1485
1486 restart:
1487 hsize = 1 << dip->i_di.di_depth;
1488 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1489 gfs2_consist_inode(dip);
1490 return -EIO;
1491 }
1492
1493 /* Figure out the address of the leaf node. */
1494
1495 hash = gfs2_disk_hash(filename->name, filename->len);
1496 index = hash >> (32 - dip->i_di.di_depth);
1497
1498 error = get_leaf_nr(dip, index, &leaf_no);
1499 if (error)
1500 return error;
1501
1502 /* Add entry to the leaf */
1503
1504 for (;;) {
1505 error = get_leaf(dip, leaf_no, &bh);
1506 if (error)
1507 return error;
1508
1509 leaf = (struct gfs2_leaf *)bh->b_data;
1510
1511 if (gfs2_dirent_alloc(dip, bh, filename->len, &dent)) {
1512
1513 if (be16_to_cpu(leaf->lf_depth) < dip->i_di.di_depth) {
1514 /* Can we split the leaf? */
1515
1516 brelse(bh);
1517
1518 error = dir_split_leaf(dip, index, leaf_no);
1519 if (error)
1520 return error;
1521
1522 goto restart;
1523
1524 } else if (dip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) {
1525 /* Can we double the hash table? */
1526
1527 brelse(bh);
1528
1529 error = dir_double_exhash(dip);
1530 if (error)
1531 return error;
1532
1533 goto restart;
1534
1535 } else if (leaf->lf_next) {
1536 /* Can we try the next leaf in the list? */
1537 leaf_no = be64_to_cpu(leaf->lf_next);
1538 brelse(bh);
1539 continue;
1540
1541 } else {
1542 /* Create a new leaf and add it to the list. */
1543
1544 bn = gfs2_alloc_meta(dip);
1545
1546 nbh = gfs2_meta_new(dip->i_gl, bn);
1547 gfs2_trans_add_bh(dip->i_gl, nbh, 1);
1548 gfs2_metatype_set(nbh,
1549 GFS2_METATYPE_LF,
1550 GFS2_FORMAT_LF);
1551 gfs2_buffer_clear_tail(nbh,
1552 sizeof(struct gfs2_meta_header));
1553
1554 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1555 leaf->lf_next = cpu_to_be64(bn);
1556
1557 nleaf = (struct gfs2_leaf *)nbh->b_data;
1558 nleaf->lf_depth = leaf->lf_depth;
1559 nleaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
1560
1561 gfs2_dirent_alloc(dip, nbh, filename->len,
1562 &dent);
1563
1564 dip->i_di.di_blocks++;
1565
1566 brelse(bh);
1567
1568 bh = nbh;
1569 leaf = nleaf;
1570 }
1571 }
1572
1573 /* If the gfs2_dirent_alloc() succeeded, it pinned the "bh" */
1574
1575 gfs2_inum_out(inum, (char *)&dent->de_inum);
1576 dent->de_hash = cpu_to_be32(hash);
1577 dent->de_type = cpu_to_be16(type);
1578 memcpy((char *)(dent + 1), filename->name, filename->len);
1579
1580 leaf->lf_entries = be16_to_cpu(leaf->lf_entries) + 1;
1581 leaf->lf_entries = cpu_to_be16(leaf->lf_entries);
1582
1583 brelse(bh);
1584
1585 error = gfs2_meta_inode_buffer(dip, &dibh);
1586 if (error)
1587 return error;
1588
1589 dip->i_di.di_entries++;
1590 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1591
1592 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1593 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1594 brelse(dibh);
1595
1596 return 0;
1597 }
1598
1599 return -ENOENT;
1600}
1601
1602static int dir_e_del(struct gfs2_inode *dip, struct qstr *filename)
1603{
1604 struct buffer_head *bh, *dibh;
1605 struct gfs2_dirent *dent, *prev;
1606 struct gfs2_leaf *leaf;
1607 unsigned int entries;
1608 int error;
1609
1610 error = linked_leaf_search(dip, filename, &dent, &prev, &bh);
1611 if (error == -ENOENT) {
1612 gfs2_consist_inode(dip);
1613 return -EIO;
1614 }
1615 if (error)
1616 return error;
1617
1618 dirent_del(dip, bh, prev, dent); /* Pins bh */
1619
1620 leaf = (struct gfs2_leaf *)bh->b_data;
1621 entries = be16_to_cpu(leaf->lf_entries);
1622 if (!entries)
1623 gfs2_consist_inode(dip);
1624 entries--;
1625 leaf->lf_entries = cpu_to_be16(entries);
1626
1627 brelse(bh);
1628
1629 error = gfs2_meta_inode_buffer(dip, &dibh);
1630 if (error)
1631 return error;
1632
1633 if (!dip->i_di.di_entries)
1634 gfs2_consist_inode(dip);
1635 dip->i_di.di_entries--;
1636 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1637
1638 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1639 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1640 brelse(dibh);
1641
1642 return 0;
1643}
1644
1645/**
1646 * dir_e_read - Reads the entries from a directory into a filldir buffer
1647 * @dip: dinode pointer
1648 * @offset: the hash of the last entry read shifted to the right once
1649 * @opaque: buffer for the filldir function to fill
1650 * @filldir: points to the filldir function to use
1651 *
1652 * Returns: errno
1653 */
1654
1655static int dir_e_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
1656 gfs2_filldir_t filldir)
1657{
1658 struct gfs2_sbd *sdp = dip->i_sbd;
1659 struct buffer_head *bh;
1660 struct gfs2_leaf leaf;
1661 uint32_t hsize, len;
1662 uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
1663 uint32_t hash, index;
1664 uint64_t *lp;
1665 int copied = 0;
1666 int error = 0;
1667
1668 hsize = 1 << dip->i_di.di_depth;
1669 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1670 gfs2_consist_inode(dip);
1671 return -EIO;
1672 }
1673
1674 hash = gfs2_dir_offset2hash(*offset);
1675 index = hash >> (32 - dip->i_di.di_depth);
1676
1677 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1678 if (!lp)
1679 return -ENOMEM;
1680
1681 while (index < hsize) {
1682 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1683 ht_offset = index - lp_offset;
1684
1685 if (ht_offset_cur != ht_offset) {
1686 error = gfs2_dir_read_data(dip, (char *)lp,
1687 ht_offset * sizeof(uint64_t),
1688 sdp->sd_hash_bsize);
1689 if (error != sdp->sd_hash_bsize) {
1690 if (error >= 0)
1691 error = -EIO;
1692 goto out;
1693 }
1694 ht_offset_cur = ht_offset;
1695 }
1696
1697 error = get_leaf(dip, be64_to_cpu(lp[lp_offset]), &bh);
1698 if (error)
1699 goto out;
1700
1701 gfs2_leaf_in(&leaf, bh->b_data);
1702
1703 if (leaf.lf_next)
1704 error = do_filldir_multi(dip, offset, opaque, filldir,
1705 bh, &copied);
1706 else
1707 error = do_filldir_single(dip, offset, opaque, filldir,
1708 bh, leaf.lf_entries, &copied);
1709
1710 brelse(bh);
1711
1712 if (error) {
1713 if (error > 0)
1714 error = 0;
1715 goto out;
1716 }
1717
1718 len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
1719 index = (index & ~(len - 1)) + len;
1720 }
1721
1722 out:
1723 kfree(lp);
1724
1725 return error;
1726}
1727
1728static int dir_e_mvino(struct gfs2_inode *dip, struct qstr *filename,
1729 struct gfs2_inum *inum, unsigned int new_type)
1730{
1731 struct buffer_head *bh, *dibh;
1732 struct gfs2_dirent *dent;
1733 int error;
1734
1735 error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
1736 if (error == -ENOENT) {
1737 gfs2_consist_inode(dip);
1738 return -EIO;
1739 }
1740 if (error)
1741 return error;
1742
1743 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1744
1745 gfs2_inum_out(inum, (char *)&dent->de_inum);
1746 dent->de_type = cpu_to_be16(new_type);
1747
1748 brelse(bh);
1749
1750 error = gfs2_meta_inode_buffer(dip, &dibh);
1751 if (error)
1752 return error;
1753
1754 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1755
1756 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1757 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1758 brelse(dibh);
1759
1760 return 0;
1761}
1762
1763/**
1764 * dir_l_search - Search linear (stuffed dinode) dir for inode matching name
1765 * @dip: The GFS2 inode
1766 * @filename: Filename string
1767 * @inode: If non-NULL, function fills with formal inode # and block address
1768 * @type: If non-NULL, function fills with DT_... dinode type
1769 *
1770 * Returns:
1771 */
1772
1773static int dir_l_search(struct gfs2_inode *dip, struct qstr *filename,
1774 struct gfs2_inum *inum, unsigned int *type)
1775{
1776 struct buffer_head *dibh;
1777 struct gfs2_dirent *dent;
1778 int error;
1779
1780 if (!gfs2_is_stuffed(dip)) {
1781 gfs2_consist_inode(dip);
1782 return -EIO;
1783 }
1784
1785 error = gfs2_meta_inode_buffer(dip, &dibh);
1786 if (error)
1787 return error;
1788
1789 error = leaf_search(dip, dibh, filename, &dent, NULL);
1790 if (!error) {
1791 if (inum)
1792 gfs2_inum_in(inum, (char *)&dent->de_inum);
1793 if (type)
1794 *type = be16_to_cpu(dent->de_type);
1795 }
1796
1797 brelse(dibh);
1798
1799 return error;
1800}
1801
1802static int dir_l_add(struct gfs2_inode *dip, struct qstr *filename,
1803 struct gfs2_inum *inum, unsigned int type)
1804{
1805 struct buffer_head *dibh;
1806 struct gfs2_dirent *dent;
1807 int error;
1808
1809 if (!gfs2_is_stuffed(dip)) {
1810 gfs2_consist_inode(dip);
1811 return -EIO;
1812 }
1813
1814 error = gfs2_meta_inode_buffer(dip, &dibh);
1815 if (error)
1816 return error;
1817
1818 if (gfs2_dirent_alloc(dip, dibh, filename->len, &dent)) {
1819 brelse(dibh);
1820
1821 error = dir_make_exhash(dip);
1822 if (!error)
1823 error = dir_e_add(dip, filename, inum, type);
1824
1825 return error;
1826 }
1827
1828 /* gfs2_dirent_alloc() pins */
1829
1830 gfs2_inum_out(inum, (char *)&dent->de_inum);
1831 dent->de_hash = gfs2_disk_hash(filename->name, filename->len);
1832 dent->de_hash = cpu_to_be32(dent->de_hash);
1833 dent->de_type = cpu_to_be16(type);
1834 memcpy((char *)(dent + 1), filename->name, filename->len);
1835
1836 dip->i_di.di_entries++;
1837 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1838
1839 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1840 brelse(dibh);
1841
1842 return 0;
1843}
1844
1845static int dir_l_del(struct gfs2_inode *dip, struct qstr *filename)
1846{
1847 struct buffer_head *dibh;
1848 struct gfs2_dirent *dent, *prev;
1849 int error;
1850
1851 if (!gfs2_is_stuffed(dip)) {
1852 gfs2_consist_inode(dip);
1853 return -EIO;
1854 }
1855
1856 error = gfs2_meta_inode_buffer(dip, &dibh);
1857 if (error)
1858 return error;
1859
1860 error = leaf_search(dip, dibh, filename, &dent, &prev);
1861 if (error == -ENOENT) {
1862 gfs2_consist_inode(dip);
1863 error = -EIO;
1864 goto out;
1865 }
1866 if (error)
1867 goto out;
1868
1869 dirent_del(dip, dibh, prev, dent);
1870
1871 /* dirent_del() pins */
1872
1873 if (!dip->i_di.di_entries)
1874 gfs2_consist_inode(dip);
1875 dip->i_di.di_entries--;
1876
1877 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1878
1879 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1880
1881 out:
1882 brelse(dibh);
1883
1884 return error;
1885}
1886
1887static int dir_l_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
1888 gfs2_filldir_t filldir)
1889{
1890 struct buffer_head *dibh;
1891 int copied = 0;
1892 int error;
1893
1894 if (!gfs2_is_stuffed(dip)) {
1895 gfs2_consist_inode(dip);
1896 return -EIO;
1897 }
1898
1899 if (!dip->i_di.di_entries)
1900 return 0;
1901
1902 error = gfs2_meta_inode_buffer(dip, &dibh);
1903 if (error)
1904 return error;
1905
1906 error = do_filldir_single(dip, offset,
1907 opaque, filldir,
1908 dibh, dip->i_di.di_entries,
1909 &copied);
1910 if (error > 0)
1911 error = 0;
1912
1913 brelse(dibh);
1914
1915 return error;
1916}
1917
1918static int dir_l_mvino(struct gfs2_inode *dip, struct qstr *filename,
1919 struct gfs2_inum *inum, unsigned int new_type)
1920{
1921 struct buffer_head *dibh;
1922 struct gfs2_dirent *dent;
1923 int error;
1924
1925 if (!gfs2_is_stuffed(dip)) {
1926 gfs2_consist_inode(dip);
1927 return -EIO;
1928 }
1929
1930 error = gfs2_meta_inode_buffer(dip, &dibh);
1931 if (error)
1932 return error;
1933
1934 error = leaf_search(dip, dibh, filename, &dent, NULL);
1935 if (error == -ENOENT) {
1936 gfs2_consist_inode(dip);
1937 error = -EIO;
1938 goto out;
1939 }
1940 if (error)
1941 goto out;
1942
1943 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1944
1945 gfs2_inum_out(inum, (char *)&dent->de_inum);
1946 dent->de_type = cpu_to_be16(new_type);
1947
1948 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1949
1950 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1951
1952 out:
1953 brelse(dibh);
1954
1955 return error;
1956}
1957
1958/**
1959 * gfs2_dir_search - Search a directory
1960 * @dip: The GFS2 inode
1961 * @filename:
1962 * @inode:
1963 *
1964 * This routine searches a directory for a file or another directory.
1965 * Assumes a glock is held on dip.
1966 *
1967 * Returns: errno
1968 */
1969
1970int gfs2_dir_search(struct gfs2_inode *dip, struct qstr *filename,
1971 struct gfs2_inum *inum, unsigned int *type)
1972{
1973 int error;
1974
1975 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1976 error = dir_e_search(dip, filename, inum, type);
1977 else
1978 error = dir_l_search(dip, filename, inum, type);
1979
1980 return error;
1981}
1982
1983/**
1984 * gfs2_dir_add - Add new filename into directory
1985 * @dip: The GFS2 inode
1986 * @filename: The new name
1987 * @inode: The inode number of the entry
1988 * @type: The type of the entry
1989 *
1990 * Returns: 0 on success, error code on failure
1991 */
1992
1993int gfs2_dir_add(struct gfs2_inode *dip, struct qstr *filename,
1994 struct gfs2_inum *inum, unsigned int type)
1995{
1996 int error;
1997
1998 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1999 error = dir_e_add(dip, filename, inum, type);
2000 else
2001 error = dir_l_add(dip, filename, inum, type);
2002
2003 return error;
2004}
2005
2006/**
2007 * gfs2_dir_del - Delete a directory entry
2008 * @dip: The GFS2 inode
2009 * @filename: The filename
2010 *
2011 * Returns: 0 on success, error code on failure
2012 */
2013
2014int gfs2_dir_del(struct gfs2_inode *dip, struct qstr *filename)
2015{
2016 int error;
2017
2018 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
2019 error = dir_e_del(dip, filename);
2020 else
2021 error = dir_l_del(dip, filename);
2022
2023 return error;
2024}
2025
2026int gfs2_dir_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
2027 gfs2_filldir_t filldir)
2028{
2029 int error;
2030
2031 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
2032 error = dir_e_read(dip, offset, opaque, filldir);
2033 else
2034 error = dir_l_read(dip, offset, opaque, filldir);
2035
2036 return error;
2037}
2038
2039/**
2040 * gfs2_dir_mvino - Change inode number of directory entry
2041 * @dip: The GFS2 inode
2042 * @filename:
2043 * @new_inode:
2044 *
2045 * This routine changes the inode number of a directory entry. It's used
2046 * by rename to change ".." when a directory is moved.
2047 * Assumes a glock is held on dvp.
2048 *
2049 * Returns: errno
2050 */
2051
2052int gfs2_dir_mvino(struct gfs2_inode *dip, struct qstr *filename,
2053 struct gfs2_inum *inum, unsigned int new_type)
2054{
2055 int error;
2056
2057 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
2058 error = dir_e_mvino(dip, filename, inum, new_type);
2059 else
2060 error = dir_l_mvino(dip, filename, inum, new_type);
2061
2062 return error;
2063}
2064
2065/**
2066 * foreach_leaf - call a function for each leaf in a directory
2067 * @dip: the directory
2068 * @lc: the function to call for each each
2069 * @data: private data to pass to it
2070 *
2071 * Returns: errno
2072 */
2073
2074static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
2075{
2076 struct gfs2_sbd *sdp = dip->i_sbd;
2077 struct buffer_head *bh;
2078 struct gfs2_leaf leaf;
2079 uint32_t hsize, len;
2080 uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
2081 uint32_t index = 0;
2082 uint64_t *lp;
2083 uint64_t leaf_no;
2084 int error = 0;
2085
2086 hsize = 1 << dip->i_di.di_depth;
2087 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
2088 gfs2_consist_inode(dip);
2089 return -EIO;
2090 }
2091
2092 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
2093 if (!lp)
2094 return -ENOMEM;
2095
2096 while (index < hsize) {
2097 lp_offset = index & (sdp->sd_hash_ptrs - 1);
2098 ht_offset = index - lp_offset;
2099
2100 if (ht_offset_cur != ht_offset) {
2101 error = gfs2_dir_read_data(dip, (char *)lp,
2102 ht_offset * sizeof(uint64_t),
2103 sdp->sd_hash_bsize);
2104 if (error != sdp->sd_hash_bsize) {
2105 if (error >= 0)
2106 error = -EIO;
2107 goto out;
2108 }
2109 ht_offset_cur = ht_offset;
2110 }
2111
2112 leaf_no = be64_to_cpu(lp[lp_offset]);
2113 if (leaf_no) {
2114 error = get_leaf(dip, leaf_no, &bh);
2115 if (error)
2116 goto out;
2117 gfs2_leaf_in(&leaf, bh->b_data);
2118 brelse(bh);
2119
2120 len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
2121
2122 error = lc(dip, index, len, leaf_no, data);
2123 if (error)
2124 goto out;
2125
2126 index = (index & ~(len - 1)) + len;
2127 } else
2128 index++;
2129 }
2130
2131 if (index != hsize) {
2132 gfs2_consist_inode(dip);
2133 error = -EIO;
2134 }
2135
2136 out:
2137 kfree(lp);
2138
2139 return error;
2140}
2141
2142/**
2143 * leaf_dealloc - Deallocate a directory leaf
2144 * @dip: the directory
2145 * @index: the hash table offset in the directory
2146 * @len: the number of pointers to this leaf
2147 * @leaf_no: the leaf number
2148 * @data: not used
2149 *
2150 * Returns: errno
2151 */
2152
2153static int leaf_dealloc(struct gfs2_inode *dip, uint32_t index, uint32_t len,
2154 uint64_t leaf_no, void *data)
2155{
2156 struct gfs2_sbd *sdp = dip->i_sbd;
2157 struct gfs2_leaf tmp_leaf;
2158 struct gfs2_rgrp_list rlist;
2159 struct buffer_head *bh, *dibh;
2160 uint64_t blk;
2161 unsigned int rg_blocks = 0, l_blocks = 0;
2162 char *ht;
2163 unsigned int x, size = len * sizeof(uint64_t);
2164 int error;
2165
2166 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
2167
2168 ht = kzalloc(size, GFP_KERNEL);
2169 if (!ht)
2170 return -ENOMEM;
2171
2172 gfs2_alloc_get(dip);
2173
2174 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
2175 if (error)
2176 goto out;
2177
2178 error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh);
2179 if (error)
2180 goto out_qs;
2181
2182 /* Count the number of leaves */
2183
2184 for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
2185 error = get_leaf(dip, blk, &bh);
2186 if (error)
2187 goto out_rlist;
2188 gfs2_leaf_in(&tmp_leaf, (bh)->b_data);
2189 brelse(bh);
2190
2191 gfs2_rlist_add(sdp, &rlist, blk);
2192 l_blocks++;
2193 }
2194
2195 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
2196
2197 for (x = 0; x < rlist.rl_rgrps; x++) {
2198 struct gfs2_rgrpd *rgd;
2199 rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl);
2200 rg_blocks += rgd->rd_ri.ri_length;
2201 }
2202
2203 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
2204 if (error)
2205 goto out_rlist;
2206
2207 error = gfs2_trans_begin(sdp,
2208 rg_blocks + (DIV_RU(size, sdp->sd_jbsize) + 1) +
2209 RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks);
2210 if (error)
2211 goto out_rg_gunlock;
2212
2213 for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
2214 error = get_leaf(dip, blk, &bh);
2215 if (error)
2216 goto out_end_trans;
2217 gfs2_leaf_in(&tmp_leaf, bh->b_data);
2218 brelse(bh);
2219
2220 gfs2_free_meta(dip, blk, 1);
2221
2222 if (!dip->i_di.di_blocks)
2223 gfs2_consist_inode(dip);
2224 dip->i_di.di_blocks--;
2225 }
2226
2227 error = gfs2_dir_write_data(dip, ht, index * sizeof(uint64_t), size);
2228 if (error != size) {
2229 if (error >= 0)
2230 error = -EIO;
2231 goto out_end_trans;
2232 }
2233
2234 error = gfs2_meta_inode_buffer(dip, &dibh);
2235 if (error)
2236 goto out_end_trans;
2237
2238 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
2239 gfs2_dinode_out(&dip->i_di, dibh->b_data);
2240 brelse(dibh);
2241
2242 out_end_trans:
2243 gfs2_trans_end(sdp);
2244
2245 out_rg_gunlock:
2246 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
2247
2248 out_rlist:
2249 gfs2_rlist_free(&rlist);
2250 gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh);
2251
2252 out_qs:
2253 gfs2_quota_unhold(dip);
2254
2255 out:
2256 gfs2_alloc_put(dip);
2257 kfree(ht);
2258
2259 return error;
2260}
2261
2262/**
2263 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory
2264 * @dip: the directory
2265 *
2266 * Dealloc all on-disk directory leaves to FREEMETA state
2267 * Change on-disk inode type to "regular file"
2268 *
2269 * Returns: errno
2270 */
2271
2272int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
2273{
2274 struct gfs2_sbd *sdp = dip->i_sbd;
2275 struct buffer_head *bh;
2276 int error;
2277
2278 /* Dealloc on-disk leaves to FREEMETA state */
2279 error = foreach_leaf(dip, leaf_dealloc, NULL);
2280 if (error)
2281 return error;
2282
2283 /* Make this a regular file in case we crash.
2284 (We don't want to free these blocks a second time.) */
2285
2286 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2287 if (error)
2288 return error;
2289
2290 error = gfs2_meta_inode_buffer(dip, &bh);
2291 if (!error) {
2292 gfs2_trans_add_bh(dip->i_gl, bh, 1);
2293 ((struct gfs2_dinode *)bh->b_data)->di_mode =
2294 cpu_to_be32(S_IFREG);
2295 brelse(bh);
2296 }
2297
2298 gfs2_trans_end(sdp);
2299
2300 return error;
2301}
2302
2303/**
2304 * gfs2_diradd_alloc_required - find if adding entry will require an allocation
2305 * @ip: the file being written to
2306 * @filname: the filename that's going to be added
2307 * @alloc_required: set to 1 if an alloc is required, 0 otherwise
2308 *
2309 * Returns: errno
2310 */
2311
2312int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename,
2313 int *alloc_required)
2314{
2315 struct buffer_head *bh = NULL, *bh_next;
2316 uint32_t hsize, hash, index;
2317 int error = 0;
2318
2319 *alloc_required = 0;
2320
2321 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
2322 hsize = 1 << dip->i_di.di_depth;
2323 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
2324 gfs2_consist_inode(dip);
2325 return -EIO;
2326 }
2327
2328 hash = gfs2_disk_hash(filename->name, filename->len);
2329 index = hash >> (32 - dip->i_di.di_depth);
2330
2331 error = get_first_leaf(dip, index, &bh_next);
2332 if (error)
2333 return error;
2334
2335 do {
2336 brelse(bh);
2337
2338 bh = bh_next;
2339
2340 if (dirent_fits(dip, bh, filename->len))
2341 break;
2342
2343 error = get_next_leaf(dip, bh, &bh_next);
2344 if (error == -ENOENT) {
2345 *alloc_required = 1;
2346 error = 0;
2347 break;
2348 }
2349 }
2350 while (!error);
2351
2352 brelse(bh);
2353 } else {
2354 error = gfs2_meta_inode_buffer(dip, &bh);
2355 if (error)
2356 return error;
2357
2358 if (!dirent_fits(dip, bh, filename->len))
2359 *alloc_required = 1;
2360
2361 brelse(bh);
2362 }
2363
2364 return error;
2365}
2366
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
new file mode 100644
index 000000000000..5b01497b3ab3
--- /dev/null
+++ b/fs/gfs2/dir.h
@@ -0,0 +1,51 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __DIR_DOT_H__
11#define __DIR_DOT_H__
12
13/**
14 * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read()
15 * @opaque: opaque data used by the function
16 * @name: the name of the directory entry
17 * @length: the length of the name
18 * @offset: the entry's offset in the directory
19 * @inum: the inode number the entry points to
20 * @type: the type of inode the entry points to
21 *
22 * Returns: 0 on success, 1 if buffer full
23 */
24
25typedef int (*gfs2_filldir_t) (void *opaque,
26 const char *name, unsigned int length,
27 uint64_t offset,
28 struct gfs2_inum *inum, unsigned int type);
29
30int gfs2_filecmp(struct qstr *file1, char *file2, int len_of_file2);
31int gfs2_dirent_alloc(struct gfs2_inode *dip, struct buffer_head *bh,
32 int name_len, struct gfs2_dirent **dent_out);
33
34int gfs2_dir_search(struct gfs2_inode *dip, struct qstr *filename,
35 struct gfs2_inum *inum, unsigned int *type);
36int gfs2_dir_add(struct gfs2_inode *dip, struct qstr *filename,
37 struct gfs2_inum *inum, unsigned int type);
38int gfs2_dir_del(struct gfs2_inode *dip, struct qstr *filename);
39int gfs2_dir_read(struct gfs2_inode *dip, uint64_t * offset, void *opaque,
40 gfs2_filldir_t filldir);
41int gfs2_dir_mvino(struct gfs2_inode *dip, struct qstr *filename,
42 struct gfs2_inum *new_inum, unsigned int new_type);
43
44int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
45
46int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename,
47 int *alloc_required);
48int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
49 struct buffer_head **bhp);
50
51#endif /* __DIR_DOT_H__ */
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
new file mode 100644
index 000000000000..2914731250c5
--- /dev/null
+++ b/fs/gfs2/eaops.c
@@ -0,0 +1,185 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
16#include <asm/semaphore.h>
17#include <asm/uaccess.h>
18
19#include "gfs2.h"
20#include "acl.h"
21#include "eaops.h"
22#include "eattr.h"
23
24/**
25 * gfs2_ea_name2type - get the type of the ea, and truncate type from the name
26 * @namep: ea name, possibly with type appended
27 *
28 * Returns: GFS2_EATYPE_XXX
29 */
30
31unsigned int gfs2_ea_name2type(const char *name, char **truncated_name)
32{
33 unsigned int type;
34
35 if (strncmp(name, "system.", 7) == 0) {
36 type = GFS2_EATYPE_SYS;
37 if (truncated_name)
38 *truncated_name = strchr(name, '.') + 1;
39 } else if (strncmp(name, "user.", 5) == 0) {
40 type = GFS2_EATYPE_USR;
41 if (truncated_name)
42 *truncated_name = strchr(name, '.') + 1;
43 } else {
44 type = GFS2_EATYPE_UNUSED;
45 if (truncated_name)
46 *truncated_name = NULL;
47 }
48
49 return type;
50}
51
52static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
53{
54 struct inode *inode = ip->i_vnode;
55 int error = permission(inode, MAY_READ, NULL);
56 if (error)
57 return error;
58
59 return gfs2_ea_get_i(ip, er);
60}
61
62static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
63{
64 struct inode *inode = ip->i_vnode;
65
66 if (S_ISREG(inode->i_mode) ||
67 (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
68 int error = permission(inode, MAY_WRITE, NULL);
69 if (error)
70 return error;
71 } else
72 return -EPERM;
73
74 return gfs2_ea_set_i(ip, er);
75}
76
77static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
78{
79 struct inode *inode = ip->i_vnode;
80
81 if (S_ISREG(inode->i_mode) ||
82 (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
83 int error = permission(inode, MAY_WRITE, NULL);
84 if (error)
85 return error;
86 } else
87 return -EPERM;
88
89 return gfs2_ea_remove_i(ip, er);
90}
91
92static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
93{
94 if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
95 !GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len) &&
96 !capable(CAP_SYS_ADMIN))
97 return -EPERM;
98
99 if (ip->i_sbd->sd_args.ar_posix_acl == 0 &&
100 (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) ||
101 GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)))
102 return -EOPNOTSUPP;
103
104
105
106 return gfs2_ea_get_i(ip, er);
107}
108
109static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
110{
111 int remove = 0;
112 int error;
113
114 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
115 if (!(er->er_flags & GFS2_ERF_MODE)) {
116 er->er_mode = ip->i_di.di_mode;
117 er->er_flags |= GFS2_ERF_MODE;
118 }
119 error = gfs2_acl_validate_set(ip, 1, er,
120 &remove, &er->er_mode);
121 if (error)
122 return error;
123 error = gfs2_ea_set_i(ip, er);
124 if (error)
125 return error;
126 if (remove)
127 gfs2_ea_remove_i(ip, er);
128 return 0;
129
130 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
131 error = gfs2_acl_validate_set(ip, 0, er,
132 &remove, NULL);
133 if (error)
134 return error;
135 if (!remove)
136 error = gfs2_ea_set_i(ip, er);
137 else {
138 error = gfs2_ea_remove_i(ip, er);
139 if (error == -ENODATA)
140 error = 0;
141 }
142 return error;
143 }
144
145 return -EPERM;
146}
147
148static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
149{
150 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
151 int error = gfs2_acl_validate_remove(ip, 1);
152 if (error)
153 return error;
154
155 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
156 int error = gfs2_acl_validate_remove(ip, 0);
157 if (error)
158 return error;
159
160 } else
161 return -EPERM;
162
163 return gfs2_ea_remove_i(ip, er);
164}
165
166struct gfs2_eattr_operations gfs2_user_eaops = {
167 .eo_get = user_eo_get,
168 .eo_set = user_eo_set,
169 .eo_remove = user_eo_remove,
170 .eo_name = "user",
171};
172
173struct gfs2_eattr_operations gfs2_system_eaops = {
174 .eo_get = system_eo_get,
175 .eo_set = system_eo_set,
176 .eo_remove = system_eo_remove,
177 .eo_name = "system",
178};
179
180struct gfs2_eattr_operations *gfs2_ea_ops[] = {
181 NULL,
182 &gfs2_user_eaops,
183 &gfs2_system_eaops,
184};
185
diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h
new file mode 100644
index 000000000000..f83c497eddca
--- /dev/null
+++ b/fs/gfs2/eaops.h
@@ -0,0 +1,30 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __EAOPS_DOT_H__
11#define __EAOPS_DOT_H__
12
13struct gfs2_ea_request;
14
15struct gfs2_eattr_operations {
16 int (*eo_get) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
17 int (*eo_set) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
18 int (*eo_remove) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
19 char *eo_name;
20};
21
22unsigned int gfs2_ea_name2type(const char *name, char **truncated_name);
23
24extern struct gfs2_eattr_operations gfs2_user_eaops;
25extern struct gfs2_eattr_operations gfs2_system_eaops;
26
27extern struct gfs2_eattr_operations *gfs2_ea_ops[];
28
29#endif /* __EAOPS_DOT_H__ */
30
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
new file mode 100644
index 000000000000..146995d9cd65
--- /dev/null
+++ b/fs/gfs2/eattr.c
@@ -0,0 +1,1563 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
16#include <asm/semaphore.h>
17#include <asm/uaccess.h>
18
19#include "gfs2.h"
20#include "acl.h"
21#include "eaops.h"
22#include "eattr.h"
23#include "glock.h"
24#include "inode.h"
25#include "meta_io.h"
26#include "quota.h"
27#include "rgrp.h"
28#include "trans.h"
29
30/**
31 * ea_calc_size - returns the acutal number of bytes the request will take up
32 * (not counting any unstuffed data blocks)
33 * @sdp:
34 * @er:
35 * @size:
36 *
37 * Returns: 1 if the EA should be stuffed
38 */
39
40static int ea_calc_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er,
41 unsigned int *size)
42{
43 *size = GFS2_EAREQ_SIZE_STUFFED(er);
44 if (*size <= sdp->sd_jbsize)
45 return 1;
46
47 *size = GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er);
48
49 return 0;
50}
51
52static int ea_check_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er)
53{
54 unsigned int size;
55
56 if (er->er_data_len > GFS2_EA_MAX_DATA_LEN)
57 return -ERANGE;
58
59 ea_calc_size(sdp, er, &size);
60
61 /* This can only happen with 512 byte blocks */
62 if (size > sdp->sd_jbsize)
63 return -ERANGE;
64
65 return 0;
66}
67
68typedef int (*ea_call_t) (struct gfs2_inode *ip,
69 struct buffer_head *bh,
70 struct gfs2_ea_header *ea,
71 struct gfs2_ea_header *prev,
72 void *private);
73
74static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
75 ea_call_t ea_call, void *data)
76{
77 struct gfs2_ea_header *ea, *prev = NULL;
78 int error = 0;
79
80 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_EA))
81 return -EIO;
82
83 for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) {
84 if (!GFS2_EA_REC_LEN(ea))
85 goto fail;
86 if (!(bh->b_data <= (char *)ea &&
87 (char *)GFS2_EA2NEXT(ea) <=
88 bh->b_data + bh->b_size))
89 goto fail;
90 if (!GFS2_EATYPE_VALID(ea->ea_type))
91 goto fail;
92
93 error = ea_call(ip, bh, ea, prev, data);
94 if (error)
95 return error;
96
97 if (GFS2_EA_IS_LAST(ea)) {
98 if ((char *)GFS2_EA2NEXT(ea) !=
99 bh->b_data + bh->b_size)
100 goto fail;
101 break;
102 }
103 }
104
105 return error;
106
107 fail:
108 gfs2_consist_inode(ip);
109 return -EIO;
110}
111
112static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
113{
114 struct buffer_head *bh, *eabh;
115 uint64_t *eablk, *end;
116 int error;
117
118 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
119 DIO_START | DIO_WAIT, &bh);
120 if (error)
121 return error;
122
123 if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) {
124 error = ea_foreach_i(ip, bh, ea_call, data);
125 goto out;
126 }
127
128 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_IN)) {
129 error = -EIO;
130 goto out;
131 }
132
133 eablk = (uint64_t *)(bh->b_data + sizeof(struct gfs2_meta_header));
134 end = eablk + ip->i_sbd->sd_inptrs;
135
136 for (; eablk < end; eablk++) {
137 uint64_t bn;
138
139 if (!*eablk)
140 break;
141 bn = be64_to_cpu(*eablk);
142
143 error = gfs2_meta_read(ip->i_gl, bn, DIO_START | DIO_WAIT,
144 &eabh);
145 if (error)
146 break;
147 error = ea_foreach_i(ip, eabh, ea_call, data);
148 brelse(eabh);
149 if (error)
150 break;
151 }
152 out:
153 brelse(bh);
154
155 return error;
156}
157
158struct ea_find {
159 struct gfs2_ea_request *ef_er;
160 struct gfs2_ea_location *ef_el;
161};
162
163static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
164 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
165 void *private)
166{
167 struct ea_find *ef = private;
168 struct gfs2_ea_request *er = ef->ef_er;
169
170 if (ea->ea_type == GFS2_EATYPE_UNUSED)
171 return 0;
172
173 if (ea->ea_type == er->er_type) {
174 if (ea->ea_name_len == er->er_name_len &&
175 !memcmp(GFS2_EA2NAME(ea), er->er_name, ea->ea_name_len)) {
176 struct gfs2_ea_location *el = ef->ef_el;
177 get_bh(bh);
178 el->el_bh = bh;
179 el->el_ea = ea;
180 el->el_prev = prev;
181 return 1;
182 }
183 }
184
185#if 0
186 else if ((ip->i_di.di_flags & GFS2_DIF_EA_PACKED) &&
187 er->er_type == GFS2_EATYPE_SYS)
188 return 1;
189#endif
190
191 return 0;
192}
193
194int gfs2_ea_find(struct gfs2_inode *ip, struct gfs2_ea_request *er,
195 struct gfs2_ea_location *el)
196{
197 struct ea_find ef;
198 int error;
199
200 ef.ef_er = er;
201 ef.ef_el = el;
202
203 memset(el, 0, sizeof(struct gfs2_ea_location));
204
205 error = ea_foreach(ip, ea_find_i, &ef);
206 if (error > 0)
207 return 0;
208
209 return error;
210}
211
212/**
213 * ea_dealloc_unstuffed -
214 * @ip:
215 * @bh:
216 * @ea:
217 * @prev:
218 * @private:
219 *
220 * Take advantage of the fact that all unstuffed blocks are
221 * allocated from the same RG. But watch, this may not always
222 * be true.
223 *
224 * Returns: errno
225 */
226
227static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
228 struct gfs2_ea_header *ea,
229 struct gfs2_ea_header *prev, void *private)
230{
231 int *leave = private;
232 struct gfs2_sbd *sdp = ip->i_sbd;
233 struct gfs2_rgrpd *rgd;
234 struct gfs2_holder rg_gh;
235 struct buffer_head *dibh;
236 uint64_t *dataptrs, bn = 0;
237 uint64_t bstart = 0;
238 unsigned int blen = 0;
239 unsigned int blks = 0;
240 unsigned int x;
241 int error;
242
243 if (GFS2_EA_IS_STUFFED(ea))
244 return 0;
245
246 dataptrs = GFS2_EA2DATAPTRS(ea);
247 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++)
248 if (*dataptrs) {
249 blks++;
250 bn = be64_to_cpu(*dataptrs);
251 }
252 if (!blks)
253 return 0;
254
255 rgd = gfs2_blk2rgrpd(sdp, bn);
256 if (!rgd) {
257 gfs2_consist_inode(ip);
258 return -EIO;
259 }
260
261 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
262 if (error)
263 return error;
264
265 error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length +
266 RES_DINODE + RES_EATTR + RES_STATFS +
267 RES_QUOTA, blks);
268 if (error)
269 goto out_gunlock;
270
271 gfs2_trans_add_bh(ip->i_gl, bh, 1);
272
273 dataptrs = GFS2_EA2DATAPTRS(ea);
274 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
275 if (!*dataptrs)
276 break;
277 bn = be64_to_cpu(*dataptrs);
278
279 if (bstart + blen == bn)
280 blen++;
281 else {
282 if (bstart)
283 gfs2_free_meta(ip, bstart, blen);
284 bstart = bn;
285 blen = 1;
286 }
287
288 *dataptrs = 0;
289 if (!ip->i_di.di_blocks)
290 gfs2_consist_inode(ip);
291 ip->i_di.di_blocks--;
292 }
293 if (bstart)
294 gfs2_free_meta(ip, bstart, blen);
295
296 if (prev && !leave) {
297 uint32_t len;
298
299 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
300 prev->ea_rec_len = cpu_to_be32(len);
301
302 if (GFS2_EA_IS_LAST(ea))
303 prev->ea_flags |= GFS2_EAFLAG_LAST;
304 } else {
305 ea->ea_type = GFS2_EATYPE_UNUSED;
306 ea->ea_num_ptrs = 0;
307 }
308
309 error = gfs2_meta_inode_buffer(ip, &dibh);
310 if (!error) {
311 ip->i_di.di_ctime = get_seconds();
312 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
313 gfs2_dinode_out(&ip->i_di, dibh->b_data);
314 brelse(dibh);
315 }
316
317 gfs2_trans_end(sdp);
318
319 out_gunlock:
320 gfs2_glock_dq_uninit(&rg_gh);
321
322 return error;
323}
324
325static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
326 struct gfs2_ea_header *ea,
327 struct gfs2_ea_header *prev, int leave)
328{
329 struct gfs2_alloc *al;
330 int error;
331
332 al = gfs2_alloc_get(ip);
333
334 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
335 if (error)
336 goto out_alloc;
337
338 error = gfs2_rindex_hold(ip->i_sbd, &al->al_ri_gh);
339 if (error)
340 goto out_quota;
341
342 error = ea_dealloc_unstuffed(ip,
343 bh, ea, prev,
344 (leave) ? &error : NULL);
345
346 gfs2_glock_dq_uninit(&al->al_ri_gh);
347
348 out_quota:
349 gfs2_quota_unhold(ip);
350
351 out_alloc:
352 gfs2_alloc_put(ip);
353
354 return error;
355}
356
357
358static int gfs2_ea_repack_i(struct gfs2_inode *ip)
359{
360 return -EOPNOTSUPP;
361}
362
363int gfs2_ea_repack(struct gfs2_inode *ip)
364{
365 struct gfs2_holder gh;
366 int error;
367
368 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
369 if (error)
370 return error;
371
372 /* Some sort of permissions checking would be nice */
373
374 error = gfs2_ea_repack_i(ip);
375
376 gfs2_glock_dq_uninit(&gh);
377
378 return error;
379}
380
381struct ea_list {
382 struct gfs2_ea_request *ei_er;
383 unsigned int ei_size;
384};
385
386static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
387 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
388 void *private)
389{
390 struct ea_list *ei = private;
391 struct gfs2_ea_request *er = ei->ei_er;
392 unsigned int ea_size = GFS2_EA_STRLEN(ea);
393
394 if (ea->ea_type == GFS2_EATYPE_UNUSED)
395 return 0;
396
397 if (er->er_data_len) {
398 char *prefix;
399 unsigned int l;
400 char c = 0;
401
402 if (ei->ei_size + ea_size > er->er_data_len)
403 return -ERANGE;
404
405 if (ea->ea_type == GFS2_EATYPE_USR) {
406 prefix = "user.";
407 l = 5;
408 } else {
409 prefix = "system.";
410 l = 7;
411 }
412
413 memcpy(er->er_data + ei->ei_size,
414 prefix, l);
415 memcpy(er->er_data + ei->ei_size + l,
416 GFS2_EA2NAME(ea),
417 ea->ea_name_len);
418 memcpy(er->er_data + ei->ei_size +
419 ea_size - 1,
420 &c, 1);
421 }
422
423 ei->ei_size += ea_size;
424
425 return 0;
426}
427
428/**
429 * gfs2_ea_list -
430 * @ip:
431 * @er:
432 *
433 * Returns: actual size of data on success, -errno on error
434 */
435
436int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er)
437{
438 struct gfs2_holder i_gh;
439 int error;
440
441 if (!er->er_data || !er->er_data_len) {
442 er->er_data = NULL;
443 er->er_data_len = 0;
444 }
445
446 error = gfs2_glock_nq_init(ip->i_gl,
447 LM_ST_SHARED, LM_FLAG_ANY,
448 &i_gh);
449 if (error)
450 return error;
451
452 if (ip->i_di.di_eattr) {
453 struct ea_list ei = { .ei_er = er, .ei_size = 0 };
454
455 error = ea_foreach(ip, ea_list_i, &ei);
456 if (!error)
457 error = ei.ei_size;
458 }
459
460 gfs2_glock_dq_uninit(&i_gh);
461
462 return error;
463}
464
465/**
466 * ea_get_unstuffed - actually copies the unstuffed data into the
467 * request buffer
468 * @ip:
469 * @ea:
470 * @data:
471 *
472 * Returns: errno
473 */
474
475static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
476 char *data)
477{
478 struct gfs2_sbd *sdp = ip->i_sbd;
479 struct buffer_head **bh;
480 unsigned int amount = GFS2_EA_DATA_LEN(ea);
481 unsigned int nptrs = DIV_RU(amount, sdp->sd_jbsize);
482 uint64_t *dataptrs = GFS2_EA2DATAPTRS(ea);
483 unsigned int x;
484 int error = 0;
485
486 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
487 if (!bh)
488 return -ENOMEM;
489
490 for (x = 0; x < nptrs; x++) {
491 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs),
492 DIO_START, bh + x);
493 if (error) {
494 while (x--)
495 brelse(bh[x]);
496 goto out;
497 }
498 dataptrs++;
499 }
500
501 for (x = 0; x < nptrs; x++) {
502 error = gfs2_meta_reread(sdp, bh[x], DIO_WAIT);
503 if (error) {
504 for (; x < nptrs; x++)
505 brelse(bh[x]);
506 goto out;
507 }
508 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
509 for (; x < nptrs; x++)
510 brelse(bh[x]);
511 error = -EIO;
512 goto out;
513 }
514
515 memcpy(data,
516 bh[x]->b_data + sizeof(struct gfs2_meta_header),
517 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
518
519 amount -= sdp->sd_jbsize;
520 data += sdp->sd_jbsize;
521
522 brelse(bh[x]);
523 }
524
525 out:
526 kfree(bh);
527
528 return error;
529}
530
531int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
532 char *data)
533{
534 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
535 memcpy(data,
536 GFS2_EA2DATA(el->el_ea),
537 GFS2_EA_DATA_LEN(el->el_ea));
538 return 0;
539 } else
540 return ea_get_unstuffed(ip, el->el_ea, data);
541}
542
543/**
544 * gfs2_ea_get_i -
545 * @ip:
546 * @er:
547 *
548 * Returns: actual size of data on success, -errno on error
549 */
550
551int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
552{
553 struct gfs2_ea_location el;
554 int error;
555
556 if (!ip->i_di.di_eattr)
557 return -ENODATA;
558
559 error = gfs2_ea_find(ip, er, &el);
560 if (error)
561 return error;
562 if (!el.el_ea)
563 return -ENODATA;
564
565 if (er->er_data_len) {
566 if (GFS2_EA_DATA_LEN(el.el_ea) > er->er_data_len)
567 error = -ERANGE;
568 else
569 error = gfs2_ea_get_copy(ip, &el, er->er_data);
570 }
571 if (!error)
572 error = GFS2_EA_DATA_LEN(el.el_ea);
573
574 brelse(el.el_bh);
575
576 return error;
577}
578
579/**
580 * gfs2_ea_get -
581 * @ip:
582 * @er:
583 *
584 * Returns: actual size of data on success, -errno on error
585 */
586
587int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
588{
589 struct gfs2_holder i_gh;
590 int error;
591
592 if (!er->er_name_len ||
593 er->er_name_len > GFS2_EA_MAX_NAME_LEN)
594 return -EINVAL;
595 if (!er->er_data || !er->er_data_len) {
596 er->er_data = NULL;
597 er->er_data_len = 0;
598 }
599
600 error = gfs2_glock_nq_init(ip->i_gl,
601 LM_ST_SHARED, LM_FLAG_ANY,
602 &i_gh);
603 if (error)
604 return error;
605
606 error = gfs2_ea_ops[er->er_type]->eo_get(ip, er);
607
608 gfs2_glock_dq_uninit(&i_gh);
609
610 return error;
611}
612
613/**
614 * ea_alloc_blk - allocates a new block for extended attributes.
615 * @ip: A pointer to the inode that's getting extended attributes
616 * @bhp:
617 *
618 * Returns: errno
619 */
620
621static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
622{
623 struct gfs2_sbd *sdp = ip->i_sbd;
624 struct gfs2_ea_header *ea;
625 uint64_t block;
626
627 block = gfs2_alloc_meta(ip);
628
629 *bhp = gfs2_meta_new(ip->i_gl, block);
630 gfs2_trans_add_bh(ip->i_gl, *bhp, 1);
631 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
632 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header));
633
634 ea = GFS2_EA_BH2FIRST(*bhp);
635 ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize);
636 ea->ea_type = GFS2_EATYPE_UNUSED;
637 ea->ea_flags = GFS2_EAFLAG_LAST;
638 ea->ea_num_ptrs = 0;
639
640 ip->i_di.di_blocks++;
641
642 return 0;
643}
644
645/**
646 * ea_write - writes the request info to an ea, creating new blocks if
647 * necessary
648 * @ip: inode that is being modified
649 * @ea: the location of the new ea in a block
650 * @er: the write request
651 *
652 * Note: does not update ea_rec_len or the GFS2_EAFLAG_LAST bin of ea_flags
653 *
654 * returns : errno
655 */
656
657static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
658 struct gfs2_ea_request *er)
659{
660 struct gfs2_sbd *sdp = ip->i_sbd;
661
662 ea->ea_data_len = cpu_to_be32(er->er_data_len);
663 ea->ea_name_len = er->er_name_len;
664 ea->ea_type = er->er_type;
665 ea->__pad = 0;
666
667 memcpy(GFS2_EA2NAME(ea), er->er_name, er->er_name_len);
668
669 if (GFS2_EAREQ_SIZE_STUFFED(er) <= sdp->sd_jbsize) {
670 ea->ea_num_ptrs = 0;
671 memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len);
672 } else {
673 uint64_t *dataptr = GFS2_EA2DATAPTRS(ea);
674 const char *data = er->er_data;
675 unsigned int data_len = er->er_data_len;
676 unsigned int copy;
677 unsigned int x;
678
679 ea->ea_num_ptrs = DIV_RU(er->er_data_len, sdp->sd_jbsize);
680 for (x = 0; x < ea->ea_num_ptrs; x++) {
681 struct buffer_head *bh;
682 uint64_t block;
683 int mh_size = sizeof(struct gfs2_meta_header);
684
685 block = gfs2_alloc_meta(ip);
686
687 bh = gfs2_meta_new(ip->i_gl, block);
688 gfs2_trans_add_bh(ip->i_gl, bh, 1);
689 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
690
691 ip->i_di.di_blocks++;
692
693 copy = (data_len > sdp->sd_jbsize) ? sdp->sd_jbsize :
694 data_len;
695 memcpy(bh->b_data + mh_size, data, copy);
696 if (copy < sdp->sd_jbsize)
697 memset(bh->b_data + mh_size + copy, 0,
698 sdp->sd_jbsize - copy);
699
700 *dataptr++ = cpu_to_be64((uint64_t)bh->b_blocknr);
701 data += copy;
702 data_len -= copy;
703
704 brelse(bh);
705 }
706
707 gfs2_assert_withdraw(sdp, !data_len);
708 }
709
710 return 0;
711}
712
713typedef int (*ea_skeleton_call_t) (struct gfs2_inode *ip,
714 struct gfs2_ea_request *er,
715 void *private);
716
717static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
718 unsigned int blks,
719 ea_skeleton_call_t skeleton_call,
720 void *private)
721{
722 struct gfs2_alloc *al;
723 struct buffer_head *dibh;
724 int error;
725
726 al = gfs2_alloc_get(ip);
727
728 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
729 if (error)
730 goto out;
731
732 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
733 if (error)
734 goto out_gunlock_q;
735
736 al->al_requested = blks;
737
738 error = gfs2_inplace_reserve(ip);
739 if (error)
740 goto out_gunlock_q;
741
742 error = gfs2_trans_begin(ip->i_sbd,
743 blks + al->al_rgd->rd_ri.ri_length +
744 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
745 if (error)
746 goto out_ipres;
747
748 error = skeleton_call(ip, er, private);
749 if (error)
750 goto out_end_trans;
751
752 error = gfs2_meta_inode_buffer(ip, &dibh);
753 if (!error) {
754 if (er->er_flags & GFS2_ERF_MODE) {
755 gfs2_assert_withdraw(ip->i_sbd,
756 (ip->i_di.di_mode & S_IFMT) ==
757 (er->er_mode & S_IFMT));
758 ip->i_di.di_mode = er->er_mode;
759 }
760 ip->i_di.di_ctime = get_seconds();
761 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
762 gfs2_dinode_out(&ip->i_di, dibh->b_data);
763 brelse(dibh);
764 }
765
766 out_end_trans:
767 gfs2_trans_end(ip->i_sbd);
768
769 out_ipres:
770 gfs2_inplace_release(ip);
771
772 out_gunlock_q:
773 gfs2_quota_unlock(ip);
774
775 out:
776 gfs2_alloc_put(ip);
777
778 return error;
779}
780
781static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
782 void *private)
783{
784 struct buffer_head *bh;
785 int error;
786
787 error = ea_alloc_blk(ip, &bh);
788 if (error)
789 return error;
790
791 ip->i_di.di_eattr = bh->b_blocknr;
792 error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er);
793
794 brelse(bh);
795
796 return error;
797}
798
799/**
800 * ea_init - initializes a new eattr block
801 * @ip:
802 * @er:
803 *
804 * Returns: errno
805 */
806
807static int ea_init(struct gfs2_inode *ip, struct gfs2_ea_request *er)
808{
809 unsigned int jbsize = ip->i_sbd->sd_jbsize;
810 unsigned int blks = 1;
811
812 if (GFS2_EAREQ_SIZE_STUFFED(er) > jbsize)
813 blks += DIV_RU(er->er_data_len, jbsize);
814
815 return ea_alloc_skeleton(ip, er, blks, ea_init_i, NULL);
816}
817
818static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea)
819{
820 uint32_t ea_size = GFS2_EA_SIZE(ea);
821 struct gfs2_ea_header *new = (struct gfs2_ea_header *)((char *)ea +
822 ea_size);
823 uint32_t new_size = GFS2_EA_REC_LEN(ea) - ea_size;
824 int last = ea->ea_flags & GFS2_EAFLAG_LAST;
825
826 ea->ea_rec_len = cpu_to_be32(ea_size);
827 ea->ea_flags ^= last;
828
829 new->ea_rec_len = cpu_to_be32(new_size);
830 new->ea_flags = last;
831
832 return new;
833}
834
835static void ea_set_remove_stuffed(struct gfs2_inode *ip,
836 struct gfs2_ea_location *el)
837{
838 struct gfs2_ea_header *ea = el->el_ea;
839 struct gfs2_ea_header *prev = el->el_prev;
840 uint32_t len;
841
842 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
843
844 if (!prev || !GFS2_EA_IS_STUFFED(ea)) {
845 ea->ea_type = GFS2_EATYPE_UNUSED;
846 return;
847 } else if (GFS2_EA2NEXT(prev) != ea) {
848 prev = GFS2_EA2NEXT(prev);
849 gfs2_assert_withdraw(ip->i_sbd, GFS2_EA2NEXT(prev) == ea);
850 }
851
852 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
853 prev->ea_rec_len = cpu_to_be32(len);
854
855 if (GFS2_EA_IS_LAST(ea))
856 prev->ea_flags |= GFS2_EAFLAG_LAST;
857}
858
859struct ea_set {
860 int ea_split;
861
862 struct gfs2_ea_request *es_er;
863 struct gfs2_ea_location *es_el;
864
865 struct buffer_head *es_bh;
866 struct gfs2_ea_header *es_ea;
867};
868
869static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
870 struct gfs2_ea_header *ea, struct ea_set *es)
871{
872 struct gfs2_ea_request *er = es->es_er;
873 struct buffer_head *dibh;
874 int error;
875
876 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + 2 * RES_EATTR, 0);
877 if (error)
878 return error;
879
880 gfs2_trans_add_bh(ip->i_gl, bh, 1);
881
882 if (es->ea_split)
883 ea = ea_split_ea(ea);
884
885 ea_write(ip, ea, er);
886
887 if (es->es_el)
888 ea_set_remove_stuffed(ip, es->es_el);
889
890 error = gfs2_meta_inode_buffer(ip, &dibh);
891 if (error)
892 goto out;
893
894 if (er->er_flags & GFS2_ERF_MODE) {
895 gfs2_assert_withdraw(ip->i_sbd,
896 (ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT));
897 ip->i_di.di_mode = er->er_mode;
898 }
899 ip->i_di.di_ctime = get_seconds();
900 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
901 gfs2_dinode_out(&ip->i_di, dibh->b_data);
902 brelse(dibh);
903 out:
904 gfs2_trans_end(ip->i_sbd);
905
906 return error;
907}
908
909static int ea_set_simple_alloc(struct gfs2_inode *ip,
910 struct gfs2_ea_request *er, void *private)
911{
912 struct ea_set *es = private;
913 struct gfs2_ea_header *ea = es->es_ea;
914 int error;
915
916 gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1);
917
918 if (es->ea_split)
919 ea = ea_split_ea(ea);
920
921 error = ea_write(ip, ea, er);
922 if (error)
923 return error;
924
925 if (es->es_el)
926 ea_set_remove_stuffed(ip, es->es_el);
927
928 return 0;
929}
930
931static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh,
932 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
933 void *private)
934{
935 struct ea_set *es = private;
936 unsigned int size;
937 int stuffed;
938 int error;
939
940 stuffed = ea_calc_size(ip->i_sbd, es->es_er, &size);
941
942 if (ea->ea_type == GFS2_EATYPE_UNUSED) {
943 if (GFS2_EA_REC_LEN(ea) < size)
944 return 0;
945 if (!GFS2_EA_IS_STUFFED(ea)) {
946 error = ea_remove_unstuffed(ip, bh, ea, prev, 1);
947 if (error)
948 return error;
949 }
950 es->ea_split = 0;
951 } else if (GFS2_EA_REC_LEN(ea) - GFS2_EA_SIZE(ea) >= size)
952 es->ea_split = 1;
953 else
954 return 0;
955
956 if (stuffed) {
957 error = ea_set_simple_noalloc(ip, bh, ea, es);
958 if (error)
959 return error;
960 } else {
961 unsigned int blks;
962
963 es->es_bh = bh;
964 es->es_ea = ea;
965 blks = 2 + DIV_RU(es->es_er->er_data_len, ip->i_sbd->sd_jbsize);
966
967 error = ea_alloc_skeleton(ip, es->es_er, blks,
968 ea_set_simple_alloc, es);
969 if (error)
970 return error;
971 }
972
973 return 1;
974}
975
976static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
977 void *private)
978{
979 struct gfs2_sbd *sdp = ip->i_sbd;
980 struct buffer_head *indbh, *newbh;
981 uint64_t *eablk;
982 int error;
983 int mh_size = sizeof(struct gfs2_meta_header);
984
985 if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
986 uint64_t *end;
987
988 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
989 DIO_START | DIO_WAIT, &indbh);
990 if (error)
991 return error;
992
993 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
994 error = -EIO;
995 goto out;
996 }
997
998 eablk = (uint64_t *)(indbh->b_data + mh_size);
999 end = eablk + sdp->sd_inptrs;
1000
1001 for (; eablk < end; eablk++)
1002 if (!*eablk)
1003 break;
1004
1005 if (eablk == end) {
1006 error = -ENOSPC;
1007 goto out;
1008 }
1009
1010 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
1011 } else {
1012 uint64_t blk;
1013
1014 blk = gfs2_alloc_meta(ip);
1015
1016 indbh = gfs2_meta_new(ip->i_gl, blk);
1017 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
1018 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
1019 gfs2_buffer_clear_tail(indbh, mh_size);
1020
1021 eablk = (uint64_t *)(indbh->b_data + mh_size);
1022 *eablk = cpu_to_be64(ip->i_di.di_eattr);
1023 ip->i_di.di_eattr = blk;
1024 ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
1025 ip->i_di.di_blocks++;
1026
1027 eablk++;
1028 }
1029
1030 error = ea_alloc_blk(ip, &newbh);
1031 if (error)
1032 goto out;
1033
1034 *eablk = cpu_to_be64((uint64_t)newbh->b_blocknr);
1035 error = ea_write(ip, GFS2_EA_BH2FIRST(newbh), er);
1036 brelse(newbh);
1037 if (error)
1038 goto out;
1039
1040 if (private)
1041 ea_set_remove_stuffed(ip, (struct gfs2_ea_location *)private);
1042
1043 out:
1044 brelse(indbh);
1045
1046 return error;
1047}
1048
1049static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
1050 struct gfs2_ea_location *el)
1051{
1052 struct ea_set es;
1053 unsigned int blks = 2;
1054 int error;
1055
1056 memset(&es, 0, sizeof(struct ea_set));
1057 es.es_er = er;
1058 es.es_el = el;
1059
1060 error = ea_foreach(ip, ea_set_simple, &es);
1061 if (error > 0)
1062 return 0;
1063 if (error)
1064 return error;
1065
1066 if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT))
1067 blks++;
1068 if (GFS2_EAREQ_SIZE_STUFFED(er) > ip->i_sbd->sd_jbsize)
1069 blks += DIV_RU(er->er_data_len, ip->i_sbd->sd_jbsize);
1070
1071 return ea_alloc_skeleton(ip, er, blks, ea_set_block, el);
1072}
1073
1074static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
1075 struct gfs2_ea_location *el)
1076{
1077 if (el->el_prev && GFS2_EA2NEXT(el->el_prev) != el->el_ea) {
1078 el->el_prev = GFS2_EA2NEXT(el->el_prev);
1079 gfs2_assert_withdraw(ip->i_sbd,
1080 GFS2_EA2NEXT(el->el_prev) == el->el_ea);
1081 }
1082
1083 return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev,0);
1084}
1085
1086int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1087{
1088 struct gfs2_ea_location el;
1089 int error;
1090
1091 if (!ip->i_di.di_eattr) {
1092 if (er->er_flags & XATTR_REPLACE)
1093 return -ENODATA;
1094 return ea_init(ip, er);
1095 }
1096
1097 error = gfs2_ea_find(ip, er, &el);
1098 if (error)
1099 return error;
1100
1101 if (el.el_ea) {
1102 if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY) {
1103 brelse(el.el_bh);
1104 return -EPERM;
1105 }
1106
1107 error = -EEXIST;
1108 if (!(er->er_flags & XATTR_CREATE)) {
1109 int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
1110 error = ea_set_i(ip, er, &el);
1111 if (!error && unstuffed)
1112 ea_set_remove_unstuffed(ip, &el);
1113 }
1114
1115 brelse(el.el_bh);
1116 } else {
1117 error = -ENODATA;
1118 if (!(er->er_flags & XATTR_REPLACE))
1119 error = ea_set_i(ip, er, NULL);
1120 }
1121
1122 return error;
1123}
1124
1125int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1126{
1127 struct gfs2_holder i_gh;
1128 int error;
1129
1130 if (!er->er_name_len ||
1131 er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1132 return -EINVAL;
1133 if (!er->er_data || !er->er_data_len) {
1134 er->er_data = NULL;
1135 er->er_data_len = 0;
1136 }
1137 error = ea_check_size(ip->i_sbd, er);
1138 if (error)
1139 return error;
1140
1141 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1142 if (error)
1143 return error;
1144
1145 if (IS_IMMUTABLE(ip->i_vnode))
1146 error = -EPERM;
1147 else
1148 error = gfs2_ea_ops[er->er_type]->eo_set(ip, er);
1149
1150 gfs2_glock_dq_uninit(&i_gh);
1151
1152 return error;
1153}
1154
1155static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1156{
1157 struct gfs2_ea_header *ea = el->el_ea;
1158 struct gfs2_ea_header *prev = el->el_prev;
1159 struct buffer_head *dibh;
1160 int error;
1161
1162 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + RES_EATTR, 0);
1163 if (error)
1164 return error;
1165
1166 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
1167
1168 if (prev) {
1169 uint32_t len;
1170
1171 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
1172 prev->ea_rec_len = cpu_to_be32(len);
1173
1174 if (GFS2_EA_IS_LAST(ea))
1175 prev->ea_flags |= GFS2_EAFLAG_LAST;
1176 } else
1177 ea->ea_type = GFS2_EATYPE_UNUSED;
1178
1179 error = gfs2_meta_inode_buffer(ip, &dibh);
1180 if (!error) {
1181 ip->i_di.di_ctime = get_seconds();
1182 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1183 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1184 brelse(dibh);
1185 }
1186
1187 gfs2_trans_end(ip->i_sbd);
1188
1189 return error;
1190}
1191
1192int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1193{
1194 struct gfs2_ea_location el;
1195 int error;
1196
1197 if (!ip->i_di.di_eattr)
1198 return -ENODATA;
1199
1200 error = gfs2_ea_find(ip, er, &el);
1201 if (error)
1202 return error;
1203 if (!el.el_ea)
1204 return -ENODATA;
1205
1206 if (GFS2_EA_IS_STUFFED(el.el_ea))
1207 error = ea_remove_stuffed(ip, &el);
1208 else
1209 error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev,
1210 0);
1211
1212 brelse(el.el_bh);
1213
1214 return error;
1215}
1216
1217/**
1218 * gfs2_ea_remove - sets (or creates or replaces) an extended attribute
1219 * @ip: pointer to the inode of the target file
1220 * @er: request information
1221 *
1222 * Returns: errno
1223 */
1224
1225int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1226{
1227 struct gfs2_holder i_gh;
1228 int error;
1229
1230 if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1231 return -EINVAL;
1232
1233 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1234 if (error)
1235 return error;
1236
1237 if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode))
1238 error = -EPERM;
1239 else
1240 error = gfs2_ea_ops[er->er_type]->eo_remove(ip, er);
1241
1242 gfs2_glock_dq_uninit(&i_gh);
1243
1244 return error;
1245}
1246
1247static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1248 struct gfs2_ea_header *ea, char *data)
1249{
1250 struct gfs2_sbd *sdp = ip->i_sbd;
1251 struct buffer_head **bh;
1252 unsigned int amount = GFS2_EA_DATA_LEN(ea);
1253 unsigned int nptrs = DIV_RU(amount, sdp->sd_jbsize);
1254 uint64_t *dataptrs = GFS2_EA2DATAPTRS(ea);
1255 unsigned int x;
1256 int error;
1257
1258 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
1259 if (!bh)
1260 return -ENOMEM;
1261
1262 error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
1263 if (error)
1264 goto out;
1265
1266 for (x = 0; x < nptrs; x++) {
1267 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs),
1268 DIO_START, bh + x);
1269 if (error) {
1270 while (x--)
1271 brelse(bh[x]);
1272 goto fail;
1273 }
1274 dataptrs++;
1275 }
1276
1277 for (x = 0; x < nptrs; x++) {
1278 error = gfs2_meta_reread(sdp, bh[x], DIO_WAIT);
1279 if (error) {
1280 for (; x < nptrs; x++)
1281 brelse(bh[x]);
1282 goto fail;
1283 }
1284 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
1285 for (; x < nptrs; x++)
1286 brelse(bh[x]);
1287 error = -EIO;
1288 goto fail;
1289 }
1290
1291 gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
1292
1293 memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header),
1294 data,
1295 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
1296
1297 amount -= sdp->sd_jbsize;
1298 data += sdp->sd_jbsize;
1299
1300 brelse(bh[x]);
1301 }
1302
1303 out:
1304 kfree(bh);
1305
1306 return error;
1307
1308 fail:
1309 gfs2_trans_end(sdp);
1310 kfree(bh);
1311
1312 return error;
1313}
1314
1315int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
1316 struct iattr *attr, char *data)
1317{
1318 struct buffer_head *dibh;
1319 int error;
1320
1321 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
1322 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + RES_EATTR, 0);
1323 if (error)
1324 return error;
1325
1326 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
1327 memcpy(GFS2_EA2DATA(el->el_ea),
1328 data,
1329 GFS2_EA_DATA_LEN(el->el_ea));
1330 } else
1331 error = ea_acl_chmod_unstuffed(ip, el->el_ea, data);
1332
1333 if (error)
1334 return error;
1335
1336 error = gfs2_meta_inode_buffer(ip, &dibh);
1337 if (!error) {
1338 error = inode_setattr(ip->i_vnode, attr);
1339 gfs2_assert_warn(ip->i_sbd, !error);
1340 gfs2_inode_attr_out(ip);
1341 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1342 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1343 brelse(dibh);
1344 }
1345
1346 gfs2_trans_end(ip->i_sbd);
1347
1348 return error;
1349}
1350
1351static int ea_dealloc_indirect(struct gfs2_inode *ip)
1352{
1353 struct gfs2_sbd *sdp = ip->i_sbd;
1354 struct gfs2_rgrp_list rlist;
1355 struct buffer_head *indbh, *dibh;
1356 uint64_t *eablk, *end;
1357 unsigned int rg_blocks = 0;
1358 uint64_t bstart = 0;
1359 unsigned int blen = 0;
1360 unsigned int blks = 0;
1361 unsigned int x;
1362 int error;
1363
1364 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1365
1366 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
1367 DIO_START | DIO_WAIT, &indbh);
1368 if (error)
1369 return error;
1370
1371 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
1372 error = -EIO;
1373 goto out;
1374 }
1375
1376 eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1377 end = eablk + sdp->sd_inptrs;
1378
1379 for (; eablk < end; eablk++) {
1380 uint64_t bn;
1381
1382 if (!*eablk)
1383 break;
1384 bn = be64_to_cpu(*eablk);
1385
1386 if (bstart + blen == bn)
1387 blen++;
1388 else {
1389 if (bstart)
1390 gfs2_rlist_add(sdp, &rlist, bstart);
1391 bstart = bn;
1392 blen = 1;
1393 }
1394 blks++;
1395 }
1396 if (bstart)
1397 gfs2_rlist_add(sdp, &rlist, bstart);
1398 else
1399 goto out;
1400
1401 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1402
1403 for (x = 0; x < rlist.rl_rgrps; x++) {
1404 struct gfs2_rgrpd *rgd;
1405 rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl);
1406 rg_blocks += rgd->rd_ri.ri_length;
1407 }
1408
1409 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1410 if (error)
1411 goto out_rlist_free;
1412
1413 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
1414 RES_INDIRECT + RES_STATFS +
1415 RES_QUOTA, blks);
1416 if (error)
1417 goto out_gunlock;
1418
1419 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
1420
1421 eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1422 bstart = 0;
1423 blen = 0;
1424
1425 for (; eablk < end; eablk++) {
1426 uint64_t bn;
1427
1428 if (!*eablk)
1429 break;
1430 bn = be64_to_cpu(*eablk);
1431
1432 if (bstart + blen == bn)
1433 blen++;
1434 else {
1435 if (bstart)
1436 gfs2_free_meta(ip, bstart, blen);
1437 bstart = bn;
1438 blen = 1;
1439 }
1440
1441 *eablk = 0;
1442 if (!ip->i_di.di_blocks)
1443 gfs2_consist_inode(ip);
1444 ip->i_di.di_blocks--;
1445 }
1446 if (bstart)
1447 gfs2_free_meta(ip, bstart, blen);
1448
1449 ip->i_di.di_flags &= ~GFS2_DIF_EA_INDIRECT;
1450
1451 error = gfs2_meta_inode_buffer(ip, &dibh);
1452 if (!error) {
1453 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1454 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1455 brelse(dibh);
1456 }
1457
1458 gfs2_trans_end(sdp);
1459
1460 out_gunlock:
1461 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1462
1463 out_rlist_free:
1464 gfs2_rlist_free(&rlist);
1465
1466 out:
1467 brelse(indbh);
1468
1469 return error;
1470}
1471
1472static int ea_dealloc_block(struct gfs2_inode *ip)
1473{
1474 struct gfs2_sbd *sdp = ip->i_sbd;
1475 struct gfs2_alloc *al = &ip->i_alloc;
1476 struct gfs2_rgrpd *rgd;
1477 struct buffer_head *dibh;
1478 int error;
1479
1480 rgd = gfs2_blk2rgrpd(sdp, ip->i_di.di_eattr);
1481 if (!rgd) {
1482 gfs2_consist_inode(ip);
1483 return -EIO;
1484 }
1485
1486 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
1487 &al->al_rgd_gh);
1488 if (error)
1489 return error;
1490
1491 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_DINODE +
1492 RES_STATFS + RES_QUOTA, 1);
1493 if (error)
1494 goto out_gunlock;
1495
1496 gfs2_free_meta(ip, ip->i_di.di_eattr, 1);
1497
1498 ip->i_di.di_eattr = 0;
1499 if (!ip->i_di.di_blocks)
1500 gfs2_consist_inode(ip);
1501 ip->i_di.di_blocks--;
1502
1503 error = gfs2_meta_inode_buffer(ip, &dibh);
1504 if (!error) {
1505 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1506 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1507 brelse(dibh);
1508 }
1509
1510 gfs2_trans_end(sdp);
1511
1512 out_gunlock:
1513 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1514
1515 return error;
1516}
1517
1518/**
1519 * gfs2_ea_dealloc - deallocate the extended attribute fork
1520 * @ip: the inode
1521 *
1522 * Returns: errno
1523 */
1524
1525int gfs2_ea_dealloc(struct gfs2_inode *ip)
1526{
1527 struct gfs2_alloc *al;
1528 int error;
1529
1530 al = gfs2_alloc_get(ip);
1531
1532 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1533 if (error)
1534 goto out_alloc;
1535
1536 error = gfs2_rindex_hold(ip->i_sbd, &al->al_ri_gh);
1537 if (error)
1538 goto out_quota;
1539
1540 error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
1541 if (error)
1542 goto out_rindex;
1543
1544 if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
1545 error = ea_dealloc_indirect(ip);
1546 if (error)
1547 goto out_rindex;
1548 }
1549
1550 error = ea_dealloc_block(ip);
1551
1552 out_rindex:
1553 gfs2_glock_dq_uninit(&al->al_ri_gh);
1554
1555 out_quota:
1556 gfs2_quota_unhold(ip);
1557
1558 out_alloc:
1559 gfs2_alloc_put(ip);
1560
1561 return error;
1562}
1563
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/eattr.h
new file mode 100644
index 000000000000..e5a42abf68a3
--- /dev/null
+++ b/fs/gfs2/eattr.h
@@ -0,0 +1,88 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __EATTR_DOT_H__
11#define __EATTR_DOT_H__
12
13#define GFS2_EA_REC_LEN(ea) be32_to_cpu((ea)->ea_rec_len)
14#define GFS2_EA_DATA_LEN(ea) be32_to_cpu((ea)->ea_data_len)
15
16#define GFS2_EA_SIZE(ea) \
17ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
18 ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
19 (sizeof(uint64_t) * (ea)->ea_num_ptrs)), 8)
20
21#define GFS2_EA_STRLEN(ea) \
22((((ea)->ea_type == GFS2_EATYPE_USR) ? 5 : 7) + (ea)->ea_name_len + 1)
23
24#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
25#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
26
27#define GFS2_EAREQ_SIZE_STUFFED(er) \
28ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
29
30#define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
31ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
32 sizeof(uint64_t) * DIV_RU((er)->er_data_len, (sdp)->sd_jbsize), 8)
33
34#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
35#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
36
37#define GFS2_EA2DATAPTRS(ea) \
38((uint64_t *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
39
40#define GFS2_EA2NEXT(ea) \
41((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea)))
42
43#define GFS2_EA_BH2FIRST(bh) \
44((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header)))
45
46#define GFS2_ERF_MODE 0x80000000
47
48struct gfs2_ea_request {
49 char *er_name;
50 char *er_data;
51 unsigned int er_name_len;
52 unsigned int er_data_len;
53 unsigned int er_type; /* GFS2_EATYPE_... */
54 int er_flags;
55 mode_t er_mode;
56};
57
58struct gfs2_ea_location {
59 struct buffer_head *el_bh;
60 struct gfs2_ea_header *el_ea;
61 struct gfs2_ea_header *el_prev;
62};
63
64int gfs2_ea_repack(struct gfs2_inode *ip);
65
66int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
67int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
68int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
69
70int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er);
71int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er);
72int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er);
73int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er);
74
75int gfs2_ea_dealloc(struct gfs2_inode *ip);
76
77/* Exported to acl.c */
78
79int gfs2_ea_find(struct gfs2_inode *ip,
80 struct gfs2_ea_request *er,
81 struct gfs2_ea_location *el);
82int gfs2_ea_get_copy(struct gfs2_inode *ip,
83 struct gfs2_ea_location *el,
84 char *data);
85int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
86 struct iattr *attr, char *data);
87
88#endif /* __EATTR_DOT_H__ */
diff --git a/fs/gfs2/format.h b/fs/gfs2/format.h
new file mode 100644
index 000000000000..c7bf32ce3eca
--- /dev/null
+++ b/fs/gfs2/format.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __FORMAT_DOT_H__
11#define __FORMAT_DOT_H__
12
13static const uint32_t gfs2_old_fs_formats[] = {
14 0
15};
16
17static const uint32_t gfs2_old_multihost_formats[] = {
18 0
19};
20
21#endif /* __FORMAT_DOT_H__ */
diff --git a/fs/gfs2/gfs2.h b/fs/gfs2/gfs2.h
new file mode 100644
index 000000000000..6c53d080675c
--- /dev/null
+++ b/fs/gfs2/gfs2.h
@@ -0,0 +1,60 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GFS2_DOT_H__
11#define __GFS2_DOT_H__
12
13#include <linux/gfs2_ondisk.h>
14
15#include "lm_interface.h"
16#include "lvb.h"
17#include "incore.h"
18#include "util.h"
19
20enum {
21 NO_CREATE = 0,
22 CREATE = 1,
23};
24
25enum {
26 NO_WAIT = 0,
27 WAIT = 1,
28};
29
30enum {
31 NO_FORCE = 0,
32 FORCE = 1,
33};
34
35/* Divide num by den. Round up if there is a remainder. */
36#define DIV_RU(num, den) (((num) + (den) - 1) / (den))
37
38#define GFS2_FAST_NAME_SIZE 8
39
40#define get_v2sdp(sb) ((struct gfs2_sbd *)(sb)->s_fs_info)
41#define set_v2sdp(sb, sdp) (sb)->s_fs_info = (sdp)
42#define get_v2ip(inode) ((struct gfs2_inode *)(inode)->u.generic_ip)
43#define set_v2ip(inode, ip) (inode)->u.generic_ip = (ip)
44#define get_v2fp(file) ((struct gfs2_file *)(file)->private_data)
45#define set_v2fp(file, fp) (file)->private_data = (fp)
46#define get_v2bd(bh) ((struct gfs2_bufdata *)(bh)->b_private)
47#define set_v2bd(bh, bd) (bh)->b_private = (bd)
48
49#define get_transaction ((struct gfs2_trans *)(current->journal_info))
50#define set_transaction(tr) (current->journal_info) = (tr)
51
52#define get_gl2ip(gl) ((struct gfs2_inode *)(gl)->gl_object)
53#define set_gl2ip(gl, ip) (gl)->gl_object = (ip)
54#define get_gl2rgd(gl) ((struct gfs2_rgrpd *)(gl)->gl_object)
55#define set_gl2rgd(gl, rgd) (gl)->gl_object = (rgd)
56#define get_gl2gl(gl) ((struct gfs2_glock *)(gl)->gl_object)
57#define set_gl2gl(gl, gl2) (gl)->gl_object = (gl2)
58
59#endif /* __GFS2_DOT_H__ */
60
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
new file mode 100644
index 000000000000..f30fde91d14a
--- /dev/null
+++ b/fs/gfs2/glock.c
@@ -0,0 +1,2492 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/delay.h>
16#include <linux/sort.h>
17#include <linux/jhash.h>
18#include <linux/kref.h>
19#include <asm/semaphore.h>
20#include <asm/uaccess.h>
21
22#include "gfs2.h"
23#include "glock.h"
24#include "glops.h"
25#include "inode.h"
26#include "lm.h"
27#include "lops.h"
28#include "meta_io.h"
29#include "quota.h"
30#include "super.h"
31
32/* Must be kept in sync with the beginning of struct gfs2_glock */
33struct glock_plug {
34 struct list_head gl_list;
35 unsigned long gl_flags;
36};
37
38struct greedy {
39 struct gfs2_holder gr_gh;
40 struct work_struct gr_work;
41};
42
43typedef void (*glock_examiner) (struct gfs2_glock * gl);
44
45/**
46 * relaxed_state_ok - is a requested lock compatible with the current lock mode?
47 * @actual: the current state of the lock
48 * @requested: the lock state that was requested by the caller
49 * @flags: the modifier flags passed in by the caller
50 *
51 * Returns: 1 if the locks are compatible, 0 otherwise
52 */
53
54static inline int relaxed_state_ok(unsigned int actual, unsigned requested,
55 int flags)
56{
57 if (actual == requested)
58 return 1;
59
60 if (flags & GL_EXACT)
61 return 0;
62
63 if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED)
64 return 1;
65
66 if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY))
67 return 1;
68
69 return 0;
70}
71
72/**
73 * gl_hash() - Turn glock number into hash bucket number
74 * @lock: The glock number
75 *
76 * Returns: The number of the corresponding hash bucket
77 */
78
79static unsigned int gl_hash(struct lm_lockname *name)
80{
81 unsigned int h;
82
83 h = jhash(&name->ln_number, sizeof(uint64_t), 0);
84 h = jhash(&name->ln_type, sizeof(unsigned int), h);
85 h &= GFS2_GL_HASH_MASK;
86
87 return h;
88}
89
90/**
91 * glock_free() - Perform a few checks and then release struct gfs2_glock
92 * @gl: The glock to release
93 *
94 * Also calls lock module to release its internal structure for this glock.
95 *
96 */
97
98static void glock_free(struct gfs2_glock *gl)
99{
100 struct gfs2_sbd *sdp = gl->gl_sbd;
101 struct inode *aspace = gl->gl_aspace;
102
103 gfs2_lm_put_lock(sdp, gl->gl_lock);
104
105 if (aspace)
106 gfs2_aspace_put(aspace);
107
108 kmem_cache_free(gfs2_glock_cachep, gl);
109}
110
111/**
112 * gfs2_glock_hold() - increment reference count on glock
113 * @gl: The glock to hold
114 *
115 */
116
117void gfs2_glock_hold(struct gfs2_glock *gl)
118{
119 kref_get(&gl->gl_ref);
120}
121
122/* All work is done after the return from kref_put() so we
123 can release the write_lock before the free. */
124
125static void kill_glock(struct kref *kref)
126{
127 struct gfs2_glock *gl = container_of(kref, struct gfs2_glock, gl_ref);
128 struct gfs2_sbd *sdp = gl->gl_sbd;
129
130 gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED);
131 gfs2_assert(sdp, list_empty(&gl->gl_reclaim));
132 gfs2_assert(sdp, list_empty(&gl->gl_holders));
133 gfs2_assert(sdp, list_empty(&gl->gl_waiters1));
134 gfs2_assert(sdp, list_empty(&gl->gl_waiters2));
135 gfs2_assert(sdp, list_empty(&gl->gl_waiters3));
136}
137
138/**
139 * gfs2_glock_put() - Decrement reference count on glock
140 * @gl: The glock to put
141 *
142 */
143
144int gfs2_glock_put(struct gfs2_glock *gl)
145{
146 struct gfs2_sbd *sdp = gl->gl_sbd;
147 struct gfs2_gl_hash_bucket *bucket = gl->gl_bucket;
148 int rv = 0;
149
150 mutex_lock(&sdp->sd_invalidate_inodes_mutex);
151
152 write_lock(&bucket->hb_lock);
153 if (kref_put(&gl->gl_ref, kill_glock)) {
154 list_del_init(&gl->gl_list);
155 write_unlock(&bucket->hb_lock);
156 glock_free(gl);
157 rv = 1;
158 goto out;
159 }
160 write_unlock(&bucket->hb_lock);
161 out:
162 mutex_unlock(&sdp->sd_invalidate_inodes_mutex);
163 return rv;
164}
165
166/**
167 * queue_empty - check to see if a glock's queue is empty
168 * @gl: the glock
169 * @head: the head of the queue to check
170 *
171 * This function protects the list in the event that a process already
172 * has a holder on the list and is adding a second holder for itself.
173 * The glmutex lock is what generally prevents processes from working
174 * on the same glock at once, but the special case of adding a second
175 * holder for yourself ("recursive" locking) doesn't involve locking
176 * glmutex, making the spin lock necessary.
177 *
178 * Returns: 1 if the queue is empty
179 */
180
181static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head)
182{
183 int empty;
184 spin_lock(&gl->gl_spin);
185 empty = list_empty(head);
186 spin_unlock(&gl->gl_spin);
187 return empty;
188}
189
190/**
191 * search_bucket() - Find struct gfs2_glock by lock number
192 * @bucket: the bucket to search
193 * @name: The lock name
194 *
195 * Returns: NULL, or the struct gfs2_glock with the requested number
196 */
197
198static struct gfs2_glock *search_bucket(struct gfs2_gl_hash_bucket *bucket,
199 struct lm_lockname *name)
200{
201 struct gfs2_glock *gl;
202
203 list_for_each_entry(gl, &bucket->hb_list, gl_list) {
204 if (test_bit(GLF_PLUG, &gl->gl_flags))
205 continue;
206 if (!lm_name_equal(&gl->gl_name, name))
207 continue;
208
209 kref_get(&gl->gl_ref);
210
211 return gl;
212 }
213
214 return NULL;
215}
216
217/**
218 * gfs2_glock_find() - Find glock by lock number
219 * @sdp: The GFS2 superblock
220 * @name: The lock name
221 *
222 * Returns: NULL, or the struct gfs2_glock with the requested number
223 */
224
225struct gfs2_glock *gfs2_glock_find(struct gfs2_sbd *sdp,
226 struct lm_lockname *name)
227{
228 struct gfs2_gl_hash_bucket *bucket = &sdp->sd_gl_hash[gl_hash(name)];
229 struct gfs2_glock *gl;
230
231 read_lock(&bucket->hb_lock);
232 gl = search_bucket(bucket, name);
233 read_unlock(&bucket->hb_lock);
234
235 return gl;
236}
237
238/**
239 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
240 * @sdp: The GFS2 superblock
241 * @number: the lock number
242 * @glops: The glock_operations to use
243 * @create: If 0, don't create the glock if it doesn't exist
244 * @glp: the glock is returned here
245 *
246 * This does not lock a glock, just finds/creates structures for one.
247 *
248 * Returns: errno
249 */
250
251int gfs2_glock_get(struct gfs2_sbd *sdp, uint64_t number,
252 struct gfs2_glock_operations *glops, int create,
253 struct gfs2_glock **glp)
254{
255 struct lm_lockname name;
256 struct gfs2_glock *gl, *tmp;
257 struct gfs2_gl_hash_bucket *bucket;
258 int error;
259
260 name.ln_number = number;
261 name.ln_type = glops->go_type;
262 bucket = &sdp->sd_gl_hash[gl_hash(&name)];
263
264 read_lock(&bucket->hb_lock);
265 gl = search_bucket(bucket, &name);
266 read_unlock(&bucket->hb_lock);
267
268 if (gl || !create) {
269 *glp = gl;
270 return 0;
271 }
272
273 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
274 if (!gl)
275 return -ENOMEM;
276
277 memset(gl, 0, sizeof(struct gfs2_glock));
278
279 INIT_LIST_HEAD(&gl->gl_list);
280 gl->gl_name = name;
281 kref_init(&gl->gl_ref);
282
283 spin_lock_init(&gl->gl_spin);
284
285 gl->gl_state = LM_ST_UNLOCKED;
286 INIT_LIST_HEAD(&gl->gl_holders);
287 INIT_LIST_HEAD(&gl->gl_waiters1);
288 INIT_LIST_HEAD(&gl->gl_waiters2);
289 INIT_LIST_HEAD(&gl->gl_waiters3);
290
291 gl->gl_ops = glops;
292
293 gl->gl_bucket = bucket;
294 INIT_LIST_HEAD(&gl->gl_reclaim);
295
296 gl->gl_sbd = sdp;
297
298 lops_init_le(&gl->gl_le, &gfs2_glock_lops);
299 INIT_LIST_HEAD(&gl->gl_ail_list);
300
301 /* If this glock protects actual on-disk data or metadata blocks,
302 create a VFS inode to manage the pages/buffers holding them. */
303 if (glops == &gfs2_inode_glops ||
304 glops == &gfs2_rgrp_glops ||
305 glops == &gfs2_meta_glops) {
306 gl->gl_aspace = gfs2_aspace_get(sdp);
307 if (!gl->gl_aspace) {
308 error = -ENOMEM;
309 goto fail;
310 }
311 }
312
313 error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock);
314 if (error)
315 goto fail_aspace;
316
317 write_lock(&bucket->hb_lock);
318 tmp = search_bucket(bucket, &name);
319 if (tmp) {
320 write_unlock(&bucket->hb_lock);
321 glock_free(gl);
322 gl = tmp;
323 } else {
324 list_add_tail(&gl->gl_list, &bucket->hb_list);
325 write_unlock(&bucket->hb_lock);
326 }
327
328 *glp = gl;
329
330 return 0;
331
332 fail_aspace:
333 if (gl->gl_aspace)
334 gfs2_aspace_put(gl->gl_aspace);
335
336 fail:
337 kmem_cache_free(gfs2_glock_cachep, gl);
338
339 return error;
340}
341
342/**
343 * gfs2_holder_init - initialize a struct gfs2_holder in the default way
344 * @gl: the glock
345 * @state: the state we're requesting
346 * @flags: the modifier flags
347 * @gh: the holder structure
348 *
349 */
350
351void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, int flags,
352 struct gfs2_holder *gh)
353{
354 INIT_LIST_HEAD(&gh->gh_list);
355 gh->gh_gl = gl;
356 gh->gh_owner = (flags & GL_NEVER_RECURSE) ? NULL : current;
357 gh->gh_state = state;
358 gh->gh_flags = flags;
359 gh->gh_error = 0;
360 gh->gh_iflags = 0;
361 init_completion(&gh->gh_wait);
362
363 if (gh->gh_state == LM_ST_EXCLUSIVE)
364 gh->gh_flags |= GL_LOCAL_EXCL;
365
366 gfs2_glock_hold(gl);
367}
368
369/**
370 * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
371 * @state: the state we're requesting
372 * @flags: the modifier flags
373 * @gh: the holder structure
374 *
375 * Don't mess with the glock.
376 *
377 */
378
379void gfs2_holder_reinit(unsigned int state, int flags, struct gfs2_holder *gh)
380{
381 gh->gh_state = state;
382 gh->gh_flags = flags;
383 if (gh->gh_state == LM_ST_EXCLUSIVE)
384 gh->gh_flags |= GL_LOCAL_EXCL;
385
386 gh->gh_iflags &= 1 << HIF_ALLOCED;
387}
388
389/**
390 * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
391 * @gh: the holder structure
392 *
393 */
394
395void gfs2_holder_uninit(struct gfs2_holder *gh)
396{
397 gfs2_glock_put(gh->gh_gl);
398 gh->gh_gl = NULL;
399}
400
401/**
402 * gfs2_holder_get - get a struct gfs2_holder structure
403 * @gl: the glock
404 * @state: the state we're requesting
405 * @flags: the modifier flags
406 * @gfp_flags: __GFP_NOFAIL
407 *
408 * Figure out how big an impact this function has. Either:
409 * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd
410 * 2) Leave it like it is
411 *
412 * Returns: the holder structure, NULL on ENOMEM
413 */
414
415struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl, unsigned int state,
416 int flags, gfp_t gfp_flags)
417{
418 struct gfs2_holder *gh;
419
420 gh = kmalloc(sizeof(struct gfs2_holder), gfp_flags);
421 if (!gh)
422 return NULL;
423
424 gfs2_holder_init(gl, state, flags, gh);
425 set_bit(HIF_ALLOCED, &gh->gh_iflags);
426
427 return gh;
428}
429
430/**
431 * gfs2_holder_put - get rid of a struct gfs2_holder structure
432 * @gh: the holder structure
433 *
434 */
435
436void gfs2_holder_put(struct gfs2_holder *gh)
437{
438 gfs2_holder_uninit(gh);
439 kfree(gh);
440}
441
442/**
443 * handle_recurse - put other holder structures (marked recursive)
444 * into the holders list
445 * @gh: the holder structure
446 *
447 */
448
449static void handle_recurse(struct gfs2_holder *gh)
450{
451 struct gfs2_glock *gl = gh->gh_gl;
452 struct gfs2_sbd *sdp = gl->gl_sbd;
453 struct gfs2_holder *tmp_gh, *safe;
454 int found = 0;
455
456 if (gfs2_assert_warn(sdp, gh->gh_owner))
457 return;
458
459 list_for_each_entry_safe(tmp_gh, safe, &gl->gl_waiters3, gh_list) {
460 if (tmp_gh->gh_owner != gh->gh_owner)
461 continue;
462
463 gfs2_assert_warn(sdp,
464 test_bit(HIF_RECURSE, &tmp_gh->gh_iflags));
465
466 list_move_tail(&tmp_gh->gh_list, &gl->gl_holders);
467 tmp_gh->gh_error = 0;
468 set_bit(HIF_HOLDER, &tmp_gh->gh_iflags);
469
470 complete(&tmp_gh->gh_wait);
471
472 found = 1;
473 }
474
475 gfs2_assert_warn(sdp, found);
476}
477
478/**
479 * do_unrecurse - a recursive holder was just dropped of the waiters3 list
480 * @gh: the holder
481 *
482 * If there is only one other recursive holder, clear its HIF_RECURSE bit.
483 * If there is more than one, leave them alone.
484 *
485 */
486
487static void do_unrecurse(struct gfs2_holder *gh)
488{
489 struct gfs2_glock *gl = gh->gh_gl;
490 struct gfs2_sbd *sdp = gl->gl_sbd;
491 struct gfs2_holder *tmp_gh, *last_gh = NULL;
492 int found = 0;
493
494 if (gfs2_assert_warn(sdp, gh->gh_owner))
495 return;
496
497 list_for_each_entry(tmp_gh, &gl->gl_waiters3, gh_list) {
498 if (tmp_gh->gh_owner != gh->gh_owner)
499 continue;
500
501 gfs2_assert_warn(sdp,
502 test_bit(HIF_RECURSE, &tmp_gh->gh_iflags));
503
504 if (found)
505 return;
506
507 found = 1;
508 last_gh = tmp_gh;
509 }
510
511 if (!gfs2_assert_warn(sdp, found))
512 clear_bit(HIF_RECURSE, &last_gh->gh_iflags);
513}
514
515/**
516 * rq_mutex - process a mutex request in the queue
517 * @gh: the glock holder
518 *
519 * Returns: 1 if the queue is blocked
520 */
521
522static int rq_mutex(struct gfs2_holder *gh)
523{
524 struct gfs2_glock *gl = gh->gh_gl;
525
526 list_del_init(&gh->gh_list);
527 /* gh->gh_error never examined. */
528 set_bit(GLF_LOCK, &gl->gl_flags);
529 complete(&gh->gh_wait);
530
531 return 1;
532}
533
534/**
535 * rq_promote - process a promote request in the queue
536 * @gh: the glock holder
537 *
538 * Acquire a new inter-node lock, or change a lock state to more restrictive.
539 *
540 * Returns: 1 if the queue is blocked
541 */
542
543static int rq_promote(struct gfs2_holder *gh)
544{
545 struct gfs2_glock *gl = gh->gh_gl;
546 struct gfs2_sbd *sdp = gl->gl_sbd;
547 struct gfs2_glock_operations *glops = gl->gl_ops;
548 int recurse;
549
550 if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
551 if (list_empty(&gl->gl_holders)) {
552 gl->gl_req_gh = gh;
553 set_bit(GLF_LOCK, &gl->gl_flags);
554 spin_unlock(&gl->gl_spin);
555
556 if (atomic_read(&sdp->sd_reclaim_count) >
557 gfs2_tune_get(sdp, gt_reclaim_limit) &&
558 !(gh->gh_flags & LM_FLAG_PRIORITY)) {
559 gfs2_reclaim_glock(sdp);
560 gfs2_reclaim_glock(sdp);
561 }
562
563 glops->go_xmote_th(gl, gh->gh_state,
564 gh->gh_flags);
565
566 spin_lock(&gl->gl_spin);
567 }
568 return 1;
569 }
570
571 if (list_empty(&gl->gl_holders)) {
572 set_bit(HIF_FIRST, &gh->gh_iflags);
573 set_bit(GLF_LOCK, &gl->gl_flags);
574 recurse = 0;
575 } else {
576 struct gfs2_holder *next_gh;
577 if (gh->gh_flags & GL_LOCAL_EXCL)
578 return 1;
579 next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder,
580 gh_list);
581 if (next_gh->gh_flags & GL_LOCAL_EXCL)
582 return 1;
583 recurse = test_bit(HIF_RECURSE, &gh->gh_iflags);
584 }
585
586 list_move_tail(&gh->gh_list, &gl->gl_holders);
587 gh->gh_error = 0;
588 set_bit(HIF_HOLDER, &gh->gh_iflags);
589
590 if (recurse)
591 handle_recurse(gh);
592
593 complete(&gh->gh_wait);
594
595 return 0;
596}
597
598/**
599 * rq_demote - process a demote request in the queue
600 * @gh: the glock holder
601 *
602 * Returns: 1 if the queue is blocked
603 */
604
605static int rq_demote(struct gfs2_holder *gh)
606{
607 struct gfs2_glock *gl = gh->gh_gl;
608 struct gfs2_glock_operations *glops = gl->gl_ops;
609
610 if (!list_empty(&gl->gl_holders))
611 return 1;
612
613 if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) {
614 list_del_init(&gh->gh_list);
615 gh->gh_error = 0;
616 spin_unlock(&gl->gl_spin);
617 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
618 gfs2_holder_put(gh);
619 else
620 complete(&gh->gh_wait);
621 spin_lock(&gl->gl_spin);
622 } else {
623 gl->gl_req_gh = gh;
624 set_bit(GLF_LOCK, &gl->gl_flags);
625 spin_unlock(&gl->gl_spin);
626
627 if (gh->gh_state == LM_ST_UNLOCKED ||
628 gl->gl_state != LM_ST_EXCLUSIVE)
629 glops->go_drop_th(gl);
630 else
631 glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
632
633 spin_lock(&gl->gl_spin);
634 }
635
636 return 0;
637}
638
639/**
640 * rq_greedy - process a queued request to drop greedy status
641 * @gh: the glock holder
642 *
643 * Returns: 1 if the queue is blocked
644 */
645
646static int rq_greedy(struct gfs2_holder *gh)
647{
648 struct gfs2_glock *gl = gh->gh_gl;
649
650 list_del_init(&gh->gh_list);
651 /* gh->gh_error never examined. */
652 clear_bit(GLF_GREEDY, &gl->gl_flags);
653 spin_unlock(&gl->gl_spin);
654
655 gfs2_holder_uninit(gh);
656 kfree(container_of(gh, struct greedy, gr_gh));
657
658 spin_lock(&gl->gl_spin);
659
660 return 0;
661}
662
663/**
664 * run_queue - process holder structures on a glock
665 * @gl: the glock
666 *
667 */
668
669static void run_queue(struct gfs2_glock *gl)
670{
671 struct gfs2_holder *gh;
672 int blocked = 1;
673
674 for (;;) {
675 if (test_bit(GLF_LOCK, &gl->gl_flags))
676 break;
677
678 if (!list_empty(&gl->gl_waiters1)) {
679 gh = list_entry(gl->gl_waiters1.next,
680 struct gfs2_holder, gh_list);
681
682 if (test_bit(HIF_MUTEX, &gh->gh_iflags))
683 blocked = rq_mutex(gh);
684 else
685 gfs2_assert_warn(gl->gl_sbd, 0);
686
687 } else if (!list_empty(&gl->gl_waiters2) &&
688 !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) {
689 gh = list_entry(gl->gl_waiters2.next,
690 struct gfs2_holder, gh_list);
691
692 if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
693 blocked = rq_demote(gh);
694 else if (test_bit(HIF_GREEDY, &gh->gh_iflags))
695 blocked = rq_greedy(gh);
696 else
697 gfs2_assert_warn(gl->gl_sbd, 0);
698
699 } else if (!list_empty(&gl->gl_waiters3)) {
700 gh = list_entry(gl->gl_waiters3.next,
701 struct gfs2_holder, gh_list);
702
703 if (test_bit(HIF_PROMOTE, &gh->gh_iflags))
704 blocked = rq_promote(gh);
705 else
706 gfs2_assert_warn(gl->gl_sbd, 0);
707
708 } else
709 break;
710
711 if (blocked)
712 break;
713 }
714}
715
716/**
717 * gfs2_glmutex_lock - acquire a local lock on a glock
718 * @gl: the glock
719 *
720 * Gives caller exclusive access to manipulate a glock structure.
721 */
722
723void gfs2_glmutex_lock(struct gfs2_glock *gl)
724{
725 struct gfs2_holder gh;
726
727 gfs2_holder_init(gl, 0, 0, &gh);
728 set_bit(HIF_MUTEX, &gh.gh_iflags);
729
730 spin_lock(&gl->gl_spin);
731 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
732 list_add_tail(&gh.gh_list, &gl->gl_waiters1);
733 else
734 complete(&gh.gh_wait);
735 spin_unlock(&gl->gl_spin);
736
737 wait_for_completion(&gh.gh_wait);
738 gfs2_holder_uninit(&gh);
739}
740
741/**
742 * gfs2_glmutex_trylock - try to acquire a local lock on a glock
743 * @gl: the glock
744 *
745 * Returns: 1 if the glock is acquired
746 */
747
748int gfs2_glmutex_trylock(struct gfs2_glock *gl)
749{
750 int acquired = 1;
751
752 spin_lock(&gl->gl_spin);
753 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
754 acquired = 0;
755 spin_unlock(&gl->gl_spin);
756
757 return acquired;
758}
759
760/**
761 * gfs2_glmutex_unlock - release a local lock on a glock
762 * @gl: the glock
763 *
764 */
765
766void gfs2_glmutex_unlock(struct gfs2_glock *gl)
767{
768 spin_lock(&gl->gl_spin);
769 clear_bit(GLF_LOCK, &gl->gl_flags);
770 run_queue(gl);
771 spin_unlock(&gl->gl_spin);
772}
773
774/**
775 * handle_callback - add a demote request to a lock's queue
776 * @gl: the glock
777 * @state: the state the caller wants us to change to
778 *
779 */
780
781static void handle_callback(struct gfs2_glock *gl, unsigned int state)
782{
783 struct gfs2_holder *gh, *new_gh = NULL;
784
785 restart:
786 spin_lock(&gl->gl_spin);
787
788 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
789 if (test_bit(HIF_DEMOTE, &gh->gh_iflags) &&
790 gl->gl_req_gh != gh) {
791 if (gh->gh_state != state)
792 gh->gh_state = LM_ST_UNLOCKED;
793 goto out;
794 }
795 }
796
797 if (new_gh) {
798 list_add_tail(&new_gh->gh_list, &gl->gl_waiters2);
799 new_gh = NULL;
800 } else {
801 spin_unlock(&gl->gl_spin);
802
803 new_gh = gfs2_holder_get(gl, state,
804 LM_FLAG_TRY | GL_NEVER_RECURSE,
805 GFP_KERNEL | __GFP_NOFAIL),
806 set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
807 set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
808
809 goto restart;
810 }
811
812 out:
813 spin_unlock(&gl->gl_spin);
814
815 if (new_gh)
816 gfs2_holder_put(new_gh);
817}
818
819/**
820 * state_change - record that the glock is now in a different state
821 * @gl: the glock
822 * @new_state the new state
823 *
824 */
825
826static void state_change(struct gfs2_glock *gl, unsigned int new_state)
827{
828 int held1, held2;
829
830 held1 = (gl->gl_state != LM_ST_UNLOCKED);
831 held2 = (new_state != LM_ST_UNLOCKED);
832
833 if (held1 != held2) {
834 if (held2)
835 gfs2_glock_hold(gl);
836 else
837 gfs2_glock_put(gl);
838 }
839
840 gl->gl_state = new_state;
841}
842
843/**
844 * xmote_bh - Called after the lock module is done acquiring a lock
845 * @gl: The glock in question
846 * @ret: the int returned from the lock module
847 *
848 */
849
850static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
851{
852 struct gfs2_sbd *sdp = gl->gl_sbd;
853 struct gfs2_glock_operations *glops = gl->gl_ops;
854 struct gfs2_holder *gh = gl->gl_req_gh;
855 int prev_state = gl->gl_state;
856 int op_done = 1;
857
858 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
859 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
860 gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
861
862 state_change(gl, ret & LM_OUT_ST_MASK);
863
864 if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
865 if (glops->go_inval)
866 glops->go_inval(gl, DIO_METADATA | DIO_DATA);
867 } else if (gl->gl_state == LM_ST_DEFERRED) {
868 /* We might not want to do this here.
869 Look at moving to the inode glops. */
870 if (glops->go_inval)
871 glops->go_inval(gl, DIO_DATA);
872 }
873
874 /* Deal with each possible exit condition */
875
876 if (!gh)
877 gl->gl_stamp = jiffies;
878
879 else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
880 spin_lock(&gl->gl_spin);
881 list_del_init(&gh->gh_list);
882 gh->gh_error = -EIO;
883 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
884 do_unrecurse(gh);
885 spin_unlock(&gl->gl_spin);
886
887 } else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) {
888 spin_lock(&gl->gl_spin);
889 list_del_init(&gh->gh_list);
890 if (gl->gl_state == gh->gh_state ||
891 gl->gl_state == LM_ST_UNLOCKED)
892 gh->gh_error = 0;
893 else {
894 if (gfs2_assert_warn(sdp, gh->gh_flags &
895 (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1)
896 fs_warn(sdp, "ret = 0x%.8X\n", ret);
897 gh->gh_error = GLR_TRYFAILED;
898 }
899 spin_unlock(&gl->gl_spin);
900
901 if (ret & LM_OUT_CANCELED)
902 handle_callback(gl, LM_ST_UNLOCKED); /* Lame */
903
904 } else if (ret & LM_OUT_CANCELED) {
905 spin_lock(&gl->gl_spin);
906 list_del_init(&gh->gh_list);
907 gh->gh_error = GLR_CANCELED;
908 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
909 do_unrecurse(gh);
910 spin_unlock(&gl->gl_spin);
911
912 } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
913 spin_lock(&gl->gl_spin);
914 list_move_tail(&gh->gh_list, &gl->gl_holders);
915 gh->gh_error = 0;
916 set_bit(HIF_HOLDER, &gh->gh_iflags);
917 spin_unlock(&gl->gl_spin);
918
919 set_bit(HIF_FIRST, &gh->gh_iflags);
920
921 op_done = 0;
922
923 } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
924 spin_lock(&gl->gl_spin);
925 list_del_init(&gh->gh_list);
926 gh->gh_error = GLR_TRYFAILED;
927 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
928 do_unrecurse(gh);
929 spin_unlock(&gl->gl_spin);
930
931 } else {
932 if (gfs2_assert_withdraw(sdp, 0) == -1)
933 fs_err(sdp, "ret = 0x%.8X\n", ret);
934 }
935
936 if (glops->go_xmote_bh)
937 glops->go_xmote_bh(gl);
938
939 if (op_done) {
940 spin_lock(&gl->gl_spin);
941 gl->gl_req_gh = NULL;
942 gl->gl_req_bh = NULL;
943 clear_bit(GLF_LOCK, &gl->gl_flags);
944 run_queue(gl);
945 spin_unlock(&gl->gl_spin);
946 }
947
948 gfs2_glock_put(gl);
949
950 if (gh) {
951 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
952 gfs2_holder_put(gh);
953 else
954 complete(&gh->gh_wait);
955 }
956}
957
958/**
959 * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock
960 * @gl: The glock in question
961 * @state: the requested state
962 * @flags: modifier flags to the lock call
963 *
964 */
965
966void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags)
967{
968 struct gfs2_sbd *sdp = gl->gl_sbd;
969 struct gfs2_glock_operations *glops = gl->gl_ops;
970 int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
971 LM_FLAG_NOEXP | LM_FLAG_ANY |
972 LM_FLAG_PRIORITY);
973 unsigned int lck_ret;
974
975 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
976 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
977 gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED);
978 gfs2_assert_warn(sdp, state != gl->gl_state);
979
980 if (gl->gl_state == LM_ST_EXCLUSIVE) {
981 if (glops->go_sync)
982 glops->go_sync(gl,
983 DIO_METADATA | DIO_DATA | DIO_RELEASE);
984 }
985
986 gfs2_glock_hold(gl);
987 gl->gl_req_bh = xmote_bh;
988
989 lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state,
990 lck_flags);
991
992 if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR)))
993 return;
994
995 if (lck_ret & LM_OUT_ASYNC)
996 gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC);
997 else
998 xmote_bh(gl, lck_ret);
999}
1000
1001/**
1002 * drop_bh - Called after a lock module unlock completes
1003 * @gl: the glock
1004 * @ret: the return status
1005 *
1006 * Doesn't wake up the process waiting on the struct gfs2_holder (if any)
1007 * Doesn't drop the reference on the glock the top half took out
1008 *
1009 */
1010
1011static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
1012{
1013 struct gfs2_sbd *sdp = gl->gl_sbd;
1014 struct gfs2_glock_operations *glops = gl->gl_ops;
1015 struct gfs2_holder *gh = gl->gl_req_gh;
1016
1017 clear_bit(GLF_PREFETCH, &gl->gl_flags);
1018
1019 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1020 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
1021 gfs2_assert_warn(sdp, !ret);
1022
1023 state_change(gl, LM_ST_UNLOCKED);
1024
1025 if (glops->go_inval)
1026 glops->go_inval(gl, DIO_METADATA | DIO_DATA);
1027
1028 if (gh) {
1029 spin_lock(&gl->gl_spin);
1030 list_del_init(&gh->gh_list);
1031 gh->gh_error = 0;
1032 spin_unlock(&gl->gl_spin);
1033 }
1034
1035 if (glops->go_drop_bh)
1036 glops->go_drop_bh(gl);
1037
1038 spin_lock(&gl->gl_spin);
1039 gl->gl_req_gh = NULL;
1040 gl->gl_req_bh = NULL;
1041 clear_bit(GLF_LOCK, &gl->gl_flags);
1042 run_queue(gl);
1043 spin_unlock(&gl->gl_spin);
1044
1045 gfs2_glock_put(gl);
1046
1047 if (gh) {
1048 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
1049 gfs2_holder_put(gh);
1050 else
1051 complete(&gh->gh_wait);
1052 }
1053}
1054
1055/**
1056 * gfs2_glock_drop_th - call into the lock module to unlock a lock
1057 * @gl: the glock
1058 *
1059 */
1060
1061void gfs2_glock_drop_th(struct gfs2_glock *gl)
1062{
1063 struct gfs2_sbd *sdp = gl->gl_sbd;
1064 struct gfs2_glock_operations *glops = gl->gl_ops;
1065 unsigned int ret;
1066
1067 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1068 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
1069 gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
1070
1071 if (gl->gl_state == LM_ST_EXCLUSIVE) {
1072 if (glops->go_sync)
1073 glops->go_sync(gl,
1074 DIO_METADATA | DIO_DATA | DIO_RELEASE);
1075 }
1076
1077 gfs2_glock_hold(gl);
1078 gl->gl_req_bh = drop_bh;
1079
1080 ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state);
1081
1082 if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR)))
1083 return;
1084
1085 if (!ret)
1086 drop_bh(gl, ret);
1087 else
1088 gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC);
1089}
1090
1091/**
1092 * do_cancels - cancel requests for locks stuck waiting on an expire flag
1093 * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock
1094 *
1095 * Don't cancel GL_NOCANCEL requests.
1096 */
1097
1098static void do_cancels(struct gfs2_holder *gh)
1099{
1100 struct gfs2_glock *gl = gh->gh_gl;
1101
1102 spin_lock(&gl->gl_spin);
1103
1104 while (gl->gl_req_gh != gh &&
1105 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
1106 !list_empty(&gh->gh_list)) {
1107 if (gl->gl_req_bh &&
1108 !(gl->gl_req_gh &&
1109 (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) {
1110 spin_unlock(&gl->gl_spin);
1111 gfs2_lm_cancel(gl->gl_sbd, gl->gl_lock);
1112 msleep(100);
1113 spin_lock(&gl->gl_spin);
1114 } else {
1115 spin_unlock(&gl->gl_spin);
1116 msleep(100);
1117 spin_lock(&gl->gl_spin);
1118 }
1119 }
1120
1121 spin_unlock(&gl->gl_spin);
1122}
1123
1124/**
1125 * glock_wait_internal - wait on a glock acquisition
1126 * @gh: the glock holder
1127 *
1128 * Returns: 0 on success
1129 */
1130
1131static int glock_wait_internal(struct gfs2_holder *gh)
1132{
1133 struct gfs2_glock *gl = gh->gh_gl;
1134 struct gfs2_sbd *sdp = gl->gl_sbd;
1135 struct gfs2_glock_operations *glops = gl->gl_ops;
1136
1137 if (test_bit(HIF_ABORTED, &gh->gh_iflags))
1138 return -EIO;
1139
1140 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
1141 spin_lock(&gl->gl_spin);
1142 if (gl->gl_req_gh != gh &&
1143 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
1144 !list_empty(&gh->gh_list)) {
1145 list_del_init(&gh->gh_list);
1146 gh->gh_error = GLR_TRYFAILED;
1147 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
1148 do_unrecurse(gh);
1149 run_queue(gl);
1150 spin_unlock(&gl->gl_spin);
1151 return gh->gh_error;
1152 }
1153 spin_unlock(&gl->gl_spin);
1154 }
1155
1156 if (gh->gh_flags & LM_FLAG_PRIORITY)
1157 do_cancels(gh);
1158
1159 wait_for_completion(&gh->gh_wait);
1160
1161 if (gh->gh_error)
1162 return gh->gh_error;
1163
1164 gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags));
1165 gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state,
1166 gh->gh_state,
1167 gh->gh_flags));
1168
1169 if (test_bit(HIF_FIRST, &gh->gh_iflags)) {
1170 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1171
1172 if (glops->go_lock) {
1173 gh->gh_error = glops->go_lock(gh);
1174 if (gh->gh_error) {
1175 spin_lock(&gl->gl_spin);
1176 list_del_init(&gh->gh_list);
1177 if (test_and_clear_bit(HIF_RECURSE,
1178 &gh->gh_iflags))
1179 do_unrecurse(gh);
1180 spin_unlock(&gl->gl_spin);
1181 }
1182 }
1183
1184 spin_lock(&gl->gl_spin);
1185 gl->gl_req_gh = NULL;
1186 gl->gl_req_bh = NULL;
1187 clear_bit(GLF_LOCK, &gl->gl_flags);
1188 if (test_bit(HIF_RECURSE, &gh->gh_iflags))
1189 handle_recurse(gh);
1190 run_queue(gl);
1191 spin_unlock(&gl->gl_spin);
1192 }
1193
1194 return gh->gh_error;
1195}
1196
1197static inline struct gfs2_holder *
1198find_holder_by_owner(struct list_head *head, struct task_struct *owner)
1199{
1200 struct gfs2_holder *gh;
1201
1202 list_for_each_entry(gh, head, gh_list) {
1203 if (gh->gh_owner == owner)
1204 return gh;
1205 }
1206
1207 return NULL;
1208}
1209
1210/**
1211 * recurse_check -
1212 *
1213 * Make sure the new holder is compatible with the pre-existing one.
1214 *
1215 */
1216
1217static int recurse_check(struct gfs2_holder *existing, struct gfs2_holder *new,
1218 unsigned int state)
1219{
1220 struct gfs2_sbd *sdp = existing->gh_gl->gl_sbd;
1221
1222 if (gfs2_assert_warn(sdp, (new->gh_flags & LM_FLAG_ANY) ||
1223 !(existing->gh_flags & LM_FLAG_ANY)))
1224 goto fail;
1225
1226 if (gfs2_assert_warn(sdp, (existing->gh_flags & GL_LOCAL_EXCL) ||
1227 !(new->gh_flags & GL_LOCAL_EXCL)))
1228 goto fail;
1229
1230 if (gfs2_assert_warn(sdp, relaxed_state_ok(state, new->gh_state,
1231 new->gh_flags)))
1232 goto fail;
1233
1234 return 0;
1235
1236 fail:
1237 set_bit(HIF_ABORTED, &new->gh_iflags);
1238 return -EINVAL;
1239}
1240
1241/**
1242 * add_to_queue - Add a holder to the wait queue (but look for recursion)
1243 * @gh: the holder structure to add
1244 *
1245 */
1246
1247static void add_to_queue(struct gfs2_holder *gh)
1248{
1249 struct gfs2_glock *gl = gh->gh_gl;
1250 struct gfs2_holder *existing;
1251
1252 if (!gh->gh_owner)
1253 goto out;
1254
1255 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner);
1256 if (existing) {
1257 if (recurse_check(existing, gh, gl->gl_state))
1258 return;
1259
1260 list_add_tail(&gh->gh_list, &gl->gl_holders);
1261 set_bit(HIF_HOLDER, &gh->gh_iflags);
1262
1263 gh->gh_error = 0;
1264 complete(&gh->gh_wait);
1265
1266 return;
1267 }
1268
1269 existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner);
1270 if (existing) {
1271 if (recurse_check(existing, gh, existing->gh_state))
1272 return;
1273
1274 set_bit(HIF_RECURSE, &gh->gh_iflags);
1275 set_bit(HIF_RECURSE, &existing->gh_iflags);
1276
1277 list_add_tail(&gh->gh_list, &gl->gl_waiters3);
1278
1279 return;
1280 }
1281
1282 out:
1283 if (gh->gh_flags & LM_FLAG_PRIORITY)
1284 list_add(&gh->gh_list, &gl->gl_waiters3);
1285 else
1286 list_add_tail(&gh->gh_list, &gl->gl_waiters3);
1287}
1288
1289/**
1290 * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
1291 * @gh: the holder structure
1292 *
1293 * if (gh->gh_flags & GL_ASYNC), this never returns an error
1294 *
1295 * Returns: 0, GLR_TRYFAILED, or errno on failure
1296 */
1297
1298int gfs2_glock_nq(struct gfs2_holder *gh)
1299{
1300 struct gfs2_glock *gl = gh->gh_gl;
1301 struct gfs2_sbd *sdp = gl->gl_sbd;
1302 int error = 0;
1303
1304 restart:
1305 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
1306 set_bit(HIF_ABORTED, &gh->gh_iflags);
1307 return -EIO;
1308 }
1309
1310 set_bit(HIF_PROMOTE, &gh->gh_iflags);
1311
1312 spin_lock(&gl->gl_spin);
1313 add_to_queue(gh);
1314 run_queue(gl);
1315 spin_unlock(&gl->gl_spin);
1316
1317 if (!(gh->gh_flags & GL_ASYNC)) {
1318 error = glock_wait_internal(gh);
1319 if (error == GLR_CANCELED) {
1320 msleep(1000);
1321 goto restart;
1322 }
1323 }
1324
1325 clear_bit(GLF_PREFETCH, &gl->gl_flags);
1326
1327 return error;
1328}
1329
1330/**
1331 * gfs2_glock_poll - poll to see if an async request has been completed
1332 * @gh: the holder
1333 *
1334 * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on
1335 */
1336
1337int gfs2_glock_poll(struct gfs2_holder *gh)
1338{
1339 struct gfs2_glock *gl = gh->gh_gl;
1340 int ready = 0;
1341
1342 spin_lock(&gl->gl_spin);
1343
1344 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
1345 ready = 1;
1346 else if (list_empty(&gh->gh_list)) {
1347 if (gh->gh_error == GLR_CANCELED) {
1348 spin_unlock(&gl->gl_spin);
1349 msleep(1000);
1350 if (gfs2_glock_nq(gh))
1351 return 1;
1352 return 0;
1353 } else
1354 ready = 1;
1355 }
1356
1357 spin_unlock(&gl->gl_spin);
1358
1359 return ready;
1360}
1361
1362/**
1363 * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC
1364 * @gh: the holder structure
1365 *
1366 * Returns: 0, GLR_TRYFAILED, or errno on failure
1367 */
1368
1369int gfs2_glock_wait(struct gfs2_holder *gh)
1370{
1371 int error;
1372
1373 error = glock_wait_internal(gh);
1374 if (error == GLR_CANCELED) {
1375 msleep(1000);
1376 gh->gh_flags &= ~GL_ASYNC;
1377 error = gfs2_glock_nq(gh);
1378 }
1379
1380 return error;
1381}
1382
1383/**
1384 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1385 * @gh: the glock holder
1386 *
1387 */
1388
1389void gfs2_glock_dq(struct gfs2_holder *gh)
1390{
1391 struct gfs2_glock *gl = gh->gh_gl;
1392 struct gfs2_glock_operations *glops = gl->gl_ops;
1393
1394 if (gh->gh_flags & GL_SYNC)
1395 set_bit(GLF_SYNC, &gl->gl_flags);
1396
1397 if (gh->gh_flags & GL_NOCACHE)
1398 handle_callback(gl, LM_ST_UNLOCKED);
1399
1400 gfs2_glmutex_lock(gl);
1401
1402 spin_lock(&gl->gl_spin);
1403 list_del_init(&gh->gh_list);
1404
1405 if (list_empty(&gl->gl_holders)) {
1406 spin_unlock(&gl->gl_spin);
1407
1408 if (glops->go_unlock)
1409 glops->go_unlock(gh);
1410
1411 if (test_bit(GLF_SYNC, &gl->gl_flags)) {
1412 if (glops->go_sync)
1413 glops->go_sync(gl, DIO_METADATA | DIO_DATA);
1414 }
1415
1416 gl->gl_stamp = jiffies;
1417
1418 spin_lock(&gl->gl_spin);
1419 }
1420
1421 clear_bit(GLF_LOCK, &gl->gl_flags);
1422 run_queue(gl);
1423 spin_unlock(&gl->gl_spin);
1424}
1425
1426/**
1427 * gfs2_glock_prefetch - Try to prefetch a glock
1428 * @gl: the glock
1429 * @state: the state to prefetch in
1430 * @flags: flags passed to go_xmote_th()
1431 *
1432 */
1433
1434void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, int flags)
1435{
1436 struct gfs2_glock_operations *glops = gl->gl_ops;
1437
1438 spin_lock(&gl->gl_spin);
1439
1440 if (test_bit(GLF_LOCK, &gl->gl_flags) ||
1441 !list_empty(&gl->gl_holders) ||
1442 !list_empty(&gl->gl_waiters1) ||
1443 !list_empty(&gl->gl_waiters2) ||
1444 !list_empty(&gl->gl_waiters3) ||
1445 relaxed_state_ok(gl->gl_state, state, flags)) {
1446 spin_unlock(&gl->gl_spin);
1447 return;
1448 }
1449
1450 set_bit(GLF_PREFETCH, &gl->gl_flags);
1451 set_bit(GLF_LOCK, &gl->gl_flags);
1452 spin_unlock(&gl->gl_spin);
1453
1454 glops->go_xmote_th(gl, state, flags);
1455}
1456
1457/**
1458 * gfs2_glock_force_drop - Force a glock to be uncached
1459 * @gl: the glock
1460 *
1461 */
1462
1463void gfs2_glock_force_drop(struct gfs2_glock *gl)
1464{
1465 struct gfs2_holder gh;
1466
1467 gfs2_holder_init(gl, LM_ST_UNLOCKED, GL_NEVER_RECURSE, &gh);
1468 set_bit(HIF_DEMOTE, &gh.gh_iflags);
1469
1470 spin_lock(&gl->gl_spin);
1471 list_add_tail(&gh.gh_list, &gl->gl_waiters2);
1472 run_queue(gl);
1473 spin_unlock(&gl->gl_spin);
1474
1475 wait_for_completion(&gh.gh_wait);
1476 gfs2_holder_uninit(&gh);
1477}
1478
1479static void greedy_work(void *data)
1480{
1481 struct greedy *gr = (struct greedy *)data;
1482 struct gfs2_holder *gh = &gr->gr_gh;
1483 struct gfs2_glock *gl = gh->gh_gl;
1484 struct gfs2_glock_operations *glops = gl->gl_ops;
1485
1486 clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1487
1488 if (glops->go_greedy)
1489 glops->go_greedy(gl);
1490
1491 spin_lock(&gl->gl_spin);
1492
1493 if (list_empty(&gl->gl_waiters2)) {
1494 clear_bit(GLF_GREEDY, &gl->gl_flags);
1495 spin_unlock(&gl->gl_spin);
1496 gfs2_holder_uninit(gh);
1497 kfree(gr);
1498 } else {
1499 gfs2_glock_hold(gl);
1500 list_add_tail(&gh->gh_list, &gl->gl_waiters2);
1501 run_queue(gl);
1502 spin_unlock(&gl->gl_spin);
1503 gfs2_glock_put(gl);
1504 }
1505}
1506
1507/**
1508 * gfs2_glock_be_greedy -
1509 * @gl:
1510 * @time:
1511 *
1512 * Returns: 0 if go_greedy will be called, 1 otherwise
1513 */
1514
1515int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time)
1516{
1517 struct greedy *gr;
1518 struct gfs2_holder *gh;
1519
1520 if (!time ||
1521 gl->gl_sbd->sd_args.ar_localcaching ||
1522 test_and_set_bit(GLF_GREEDY, &gl->gl_flags))
1523 return 1;
1524
1525 gr = kmalloc(sizeof(struct greedy), GFP_KERNEL);
1526 if (!gr) {
1527 clear_bit(GLF_GREEDY, &gl->gl_flags);
1528 return 1;
1529 }
1530 gh = &gr->gr_gh;
1531
1532 gfs2_holder_init(gl, 0, GL_NEVER_RECURSE, gh);
1533 set_bit(HIF_GREEDY, &gh->gh_iflags);
1534 INIT_WORK(&gr->gr_work, greedy_work, gr);
1535
1536 set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1537 schedule_delayed_work(&gr->gr_work, time);
1538
1539 return 0;
1540}
1541
1542/**
1543 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
1544 * @gl: the glock
1545 * @state: the state we're requesting
1546 * @flags: the modifier flags
1547 * @gh: the holder structure
1548 *
1549 * Returns: 0, GLR_*, or errno
1550 */
1551
1552int gfs2_glock_nq_init(struct gfs2_glock *gl, unsigned int state, int flags,
1553 struct gfs2_holder *gh)
1554{
1555 int error;
1556
1557 gfs2_holder_init(gl, state, flags, gh);
1558
1559 error = gfs2_glock_nq(gh);
1560 if (error)
1561 gfs2_holder_uninit(gh);
1562
1563 return error;
1564}
1565
1566/**
1567 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1568 * @gh: the holder structure
1569 *
1570 */
1571
1572void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1573{
1574 gfs2_glock_dq(gh);
1575 gfs2_holder_uninit(gh);
1576}
1577
1578/**
1579 * gfs2_glock_nq_num - acquire a glock based on lock number
1580 * @sdp: the filesystem
1581 * @number: the lock number
1582 * @glops: the glock operations for the type of glock
1583 * @state: the state to acquire the glock in
1584 * @flags: modifier flags for the aquisition
1585 * @gh: the struct gfs2_holder
1586 *
1587 * Returns: errno
1588 */
1589
1590int gfs2_glock_nq_num(struct gfs2_sbd *sdp, uint64_t number,
1591 struct gfs2_glock_operations *glops, unsigned int state,
1592 int flags, struct gfs2_holder *gh)
1593{
1594 struct gfs2_glock *gl;
1595 int error;
1596
1597 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1598 if (!error) {
1599 error = gfs2_glock_nq_init(gl, state, flags, gh);
1600 gfs2_glock_put(gl);
1601 }
1602
1603 return error;
1604}
1605
1606/**
1607 * glock_compare - Compare two struct gfs2_glock structures for sorting
1608 * @arg_a: the first structure
1609 * @arg_b: the second structure
1610 *
1611 */
1612
1613static int glock_compare(const void *arg_a, const void *arg_b)
1614{
1615 struct gfs2_holder *gh_a = *(struct gfs2_holder **)arg_a;
1616 struct gfs2_holder *gh_b = *(struct gfs2_holder **)arg_b;
1617 struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1618 struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1619 int ret = 0;
1620
1621 if (a->ln_number > b->ln_number)
1622 ret = 1;
1623 else if (a->ln_number < b->ln_number)
1624 ret = -1;
1625 else {
1626 if (gh_a->gh_state == LM_ST_SHARED &&
1627 gh_b->gh_state == LM_ST_EXCLUSIVE)
1628 ret = 1;
1629 else if (!(gh_a->gh_flags & GL_LOCAL_EXCL) &&
1630 (gh_b->gh_flags & GL_LOCAL_EXCL))
1631 ret = 1;
1632 }
1633
1634 return ret;
1635}
1636
1637/**
1638 * nq_m_sync - synchonously acquire more than one glock in deadlock free order
1639 * @num_gh: the number of structures
1640 * @ghs: an array of struct gfs2_holder structures
1641 *
1642 * Returns: 0 on success (all glocks acquired),
1643 * errno on failure (no glocks acquired)
1644 */
1645
1646static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1647 struct gfs2_holder **p)
1648{
1649 unsigned int x;
1650 int error = 0;
1651
1652 for (x = 0; x < num_gh; x++)
1653 p[x] = &ghs[x];
1654
1655 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);
1656
1657 for (x = 0; x < num_gh; x++) {
1658 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1659
1660 error = gfs2_glock_nq(p[x]);
1661 if (error) {
1662 while (x--)
1663 gfs2_glock_dq(p[x]);
1664 break;
1665 }
1666 }
1667
1668 return error;
1669}
1670
1671/**
1672 * gfs2_glock_nq_m - acquire multiple glocks
1673 * @num_gh: the number of structures
1674 * @ghs: an array of struct gfs2_holder structures
1675 *
1676 * Figure out how big an impact this function has. Either:
1677 * 1) Replace this code with code that calls gfs2_glock_prefetch()
1678 * 2) Forget async stuff and just call nq_m_sync()
1679 * 3) Leave it like it is
1680 *
1681 * Returns: 0 on success (all glocks acquired),
1682 * errno on failure (no glocks acquired)
1683 */
1684
1685int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1686{
1687 int *e;
1688 unsigned int x;
1689 int borked = 0, serious = 0;
1690 int error = 0;
1691
1692 if (!num_gh)
1693 return 0;
1694
1695 if (num_gh == 1) {
1696 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1697 return gfs2_glock_nq(ghs);
1698 }
1699
1700 e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1701 if (!e)
1702 return -ENOMEM;
1703
1704 for (x = 0; x < num_gh; x++) {
1705 ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
1706 error = gfs2_glock_nq(&ghs[x]);
1707 if (error) {
1708 borked = 1;
1709 serious = error;
1710 num_gh = x;
1711 break;
1712 }
1713 }
1714
1715 for (x = 0; x < num_gh; x++) {
1716 error = e[x] = glock_wait_internal(&ghs[x]);
1717 if (error) {
1718 borked = 1;
1719 if (error != GLR_TRYFAILED && error != GLR_CANCELED)
1720 serious = error;
1721 }
1722 }
1723
1724 if (!borked) {
1725 kfree(e);
1726 return 0;
1727 }
1728
1729 for (x = 0; x < num_gh; x++)
1730 if (!e[x])
1731 gfs2_glock_dq(&ghs[x]);
1732
1733 if (serious)
1734 error = serious;
1735 else {
1736 for (x = 0; x < num_gh; x++)
1737 gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
1738 &ghs[x]);
1739 error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
1740 }
1741
1742 kfree(e);
1743
1744 return error;
1745}
1746
1747/**
1748 * gfs2_glock_dq_m - release multiple glocks
1749 * @num_gh: the number of structures
1750 * @ghs: an array of struct gfs2_holder structures
1751 *
1752 */
1753
1754void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1755{
1756 unsigned int x;
1757
1758 for (x = 0; x < num_gh; x++)
1759 gfs2_glock_dq(&ghs[x]);
1760}
1761
1762/**
1763 * gfs2_glock_dq_uninit_m - release multiple glocks
1764 * @num_gh: the number of structures
1765 * @ghs: an array of struct gfs2_holder structures
1766 *
1767 */
1768
1769void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1770{
1771 unsigned int x;
1772
1773 for (x = 0; x < num_gh; x++)
1774 gfs2_glock_dq_uninit(&ghs[x]);
1775}
1776
1777/**
1778 * gfs2_glock_prefetch_num - prefetch a glock based on lock number
1779 * @sdp: the filesystem
1780 * @number: the lock number
1781 * @glops: the glock operations for the type of glock
1782 * @state: the state to acquire the glock in
1783 * @flags: modifier flags for the aquisition
1784 *
1785 * Returns: errno
1786 */
1787
1788void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, uint64_t number,
1789 struct gfs2_glock_operations *glops,
1790 unsigned int state, int flags)
1791{
1792 struct gfs2_glock *gl;
1793 int error;
1794
1795 if (atomic_read(&sdp->sd_reclaim_count) <
1796 gfs2_tune_get(sdp, gt_reclaim_limit)) {
1797 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1798 if (!error) {
1799 gfs2_glock_prefetch(gl, state, flags);
1800 gfs2_glock_put(gl);
1801 }
1802 }
1803}
1804
1805/**
1806 * gfs2_lvb_hold - attach a LVB from a glock
1807 * @gl: The glock in question
1808 *
1809 */
1810
1811int gfs2_lvb_hold(struct gfs2_glock *gl)
1812{
1813 int error;
1814
1815 gfs2_glmutex_lock(gl);
1816
1817 if (!atomic_read(&gl->gl_lvb_count)) {
1818 error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb);
1819 if (error) {
1820 gfs2_glmutex_unlock(gl);
1821 return error;
1822 }
1823 gfs2_glock_hold(gl);
1824 }
1825 atomic_inc(&gl->gl_lvb_count);
1826
1827 gfs2_glmutex_unlock(gl);
1828
1829 return 0;
1830}
1831
1832/**
1833 * gfs2_lvb_unhold - detach a LVB from a glock
1834 * @gl: The glock in question
1835 *
1836 */
1837
1838void gfs2_lvb_unhold(struct gfs2_glock *gl)
1839{
1840 gfs2_glock_hold(gl);
1841 gfs2_glmutex_lock(gl);
1842
1843 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0);
1844 if (atomic_dec_and_test(&gl->gl_lvb_count)) {
1845 gfs2_lm_unhold_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb);
1846 gl->gl_lvb = NULL;
1847 gfs2_glock_put(gl);
1848 }
1849
1850 gfs2_glmutex_unlock(gl);
1851 gfs2_glock_put(gl);
1852}
1853
1854void gfs2_lvb_sync(struct gfs2_glock *gl)
1855{
1856 gfs2_glmutex_lock(gl);
1857
1858 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count));
1859 if (!gfs2_assert_warn(gl->gl_sbd, gfs2_glock_is_held_excl(gl)))
1860 gfs2_lm_sync_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb);
1861
1862 gfs2_glmutex_unlock(gl);
1863}
1864
1865static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1866 unsigned int state)
1867{
1868 struct gfs2_glock *gl;
1869
1870 gl = gfs2_glock_find(sdp, name);
1871 if (!gl)
1872 return;
1873
1874 if (gl->gl_ops->go_callback)
1875 gl->gl_ops->go_callback(gl, state);
1876 handle_callback(gl, state);
1877
1878 spin_lock(&gl->gl_spin);
1879 run_queue(gl);
1880 spin_unlock(&gl->gl_spin);
1881
1882 gfs2_glock_put(gl);
1883}
1884
1885/**
1886 * gfs2_glock_cb - Callback used by locking module
1887 * @fsdata: Pointer to the superblock
1888 * @type: Type of callback
1889 * @data: Type dependent data pointer
1890 *
1891 * Called by the locking module when it wants to tell us something.
1892 * Either we need to drop a lock, one of our ASYNC requests completed, or
1893 * a journal from another client needs to be recovered.
1894 */
1895
1896void gfs2_glock_cb(lm_fsdata_t *fsdata, unsigned int type, void *data)
1897{
1898 struct gfs2_sbd *sdp = (struct gfs2_sbd *)fsdata;
1899
1900 switch (type) {
1901 case LM_CB_NEED_E:
1902 blocking_cb(sdp, (struct lm_lockname *)data, LM_ST_UNLOCKED);
1903 return;
1904
1905 case LM_CB_NEED_D:
1906 blocking_cb(sdp, (struct lm_lockname *)data, LM_ST_DEFERRED);
1907 return;
1908
1909 case LM_CB_NEED_S:
1910 blocking_cb(sdp, (struct lm_lockname *)data, LM_ST_SHARED);
1911 return;
1912
1913 case LM_CB_ASYNC: {
1914 struct lm_async_cb *async = (struct lm_async_cb *)data;
1915 struct gfs2_glock *gl;
1916
1917 gl = gfs2_glock_find(sdp, &async->lc_name);
1918 if (gfs2_assert_warn(sdp, gl))
1919 return;
1920 if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
1921 gl->gl_req_bh(gl, async->lc_ret);
1922 gfs2_glock_put(gl);
1923
1924 return;
1925 }
1926
1927 case LM_CB_NEED_RECOVERY:
1928 gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data);
1929 if (sdp->sd_recoverd_process)
1930 wake_up_process(sdp->sd_recoverd_process);
1931 return;
1932
1933 case LM_CB_DROPLOCKS:
1934 gfs2_gl_hash_clear(sdp, NO_WAIT);
1935 gfs2_quota_scan(sdp);
1936 return;
1937
1938 default:
1939 gfs2_assert_warn(sdp, 0);
1940 return;
1941 }
1942}
1943
1944/**
1945 * gfs2_try_toss_inode - try to remove a particular inode struct from cache
1946 * sdp: the filesystem
1947 * inum: the inode number
1948 *
1949 */
1950
1951void gfs2_try_toss_inode(struct gfs2_sbd *sdp, struct gfs2_inum *inum)
1952{
1953 struct gfs2_glock *gl;
1954 struct gfs2_inode *ip;
1955 int error;
1956
1957 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops,
1958 NO_CREATE, &gl);
1959 if (error || !gl)
1960 return;
1961
1962 if (!gfs2_glmutex_trylock(gl))
1963 goto out;
1964
1965 ip = get_gl2ip(gl);
1966 if (!ip)
1967 goto out_unlock;
1968
1969 if (atomic_read(&ip->i_count))
1970 goto out_unlock;
1971
1972 gfs2_inode_destroy(ip);
1973
1974 out_unlock:
1975 gfs2_glmutex_unlock(gl);
1976
1977 out:
1978 gfs2_glock_put(gl);
1979}
1980
1981/**
1982 * gfs2_iopen_go_callback - Try to kick the inode/vnode associated with an
1983 * iopen glock from memory
1984 * @io_gl: the iopen glock
1985 * @state: the state into which the glock should be put
1986 *
1987 */
1988
1989void gfs2_iopen_go_callback(struct gfs2_glock *io_gl, unsigned int state)
1990{
1991 struct gfs2_glock *i_gl;
1992
1993 if (state != LM_ST_UNLOCKED)
1994 return;
1995
1996 spin_lock(&io_gl->gl_spin);
1997 i_gl = get_gl2gl(io_gl);
1998 if (i_gl) {
1999 gfs2_glock_hold(i_gl);
2000 spin_unlock(&io_gl->gl_spin);
2001 } else {
2002 spin_unlock(&io_gl->gl_spin);
2003 return;
2004 }
2005
2006 if (gfs2_glmutex_trylock(i_gl)) {
2007 struct gfs2_inode *ip = get_gl2ip(i_gl);
2008 if (ip) {
2009 gfs2_try_toss_vnode(ip);
2010 gfs2_glmutex_unlock(i_gl);
2011 gfs2_glock_schedule_for_reclaim(i_gl);
2012 goto out;
2013 }
2014 gfs2_glmutex_unlock(i_gl);
2015 }
2016
2017 out:
2018 gfs2_glock_put(i_gl);
2019}
2020
2021/**
2022 * demote_ok - Check to see if it's ok to unlock a glock
2023 * @gl: the glock
2024 *
2025 * Returns: 1 if it's ok
2026 */
2027
2028static int demote_ok(struct gfs2_glock *gl)
2029{
2030 struct gfs2_sbd *sdp = gl->gl_sbd;
2031 struct gfs2_glock_operations *glops = gl->gl_ops;
2032 int demote = 1;
2033
2034 if (test_bit(GLF_STICKY, &gl->gl_flags))
2035 demote = 0;
2036 else if (test_bit(GLF_PREFETCH, &gl->gl_flags))
2037 demote = time_after_eq(jiffies,
2038 gl->gl_stamp +
2039 gfs2_tune_get(sdp, gt_prefetch_secs) * HZ);
2040 else if (glops->go_demote_ok)
2041 demote = glops->go_demote_ok(gl);
2042
2043 return demote;
2044}
2045
2046/**
2047 * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
2048 * @gl: the glock
2049 *
2050 */
2051
2052void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
2053{
2054 struct gfs2_sbd *sdp = gl->gl_sbd;
2055
2056 spin_lock(&sdp->sd_reclaim_lock);
2057 if (list_empty(&gl->gl_reclaim)) {
2058 gfs2_glock_hold(gl);
2059 list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list);
2060 atomic_inc(&sdp->sd_reclaim_count);
2061 }
2062 spin_unlock(&sdp->sd_reclaim_lock);
2063
2064 wake_up(&sdp->sd_reclaim_wq);
2065}
2066
2067/**
2068 * gfs2_reclaim_glock - process the next glock on the filesystem's reclaim list
2069 * @sdp: the filesystem
2070 *
2071 * Called from gfs2_glockd() glock reclaim daemon, or when promoting a
2072 * different glock and we notice that there are a lot of glocks in the
2073 * reclaim list.
2074 *
2075 */
2076
2077void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
2078{
2079 struct gfs2_glock *gl;
2080
2081 spin_lock(&sdp->sd_reclaim_lock);
2082 if (list_empty(&sdp->sd_reclaim_list)) {
2083 spin_unlock(&sdp->sd_reclaim_lock);
2084 return;
2085 }
2086 gl = list_entry(sdp->sd_reclaim_list.next,
2087 struct gfs2_glock, gl_reclaim);
2088 list_del_init(&gl->gl_reclaim);
2089 spin_unlock(&sdp->sd_reclaim_lock);
2090
2091 atomic_dec(&sdp->sd_reclaim_count);
2092 atomic_inc(&sdp->sd_reclaimed);
2093
2094 if (gfs2_glmutex_trylock(gl)) {
2095 if (gl->gl_ops == &gfs2_inode_glops) {
2096 struct gfs2_inode *ip = get_gl2ip(gl);
2097 if (ip && !atomic_read(&ip->i_count))
2098 gfs2_inode_destroy(ip);
2099 }
2100 if (queue_empty(gl, &gl->gl_holders) &&
2101 gl->gl_state != LM_ST_UNLOCKED &&
2102 demote_ok(gl))
2103 handle_callback(gl, LM_ST_UNLOCKED);
2104 gfs2_glmutex_unlock(gl);
2105 }
2106
2107 gfs2_glock_put(gl);
2108}
2109
2110/**
2111 * examine_bucket - Call a function for glock in a hash bucket
2112 * @examiner: the function
2113 * @sdp: the filesystem
2114 * @bucket: the bucket
2115 *
2116 * Returns: 1 if the bucket has entries
2117 */
2118
2119static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
2120 struct gfs2_gl_hash_bucket *bucket)
2121{
2122 struct glock_plug plug;
2123 struct list_head *tmp;
2124 struct gfs2_glock *gl;
2125 int entries;
2126
2127 /* Add "plug" to end of bucket list, work back up list from there */
2128 memset(&plug.gl_flags, 0, sizeof(unsigned long));
2129 set_bit(GLF_PLUG, &plug.gl_flags);
2130
2131 write_lock(&bucket->hb_lock);
2132 list_add(&plug.gl_list, &bucket->hb_list);
2133 write_unlock(&bucket->hb_lock);
2134
2135 for (;;) {
2136 write_lock(&bucket->hb_lock);
2137
2138 for (;;) {
2139 tmp = plug.gl_list.next;
2140
2141 if (tmp == &bucket->hb_list) {
2142 list_del(&plug.gl_list);
2143 entries = !list_empty(&bucket->hb_list);
2144 write_unlock(&bucket->hb_lock);
2145 return entries;
2146 }
2147 gl = list_entry(tmp, struct gfs2_glock, gl_list);
2148
2149 /* Move plug up list */
2150 list_move(&plug.gl_list, &gl->gl_list);
2151
2152 if (test_bit(GLF_PLUG, &gl->gl_flags))
2153 continue;
2154
2155 /* examiner() must glock_put() */
2156 gfs2_glock_hold(gl);
2157
2158 break;
2159 }
2160
2161 write_unlock(&bucket->hb_lock);
2162
2163 examiner(gl);
2164 }
2165}
2166
2167/**
2168 * scan_glock - look at a glock and see if we can reclaim it
2169 * @gl: the glock to look at
2170 *
2171 */
2172
2173static void scan_glock(struct gfs2_glock *gl)
2174{
2175 if (gfs2_glmutex_trylock(gl)) {
2176 if (gl->gl_ops == &gfs2_inode_glops) {
2177 struct gfs2_inode *ip = get_gl2ip(gl);
2178 if (ip && !atomic_read(&ip->i_count))
2179 goto out_schedule;
2180 }
2181 if (queue_empty(gl, &gl->gl_holders) &&
2182 gl->gl_state != LM_ST_UNLOCKED &&
2183 demote_ok(gl))
2184 goto out_schedule;
2185
2186 gfs2_glmutex_unlock(gl);
2187 }
2188
2189 gfs2_glock_put(gl);
2190
2191 return;
2192
2193 out_schedule:
2194 gfs2_glmutex_unlock(gl);
2195 gfs2_glock_schedule_for_reclaim(gl);
2196 gfs2_glock_put(gl);
2197}
2198
2199/**
2200 * gfs2_scand_internal - Look for glocks and inodes to toss from memory
2201 * @sdp: the filesystem
2202 *
2203 */
2204
2205void gfs2_scand_internal(struct gfs2_sbd *sdp)
2206{
2207 unsigned int x;
2208
2209 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
2210 examine_bucket(scan_glock, sdp, &sdp->sd_gl_hash[x]);
2211 cond_resched();
2212 }
2213}
2214
2215/**
2216 * clear_glock - look at a glock and see if we can free it from glock cache
2217 * @gl: the glock to look at
2218 *
2219 */
2220
2221static void clear_glock(struct gfs2_glock *gl)
2222{
2223 struct gfs2_sbd *sdp = gl->gl_sbd;
2224 int released;
2225
2226 spin_lock(&sdp->sd_reclaim_lock);
2227 if (!list_empty(&gl->gl_reclaim)) {
2228 list_del_init(&gl->gl_reclaim);
2229 atomic_dec(&sdp->sd_reclaim_count);
2230 released = gfs2_glock_put(gl);
2231 gfs2_assert(sdp, !released);
2232 }
2233 spin_unlock(&sdp->sd_reclaim_lock);
2234
2235 if (gfs2_glmutex_trylock(gl)) {
2236 if (gl->gl_ops == &gfs2_inode_glops) {
2237 struct gfs2_inode *ip = get_gl2ip(gl);
2238 if (ip && !atomic_read(&ip->i_count))
2239 gfs2_inode_destroy(ip);
2240 }
2241 if (queue_empty(gl, &gl->gl_holders) &&
2242 gl->gl_state != LM_ST_UNLOCKED)
2243 handle_callback(gl, LM_ST_UNLOCKED);
2244
2245 gfs2_glmutex_unlock(gl);
2246 }
2247
2248 gfs2_glock_put(gl);
2249}
2250
2251/**
2252 * gfs2_gl_hash_clear - Empty out the glock hash table
2253 * @sdp: the filesystem
2254 * @wait: wait until it's all gone
2255 *
2256 * Called when unmounting the filesystem, or when inter-node lock manager
2257 * requests DROPLOCKS because it is running out of capacity.
2258 */
2259
2260void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
2261{
2262 unsigned long t;
2263 unsigned int x;
2264 int cont;
2265
2266 t = jiffies;
2267
2268 for (;;) {
2269 cont = 0;
2270
2271 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
2272 if (examine_bucket(clear_glock, sdp,
2273 &sdp->sd_gl_hash[x]))
2274 cont = 1;
2275
2276 if (!wait || !cont)
2277 break;
2278
2279 if (time_after_eq(jiffies,
2280 t + gfs2_tune_get(sdp, gt_stall_secs) * HZ)) {
2281 fs_warn(sdp, "Unmount seems to be stalled. "
2282 "Dumping lock state...\n");
2283 gfs2_dump_lockstate(sdp);
2284 t = jiffies;
2285 }
2286
2287 /* invalidate_inodes() requires that the sb inodes list
2288 not change, but an async completion callback for an
2289 unlock can occur which does glock_put() which
2290 can call iput() which will change the sb inodes list.
2291 invalidate_inodes_mutex prevents glock_put()'s during
2292 an invalidate_inodes() */
2293
2294 mutex_lock(&sdp->sd_invalidate_inodes_mutex);
2295 invalidate_inodes(sdp->sd_vfs);
2296 mutex_unlock(&sdp->sd_invalidate_inodes_mutex);
2297 yield();
2298 }
2299}
2300
2301/*
2302 * Diagnostic routines to help debug distributed deadlock
2303 */
2304
2305/**
2306 * dump_holder - print information about a glock holder
2307 * @str: a string naming the type of holder
2308 * @gh: the glock holder
2309 *
2310 * Returns: 0 on success, -ENOBUFS when we run out of space
2311 */
2312
2313static int dump_holder(char *str, struct gfs2_holder *gh)
2314{
2315 unsigned int x;
2316 int error = -ENOBUFS;
2317
2318 printk(KERN_INFO " %s\n", str);
2319 printk(KERN_INFO " owner = %ld\n",
2320 (gh->gh_owner) ? (long)gh->gh_owner->pid : -1);
2321 printk(KERN_INFO " gh_state = %u\n", gh->gh_state);
2322 printk(KERN_INFO " gh_flags =");
2323 for (x = 0; x < 32; x++)
2324 if (gh->gh_flags & (1 << x))
2325 printk(" %u", x);
2326 printk(" \n");
2327 printk(KERN_INFO " error = %d\n", gh->gh_error);
2328 printk(KERN_INFO " gh_iflags =");
2329 for (x = 0; x < 32; x++)
2330 if (test_bit(x, &gh->gh_iflags))
2331 printk(" %u", x);
2332 printk(" \n");
2333
2334 error = 0;
2335
2336 return error;
2337}
2338
2339/**
2340 * dump_inode - print information about an inode
2341 * @ip: the inode
2342 *
2343 * Returns: 0 on success, -ENOBUFS when we run out of space
2344 */
2345
2346static int dump_inode(struct gfs2_inode *ip)
2347{
2348 unsigned int x;
2349 int error = -ENOBUFS;
2350
2351 printk(KERN_INFO " Inode:\n");
2352 printk(KERN_INFO " num = %llu %llu\n",
2353 ip->i_num.no_formal_ino, ip->i_num.no_addr);
2354 printk(KERN_INFO " type = %u\n", IF2DT(ip->i_di.di_mode));
2355 printk(KERN_INFO " i_count = %d\n", atomic_read(&ip->i_count));
2356 printk(KERN_INFO " i_flags =");
2357 for (x = 0; x < 32; x++)
2358 if (test_bit(x, &ip->i_flags))
2359 printk(" %u", x);
2360 printk(" \n");
2361 printk(KERN_INFO " vnode = %s\n", (ip->i_vnode) ? "yes" : "no");
2362
2363 error = 0;
2364
2365 return error;
2366}
2367
2368/**
2369 * dump_glock - print information about a glock
2370 * @gl: the glock
2371 * @count: where we are in the buffer
2372 *
2373 * Returns: 0 on success, -ENOBUFS when we run out of space
2374 */
2375
2376static int dump_glock(struct gfs2_glock *gl)
2377{
2378 struct gfs2_holder *gh;
2379 unsigned int x;
2380 int error = -ENOBUFS;
2381
2382 spin_lock(&gl->gl_spin);
2383
2384 printk(KERN_INFO "Glock (%u, %llu)\n",
2385 gl->gl_name.ln_type,
2386 gl->gl_name.ln_number);
2387 printk(KERN_INFO " gl_flags =");
2388 for (x = 0; x < 32; x++)
2389 if (test_bit(x, &gl->gl_flags))
2390 printk(" %u", x);
2391 printk(" \n");
2392 printk(KERN_INFO " gl_ref = %d\n", atomic_read(&gl->gl_ref.refcount));
2393 printk(KERN_INFO " gl_state = %u\n", gl->gl_state);
2394 printk(KERN_INFO " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
2395 printk(KERN_INFO " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
2396 printk(KERN_INFO " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
2397 printk(KERN_INFO " object = %s\n", (gl->gl_object) ? "yes" : "no");
2398 printk(KERN_INFO " le = %s\n",
2399 (list_empty(&gl->gl_le.le_list)) ? "no" : "yes");
2400 printk(KERN_INFO " reclaim = %s\n",
2401 (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
2402 if (gl->gl_aspace)
2403 printk(KERN_INFO " aspace = %lu\n",
2404 gl->gl_aspace->i_mapping->nrpages);
2405 else
2406 printk(KERN_INFO " aspace = no\n");
2407 printk(KERN_INFO " ail = %d\n", atomic_read(&gl->gl_ail_count));
2408 if (gl->gl_req_gh) {
2409 error = dump_holder("Request", gl->gl_req_gh);
2410 if (error)
2411 goto out;
2412 }
2413 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
2414 error = dump_holder("Holder", gh);
2415 if (error)
2416 goto out;
2417 }
2418 list_for_each_entry(gh, &gl->gl_waiters1, gh_list) {
2419 error = dump_holder("Waiter1", gh);
2420 if (error)
2421 goto out;
2422 }
2423 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
2424 error = dump_holder("Waiter2", gh);
2425 if (error)
2426 goto out;
2427 }
2428 list_for_each_entry(gh, &gl->gl_waiters3, gh_list) {
2429 error = dump_holder("Waiter3", gh);
2430 if (error)
2431 goto out;
2432 }
2433 if (gl->gl_ops == &gfs2_inode_glops && get_gl2ip(gl)) {
2434 if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
2435 list_empty(&gl->gl_holders)) {
2436 error = dump_inode(get_gl2ip(gl));
2437 if (error)
2438 goto out;
2439 } else {
2440 error = -ENOBUFS;
2441 printk(KERN_INFO " Inode: busy\n");
2442 }
2443 }
2444
2445 error = 0;
2446
2447 out:
2448 spin_unlock(&gl->gl_spin);
2449
2450 return error;
2451}
2452
2453/**
2454 * gfs2_dump_lockstate - print out the current lockstate
2455 * @sdp: the filesystem
2456 * @ub: the buffer to copy the information into
2457 *
2458 * If @ub is NULL, dump the lockstate to the console.
2459 *
2460 */
2461
2462int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
2463{
2464 struct gfs2_gl_hash_bucket *bucket;
2465 struct gfs2_glock *gl;
2466 unsigned int x;
2467 int error = 0;
2468
2469 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
2470 bucket = &sdp->sd_gl_hash[x];
2471
2472 read_lock(&bucket->hb_lock);
2473
2474 list_for_each_entry(gl, &bucket->hb_list, gl_list) {
2475 if (test_bit(GLF_PLUG, &gl->gl_flags))
2476 continue;
2477
2478 error = dump_glock(gl);
2479 if (error)
2480 break;
2481 }
2482
2483 read_unlock(&bucket->hb_lock);
2484
2485 if (error)
2486 break;
2487 }
2488
2489
2490 return error;
2491}
2492
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
new file mode 100644
index 000000000000..06847ebebdee
--- /dev/null
+++ b/fs/gfs2/glock.h
@@ -0,0 +1,143 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GLOCK_DOT_H__
11#define __GLOCK_DOT_H__
12
13/* Flags for lock requests; used in gfs2_holder gh_flag field.
14 From lm_interface.h:
15#define LM_FLAG_TRY 0x00000001
16#define LM_FLAG_TRY_1CB 0x00000002
17#define LM_FLAG_NOEXP 0x00000004
18#define LM_FLAG_ANY 0x00000008
19#define LM_FLAG_PRIORITY 0x00000010 */
20
21#define GL_LOCAL_EXCL 0x00000020
22#define GL_ASYNC 0x00000040
23#define GL_EXACT 0x00000080
24#define GL_SKIP 0x00000100
25#define GL_ATIME 0x00000200
26#define GL_NOCACHE 0x00000400
27#define GL_SYNC 0x00000800
28#define GL_NOCANCEL 0x00001000
29#define GL_NEVER_RECURSE 0x00002000
30
31#define GLR_TRYFAILED 13
32#define GLR_CANCELED 14
33
34static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
35{
36 struct gfs2_holder *gh;
37 int locked = 0;
38
39 /* Look in glock's list of holders for one with current task as owner */
40 spin_lock(&gl->gl_spin);
41 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
42 if (gh->gh_owner == current) {
43 locked = 1;
44 break;
45 }
46 }
47 spin_unlock(&gl->gl_spin);
48
49 return locked;
50}
51
52static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
53{
54 return (gl->gl_state == LM_ST_EXCLUSIVE);
55}
56
57static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl)
58{
59 return (gl->gl_state == LM_ST_DEFERRED);
60}
61
62static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
63{
64 return (gl->gl_state == LM_ST_SHARED);
65}
66
67static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
68{
69 int ret;
70 spin_lock(&gl->gl_spin);
71 ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3);
72 spin_unlock(&gl->gl_spin);
73 return ret;
74}
75
76struct gfs2_glock *gfs2_glock_find(struct gfs2_sbd *sdp,
77 struct lm_lockname *name);
78int gfs2_glock_get(struct gfs2_sbd *sdp,
79 uint64_t number, struct gfs2_glock_operations *glops,
80 int create, struct gfs2_glock **glp);
81void gfs2_glock_hold(struct gfs2_glock *gl);
82int gfs2_glock_put(struct gfs2_glock *gl);
83
84void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, int flags,
85 struct gfs2_holder *gh);
86void gfs2_holder_reinit(unsigned int state, int flags, struct gfs2_holder *gh);
87void gfs2_holder_uninit(struct gfs2_holder *gh);
88struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl, unsigned int state,
89 int flags, gfp_t gfp_flags);
90void gfs2_holder_put(struct gfs2_holder *gh);
91
92void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags);
93void gfs2_glock_drop_th(struct gfs2_glock *gl);
94
95void gfs2_glmutex_lock(struct gfs2_glock *gl);
96int gfs2_glmutex_trylock(struct gfs2_glock *gl);
97void gfs2_glmutex_unlock(struct gfs2_glock *gl);
98
99int gfs2_glock_nq(struct gfs2_holder *gh);
100int gfs2_glock_poll(struct gfs2_holder *gh);
101int gfs2_glock_wait(struct gfs2_holder *gh);
102void gfs2_glock_dq(struct gfs2_holder *gh);
103
104void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state, int flags);
105void gfs2_glock_force_drop(struct gfs2_glock *gl);
106
107int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time);
108
109int gfs2_glock_nq_init(struct gfs2_glock *gl, unsigned int state, int flags,
110 struct gfs2_holder *gh);
111void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
112int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
113 uint64_t number, struct gfs2_glock_operations *glops,
114 unsigned int state, int flags, struct gfs2_holder *gh);
115
116int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
117void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
118void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
119
120void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, uint64_t number,
121 struct gfs2_glock_operations *glops,
122 unsigned int state, int flags);
123
124/* Lock Value Block functions */
125
126int gfs2_lvb_hold(struct gfs2_glock *gl);
127void gfs2_lvb_unhold(struct gfs2_glock *gl);
128void gfs2_lvb_sync(struct gfs2_glock *gl);
129
130void gfs2_glock_cb(lm_fsdata_t *fsdata, unsigned int type, void *data);
131
132void gfs2_try_toss_inode(struct gfs2_sbd *sdp, struct gfs2_inum *inum);
133void gfs2_iopen_go_callback(struct gfs2_glock *gl, unsigned int state);
134
135void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
136void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
137
138void gfs2_scand_internal(struct gfs2_sbd *sdp);
139void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
140
141int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
142
143#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
new file mode 100644
index 000000000000..27374306ecde
--- /dev/null
+++ b/fs/gfs2/glops.c
@@ -0,0 +1,487 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "bmap.h"
19#include "glock.h"
20#include "glops.h"
21#include "inode.h"
22#include "log.h"
23#include "meta_io.h"
24#include "page.h"
25#include "recovery.h"
26#include "rgrp.h"
27
28/**
29 * meta_go_sync - sync out the metadata for this glock
30 * @gl: the glock
31 * @flags: DIO_*
32 *
33 * Called when demoting or unlocking an EX glock. We must flush
34 * to disk all dirty buffers/pages relating to this glock, and must not
35 * not return to caller to demote/unlock the glock until I/O is complete.
36 */
37
38static void meta_go_sync(struct gfs2_glock *gl, int flags)
39{
40 if (!(flags & DIO_METADATA))
41 return;
42
43 if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
44 gfs2_log_flush_glock(gl);
45 gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
46 if (flags & DIO_RELEASE)
47 gfs2_ail_empty_gl(gl);
48 }
49
50 clear_bit(GLF_SYNC, &gl->gl_flags);
51}
52
53/**
54 * meta_go_inval - invalidate the metadata for this glock
55 * @gl: the glock
56 * @flags:
57 *
58 */
59
60static void meta_go_inval(struct gfs2_glock *gl, int flags)
61{
62 if (!(flags & DIO_METADATA))
63 return;
64
65 gfs2_meta_inval(gl);
66 gl->gl_vn++;
67}
68
69/**
70 * meta_go_demote_ok - Check to see if it's ok to unlock a glock
71 * @gl: the glock
72 *
73 * Returns: 1 if we have no cached data; ok to demote meta glock
74 */
75
76static int meta_go_demote_ok(struct gfs2_glock *gl)
77{
78 return !gl->gl_aspace->i_mapping->nrpages;
79}
80
81/**
82 * inode_go_xmote_th - promote/demote a glock
83 * @gl: the glock
84 * @state: the requested state
85 * @flags:
86 *
87 */
88
89static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
90 int flags)
91{
92 if (gl->gl_state != LM_ST_UNLOCKED)
93 gfs2_pte_inval(gl);
94 gfs2_glock_xmote_th(gl, state, flags);
95}
96
97/**
98 * inode_go_xmote_bh - After promoting/demoting a glock
99 * @gl: the glock
100 *
101 */
102
103static void inode_go_xmote_bh(struct gfs2_glock *gl)
104{
105 struct gfs2_holder *gh = gl->gl_req_gh;
106 struct buffer_head *bh;
107 int error;
108
109 if (gl->gl_state != LM_ST_UNLOCKED &&
110 (!gh || !(gh->gh_flags & GL_SKIP))) {
111 error = gfs2_meta_read(gl, gl->gl_name.ln_number, DIO_START,
112 &bh);
113 if (!error)
114 brelse(bh);
115 }
116}
117
118/**
119 * inode_go_drop_th - unlock a glock
120 * @gl: the glock
121 *
122 * Invoked from rq_demote().
123 * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long)
124 * is being purged from our node's glock cache; we're dropping lock.
125 */
126
127static void inode_go_drop_th(struct gfs2_glock *gl)
128{
129 gfs2_pte_inval(gl);
130 gfs2_glock_drop_th(gl);
131}
132
133/**
134 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
135 * @gl: the glock protecting the inode
136 * @flags:
137 *
138 */
139
140static void inode_go_sync(struct gfs2_glock *gl, int flags)
141{
142 int meta = (flags & DIO_METADATA);
143 int data = (flags & DIO_DATA);
144
145 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
146 if (meta && data) {
147 gfs2_page_sync(gl, flags | DIO_START);
148 gfs2_log_flush_glock(gl);
149 gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
150 gfs2_page_sync(gl, flags | DIO_WAIT);
151 clear_bit(GLF_DIRTY, &gl->gl_flags);
152 } else if (meta) {
153 gfs2_log_flush_glock(gl);
154 gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
155 } else if (data)
156 gfs2_page_sync(gl, flags | DIO_START | DIO_WAIT);
157 if (flags & DIO_RELEASE)
158 gfs2_ail_empty_gl(gl);
159 }
160
161 clear_bit(GLF_SYNC, &gl->gl_flags);
162}
163
164/**
165 * inode_go_inval - prepare a inode glock to be released
166 * @gl: the glock
167 * @flags:
168 *
169 */
170
171static void inode_go_inval(struct gfs2_glock *gl, int flags)
172{
173 int meta = (flags & DIO_METADATA);
174 int data = (flags & DIO_DATA);
175
176 if (meta) {
177 gfs2_meta_inval(gl);
178 gl->gl_vn++;
179 }
180 if (data)
181 gfs2_page_inval(gl);
182}
183
184/**
185 * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
186 * @gl: the glock
187 *
188 * Returns: 1 if it's ok
189 */
190
191static int inode_go_demote_ok(struct gfs2_glock *gl)
192{
193 struct gfs2_sbd *sdp = gl->gl_sbd;
194 int demote = 0;
195
196 if (!get_gl2ip(gl) && !gl->gl_aspace->i_mapping->nrpages)
197 demote = 1;
198 else if (!sdp->sd_args.ar_localcaching &&
199 time_after_eq(jiffies, gl->gl_stamp +
200 gfs2_tune_get(sdp, gt_demote_secs) * HZ))
201 demote = 1;
202
203 return demote;
204}
205
206/**
207 * inode_go_lock - operation done after an inode lock is locked by a process
208 * @gl: the glock
209 * @flags:
210 *
211 * Returns: errno
212 */
213
214static int inode_go_lock(struct gfs2_holder *gh)
215{
216 struct gfs2_glock *gl = gh->gh_gl;
217 struct gfs2_inode *ip = get_gl2ip(gl);
218 int error = 0;
219
220 if (!ip)
221 return 0;
222
223 if (ip->i_vn != gl->gl_vn) {
224 error = gfs2_inode_refresh(ip);
225 if (error)
226 return error;
227 gfs2_inode_attr_in(ip);
228 }
229
230 if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
231 (gl->gl_state == LM_ST_EXCLUSIVE) &&
232 (gh->gh_flags & GL_LOCAL_EXCL))
233 error = gfs2_truncatei_resume(ip);
234
235 return error;
236}
237
238/**
239 * inode_go_unlock - operation done before an inode lock is unlocked by a
240 * process
241 * @gl: the glock
242 * @flags:
243 *
244 */
245
246static void inode_go_unlock(struct gfs2_holder *gh)
247{
248 struct gfs2_glock *gl = gh->gh_gl;
249 struct gfs2_inode *ip = get_gl2ip(gl);
250
251 if (ip && test_bit(GLF_DIRTY, &gl->gl_flags))
252 gfs2_inode_attr_in(ip);
253
254 if (ip)
255 gfs2_meta_cache_flush(ip);
256}
257
258/**
259 * inode_greedy -
260 * @gl: the glock
261 *
262 */
263
264static void inode_greedy(struct gfs2_glock *gl)
265{
266 struct gfs2_sbd *sdp = gl->gl_sbd;
267 struct gfs2_inode *ip = get_gl2ip(gl);
268 unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum);
269 unsigned int max = gfs2_tune_get(sdp, gt_greedy_max);
270 unsigned int new_time;
271
272 spin_lock(&ip->i_spin);
273
274 if (time_after(ip->i_last_pfault + quantum, jiffies)) {
275 new_time = ip->i_greedy + quantum;
276 if (new_time > max)
277 new_time = max;
278 } else {
279 new_time = ip->i_greedy - quantum;
280 if (!new_time || new_time > max)
281 new_time = 1;
282 }
283
284 ip->i_greedy = new_time;
285
286 spin_unlock(&ip->i_spin);
287
288 gfs2_inode_put(ip);
289}
290
291/**
292 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
293 * @gl: the glock
294 *
295 * Returns: 1 if it's ok
296 */
297
298static int rgrp_go_demote_ok(struct gfs2_glock *gl)
299{
300 return !gl->gl_aspace->i_mapping->nrpages;
301}
302
303/**
304 * rgrp_go_lock - operation done after an rgrp lock is locked by
305 * a first holder on this node.
306 * @gl: the glock
307 * @flags:
308 *
309 * Returns: errno
310 */
311
312static int rgrp_go_lock(struct gfs2_holder *gh)
313{
314 return gfs2_rgrp_bh_get(get_gl2rgd(gh->gh_gl));
315}
316
317/**
318 * rgrp_go_unlock - operation done before an rgrp lock is unlocked by
319 * a last holder on this node.
320 * @gl: the glock
321 * @flags:
322 *
323 */
324
325static void rgrp_go_unlock(struct gfs2_holder *gh)
326{
327 gfs2_rgrp_bh_put(get_gl2rgd(gh->gh_gl));
328}
329
330/**
331 * trans_go_xmote_th - promote/demote the transaction glock
332 * @gl: the glock
333 * @state: the requested state
334 * @flags:
335 *
336 */
337
338static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
339 int flags)
340{
341 struct gfs2_sbd *sdp = gl->gl_sbd;
342
343 if (gl->gl_state != LM_ST_UNLOCKED &&
344 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
345 gfs2_meta_syncfs(sdp);
346 gfs2_log_shutdown(sdp);
347 }
348
349 gfs2_glock_xmote_th(gl, state, flags);
350}
351
352/**
353 * trans_go_xmote_bh - After promoting/demoting the transaction glock
354 * @gl: the glock
355 *
356 */
357
358static void trans_go_xmote_bh(struct gfs2_glock *gl)
359{
360 struct gfs2_sbd *sdp = gl->gl_sbd;
361 struct gfs2_glock *j_gl = get_v2ip(sdp->sd_jdesc->jd_inode)->i_gl;
362 struct gfs2_log_header head;
363 int error;
364
365 if (gl->gl_state != LM_ST_UNLOCKED &&
366 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
367 gfs2_meta_cache_flush(get_v2ip(sdp->sd_jdesc->jd_inode));
368 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
369
370 error = gfs2_find_jhead(sdp->sd_jdesc, &head);
371 if (error)
372 gfs2_consist(sdp);
373 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
374 gfs2_consist(sdp);
375
376 /* Initialize some head of the log stuff */
377 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
378 sdp->sd_log_sequence = head.lh_sequence + 1;
379 gfs2_log_pointers_init(sdp, head.lh_blkno);
380 }
381 }
382}
383
384/**
385 * trans_go_drop_th - unlock the transaction glock
386 * @gl: the glock
387 *
388 * We want to sync the device even with localcaching. Remember
389 * that localcaching journal replay only marks buffers dirty.
390 */
391
392static void trans_go_drop_th(struct gfs2_glock *gl)
393{
394 struct gfs2_sbd *sdp = gl->gl_sbd;
395
396 if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
397 gfs2_meta_syncfs(sdp);
398 gfs2_log_shutdown(sdp);
399 }
400
401 gfs2_glock_drop_th(gl);
402}
403
404/**
405 * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
406 * @gl: the glock
407 *
408 * Returns: 1 if it's ok
409 */
410
411static int quota_go_demote_ok(struct gfs2_glock *gl)
412{
413 return !atomic_read(&gl->gl_lvb_count);
414}
415
416struct gfs2_glock_operations gfs2_meta_glops = {
417 .go_xmote_th = gfs2_glock_xmote_th,
418 .go_drop_th = gfs2_glock_drop_th,
419 .go_sync = meta_go_sync,
420 .go_inval = meta_go_inval,
421 .go_demote_ok = meta_go_demote_ok,
422 .go_type = LM_TYPE_META
423};
424
425struct gfs2_glock_operations gfs2_inode_glops = {
426 .go_xmote_th = inode_go_xmote_th,
427 .go_xmote_bh = inode_go_xmote_bh,
428 .go_drop_th = inode_go_drop_th,
429 .go_sync = inode_go_sync,
430 .go_inval = inode_go_inval,
431 .go_demote_ok = inode_go_demote_ok,
432 .go_lock = inode_go_lock,
433 .go_unlock = inode_go_unlock,
434 .go_greedy = inode_greedy,
435 .go_type = LM_TYPE_INODE
436};
437
438struct gfs2_glock_operations gfs2_rgrp_glops = {
439 .go_xmote_th = gfs2_glock_xmote_th,
440 .go_drop_th = gfs2_glock_drop_th,
441 .go_sync = meta_go_sync,
442 .go_inval = meta_go_inval,
443 .go_demote_ok = rgrp_go_demote_ok,
444 .go_lock = rgrp_go_lock,
445 .go_unlock = rgrp_go_unlock,
446 .go_type = LM_TYPE_RGRP
447};
448
449struct gfs2_glock_operations gfs2_trans_glops = {
450 .go_xmote_th = trans_go_xmote_th,
451 .go_xmote_bh = trans_go_xmote_bh,
452 .go_drop_th = trans_go_drop_th,
453 .go_type = LM_TYPE_NONDISK
454};
455
456struct gfs2_glock_operations gfs2_iopen_glops = {
457 .go_xmote_th = gfs2_glock_xmote_th,
458 .go_drop_th = gfs2_glock_drop_th,
459 .go_callback = gfs2_iopen_go_callback,
460 .go_type = LM_TYPE_IOPEN
461};
462
463struct gfs2_glock_operations gfs2_flock_glops = {
464 .go_xmote_th = gfs2_glock_xmote_th,
465 .go_drop_th = gfs2_glock_drop_th,
466 .go_type = LM_TYPE_FLOCK
467};
468
469struct gfs2_glock_operations gfs2_nondisk_glops = {
470 .go_xmote_th = gfs2_glock_xmote_th,
471 .go_drop_th = gfs2_glock_drop_th,
472 .go_type = LM_TYPE_NONDISK
473};
474
475struct gfs2_glock_operations gfs2_quota_glops = {
476 .go_xmote_th = gfs2_glock_xmote_th,
477 .go_drop_th = gfs2_glock_drop_th,
478 .go_demote_ok = quota_go_demote_ok,
479 .go_type = LM_TYPE_QUOTA
480};
481
482struct gfs2_glock_operations gfs2_journal_glops = {
483 .go_xmote_th = gfs2_glock_xmote_th,
484 .go_drop_th = gfs2_glock_drop_th,
485 .go_type = LM_TYPE_JOURNAL
486};
487
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
new file mode 100644
index 000000000000..94f2d264aa64
--- /dev/null
+++ b/fs/gfs2/glops.h
@@ -0,0 +1,23 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GLOPS_DOT_H__
11#define __GLOPS_DOT_H__
12
13extern struct gfs2_glock_operations gfs2_meta_glops;
14extern struct gfs2_glock_operations gfs2_inode_glops;
15extern struct gfs2_glock_operations gfs2_rgrp_glops;
16extern struct gfs2_glock_operations gfs2_trans_glops;
17extern struct gfs2_glock_operations gfs2_iopen_glops;
18extern struct gfs2_glock_operations gfs2_flock_glops;
19extern struct gfs2_glock_operations gfs2_nondisk_glops;
20extern struct gfs2_glock_operations gfs2_quota_glops;
21extern struct gfs2_glock_operations gfs2_journal_glops;
22
23#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
new file mode 100644
index 000000000000..0e550e8e5be3
--- /dev/null
+++ b/fs/gfs2/incore.h
@@ -0,0 +1,682 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __INCORE_DOT_H__
11#define __INCORE_DOT_H__
12
13#define DIO_FORCE 0x00000001
14#define DIO_CLEAN 0x00000002
15#define DIO_DIRTY 0x00000004
16#define DIO_START 0x00000008
17#define DIO_WAIT 0x00000010
18#define DIO_METADATA 0x00000020
19#define DIO_DATA 0x00000040
20#define DIO_RELEASE 0x00000080
21#define DIO_ALL 0x00000100
22
23struct gfs2_log_operations;
24struct gfs2_log_element;
25struct gfs2_bitmap;
26struct gfs2_rgrpd;
27struct gfs2_bufdata;
28struct gfs2_glock_operations;
29struct gfs2_holder;
30struct gfs2_glock;
31struct gfs2_alloc;
32struct gfs2_inode;
33struct gfs2_file;
34struct gfs2_revoke;
35struct gfs2_revoke_replay;
36struct gfs2_unlinked;
37struct gfs2_quota_data;
38struct gfs2_log_buf;
39struct gfs2_trans;
40struct gfs2_ail;
41struct gfs2_jdesc;
42struct gfs2_args;
43struct gfs2_tune;
44struct gfs2_gl_hash_bucket;
45struct gfs2_sbd;
46
47typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
48
49/*
50 * Structure of operations that are associated with each
51 * type of element in the log.
52 */
53
54struct gfs2_log_operations {
55 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le);
56 void (*lo_incore_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr);
57 void (*lo_before_commit) (struct gfs2_sbd *sdp);
58 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
59 void (*lo_before_scan) (struct gfs2_jdesc *jd,
60 struct gfs2_log_header *head, int pass);
61 int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
62 struct gfs2_log_descriptor *ld, __be64 *ptr,
63 int pass);
64 void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass);
65 char *lo_name;
66};
67
68struct gfs2_log_element {
69 struct list_head le_list;
70 struct gfs2_log_operations *le_ops;
71};
72
73struct gfs2_bitmap {
74 struct buffer_head *bi_bh;
75 char *bi_clone;
76 uint32_t bi_offset;
77 uint32_t bi_start;
78 uint32_t bi_len;
79};
80
81struct gfs2_rgrpd {
82 struct list_head rd_list; /* Link with superblock */
83 struct list_head rd_list_mru;
84 struct list_head rd_recent; /* Recently used rgrps */
85 struct gfs2_glock *rd_gl; /* Glock for this rgrp */
86 struct gfs2_rindex rd_ri;
87 struct gfs2_rgrp rd_rg;
88 uint64_t rd_rg_vn;
89 struct gfs2_bitmap *rd_bits;
90 unsigned int rd_bh_count;
91 struct mutex rd_mutex;
92 uint32_t rd_free_clone;
93 struct gfs2_log_element rd_le;
94 uint32_t rd_last_alloc_data;
95 uint32_t rd_last_alloc_meta;
96 struct gfs2_sbd *rd_sbd;
97};
98
99enum gfs2_state_bits {
100 BH_Pinned = BH_PrivateStart,
101 BH_Escaped = BH_PrivateStart + 1,
102};
103
104BUFFER_FNS(Pinned, pinned)
105TAS_BUFFER_FNS(Pinned, pinned)
106BUFFER_FNS(Escaped, escaped)
107TAS_BUFFER_FNS(Escaped, escaped)
108
109struct gfs2_bufdata {
110 struct buffer_head *bd_bh;
111 struct gfs2_glock *bd_gl;
112
113 struct list_head bd_list_tr;
114 struct gfs2_log_element bd_le;
115
116 struct gfs2_ail *bd_ail;
117 struct list_head bd_ail_st_list;
118 struct list_head bd_ail_gl_list;
119};
120
121struct gfs2_glock_operations {
122 void (*go_xmote_th) (struct gfs2_glock * gl, unsigned int state,
123 int flags);
124 void (*go_xmote_bh) (struct gfs2_glock * gl);
125 void (*go_drop_th) (struct gfs2_glock * gl);
126 void (*go_drop_bh) (struct gfs2_glock * gl);
127 void (*go_sync) (struct gfs2_glock * gl, int flags);
128 void (*go_inval) (struct gfs2_glock * gl, int flags);
129 int (*go_demote_ok) (struct gfs2_glock * gl);
130 int (*go_lock) (struct gfs2_holder * gh);
131 void (*go_unlock) (struct gfs2_holder * gh);
132 void (*go_callback) (struct gfs2_glock * gl, unsigned int state);
133 void (*go_greedy) (struct gfs2_glock * gl);
134 int go_type;
135};
136
137enum {
138 /* Actions */
139 HIF_MUTEX = 0,
140 HIF_PROMOTE = 1,
141 HIF_DEMOTE = 2,
142 HIF_GREEDY = 3,
143
144 /* States */
145 HIF_ALLOCED = 4,
146 HIF_DEALLOC = 5,
147 HIF_HOLDER = 6,
148 HIF_FIRST = 7,
149 HIF_RECURSE = 8,
150 HIF_ABORTED = 9,
151};
152
153struct gfs2_holder {
154 struct list_head gh_list;
155
156 struct gfs2_glock *gh_gl;
157 struct task_struct *gh_owner;
158 unsigned int gh_state;
159 int gh_flags;
160
161 int gh_error;
162 unsigned long gh_iflags;
163 struct completion gh_wait;
164};
165
166enum {
167 GLF_PLUG = 0,
168 GLF_LOCK = 1,
169 GLF_STICKY = 2,
170 GLF_PREFETCH = 3,
171 GLF_SYNC = 4,
172 GLF_DIRTY = 5,
173 GLF_SKIP_WAITERS2 = 6,
174 GLF_GREEDY = 7,
175};
176
177struct gfs2_glock {
178 struct list_head gl_list;
179 unsigned long gl_flags; /* GLF_... */
180 struct lm_lockname gl_name;
181 struct kref gl_ref;
182
183 spinlock_t gl_spin;
184
185 unsigned int gl_state;
186 struct list_head gl_holders;
187 struct list_head gl_waiters1; /* HIF_MUTEX */
188 struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */
189 struct list_head gl_waiters3; /* HIF_PROMOTE */
190
191 struct gfs2_glock_operations *gl_ops;
192
193 struct gfs2_holder *gl_req_gh;
194 gfs2_glop_bh_t gl_req_bh;
195
196 lm_lock_t *gl_lock;
197 char *gl_lvb;
198 atomic_t gl_lvb_count;
199
200 uint64_t gl_vn;
201 unsigned long gl_stamp;
202 void *gl_object;
203
204 struct gfs2_gl_hash_bucket *gl_bucket;
205 struct list_head gl_reclaim;
206
207 struct gfs2_sbd *gl_sbd;
208
209 struct inode *gl_aspace;
210 struct gfs2_log_element gl_le;
211 struct list_head gl_ail_list;
212 atomic_t gl_ail_count;
213};
214
215struct gfs2_alloc {
216 /* Quota stuff */
217
218 unsigned int al_qd_num;
219 struct gfs2_quota_data *al_qd[4];
220 struct gfs2_holder al_qd_ghs[4];
221
222 /* Filled in by the caller to gfs2_inplace_reserve() */
223
224 uint32_t al_requested;
225
226 /* Filled in by gfs2_inplace_reserve() */
227
228 char *al_file;
229 unsigned int al_line;
230 struct gfs2_holder al_ri_gh;
231 struct gfs2_holder al_rgd_gh;
232 struct gfs2_rgrpd *al_rgd;
233
234 /* Filled in by gfs2_alloc_*() */
235
236 uint32_t al_alloced;
237};
238
239enum {
240 GIF_MIN_INIT = 0,
241 GIF_QD_LOCKED = 1,
242 GIF_PAGED = 2,
243 GIF_SW_PAGED = 3,
244};
245
246struct gfs2_inode {
247 struct gfs2_inum i_num;
248
249 atomic_t i_count;
250 unsigned long i_flags; /* GIF_... */
251
252 uint64_t i_vn;
253 struct gfs2_dinode i_di;
254
255 struct gfs2_glock *i_gl;
256 struct gfs2_sbd *i_sbd;
257 struct inode *i_vnode;
258
259 struct gfs2_holder i_iopen_gh;
260 struct gfs2_holder i_gh; /* for prepare/commit_write only */
261 struct gfs2_alloc i_alloc;
262 uint64_t i_last_rg_alloc;
263
264 spinlock_t i_spin;
265 struct rw_semaphore i_rw_mutex;
266
267 unsigned int i_greedy;
268 unsigned long i_last_pfault;
269
270 struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
271};
272
273enum {
274 GFF_DID_DIRECT_ALLOC = 0,
275};
276
277struct gfs2_file {
278 unsigned long f_flags; /* GFF_... */
279
280 struct mutex f_fl_mutex;
281 struct gfs2_holder f_fl_gh;
282
283 struct gfs2_inode *f_inode;
284 struct file *f_vfile;
285};
286
287struct gfs2_revoke {
288 struct gfs2_log_element rv_le;
289 uint64_t rv_blkno;
290};
291
292struct gfs2_revoke_replay {
293 struct list_head rr_list;
294 uint64_t rr_blkno;
295 unsigned int rr_where;
296};
297
298enum {
299 ULF_LOCKED = 0,
300};
301
302struct gfs2_unlinked {
303 struct list_head ul_list;
304 unsigned int ul_count;
305 struct gfs2_unlinked_tag ul_ut;
306 unsigned long ul_flags; /* ULF_... */
307 unsigned int ul_slot;
308};
309
310enum {
311 QDF_USER = 0,
312 QDF_CHANGE = 1,
313 QDF_LOCKED = 2,
314};
315
316struct gfs2_quota_data {
317 struct list_head qd_list;
318 unsigned int qd_count;
319
320 uint32_t qd_id;
321 unsigned long qd_flags; /* QDF_... */
322
323 int64_t qd_change;
324 int64_t qd_change_sync;
325
326 unsigned int qd_slot;
327 unsigned int qd_slot_count;
328
329 struct buffer_head *qd_bh;
330 struct gfs2_quota_change *qd_bh_qc;
331 unsigned int qd_bh_count;
332
333 struct gfs2_glock *qd_gl;
334 struct gfs2_quota_lvb qd_qb;
335
336 uint64_t qd_sync_gen;
337 unsigned long qd_last_warn;
338 unsigned long qd_last_touched;
339};
340
341struct gfs2_log_buf {
342 struct list_head lb_list;
343 struct buffer_head *lb_bh;
344 struct buffer_head *lb_real;
345};
346
347struct gfs2_trans {
348 char *tr_file;
349 unsigned int tr_line;
350
351 unsigned int tr_blocks;
352 unsigned int tr_revokes;
353 unsigned int tr_reserved;
354
355 struct gfs2_holder *tr_t_gh;
356
357 int tr_touched;
358
359 unsigned int tr_num_buf;
360 unsigned int tr_num_buf_new;
361 unsigned int tr_num_buf_rm;
362 struct list_head tr_list_buf;
363
364 unsigned int tr_num_revoke;
365 unsigned int tr_num_revoke_rm;
366};
367
368struct gfs2_ail {
369 struct list_head ai_list;
370
371 unsigned int ai_first;
372 struct list_head ai_ail1_list;
373 struct list_head ai_ail2_list;
374
375 uint64_t ai_sync_gen;
376};
377
378struct gfs2_jdesc {
379 struct list_head jd_list;
380
381 struct inode *jd_inode;
382 unsigned int jd_jid;
383 int jd_dirty;
384
385 unsigned int jd_blocks;
386};
387
388#define GFS2_GLOCKD_DEFAULT 1
389#define GFS2_GLOCKD_MAX 16
390
391#define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF
392#define GFS2_QUOTA_OFF 0
393#define GFS2_QUOTA_ACCOUNT 1
394#define GFS2_QUOTA_ON 2
395
396#define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED
397#define GFS2_DATA_WRITEBACK 1
398#define GFS2_DATA_ORDERED 2
399
400struct gfs2_args {
401 char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
402 char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
403 char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
404 int ar_spectator; /* Don't get a journal because we're always RO */
405 int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */
406 int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */
407 int ar_localcaching; /* Local-style caching (dangerous on multihost) */
408 int ar_debug; /* Oops on errors instead of trying to be graceful */
409 int ar_upgrade; /* Upgrade ondisk/multihost format */
410 unsigned int ar_num_glockd; /* Number of glockd threads */
411 int ar_posix_acl; /* Enable posix acls */
412 int ar_quota; /* off/account/on */
413 int ar_suiddir; /* suiddir support */
414 int ar_data; /* ordered/writeback */
415};
416
417struct gfs2_tune {
418 spinlock_t gt_spin;
419
420 unsigned int gt_ilimit;
421 unsigned int gt_ilimit_tries;
422 unsigned int gt_ilimit_min;
423 unsigned int gt_demote_secs; /* Cache retention for unheld glock */
424 unsigned int gt_incore_log_blocks;
425 unsigned int gt_log_flush_secs;
426 unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
427
428 unsigned int gt_scand_secs;
429 unsigned int gt_recoverd_secs;
430 unsigned int gt_logd_secs;
431 unsigned int gt_quotad_secs;
432 unsigned int gt_inoded_secs;
433
434 unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
435 unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
436 unsigned int gt_quota_scale_num; /* Numerator */
437 unsigned int gt_quota_scale_den; /* Denominator */
438 unsigned int gt_quota_cache_secs;
439 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
440 unsigned int gt_atime_quantum; /* Min secs between atime updates */
441 unsigned int gt_new_files_jdata;
442 unsigned int gt_new_files_directio;
443 unsigned int gt_max_atomic_write; /* Split big writes into this size */
444 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
445 unsigned int gt_lockdump_size;
446 unsigned int gt_stall_secs; /* Detects trouble! */
447 unsigned int gt_complain_secs;
448 unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
449 unsigned int gt_entries_per_readdir;
450 unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */
451 unsigned int gt_greedy_default;
452 unsigned int gt_greedy_quantum;
453 unsigned int gt_greedy_max;
454 unsigned int gt_statfs_quantum;
455 unsigned int gt_statfs_slow;
456};
457
458struct gfs2_gl_hash_bucket {
459 rwlock_t hb_lock;
460 struct list_head hb_list;
461};
462
463enum {
464 SDF_JOURNAL_CHECKED = 0,
465 SDF_JOURNAL_LIVE = 1,
466 SDF_SHUTDOWN = 2,
467 SDF_NOATIME = 3,
468};
469
470#define GFS2_GL_HASH_SHIFT 13
471#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
472#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
473#define GFS2_FSNAME_LEN 256
474
475struct gfs2_sbd {
476 struct super_block *sd_vfs;
477 struct kobject sd_kobj;
478 unsigned long sd_flags; /* SDF_... */
479 struct gfs2_sb sd_sb;
480
481 /* Constants computed on mount */
482
483 uint32_t sd_fsb2bb;
484 uint32_t sd_fsb2bb_shift;
485 uint32_t sd_diptrs; /* Number of pointers in a dinode */
486 uint32_t sd_inptrs; /* Number of pointers in a indirect block */
487 uint32_t sd_jbsize; /* Size of a journaled data block */
488 uint32_t sd_hash_bsize; /* sizeof(exhash block) */
489 uint32_t sd_hash_bsize_shift;
490 uint32_t sd_hash_ptrs; /* Number of pointers in a hash block */
491 uint32_t sd_ut_per_block;
492 uint32_t sd_qc_per_block;
493 uint32_t sd_max_dirres; /* Max blocks needed to add a directory entry */
494 uint32_t sd_max_height; /* Max height of a file's metadata tree */
495 uint64_t sd_heightsize[GFS2_MAX_META_HEIGHT];
496 uint32_t sd_max_jheight; /* Max height of journaled file's meta tree */
497 uint64_t sd_jheightsize[GFS2_MAX_META_HEIGHT];
498
499 struct gfs2_args sd_args; /* Mount arguments */
500 struct gfs2_tune sd_tune; /* Filesystem tuning structure */
501
502 /* Lock Stuff */
503
504 struct lm_lockstruct sd_lockstruct;
505 struct gfs2_gl_hash_bucket sd_gl_hash[GFS2_GL_HASH_SIZE];
506 struct list_head sd_reclaim_list;
507 spinlock_t sd_reclaim_lock;
508 wait_queue_head_t sd_reclaim_wq;
509 atomic_t sd_reclaim_count;
510 struct gfs2_holder sd_live_gh;
511 struct gfs2_glock *sd_rename_gl;
512 struct gfs2_glock *sd_trans_gl;
513 struct mutex sd_invalidate_inodes_mutex;
514
515 /* Inode Stuff */
516
517 struct inode *sd_master_dir;
518 struct inode *sd_jindex;
519 struct inode *sd_inum_inode;
520 struct inode *sd_statfs_inode;
521 struct inode *sd_ir_inode;
522 struct inode *sd_sc_inode;
523 struct inode *sd_ut_inode;
524 struct inode *sd_qc_inode;
525 struct inode *sd_rindex;
526 struct inode *sd_quota_inode;
527 struct inode *sd_root_dir;
528
529 /* Inum stuff */
530
531 struct mutex sd_inum_mutex;
532
533 /* StatFS stuff */
534
535 spinlock_t sd_statfs_spin;
536 struct mutex sd_statfs_mutex;
537 struct gfs2_statfs_change sd_statfs_master;
538 struct gfs2_statfs_change sd_statfs_local;
539 unsigned long sd_statfs_sync_time;
540
541 /* Resource group stuff */
542
543 uint64_t sd_rindex_vn;
544 spinlock_t sd_rindex_spin;
545 struct mutex sd_rindex_mutex;
546 struct list_head sd_rindex_list;
547 struct list_head sd_rindex_mru_list;
548 struct list_head sd_rindex_recent_list;
549 struct gfs2_rgrpd *sd_rindex_forward;
550 unsigned int sd_rgrps;
551
552 /* Journal index stuff */
553
554 struct list_head sd_jindex_list;
555 spinlock_t sd_jindex_spin;
556 struct mutex sd_jindex_mutex;
557 unsigned int sd_journals;
558 unsigned long sd_jindex_refresh_time;
559
560 struct gfs2_jdesc *sd_jdesc;
561 struct gfs2_holder sd_journal_gh;
562 struct gfs2_holder sd_jinode_gh;
563
564 struct gfs2_holder sd_ir_gh;
565 struct gfs2_holder sd_sc_gh;
566 struct gfs2_holder sd_ut_gh;
567 struct gfs2_holder sd_qc_gh;
568
569 /* Daemon stuff */
570
571 struct task_struct *sd_scand_process;
572 struct task_struct *sd_recoverd_process;
573 struct task_struct *sd_logd_process;
574 struct task_struct *sd_quotad_process;
575 struct task_struct *sd_inoded_process;
576 struct task_struct *sd_glockd_process[GFS2_GLOCKD_MAX];
577 unsigned int sd_glockd_num;
578
579 /* Unlinked inode stuff */
580
581 struct list_head sd_unlinked_list;
582 atomic_t sd_unlinked_count;
583 spinlock_t sd_unlinked_spin;
584 struct mutex sd_unlinked_mutex;
585
586 unsigned int sd_unlinked_slots;
587 unsigned int sd_unlinked_chunks;
588 unsigned char **sd_unlinked_bitmap;
589
590 /* Quota stuff */
591
592 struct list_head sd_quota_list;
593 atomic_t sd_quota_count;
594 spinlock_t sd_quota_spin;
595 struct mutex sd_quota_mutex;
596
597 unsigned int sd_quota_slots;
598 unsigned int sd_quota_chunks;
599 unsigned char **sd_quota_bitmap;
600
601 uint64_t sd_quota_sync_gen;
602 unsigned long sd_quota_sync_time;
603
604 /* Log stuff */
605
606 spinlock_t sd_log_lock;
607 atomic_t sd_log_trans_count;
608 wait_queue_head_t sd_log_trans_wq;
609 atomic_t sd_log_flush_count;
610 wait_queue_head_t sd_log_flush_wq;
611
612 unsigned int sd_log_blks_reserved;
613 unsigned int sd_log_commited_buf;
614 unsigned int sd_log_commited_revoke;
615
616 unsigned int sd_log_num_gl;
617 unsigned int sd_log_num_buf;
618 unsigned int sd_log_num_revoke;
619 unsigned int sd_log_num_rg;
620 unsigned int sd_log_num_databuf;
621 unsigned int sd_log_num_jdata;
622
623 struct list_head sd_log_le_gl;
624 struct list_head sd_log_le_buf;
625 struct list_head sd_log_le_revoke;
626 struct list_head sd_log_le_rg;
627 struct list_head sd_log_le_databuf;
628
629 unsigned int sd_log_blks_free;
630 struct list_head sd_log_blks_list;
631 wait_queue_head_t sd_log_blks_wait;
632
633 uint64_t sd_log_sequence;
634 unsigned int sd_log_head;
635 unsigned int sd_log_tail;
636 uint64_t sd_log_wraps;
637 int sd_log_idle;
638
639 unsigned long sd_log_flush_time;
640 struct mutex sd_log_flush_lock;
641 struct list_head sd_log_flush_list;
642
643 unsigned int sd_log_flush_head;
644 uint64_t sd_log_flush_wrapped;
645
646 struct list_head sd_ail1_list;
647 struct list_head sd_ail2_list;
648 uint64_t sd_ail_sync_gen;
649
650 /* Replay stuff */
651
652 struct list_head sd_revoke_list;
653 unsigned int sd_replay_tail;
654
655 unsigned int sd_found_blocks;
656 unsigned int sd_found_revokes;
657 unsigned int sd_replayed_blocks;
658
659 /* For quiescing the filesystem */
660
661 struct gfs2_holder sd_freeze_gh;
662 struct mutex sd_freeze_lock;
663 unsigned int sd_freeze_count;
664
665 /* Counters */
666
667 atomic_t sd_glock_count;
668 atomic_t sd_glock_held_count;
669 atomic_t sd_inode_count;
670 atomic_t sd_reclaimed;
671
672 char sd_fsname[GFS2_FSNAME_LEN];
673 char sd_table_name[GFS2_FSNAME_LEN];
674 char sd_proto_name[GFS2_FSNAME_LEN];
675
676 /* Debugging crud */
677
678 unsigned long sd_last_warning;
679};
680
681#endif /* __INCORE_DOT_H__ */
682
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
new file mode 100644
index 000000000000..51ecdb8503b0
--- /dev/null
+++ b/fs/gfs2/inode.c
@@ -0,0 +1,1838 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/posix_acl.h>
16#include <linux/sort.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "acl.h"
21#include "bmap.h"
22#include "dir.h"
23#include "eattr.h"
24#include "glock.h"
25#include "glops.h"
26#include "inode.h"
27#include "log.h"
28#include "meta_io.h"
29#include "ops_address.h"
30#include "ops_file.h"
31#include "ops_inode.h"
32#include "quota.h"
33#include "rgrp.h"
34#include "trans.h"
35#include "unlinked.h"
36
37/**
38 * inode_attr_in - Copy attributes from the dinode into the VFS inode
39 * @ip: The GFS2 inode (with embedded disk inode data)
40 * @inode: The Linux VFS inode
41 *
42 */
43
44static void inode_attr_in(struct gfs2_inode *ip, struct inode *inode)
45{
46 inode->i_ino = ip->i_num.no_formal_ino;
47
48 switch (ip->i_di.di_mode & S_IFMT) {
49 case S_IFBLK:
50 case S_IFCHR:
51 inode->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
52 break;
53 default:
54 inode->i_rdev = 0;
55 break;
56 };
57
58 inode->i_mode = ip->i_di.di_mode;
59 inode->i_nlink = ip->i_di.di_nlink;
60 inode->i_uid = ip->i_di.di_uid;
61 inode->i_gid = ip->i_di.di_gid;
62 i_size_write(inode, ip->i_di.di_size);
63 inode->i_atime.tv_sec = ip->i_di.di_atime;
64 inode->i_mtime.tv_sec = ip->i_di.di_mtime;
65 inode->i_ctime.tv_sec = ip->i_di.di_ctime;
66 inode->i_atime.tv_nsec = 0;
67 inode->i_mtime.tv_nsec = 0;
68 inode->i_ctime.tv_nsec = 0;
69 inode->i_blksize = PAGE_SIZE;
70 inode->i_blocks = ip->i_di.di_blocks <<
71 (ip->i_sbd->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
72
73 if (ip->i_di.di_flags & GFS2_DIF_IMMUTABLE)
74 inode->i_flags |= S_IMMUTABLE;
75 else
76 inode->i_flags &= ~S_IMMUTABLE;
77
78 if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY)
79 inode->i_flags |= S_APPEND;
80 else
81 inode->i_flags &= ~S_APPEND;
82}
83
84/**
85 * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode
86 * @ip: The GFS2 inode (with embedded disk inode data)
87 *
88 */
89
90void gfs2_inode_attr_in(struct gfs2_inode *ip)
91{
92 struct inode *inode;
93
94 inode = gfs2_ip2v_lookup(ip);
95 if (inode) {
96 inode_attr_in(ip, inode);
97 iput(inode);
98 }
99}
100
101/**
102 * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode
103 * @ip: The GFS2 inode
104 *
105 * Only copy out the attributes that we want the VFS layer
106 * to be able to modify.
107 */
108
109void gfs2_inode_attr_out(struct gfs2_inode *ip)
110{
111 struct inode *inode = ip->i_vnode;
112
113 gfs2_assert_withdraw(ip->i_sbd,
114 (ip->i_di.di_mode & S_IFMT) == (inode->i_mode & S_IFMT));
115 ip->i_di.di_mode = inode->i_mode;
116 ip->i_di.di_uid = inode->i_uid;
117 ip->i_di.di_gid = inode->i_gid;
118 ip->i_di.di_atime = inode->i_atime.tv_sec;
119 ip->i_di.di_mtime = inode->i_mtime.tv_sec;
120 ip->i_di.di_ctime = inode->i_ctime.tv_sec;
121}
122
123/**
124 * gfs2_ip2v_lookup - Get the struct inode for a struct gfs2_inode
125 * @ip: the struct gfs2_inode to get the struct inode for
126 *
127 * Returns: A VFS inode, or NULL if none
128 */
129
130struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip)
131{
132 struct inode *inode = NULL;
133
134 gfs2_assert_warn(ip->i_sbd, test_bit(GIF_MIN_INIT, &ip->i_flags));
135
136 spin_lock(&ip->i_spin);
137 if (ip->i_vnode)
138 inode = igrab(ip->i_vnode);
139 spin_unlock(&ip->i_spin);
140
141 return inode;
142}
143
144/**
145 * gfs2_ip2v - Get/Create a struct inode for a struct gfs2_inode
146 * @ip: the struct gfs2_inode to get the struct inode for
147 *
148 * Returns: A VFS inode, or NULL if no mem
149 */
150
151struct inode *gfs2_ip2v(struct gfs2_inode *ip)
152{
153 struct inode *inode, *tmp;
154
155 inode = gfs2_ip2v_lookup(ip);
156 if (inode)
157 return inode;
158
159 tmp = new_inode(ip->i_sbd->sd_vfs);
160 if (!tmp)
161 return NULL;
162
163 inode_attr_in(ip, tmp);
164
165 if (S_ISREG(ip->i_di.di_mode)) {
166 tmp->i_op = &gfs2_file_iops;
167 tmp->i_fop = &gfs2_file_fops;
168 tmp->i_mapping->a_ops = &gfs2_file_aops;
169 } else if (S_ISDIR(ip->i_di.di_mode)) {
170 tmp->i_op = &gfs2_dir_iops;
171 tmp->i_fop = &gfs2_dir_fops;
172 } else if (S_ISLNK(ip->i_di.di_mode)) {
173 tmp->i_op = &gfs2_symlink_iops;
174 } else {
175 tmp->i_op = &gfs2_dev_iops;
176 init_special_inode(tmp, tmp->i_mode, tmp->i_rdev);
177 }
178
179 set_v2ip(tmp, NULL);
180
181 for (;;) {
182 spin_lock(&ip->i_spin);
183 if (!ip->i_vnode)
184 break;
185 inode = igrab(ip->i_vnode);
186 spin_unlock(&ip->i_spin);
187
188 if (inode) {
189 iput(tmp);
190 return inode;
191 }
192 yield();
193 }
194
195 inode = tmp;
196
197 gfs2_inode_hold(ip);
198 ip->i_vnode = inode;
199 set_v2ip(inode, ip);
200
201 spin_unlock(&ip->i_spin);
202
203 insert_inode_hash(inode);
204
205 return inode;
206}
207
208static int iget_test(struct inode *inode, void *opaque)
209{
210 struct gfs2_inode *ip = get_v2ip(inode);
211 struct gfs2_inum *inum = (struct gfs2_inum *)opaque;
212
213 if (ip && ip->i_num.no_addr == inum->no_addr)
214 return 1;
215
216 return 0;
217}
218
219struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
220{
221 return ilookup5(sb, (unsigned long)inum->no_formal_ino,
222 iget_test, inum);
223}
224
225void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type)
226{
227 spin_lock(&ip->i_spin);
228 if (!test_and_set_bit(GIF_MIN_INIT, &ip->i_flags)) {
229 ip->i_di.di_nlink = 1;
230 ip->i_di.di_mode = DT2IF(type);
231 }
232 spin_unlock(&ip->i_spin);
233}
234
235/**
236 * gfs2_inode_refresh - Refresh the incore copy of the dinode
237 * @ip: The GFS2 inode
238 *
239 * Returns: errno
240 */
241
242int gfs2_inode_refresh(struct gfs2_inode *ip)
243{
244 struct buffer_head *dibh;
245 int error;
246
247 error = gfs2_meta_inode_buffer(ip, &dibh);
248 if (error)
249 return error;
250
251 if (gfs2_metatype_check(ip->i_sbd, dibh, GFS2_METATYPE_DI)) {
252 brelse(dibh);
253 return -EIO;
254 }
255
256 spin_lock(&ip->i_spin);
257 gfs2_dinode_in(&ip->i_di, dibh->b_data);
258 set_bit(GIF_MIN_INIT, &ip->i_flags);
259 spin_unlock(&ip->i_spin);
260
261 brelse(dibh);
262
263 if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
264 if (gfs2_consist_inode(ip))
265 gfs2_dinode_print(&ip->i_di);
266 return -EIO;
267 }
268 if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino)
269 return -ESTALE;
270
271 ip->i_vn = ip->i_gl->gl_vn;
272
273 return 0;
274}
275
276/**
277 * inode_create - create a struct gfs2_inode
278 * @i_gl: The glock covering the inode
279 * @inum: The inode number
280 * @io_gl: the iopen glock to acquire/hold (using holder in new gfs2_inode)
281 * @io_state: the state the iopen glock should be acquired in
282 * @ipp: pointer to put the returned inode in
283 *
284 * Returns: errno
285 */
286
287static int inode_create(struct gfs2_glock *i_gl, struct gfs2_inum *inum,
288 struct gfs2_glock *io_gl, unsigned int io_state,
289 struct gfs2_inode **ipp)
290{
291 struct gfs2_sbd *sdp = i_gl->gl_sbd;
292 struct gfs2_inode *ip;
293 int error = 0;
294
295 ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
296 if (!ip)
297 return -ENOMEM;
298 memset(ip, 0, sizeof(struct gfs2_inode));
299
300 ip->i_num = *inum;
301
302 atomic_set(&ip->i_count, 1);
303
304 ip->i_vn = i_gl->gl_vn - 1;
305
306 ip->i_gl = i_gl;
307 ip->i_sbd = sdp;
308
309 spin_lock_init(&ip->i_spin);
310 init_rwsem(&ip->i_rw_mutex);
311
312 ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
313
314 error = gfs2_glock_nq_init(io_gl,
315 io_state, GL_LOCAL_EXCL | GL_EXACT,
316 &ip->i_iopen_gh);
317 if (error)
318 goto fail;
319 ip->i_iopen_gh.gh_owner = NULL;
320
321 spin_lock(&io_gl->gl_spin);
322 gfs2_glock_hold(i_gl);
323 set_gl2gl(io_gl, i_gl);
324 spin_unlock(&io_gl->gl_spin);
325
326 gfs2_glock_hold(i_gl);
327 set_gl2ip(i_gl, ip);
328
329 atomic_inc(&sdp->sd_inode_count);
330
331 *ipp = ip;
332
333 return 0;
334
335 fail:
336 gfs2_meta_cache_flush(ip);
337 kmem_cache_free(gfs2_inode_cachep, ip);
338 *ipp = NULL;
339
340 return error;
341}
342
343/**
344 * gfs2_inode_get - Create or get a reference on an inode
345 * @i_gl: The glock covering the inode
346 * @inum: The inode number
347 * @create:
348 * @ipp: pointer to put the returned inode in
349 *
350 * Returns: errno
351 */
352
353int gfs2_inode_get(struct gfs2_glock *i_gl, struct gfs2_inum *inum, int create,
354 struct gfs2_inode **ipp)
355{
356 struct gfs2_sbd *sdp = i_gl->gl_sbd;
357 struct gfs2_glock *io_gl;
358 int error = 0;
359
360 gfs2_glmutex_lock(i_gl);
361
362 *ipp = get_gl2ip(i_gl);
363 if (*ipp) {
364 error = -ESTALE;
365 if ((*ipp)->i_num.no_formal_ino != inum->no_formal_ino)
366 goto out;
367 atomic_inc(&(*ipp)->i_count);
368 error = 0;
369 goto out;
370 }
371
372 if (!create)
373 goto out;
374
375 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops,
376 CREATE, &io_gl);
377 if (!error) {
378 error = inode_create(i_gl, inum, io_gl, LM_ST_SHARED, ipp);
379 gfs2_glock_put(io_gl);
380 }
381
382 out:
383 gfs2_glmutex_unlock(i_gl);
384
385 return error;
386}
387
388void gfs2_inode_hold(struct gfs2_inode *ip)
389{
390 gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0);
391 atomic_inc(&ip->i_count);
392}
393
394void gfs2_inode_put(struct gfs2_inode *ip)
395{
396 gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0);
397 atomic_dec(&ip->i_count);
398}
399
400void gfs2_inode_destroy(struct gfs2_inode *ip)
401{
402 struct gfs2_sbd *sdp = ip->i_sbd;
403 struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl;
404 struct gfs2_glock *i_gl = ip->i_gl;
405
406 gfs2_assert_warn(sdp, !atomic_read(&ip->i_count));
407 gfs2_assert(sdp, get_gl2gl(io_gl) == i_gl);
408
409 spin_lock(&io_gl->gl_spin);
410 set_gl2gl(io_gl, NULL);
411 gfs2_glock_put(i_gl);
412 spin_unlock(&io_gl->gl_spin);
413
414 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
415
416 gfs2_meta_cache_flush(ip);
417 kmem_cache_free(gfs2_inode_cachep, ip);
418
419 set_gl2ip(i_gl, NULL);
420 gfs2_glock_put(i_gl);
421
422 atomic_dec(&sdp->sd_inode_count);
423}
424
425static int dinode_dealloc(struct gfs2_inode *ip, struct gfs2_unlinked *ul)
426{
427 struct gfs2_sbd *sdp = ip->i_sbd;
428 struct gfs2_alloc *al;
429 struct gfs2_rgrpd *rgd;
430 int error;
431
432 if (ip->i_di.di_blocks != 1) {
433 if (gfs2_consist_inode(ip))
434 gfs2_dinode_print(&ip->i_di);
435 return -EIO;
436 }
437
438 al = gfs2_alloc_get(ip);
439
440 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
441 if (error)
442 goto out;
443
444 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
445 if (error)
446 goto out_qs;
447
448 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
449 if (!rgd) {
450 gfs2_consist_inode(ip);
451 error = -EIO;
452 goto out_rindex_relse;
453 }
454
455 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
456 &al->al_rgd_gh);
457 if (error)
458 goto out_rindex_relse;
459
460 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED +
461 RES_STATFS + RES_QUOTA, 1);
462 if (error)
463 goto out_rg_gunlock;
464
465 gfs2_trans_add_gl(ip->i_gl);
466
467 gfs2_free_di(rgd, ip);
468
469 error = gfs2_unlinked_ondisk_rm(sdp, ul);
470
471 gfs2_trans_end(sdp);
472 clear_bit(GLF_STICKY, &ip->i_gl->gl_flags);
473
474 out_rg_gunlock:
475 gfs2_glock_dq_uninit(&al->al_rgd_gh);
476
477 out_rindex_relse:
478 gfs2_glock_dq_uninit(&al->al_ri_gh);
479
480 out_qs:
481 gfs2_quota_unhold(ip);
482
483 out:
484 gfs2_alloc_put(ip);
485
486 return error;
487}
488
489/**
490 * inode_dealloc - Deallocate all on-disk blocks for an inode (dinode)
491 * @sdp: the filesystem
492 * @inum: the inode number to deallocate
493 * @io_gh: a holder for the iopen glock for this inode
494 *
495 * Returns: errno
496 */
497
498static int inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul,
499 struct gfs2_holder *io_gh)
500{
501 struct gfs2_inode *ip;
502 struct gfs2_holder i_gh;
503 int error;
504
505 error = gfs2_glock_nq_num(sdp,
506 ul->ul_ut.ut_inum.no_addr, &gfs2_inode_glops,
507 LM_ST_EXCLUSIVE, 0, &i_gh);
508 if (error)
509 return error;
510
511 /* We reacquire the iopen lock here to avoid a race with the NFS server
512 calling gfs2_read_inode() with the inode number of a inode we're in
513 the process of deallocating. And we can't keep our hold on the lock
514 from inode_dealloc_init() for deadlock reasons. */
515
516 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY, io_gh);
517 error = gfs2_glock_nq(io_gh);
518 switch (error) {
519 case 0:
520 break;
521 case GLR_TRYFAILED:
522 error = 1;
523 default:
524 goto out;
525 }
526
527 gfs2_assert_warn(sdp, !get_gl2ip(i_gh.gh_gl));
528 error = inode_create(i_gh.gh_gl, &ul->ul_ut.ut_inum, io_gh->gh_gl,
529 LM_ST_EXCLUSIVE, &ip);
530
531 gfs2_glock_dq(io_gh);
532
533 if (error)
534 goto out;
535
536 error = gfs2_inode_refresh(ip);
537 if (error)
538 goto out_iput;
539
540 if (ip->i_di.di_nlink) {
541 if (gfs2_consist_inode(ip))
542 gfs2_dinode_print(&ip->i_di);
543 error = -EIO;
544 goto out_iput;
545 }
546
547 if (S_ISDIR(ip->i_di.di_mode) &&
548 (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
549 error = gfs2_dir_exhash_dealloc(ip);
550 if (error)
551 goto out_iput;
552 }
553
554 if (ip->i_di.di_eattr) {
555 error = gfs2_ea_dealloc(ip);
556 if (error)
557 goto out_iput;
558 }
559
560 if (!gfs2_is_stuffed(ip)) {
561 error = gfs2_file_dealloc(ip);
562 if (error)
563 goto out_iput;
564 }
565
566 error = dinode_dealloc(ip, ul);
567 if (error)
568 goto out_iput;
569
570 out_iput:
571 gfs2_glmutex_lock(i_gh.gh_gl);
572 gfs2_inode_put(ip);
573 gfs2_inode_destroy(ip);
574 gfs2_glmutex_unlock(i_gh.gh_gl);
575
576 out:
577 gfs2_glock_dq_uninit(&i_gh);
578
579 return error;
580}
581
582/**
583 * try_inode_dealloc - Try to deallocate an inode and all its blocks
584 * @sdp: the filesystem
585 *
586 * Returns: 0 on success, -errno on error, 1 on busy (inode open)
587 */
588
589static int try_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
590{
591 struct gfs2_holder io_gh;
592 int error = 0;
593
594 gfs2_try_toss_inode(sdp, &ul->ul_ut.ut_inum);
595
596 error = gfs2_glock_nq_num(sdp,
597 ul->ul_ut.ut_inum.no_addr, &gfs2_iopen_glops,
598 LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &io_gh);
599 switch (error) {
600 case 0:
601 break;
602 case GLR_TRYFAILED:
603 return 1;
604 default:
605 return error;
606 }
607
608 gfs2_glock_dq(&io_gh);
609 error = inode_dealloc(sdp, ul, &io_gh);
610 gfs2_holder_uninit(&io_gh);
611
612 return error;
613}
614
615static int inode_dealloc_uninit(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
616{
617 struct gfs2_rgrpd *rgd;
618 struct gfs2_holder ri_gh, rgd_gh;
619 int error;
620
621 error = gfs2_rindex_hold(sdp, &ri_gh);
622 if (error)
623 return error;
624
625 rgd = gfs2_blk2rgrpd(sdp, ul->ul_ut.ut_inum.no_addr);
626 if (!rgd) {
627 gfs2_consist(sdp);
628 error = -EIO;
629 goto out;
630 }
631
632 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
633 if (error)
634 goto out;
635
636 error = gfs2_trans_begin(sdp,
637 RES_RG_BIT + RES_UNLINKED + RES_STATFS,
638 0);
639 if (error)
640 goto out_gunlock;
641
642 gfs2_free_uninit_di(rgd, ul->ul_ut.ut_inum.no_addr);
643 gfs2_unlinked_ondisk_rm(sdp, ul);
644
645 gfs2_trans_end(sdp);
646
647 out_gunlock:
648 gfs2_glock_dq_uninit(&rgd_gh);
649 out:
650 gfs2_glock_dq_uninit(&ri_gh);
651
652 return error;
653}
654
655int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
656{
657 if (ul->ul_ut.ut_flags & GFS2_UTF_UNINIT)
658 return inode_dealloc_uninit(sdp, ul);
659 else
660 return try_inode_dealloc(sdp, ul);
661}
662
663/**
664 * gfs2_change_nlink - Change nlink count on inode
665 * @ip: The GFS2 inode
666 * @diff: The change in the nlink count required
667 *
668 * Returns: errno
669 */
670
671int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
672{
673 struct buffer_head *dibh;
674 uint32_t nlink;
675 int error;
676
677 nlink = ip->i_di.di_nlink + diff;
678
679 /* If we are reducing the nlink count, but the new value ends up being
680 bigger than the old one, we must have underflowed. */
681 if (diff < 0 && nlink > ip->i_di.di_nlink) {
682 if (gfs2_consist_inode(ip))
683 gfs2_dinode_print(&ip->i_di);
684 return -EIO;
685 }
686
687 error = gfs2_meta_inode_buffer(ip, &dibh);
688 if (error)
689 return error;
690
691 ip->i_di.di_nlink = nlink;
692 ip->i_di.di_ctime = get_seconds();
693
694 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
695 gfs2_dinode_out(&ip->i_di, dibh->b_data);
696 brelse(dibh);
697
698 return 0;
699}
700
701/**
702 * gfs2_lookupi - Look up a filename in a directory and return its inode
703 * @d_gh: An initialized holder for the directory glock
704 * @name: The name of the inode to look for
705 * @is_root: If 1, ignore the caller's permissions
706 * @i_gh: An uninitialized holder for the new inode glock
707 *
708 * There will always be a vnode (Linux VFS inode) for the d_gh inode unless
709 * @is_root is true.
710 *
711 * Returns: errno
712 */
713
714int gfs2_lookupi(struct inode *dir, struct qstr *name, int is_root,
715 struct inode **inodep)
716{
717 struct gfs2_inode *ipp;
718 struct gfs2_inode *dip = get_v2ip(dir);
719 struct gfs2_sbd *sdp = dip->i_sbd;
720 struct gfs2_holder d_gh;
721 struct gfs2_inum inum;
722 unsigned int type;
723 struct gfs2_glock *gl;
724 int error = 0;
725
726 *inodep = NULL;
727
728 if (!name->len || name->len > GFS2_FNAMESIZE)
729 return -ENAMETOOLONG;
730
731 if (gfs2_filecmp(name, ".", 1) ||
732 (gfs2_filecmp(name, "..", 2) && dir == sdp->sd_root_dir)) {
733 gfs2_inode_hold(dip);
734 ipp = dip;
735 goto done;
736 }
737
738 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
739 if (error)
740 return error;
741
742 if (!is_root) {
743 error = gfs2_repermission(dip->i_vnode, MAY_EXEC, NULL);
744 if (error)
745 goto out;
746 }
747
748 error = gfs2_dir_search(dip, name, &inum, &type);
749 if (error)
750 goto out;
751
752 error = gfs2_glock_get(sdp, inum.no_addr, &gfs2_inode_glops,
753 CREATE, &gl);
754 if (error)
755 goto out;
756
757 error = gfs2_inode_get(gl, &inum, CREATE, &ipp);
758 if (!error)
759 gfs2_inode_min_init(ipp, type);
760
761 gfs2_glock_put(gl);
762
763out:
764 gfs2_glock_dq_uninit(&d_gh);
765done:
766 if (error == 0) {
767 *inodep = gfs2_ip2v(ipp);
768 if (!*inodep)
769 error = -ENOMEM;
770 gfs2_inode_put(ipp);
771 }
772 return error;
773}
774
775static int pick_formal_ino_1(struct gfs2_sbd *sdp, uint64_t *formal_ino)
776{
777 struct gfs2_inode *ip = get_v2ip(sdp->sd_ir_inode);
778 struct buffer_head *bh;
779 struct gfs2_inum_range ir;
780 int error;
781
782 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
783 if (error)
784 return error;
785 mutex_lock(&sdp->sd_inum_mutex);
786
787 error = gfs2_meta_inode_buffer(ip, &bh);
788 if (error) {
789 mutex_unlock(&sdp->sd_inum_mutex);
790 gfs2_trans_end(sdp);
791 return error;
792 }
793
794 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
795
796 if (ir.ir_length) {
797 *formal_ino = ir.ir_start++;
798 ir.ir_length--;
799 gfs2_trans_add_bh(ip->i_gl, bh, 1);
800 gfs2_inum_range_out(&ir,
801 bh->b_data + sizeof(struct gfs2_dinode));
802 brelse(bh);
803 mutex_unlock(&sdp->sd_inum_mutex);
804 gfs2_trans_end(sdp);
805 return 0;
806 }
807
808 brelse(bh);
809
810 mutex_unlock(&sdp->sd_inum_mutex);
811 gfs2_trans_end(sdp);
812
813 return 1;
814}
815
816static int pick_formal_ino_2(struct gfs2_sbd *sdp, uint64_t *formal_ino)
817{
818 struct gfs2_inode *ip = get_v2ip(sdp->sd_ir_inode);
819 struct gfs2_inode *m_ip = get_v2ip(sdp->sd_inum_inode);
820 struct gfs2_holder gh;
821 struct buffer_head *bh;
822 struct gfs2_inum_range ir;
823 int error;
824
825 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
826 if (error)
827 return error;
828
829 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
830 if (error)
831 goto out;
832 mutex_lock(&sdp->sd_inum_mutex);
833
834 error = gfs2_meta_inode_buffer(ip, &bh);
835 if (error)
836 goto out_end_trans;
837
838 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
839
840 if (!ir.ir_length) {
841 struct buffer_head *m_bh;
842 uint64_t x, y;
843
844 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
845 if (error)
846 goto out_brelse;
847
848 x = *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode));
849 x = y = be64_to_cpu(x);
850 ir.ir_start = x;
851 ir.ir_length = GFS2_INUM_QUANTUM;
852 x += GFS2_INUM_QUANTUM;
853 if (x < y)
854 gfs2_consist_inode(m_ip);
855 x = cpu_to_be64(x);
856 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
857 *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x;
858
859 brelse(m_bh);
860 }
861
862 *formal_ino = ir.ir_start++;
863 ir.ir_length--;
864
865 gfs2_trans_add_bh(ip->i_gl, bh, 1);
866 gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode));
867
868 out_brelse:
869 brelse(bh);
870
871 out_end_trans:
872 mutex_unlock(&sdp->sd_inum_mutex);
873 gfs2_trans_end(sdp);
874
875 out:
876 gfs2_glock_dq_uninit(&gh);
877
878 return error;
879}
880
881static int pick_formal_ino(struct gfs2_sbd *sdp, uint64_t *inum)
882{
883 int error;
884
885 error = pick_formal_ino_1(sdp, inum);
886 if (error <= 0)
887 return error;
888
889 error = pick_formal_ino_2(sdp, inum);
890
891 return error;
892}
893
894/**
895 * create_ok - OK to create a new on-disk inode here?
896 * @dip: Directory in which dinode is to be created
897 * @name: Name of new dinode
898 * @mode:
899 *
900 * Returns: errno
901 */
902
903static int create_ok(struct gfs2_inode *dip, struct qstr *name,
904 unsigned int mode)
905{
906 int error;
907
908 error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
909 if (error)
910 return error;
911
912 /* Don't create entries in an unlinked directory */
913 if (!dip->i_di.di_nlink)
914 return -EPERM;
915
916 error = gfs2_dir_search(dip, name, NULL, NULL);
917 switch (error) {
918 case -ENOENT:
919 error = 0;
920 break;
921 case 0:
922 return -EEXIST;
923 default:
924 return error;
925 }
926
927 if (dip->i_di.di_entries == (uint32_t)-1)
928 return -EFBIG;
929 if (S_ISDIR(mode) && dip->i_di.di_nlink == (uint32_t)-1)
930 return -EMLINK;
931
932 return 0;
933}
934
935static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
936 unsigned int *uid, unsigned int *gid)
937{
938 if (dip->i_sbd->sd_args.ar_suiddir &&
939 (dip->i_di.di_mode & S_ISUID) &&
940 dip->i_di.di_uid) {
941 if (S_ISDIR(*mode))
942 *mode |= S_ISUID;
943 else if (dip->i_di.di_uid != current->fsuid)
944 *mode &= ~07111;
945 *uid = dip->i_di.di_uid;
946 } else
947 *uid = current->fsuid;
948
949 if (dip->i_di.di_mode & S_ISGID) {
950 if (S_ISDIR(*mode))
951 *mode |= S_ISGID;
952 *gid = dip->i_di.di_gid;
953 } else
954 *gid = current->fsgid;
955}
956
957static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_unlinked *ul)
958{
959 struct gfs2_sbd *sdp = dip->i_sbd;
960 int error;
961
962 gfs2_alloc_get(dip);
963
964 dip->i_alloc.al_requested = RES_DINODE;
965 error = gfs2_inplace_reserve(dip);
966 if (error)
967 goto out;
968
969 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED +
970 RES_STATFS, 0);
971 if (error)
972 goto out_ipreserv;
973
974 ul->ul_ut.ut_inum.no_addr = gfs2_alloc_di(dip);
975
976 ul->ul_ut.ut_flags = GFS2_UTF_UNINIT;
977 error = gfs2_unlinked_ondisk_add(sdp, ul);
978
979 gfs2_trans_end(sdp);
980
981 out_ipreserv:
982 gfs2_inplace_release(dip);
983
984 out:
985 gfs2_alloc_put(dip);
986
987 return error;
988}
989
990/**
991 * init_dinode - Fill in a new dinode structure
992 * @dip: the directory this inode is being created in
993 * @gl: The glock covering the new inode
994 * @inum: the inode number
995 * @mode: the file permissions
996 * @uid:
997 * @gid:
998 *
999 */
1000
1001static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
1002 struct gfs2_inum *inum, unsigned int mode,
1003 unsigned int uid, unsigned int gid)
1004{
1005 struct gfs2_sbd *sdp = dip->i_sbd;
1006 struct gfs2_dinode *di;
1007 struct buffer_head *dibh;
1008
1009 dibh = gfs2_meta_new(gl, inum->no_addr);
1010 gfs2_trans_add_bh(gl, dibh, 1);
1011 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
1012 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1013 di = (struct gfs2_dinode *)dibh->b_data;
1014
1015 di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino);
1016 di->di_num.no_addr = cpu_to_be64(inum->no_addr);
1017 di->di_mode = cpu_to_be32(mode);
1018 di->di_uid = cpu_to_be32(uid);
1019 di->di_gid = cpu_to_be32(gid);
1020 di->di_nlink = cpu_to_be32(0);
1021 di->di_size = cpu_to_be64(0);
1022 di->di_blocks = cpu_to_be64(1);
1023 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
1024 di->di_major = di->di_minor = cpu_to_be32(0);
1025 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
1026 di->__pad[0] = di->__pad[1] = 0;
1027 di->di_flags = cpu_to_be32(0);
1028
1029 if (S_ISREG(mode)) {
1030 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
1031 gfs2_tune_get(sdp, gt_new_files_jdata))
1032 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
1033 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) ||
1034 gfs2_tune_get(sdp, gt_new_files_directio))
1035 di->di_flags |= cpu_to_be32(GFS2_DIF_DIRECTIO);
1036 } else if (S_ISDIR(mode)) {
1037 di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
1038 GFS2_DIF_INHERIT_DIRECTIO);
1039 di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
1040 GFS2_DIF_INHERIT_JDATA);
1041 }
1042
1043 di->__pad1 = 0;
1044 di->di_height = cpu_to_be32(0);
1045 di->__pad2 = 0;
1046 di->__pad3 = 0;
1047 di->di_depth = cpu_to_be16(0);
1048 di->di_entries = cpu_to_be32(0);
1049 memset(&di->__pad4, 0, sizeof(di->__pad4));
1050 di->di_eattr = cpu_to_be64(0);
1051 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
1052
1053 brelse(dibh);
1054}
1055
1056static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
1057 unsigned int mode, struct gfs2_unlinked *ul)
1058{
1059 struct gfs2_sbd *sdp = dip->i_sbd;
1060 unsigned int uid, gid;
1061 int error;
1062
1063 munge_mode_uid_gid(dip, &mode, &uid, &gid);
1064
1065 gfs2_alloc_get(dip);
1066
1067 error = gfs2_quota_lock(dip, uid, gid);
1068 if (error)
1069 goto out;
1070
1071 error = gfs2_quota_check(dip, uid, gid);
1072 if (error)
1073 goto out_quota;
1074
1075 error = gfs2_trans_begin(sdp, RES_DINODE + RES_UNLINKED +
1076 RES_QUOTA, 0);
1077 if (error)
1078 goto out_quota;
1079
1080 ul->ul_ut.ut_flags = 0;
1081 error = gfs2_unlinked_ondisk_munge(sdp, ul);
1082
1083 init_dinode(dip, gl, &ul->ul_ut.ut_inum,
1084 mode, uid, gid);
1085
1086 gfs2_quota_change(dip, +1, uid, gid);
1087
1088 gfs2_trans_end(sdp);
1089
1090 out_quota:
1091 gfs2_quota_unlock(dip);
1092
1093 out:
1094 gfs2_alloc_put(dip);
1095
1096 return error;
1097}
1098
1099static int link_dinode(struct gfs2_inode *dip, struct qstr *name,
1100 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1101{
1102 struct gfs2_sbd *sdp = dip->i_sbd;
1103 struct gfs2_alloc *al;
1104 int alloc_required;
1105 struct buffer_head *dibh;
1106 int error;
1107
1108 al = gfs2_alloc_get(dip);
1109
1110 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1111 if (error)
1112 goto fail;
1113
1114 error = gfs2_diradd_alloc_required(dip, name, &alloc_required);
1115 if (alloc_required) {
1116 error = gfs2_quota_check(dip, dip->i_di.di_uid,
1117 dip->i_di.di_gid);
1118 if (error)
1119 goto fail_quota_locks;
1120
1121 al->al_requested = sdp->sd_max_dirres;
1122
1123 error = gfs2_inplace_reserve(dip);
1124 if (error)
1125 goto fail_quota_locks;
1126
1127 error = gfs2_trans_begin(sdp,
1128 sdp->sd_max_dirres +
1129 al->al_rgd->rd_ri.ri_length +
1130 2 * RES_DINODE + RES_UNLINKED +
1131 RES_STATFS + RES_QUOTA, 0);
1132 if (error)
1133 goto fail_ipreserv;
1134 } else {
1135 error = gfs2_trans_begin(sdp,
1136 RES_LEAF +
1137 2 * RES_DINODE +
1138 RES_UNLINKED, 0);
1139 if (error)
1140 goto fail_quota_locks;
1141 }
1142
1143 error = gfs2_dir_add(dip, name, &ip->i_num, IF2DT(ip->i_di.di_mode));
1144 if (error)
1145 goto fail_end_trans;
1146
1147 error = gfs2_meta_inode_buffer(ip, &dibh);
1148 if (error)
1149 goto fail_end_trans;
1150 ip->i_di.di_nlink = 1;
1151 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1152 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1153 brelse(dibh);
1154
1155 error = gfs2_unlinked_ondisk_rm(sdp, ul);
1156 if (error)
1157 goto fail_end_trans;
1158
1159 return 0;
1160
1161 fail_end_trans:
1162 gfs2_trans_end(sdp);
1163
1164 fail_ipreserv:
1165 if (dip->i_alloc.al_rgd)
1166 gfs2_inplace_release(dip);
1167
1168 fail_quota_locks:
1169 gfs2_quota_unlock(dip);
1170
1171 fail:
1172 gfs2_alloc_put(dip);
1173
1174 return error;
1175}
1176
1177/**
1178 * gfs2_createi - Create a new inode
1179 * @ghs: An array of two holders
1180 * @name: The name of the new file
1181 * @mode: the permissions on the new inode
1182 *
1183 * @ghs[0] is an initialized holder for the directory
1184 * @ghs[1] is the holder for the inode lock
1185 *
1186 * If the return value is not NULL, the glocks on both the directory and the new
1187 * file are held. A transaction has been started and an inplace reservation
1188 * is held, as well.
1189 *
1190 * Returns: An inode
1191 */
1192
1193struct inode *gfs2_createi(struct gfs2_holder *ghs, struct qstr *name,
1194 unsigned int mode)
1195{
1196 struct inode *inode;
1197 struct gfs2_inode *dip = get_gl2ip(ghs->gh_gl);
1198 struct gfs2_sbd *sdp = dip->i_sbd;
1199 struct gfs2_unlinked *ul;
1200 struct gfs2_inode *ip;
1201 int error;
1202
1203 if (!name->len || name->len > GFS2_FNAMESIZE)
1204 return ERR_PTR(-ENAMETOOLONG);
1205
1206 error = gfs2_unlinked_get(sdp, &ul);
1207 if (error)
1208 return ERR_PTR(error);
1209
1210 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
1211 error = gfs2_glock_nq(ghs);
1212 if (error)
1213 goto fail;
1214
1215 error = create_ok(dip, name, mode);
1216 if (error)
1217 goto fail_gunlock;
1218
1219 error = pick_formal_ino(sdp, &ul->ul_ut.ut_inum.no_formal_ino);
1220 if (error)
1221 goto fail_gunlock;
1222
1223 error = alloc_dinode(dip, ul);
1224 if (error)
1225 goto fail_gunlock;
1226
1227 if (ul->ul_ut.ut_inum.no_addr < dip->i_num.no_addr) {
1228 gfs2_glock_dq(ghs);
1229
1230 error = gfs2_glock_nq_num(sdp,
1231 ul->ul_ut.ut_inum.no_addr,
1232 &gfs2_inode_glops,
1233 LM_ST_EXCLUSIVE, GL_SKIP,
1234 ghs + 1);
1235 if (error) {
1236 gfs2_unlinked_put(sdp, ul);
1237 return ERR_PTR(error);
1238 }
1239
1240 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
1241 error = gfs2_glock_nq(ghs);
1242 if (error) {
1243 gfs2_glock_dq_uninit(ghs + 1);
1244 gfs2_unlinked_put(sdp, ul);
1245 return ERR_PTR(error);
1246 }
1247
1248 error = create_ok(dip, name, mode);
1249 if (error)
1250 goto fail_gunlock2;
1251 } else {
1252 error = gfs2_glock_nq_num(sdp,
1253 ul->ul_ut.ut_inum.no_addr,
1254 &gfs2_inode_glops,
1255 LM_ST_EXCLUSIVE, GL_SKIP,
1256 ghs + 1);
1257 if (error)
1258 goto fail_gunlock;
1259 }
1260
1261 error = make_dinode(dip, ghs[1].gh_gl, mode, ul);
1262 if (error)
1263 goto fail_gunlock2;
1264
1265 error = gfs2_inode_get(ghs[1].gh_gl, &ul->ul_ut.ut_inum, CREATE, &ip);
1266 if (error)
1267 goto fail_gunlock2;
1268
1269 error = gfs2_inode_refresh(ip);
1270 if (error)
1271 goto fail_iput;
1272
1273 error = gfs2_acl_create(dip, ip);
1274 if (error)
1275 goto fail_iput;
1276
1277 error = link_dinode(dip, name, ip, ul);
1278 if (error)
1279 goto fail_iput;
1280
1281 gfs2_unlinked_put(sdp, ul);
1282
1283 inode = gfs2_ip2v(ip);
1284 gfs2_inode_put(ip);
1285 if (!inode)
1286 return ERR_PTR(-ENOMEM);
1287 return inode;
1288
1289 fail_iput:
1290 gfs2_inode_put(ip);
1291
1292 fail_gunlock2:
1293 gfs2_glock_dq_uninit(ghs + 1);
1294
1295 fail_gunlock:
1296 gfs2_glock_dq(ghs);
1297
1298 fail:
1299 gfs2_unlinked_put(sdp, ul);
1300
1301 return ERR_PTR(error);
1302}
1303
1304/**
1305 * gfs2_unlinki - Unlink a file
1306 * @dip: The inode of the directory
1307 * @name: The name of the file to be unlinked
1308 * @ip: The inode of the file to be removed
1309 *
1310 * Assumes Glocks on both dip and ip are held.
1311 *
1312 * Returns: errno
1313 */
1314
1315int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name,
1316 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1317{
1318 struct gfs2_sbd *sdp = dip->i_sbd;
1319 int error;
1320
1321 error = gfs2_dir_del(dip, name);
1322 if (error)
1323 return error;
1324
1325 error = gfs2_change_nlink(ip, -1);
1326 if (error)
1327 return error;
1328
1329 /* If this inode is being unlinked from the directory structure,
1330 we need to mark that in the log so that it isn't lost during
1331 a crash. */
1332
1333 if (!ip->i_di.di_nlink) {
1334 ul->ul_ut.ut_inum = ip->i_num;
1335 error = gfs2_unlinked_ondisk_add(sdp, ul);
1336 if (!error)
1337 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
1338 }
1339
1340 return error;
1341}
1342
1343/**
1344 * gfs2_rmdiri - Remove a directory
1345 * @dip: The parent directory of the directory to be removed
1346 * @name: The name of the directory to be removed
1347 * @ip: The GFS2 inode of the directory to be removed
1348 *
1349 * Assumes Glocks on dip and ip are held
1350 *
1351 * Returns: errno
1352 */
1353
1354int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name,
1355 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1356{
1357 struct gfs2_sbd *sdp = dip->i_sbd;
1358 struct qstr dotname;
1359 int error;
1360
1361 if (ip->i_di.di_entries != 2) {
1362 if (gfs2_consist_inode(ip))
1363 gfs2_dinode_print(&ip->i_di);
1364 return -EIO;
1365 }
1366
1367 error = gfs2_dir_del(dip, name);
1368 if (error)
1369 return error;
1370
1371 error = gfs2_change_nlink(dip, -1);
1372 if (error)
1373 return error;
1374
1375 dotname.len = 1;
1376 dotname.name = ".";
1377 error = gfs2_dir_del(ip, &dotname);
1378 if (error)
1379 return error;
1380
1381 dotname.len = 2;
1382 dotname.name = "..";
1383 error = gfs2_dir_del(ip, &dotname);
1384 if (error)
1385 return error;
1386
1387 error = gfs2_change_nlink(ip, -2);
1388 if (error)
1389 return error;
1390
1391 /* This inode is being unlinked from the directory structure and
1392 we need to mark that in the log so that it isn't lost during
1393 a crash. */
1394
1395 ul->ul_ut.ut_inum = ip->i_num;
1396 error = gfs2_unlinked_ondisk_add(sdp, ul);
1397 if (!error)
1398 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
1399
1400 return error;
1401}
1402
1403/*
1404 * gfs2_unlink_ok - check to see that a inode is still in a directory
1405 * @dip: the directory
1406 * @name: the name of the file
1407 * @ip: the inode
1408 *
1409 * Assumes that the lock on (at least) @dip is held.
1410 *
1411 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
1412 */
1413
1414int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name,
1415 struct gfs2_inode *ip)
1416{
1417 struct gfs2_inum inum;
1418 unsigned int type;
1419 int error;
1420
1421 if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode))
1422 return -EPERM;
1423
1424 if ((dip->i_di.di_mode & S_ISVTX) &&
1425 dip->i_di.di_uid != current->fsuid &&
1426 ip->i_di.di_uid != current->fsuid &&
1427 !capable(CAP_FOWNER))
1428 return -EPERM;
1429
1430 if (IS_APPEND(dip->i_vnode))
1431 return -EPERM;
1432
1433 error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
1434 if (error)
1435 return error;
1436
1437 error = gfs2_dir_search(dip, name, &inum, &type);
1438 if (error)
1439 return error;
1440
1441 if (!gfs2_inum_equal(&inum, &ip->i_num))
1442 return -ENOENT;
1443
1444 if (IF2DT(ip->i_di.di_mode) != type) {
1445 gfs2_consist_inode(dip);
1446 return -EIO;
1447 }
1448
1449 return 0;
1450}
1451
1452/*
1453 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1454 * @this: move this
1455 * @to: to here
1456 *
1457 * Follow @to back to the root and make sure we don't encounter @this
1458 * Assumes we already hold the rename lock.
1459 *
1460 * Returns: errno
1461 */
1462
1463int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1464{
1465 struct gfs2_sbd *sdp = this->i_sbd;
1466 struct inode *dir = to->i_vnode;
1467 struct inode *tmp;
1468 struct qstr dotdot;
1469 int error = 0;
1470
1471 memset(&dotdot, 0, sizeof(struct qstr));
1472 dotdot.name = "..";
1473 dotdot.len = 2;
1474
1475 igrab(dir);
1476
1477 for (;;) {
1478 if (dir == this->i_vnode) {
1479 error = -EINVAL;
1480 break;
1481 }
1482 if (dir == sdp->sd_root_dir) {
1483 error = 0;
1484 break;
1485 }
1486
1487 error = gfs2_lookupi(dir, &dotdot, 1, &tmp);
1488 if (error)
1489 break;
1490
1491 iput(dir);
1492 dir = tmp;
1493 }
1494
1495 iput(dir);
1496
1497 return error;
1498}
1499
1500/**
1501 * gfs2_readlinki - return the contents of a symlink
1502 * @ip: the symlink's inode
1503 * @buf: a pointer to the buffer to be filled
1504 * @len: a pointer to the length of @buf
1505 *
1506 * If @buf is too small, a piece of memory is kmalloc()ed and needs
1507 * to be freed by the caller.
1508 *
1509 * Returns: errno
1510 */
1511
1512int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
1513{
1514 struct gfs2_holder i_gh;
1515 struct buffer_head *dibh;
1516 unsigned int x;
1517 int error;
1518
1519 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
1520 error = gfs2_glock_nq_atime(&i_gh);
1521 if (error) {
1522 gfs2_holder_uninit(&i_gh);
1523 return error;
1524 }
1525
1526 if (!ip->i_di.di_size) {
1527 gfs2_consist_inode(ip);
1528 error = -EIO;
1529 goto out;
1530 }
1531
1532 error = gfs2_meta_inode_buffer(ip, &dibh);
1533 if (error)
1534 goto out;
1535
1536 x = ip->i_di.di_size + 1;
1537 if (x > *len) {
1538 *buf = kmalloc(x, GFP_KERNEL);
1539 if (!*buf) {
1540 error = -ENOMEM;
1541 goto out_brelse;
1542 }
1543 }
1544
1545 memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
1546 *len = x;
1547
1548 out_brelse:
1549 brelse(dibh);
1550
1551 out:
1552 gfs2_glock_dq_uninit(&i_gh);
1553
1554 return error;
1555}
1556
1557/**
1558 * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and
1559 * conditionally update the inode's atime
1560 * @gh: the holder to acquire
1561 *
1562 * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap
1563 * Update if the difference between the current time and the inode's current
1564 * atime is greater than an interval specified at mount.
1565 *
1566 * Returns: errno
1567 */
1568
1569int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1570{
1571 struct gfs2_glock *gl = gh->gh_gl;
1572 struct gfs2_sbd *sdp = gl->gl_sbd;
1573 struct gfs2_inode *ip = get_gl2ip(gl);
1574 int64_t curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
1575 unsigned int state;
1576 int flags;
1577 int error;
1578
1579 if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
1580 gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
1581 gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops))
1582 return -EINVAL;
1583
1584 state = gh->gh_state;
1585 flags = gh->gh_flags;
1586
1587 error = gfs2_glock_nq(gh);
1588 if (error)
1589 return error;
1590
1591 if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
1592 (sdp->sd_vfs->s_flags & MS_RDONLY))
1593 return 0;
1594
1595 curtime = get_seconds();
1596 if (curtime - ip->i_di.di_atime >= quantum) {
1597 gfs2_glock_dq(gh);
1598 gfs2_holder_reinit(LM_ST_EXCLUSIVE,
1599 gh->gh_flags & ~LM_FLAG_ANY,
1600 gh);
1601 error = gfs2_glock_nq(gh);
1602 if (error)
1603 return error;
1604
1605 /* Verify that atime hasn't been updated while we were
1606 trying to get exclusive lock. */
1607
1608 curtime = get_seconds();
1609 if (curtime - ip->i_di.di_atime >= quantum) {
1610 struct buffer_head *dibh;
1611
1612 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1613 if (error == -EROFS)
1614 return 0;
1615 if (error)
1616 goto fail;
1617
1618 error = gfs2_meta_inode_buffer(ip, &dibh);
1619 if (error)
1620 goto fail_end_trans;
1621
1622 ip->i_di.di_atime = curtime;
1623
1624 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1625 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1626 brelse(dibh);
1627
1628 gfs2_trans_end(sdp);
1629 }
1630
1631 /* If someone else has asked for the glock,
1632 unlock and let them have it. Then reacquire
1633 in the original state. */
1634 if (gfs2_glock_is_blocking(gl)) {
1635 gfs2_glock_dq(gh);
1636 gfs2_holder_reinit(state, flags, gh);
1637 return gfs2_glock_nq(gh);
1638 }
1639 }
1640
1641 return 0;
1642
1643 fail_end_trans:
1644 gfs2_trans_end(sdp);
1645
1646 fail:
1647 gfs2_glock_dq(gh);
1648
1649 return error;
1650}
1651
1652/**
1653 * glock_compare_atime - Compare two struct gfs2_glock structures for sort
1654 * @arg_a: the first structure
1655 * @arg_b: the second structure
1656 *
1657 * Returns: 1 if A > B
1658 * -1 if A < B
1659 * 0 if A = B
1660 */
1661
1662static int glock_compare_atime(const void *arg_a, const void *arg_b)
1663{
1664 struct gfs2_holder *gh_a = *(struct gfs2_holder **)arg_a;
1665 struct gfs2_holder *gh_b = *(struct gfs2_holder **)arg_b;
1666 struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1667 struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1668 int ret = 0;
1669
1670 if (a->ln_number > b->ln_number)
1671 ret = 1;
1672 else if (a->ln_number < b->ln_number)
1673 ret = -1;
1674 else {
1675 if (gh_a->gh_state == LM_ST_SHARED &&
1676 gh_b->gh_state == LM_ST_EXCLUSIVE)
1677 ret = 1;
1678 else if (gh_a->gh_state == LM_ST_SHARED &&
1679 (gh_b->gh_flags & GL_ATIME))
1680 ret = 1;
1681 }
1682
1683 return ret;
1684}
1685
1686/**
1687 * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an
1688 * atime update
1689 * @num_gh: the number of structures
1690 * @ghs: an array of struct gfs2_holder structures
1691 *
1692 * Returns: 0 on success (all glocks acquired),
1693 * errno on failure (no glocks acquired)
1694 */
1695
1696int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs)
1697{
1698 struct gfs2_holder **p;
1699 unsigned int x;
1700 int error = 0;
1701
1702 if (!num_gh)
1703 return 0;
1704
1705 if (num_gh == 1) {
1706 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1707 if (ghs->gh_flags & GL_ATIME)
1708 error = gfs2_glock_nq_atime(ghs);
1709 else
1710 error = gfs2_glock_nq(ghs);
1711 return error;
1712 }
1713
1714 p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1715 if (!p)
1716 return -ENOMEM;
1717
1718 for (x = 0; x < num_gh; x++)
1719 p[x] = &ghs[x];
1720
1721 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL);
1722
1723 for (x = 0; x < num_gh; x++) {
1724 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1725
1726 if (p[x]->gh_flags & GL_ATIME)
1727 error = gfs2_glock_nq_atime(p[x]);
1728 else
1729 error = gfs2_glock_nq(p[x]);
1730
1731 if (error) {
1732 while (x--)
1733 gfs2_glock_dq(p[x]);
1734 break;
1735 }
1736 }
1737
1738 kfree(p);
1739
1740 return error;
1741}
1742
1743/**
1744 * gfs2_try_toss_vnode - See if we can toss a vnode from memory
1745 * @ip: the inode
1746 *
1747 * Returns: 1 if the vnode was tossed
1748 */
1749
1750void gfs2_try_toss_vnode(struct gfs2_inode *ip)
1751{
1752 struct inode *inode;
1753
1754 inode = gfs2_ip2v_lookup(ip);
1755 if (!inode)
1756 return;
1757
1758 d_prune_aliases(inode);
1759
1760 if (S_ISDIR(ip->i_di.di_mode)) {
1761 struct list_head *head = &inode->i_dentry;
1762 struct dentry *d = NULL;
1763
1764 spin_lock(&dcache_lock);
1765 if (list_empty(head))
1766 spin_unlock(&dcache_lock);
1767 else {
1768 d = list_entry(head->next, struct dentry, d_alias);
1769 dget_locked(d);
1770 spin_unlock(&dcache_lock);
1771
1772 if (have_submounts(d))
1773 dput(d);
1774 else {
1775 shrink_dcache_parent(d);
1776 dput(d);
1777 d_prune_aliases(inode);
1778 }
1779 }
1780 }
1781
1782 inode->i_nlink = 0;
1783 iput(inode);
1784}
1785
1786
1787static int
1788__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1789{
1790 struct buffer_head *dibh;
1791 int error;
1792
1793 error = gfs2_meta_inode_buffer(ip, &dibh);
1794 if (!error) {
1795 error = inode_setattr(ip->i_vnode, attr);
1796 gfs2_assert_warn(ip->i_sbd, !error);
1797 gfs2_inode_attr_out(ip);
1798
1799 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1800 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1801 brelse(dibh);
1802 }
1803 return error;
1804}
1805
1806/**
1807 * gfs2_setattr_simple -
1808 * @ip:
1809 * @attr:
1810 *
1811 * Called with a reference on the vnode.
1812 *
1813 * Returns: errno
1814 */
1815
1816int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1817{
1818 int error;
1819
1820 if (get_transaction)
1821 return __gfs2_setattr_simple(ip, attr);
1822
1823 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE, 0);
1824 if (error)
1825 return error;
1826
1827 error = __gfs2_setattr_simple(ip, attr);
1828
1829 gfs2_trans_end(ip->i_sbd);
1830
1831 return error;
1832}
1833
1834int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd)
1835{
1836 return permission(inode, mask, nd);
1837}
1838
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
new file mode 100644
index 000000000000..069f0e21db6d
--- /dev/null
+++ b/fs/gfs2/inode.h
@@ -0,0 +1,82 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __INODE_DOT_H__
11#define __INODE_DOT_H__
12
13static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
14{
15 return !ip->i_di.di_height;
16}
17
18static inline int gfs2_is_jdata(struct gfs2_inode *ip)
19{
20 return ip->i_di.di_flags & GFS2_DIF_JDATA;
21}
22
23static inline int gfs2_is_dir(struct gfs2_inode *ip)
24{
25 return S_ISDIR(ip->i_di.di_mode);
26}
27
28void gfs2_inode_attr_in(struct gfs2_inode *ip);
29void gfs2_inode_attr_out(struct gfs2_inode *ip);
30struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip);
31struct inode *gfs2_ip2v(struct gfs2_inode *ip);
32struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum);
33
34void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type);
35int gfs2_inode_refresh(struct gfs2_inode *ip);
36
37int gfs2_inode_get(struct gfs2_glock *i_gl,
38 struct gfs2_inum *inum, int create,
39 struct gfs2_inode **ipp);
40void gfs2_inode_hold(struct gfs2_inode *ip);
41void gfs2_inode_put(struct gfs2_inode *ip);
42void gfs2_inode_destroy(struct gfs2_inode *ip);
43
44int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
45
46int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
47int gfs2_lookupi(struct inode *dir, struct qstr *name, int is_root,
48 struct inode **ipp);
49struct inode *gfs2_createi(struct gfs2_holder *ghs, struct qstr *name,
50 unsigned int mode);
51int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name,
52 struct gfs2_inode *ip, struct gfs2_unlinked *ul);
53int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name,
54 struct gfs2_inode *ip, struct gfs2_unlinked *ul);
55int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name,
56 struct gfs2_inode *ip);
57int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
58int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
59
60int gfs2_glock_nq_atime(struct gfs2_holder *gh);
61int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs);
62
63void gfs2_try_toss_vnode(struct gfs2_inode *ip);
64
65int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
66
67int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd);
68
69static inline int gfs2_lookup_simple(struct inode *dip, char *name,
70 struct inode **ipp)
71{
72 struct qstr qstr;
73 int err;
74 memset(&qstr, 0, sizeof(struct qstr));
75 qstr.name = name;
76 qstr.len = strlen(name);
77 err = gfs2_lookupi(dip, &qstr, 1, ipp);
78 return err;
79}
80
81#endif /* __INODE_DOT_H__ */
82
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c
new file mode 100644
index 000000000000..3df8fa00442d
--- /dev/null
+++ b/fs/gfs2/lm.c
@@ -0,0 +1,238 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/delay.h>
16#include <asm/semaphore.h>
17
18#include "gfs2.h"
19#include "glock.h"
20#include "lm.h"
21#include "super.h"
22
23/**
24 * gfs2_lm_mount - mount a locking protocol
25 * @sdp: the filesystem
26 * @args: mount arguements
27 * @silent: if 1, don't complain if the FS isn't a GFS2 fs
28 *
29 * Returns: errno
30 */
31
32int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
33{
34 char *proto = sdp->sd_proto_name;
35 char *table = sdp->sd_table_name;
36 int flags = 0;
37 int error;
38
39 if (sdp->sd_args.ar_spectator)
40 flags |= LM_MFLAG_SPECTATOR;
41
42 fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
43
44 error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata,
45 gfs2_glock_cb, sdp,
46 GFS2_MIN_LVB_SIZE, flags,
47 &sdp->sd_lockstruct, &sdp->sd_kobj);
48 if (error) {
49 fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
50 proto, table, sdp->sd_args.ar_hostdata);
51 goto out;
52 }
53
54 if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) ||
55 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
56 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >=
57 GFS2_MIN_LVB_SIZE)) {
58 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
59 goto out;
60 }
61
62 if (sdp->sd_args.ar_spectator)
63 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
64 else
65 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
66 sdp->sd_lockstruct.ls_jid);
67
68 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
69
70 if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
71 !sdp->sd_args.ar_ignore_local_fs) {
72 sdp->sd_args.ar_localflocks = 1;
73 sdp->sd_args.ar_localcaching = 1;
74 }
75
76 out:
77 return error;
78}
79
80void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
81{
82 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
83 sdp->sd_lockstruct.ls_ops->lm_others_may_mount(
84 sdp->sd_lockstruct.ls_lockspace);
85}
86
87void gfs2_lm_unmount(struct gfs2_sbd *sdp)
88{
89 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
90 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
91}
92
93int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
94{
95 va_list args;
96
97 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
98 return 0;
99
100 va_start(args, fmt);
101 vprintk(fmt, args);
102 va_end(args);
103
104 fs_err(sdp, "about to withdraw from the cluster\n");
105 BUG_ON(sdp->sd_args.ar_debug);
106
107
108 fs_err(sdp, "waiting for outstanding I/O\n");
109
110 /* FIXME: suspend dm device so oustanding bio's complete
111 and all further io requests fail */
112
113 fs_err(sdp, "telling LM to withdraw\n");
114 gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
115 fs_err(sdp, "withdrawn\n");
116 dump_stack();
117
118 return -1;
119}
120
121int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
122 lm_lock_t **lockp)
123{
124 int error;
125 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
126 error = -EIO;
127 else
128 error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
129 sdp->sd_lockstruct.ls_lockspace, name, lockp);
130 return error;
131}
132
133void gfs2_lm_put_lock(struct gfs2_sbd *sdp, lm_lock_t *lock)
134{
135 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
136 sdp->sd_lockstruct.ls_ops->lm_put_lock(lock);
137}
138
139unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, lm_lock_t *lock,
140 unsigned int cur_state, unsigned int req_state,
141 unsigned int flags)
142{
143 int ret;
144 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
145 ret = 0;
146 else
147 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
148 cur_state,
149 req_state, flags);
150 return ret;
151}
152
153unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, lm_lock_t *lock,
154 unsigned int cur_state)
155{
156 int ret;
157 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
158 ret = 0;
159 else
160 ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state);
161 return ret;
162}
163
164void gfs2_lm_cancel(struct gfs2_sbd *sdp, lm_lock_t *lock)
165{
166 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
167 sdp->sd_lockstruct.ls_ops->lm_cancel(lock);
168}
169
170int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char **lvbp)
171{
172 int error;
173 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
174 error = -EIO;
175 else
176 error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
177 return error;
178}
179
180void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb)
181{
182 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
183 sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(lock, lvb);
184}
185
186void gfs2_lm_sync_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb)
187{
188 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
189 sdp->sd_lockstruct.ls_ops->lm_sync_lvb(lock, lvb);
190}
191
192int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
193 struct file *file, struct file_lock *fl)
194{
195 int error;
196 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
197 error = -EIO;
198 else
199 error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
200 sdp->sd_lockstruct.ls_lockspace,
201 name, file, fl);
202 return error;
203}
204
205int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
206 struct file *file, int cmd, struct file_lock *fl)
207{
208 int error;
209 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
210 error = -EIO;
211 else
212 error = sdp->sd_lockstruct.ls_ops->lm_plock(
213 sdp->sd_lockstruct.ls_lockspace,
214 name, file, cmd, fl);
215 return error;
216}
217
218int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
219 struct file *file, struct file_lock *fl)
220{
221 int error;
222 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
223 error = -EIO;
224 else
225 error = sdp->sd_lockstruct.ls_ops->lm_punlock(
226 sdp->sd_lockstruct.ls_lockspace,
227 name, file, fl);
228 return error;
229}
230
231void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
232 unsigned int message)
233{
234 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
235 sdp->sd_lockstruct.ls_ops->lm_recovery_done(
236 sdp->sd_lockstruct.ls_lockspace, jid, message);
237}
238
diff --git a/fs/gfs2/lm.h b/fs/gfs2/lm.h
new file mode 100644
index 000000000000..ec812424fdec
--- /dev/null
+++ b/fs/gfs2/lm.h
@@ -0,0 +1,42 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LM_DOT_H__
11#define __LM_DOT_H__
12
13int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent);
14void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp);
15void gfs2_lm_unmount(struct gfs2_sbd *sdp);
16int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
17__attribute__ ((format(printf, 2, 3)));
18int gfs2_lm_get_lock(struct gfs2_sbd *sdp,
19 struct lm_lockname *name, lm_lock_t **lockp);
20void gfs2_lm_put_lock(struct gfs2_sbd *sdp, lm_lock_t *lock);
21unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, lm_lock_t *lock,
22 unsigned int cur_state, unsigned int req_state,
23 unsigned int flags);
24unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, lm_lock_t *lock,
25 unsigned int cur_state);
26void gfs2_lm_cancel(struct gfs2_sbd *sdp, lm_lock_t *lock);
27int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char **lvbp);
28void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb);
29void gfs2_lm_sync_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb);
30int gfs2_lm_plock_get(struct gfs2_sbd *sdp,
31 struct lm_lockname *name,
32 struct file *file, struct file_lock *fl);
33int gfs2_lm_plock(struct gfs2_sbd *sdp,
34 struct lm_lockname *name,
35 struct file *file, int cmd, struct file_lock *fl);
36int gfs2_lm_punlock(struct gfs2_sbd *sdp,
37 struct lm_lockname *name,
38 struct file *file, struct file_lock *fl);
39void gfs2_lm_recovery_done(struct gfs2_sbd *sdp,
40 unsigned int jid, unsigned int message);
41
42#endif /* __LM_DOT_H__ */
diff --git a/fs/gfs2/lm_interface.h b/fs/gfs2/lm_interface.h
new file mode 100644
index 000000000000..378432f17f27
--- /dev/null
+++ b/fs/gfs2/lm_interface.h
@@ -0,0 +1,295 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LM_INTERFACE_DOT_H__
11#define __LM_INTERFACE_DOT_H__
12
13/*
14 * Opaque handles represent the lock module's lockspace structure, the lock
15 * module's lock structures, and GFS's file system (superblock) structure.
16 */
17
18typedef void lm_lockspace_t;
19typedef void lm_lock_t;
20typedef void lm_fsdata_t;
21
22typedef void (*lm_callback_t) (lm_fsdata_t *fsdata, unsigned int type,
23 void *data);
24
25/*
26 * lm_mount() flags
27 *
28 * LM_MFLAG_SPECTATOR
29 * GFS is asking to join the filesystem's lockspace, but it doesn't want to
30 * modify the filesystem. The lock module shouldn't assign a journal to the FS
31 * mount. It shouldn't send recovery callbacks to the FS mount. If the node
32 * dies or withdraws, all locks can be wiped immediately.
33 */
34
35#define LM_MFLAG_SPECTATOR 0x00000001
36
37/*
38 * lm_lockstruct flags
39 *
40 * LM_LSFLAG_LOCAL
41 * The lock_nolock module returns LM_LSFLAG_LOCAL to GFS, indicating that GFS
42 * can make single-node optimizations.
43 */
44
45#define LM_LSFLAG_LOCAL 0x00000001
46
47/*
48 * lm_lockname types
49 */
50
51#define LM_TYPE_RESERVED 0x00
52#define LM_TYPE_NONDISK 0x01
53#define LM_TYPE_INODE 0x02
54#define LM_TYPE_RGRP 0x03
55#define LM_TYPE_META 0x04
56#define LM_TYPE_IOPEN 0x05
57#define LM_TYPE_FLOCK 0x06
58#define LM_TYPE_PLOCK 0x07
59#define LM_TYPE_QUOTA 0x08
60#define LM_TYPE_JOURNAL 0x09
61
62/*
63 * lm_lock() states
64 *
65 * SHARED is compatible with SHARED, not with DEFERRED or EX.
66 * DEFERRED is compatible with DEFERRED, not with SHARED or EX.
67 */
68
69#define LM_ST_UNLOCKED 0
70#define LM_ST_EXCLUSIVE 1
71#define LM_ST_DEFERRED 2
72#define LM_ST_SHARED 3
73
74/*
75 * lm_lock() flags
76 *
77 * LM_FLAG_TRY
78 * Don't wait to acquire the lock if it can't be granted immediately.
79 *
80 * LM_FLAG_TRY_1CB
81 * Send one blocking callback if TRY is set and the lock is not granted.
82 *
83 * LM_FLAG_NOEXP
84 * GFS sets this flag on lock requests it makes while doing journal recovery.
85 * These special requests should not be blocked due to the recovery like
86 * ordinary locks would be.
87 *
88 * LM_FLAG_ANY
89 * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may
90 * also be granted in SHARED. The preferred state is whichever is compatible
91 * with other granted locks, or the specified state if no other locks exist.
92 *
93 * LM_FLAG_PRIORITY
94 * Override fairness considerations. Suppose a lock is held in a shared state
95 * and there is a pending request for the deferred state. A shared lock
96 * request with the priority flag would be allowed to bypass the deferred
97 * request and directly join the other shared lock. A shared lock request
98 * without the priority flag might be forced to wait until the deferred
99 * requested had acquired and released the lock.
100 */
101
102#define LM_FLAG_TRY 0x00000001
103#define LM_FLAG_TRY_1CB 0x00000002
104#define LM_FLAG_NOEXP 0x00000004
105#define LM_FLAG_ANY 0x00000008
106#define LM_FLAG_PRIORITY 0x00000010
107
108/*
109 * lm_lock() and lm_async_cb return flags
110 *
111 * LM_OUT_ST_MASK
112 * Masks the lower two bits of lock state in the returned value.
113 *
114 * LM_OUT_CACHEABLE
115 * The lock hasn't been released so GFS can continue to cache data for it.
116 *
117 * LM_OUT_CANCELED
118 * The lock request was canceled.
119 *
120 * LM_OUT_ASYNC
121 * The result of the request will be returned in an LM_CB_ASYNC callback.
122 */
123
124#define LM_OUT_ST_MASK 0x00000003
125#define LM_OUT_CACHEABLE 0x00000004
126#define LM_OUT_CANCELED 0x00000008
127#define LM_OUT_ASYNC 0x00000080
128#define LM_OUT_ERROR 0x00000100
129
130/*
131 * lm_callback_t types
132 *
133 * LM_CB_NEED_E LM_CB_NEED_D LM_CB_NEED_S
134 * Blocking callback, a remote node is requesting the given lock in
135 * EXCLUSIVE, DEFERRED, or SHARED.
136 *
137 * LM_CB_NEED_RECOVERY
138 * The given journal needs to be recovered.
139 *
140 * LM_CB_DROPLOCKS
141 * Reduce the number of cached locks.
142 *
143 * LM_CB_ASYNC
144 * The given lock has been granted.
145 */
146
147#define LM_CB_NEED_E 257
148#define LM_CB_NEED_D 258
149#define LM_CB_NEED_S 259
150#define LM_CB_NEED_RECOVERY 260
151#define LM_CB_DROPLOCKS 261
152#define LM_CB_ASYNC 262
153
154/*
155 * lm_recovery_done() messages
156 */
157
158#define LM_RD_GAVEUP 308
159#define LM_RD_SUCCESS 309
160
161
162struct lm_lockname {
163 uint64_t ln_number;
164 unsigned int ln_type;
165};
166
167#define lm_name_equal(name1, name2) \
168 (((name1)->ln_number == (name2)->ln_number) && \
169 ((name1)->ln_type == (name2)->ln_type)) \
170
171struct lm_async_cb {
172 struct lm_lockname lc_name;
173 int lc_ret;
174};
175
176struct lm_lockstruct;
177
178struct lm_lockops {
179 char lm_proto_name[256];
180
181 /*
182 * Mount/Unmount
183 */
184
185 int (*lm_mount) (char *table_name, char *host_data,
186 lm_callback_t cb, lm_fsdata_t *fsdata,
187 unsigned int min_lvb_size, int flags,
188 struct lm_lockstruct *lockstruct,
189 struct kobject *fskobj);
190
191 void (*lm_others_may_mount) (lm_lockspace_t *lockspace);
192
193 void (*lm_unmount) (lm_lockspace_t *lockspace);
194
195 void (*lm_withdraw) (lm_lockspace_t *lockspace);
196
197 /*
198 * Lock oriented operations
199 */
200
201 int (*lm_get_lock) (lm_lockspace_t *lockspace,
202 struct lm_lockname *name, lm_lock_t **lockp);
203
204 void (*lm_put_lock) (lm_lock_t *lock);
205
206 unsigned int (*lm_lock) (lm_lock_t *lock, unsigned int cur_state,
207 unsigned int req_state, unsigned int flags);
208
209 unsigned int (*lm_unlock) (lm_lock_t *lock, unsigned int cur_state);
210
211 void (*lm_cancel) (lm_lock_t *lock);
212
213 int (*lm_hold_lvb) (lm_lock_t *lock, char **lvbp);
214 void (*lm_unhold_lvb) (lm_lock_t *lock, char *lvb);
215 void (*lm_sync_lvb) (lm_lock_t *lock, char *lvb);
216
217 /*
218 * Posix Lock oriented operations
219 */
220
221 int (*lm_plock_get) (lm_lockspace_t *lockspace,
222 struct lm_lockname *name,
223 struct file *file, struct file_lock *fl);
224
225 int (*lm_plock) (lm_lockspace_t *lockspace,
226 struct lm_lockname *name,
227 struct file *file, int cmd, struct file_lock *fl);
228
229 int (*lm_punlock) (lm_lockspace_t *lockspace,
230 struct lm_lockname *name,
231 struct file *file, struct file_lock *fl);
232
233 /*
234 * Client oriented operations
235 */
236
237 void (*lm_recovery_done) (lm_lockspace_t *lockspace, unsigned int jid,
238 unsigned int message);
239
240 struct module *lm_owner;
241};
242
243/*
244 * lm_mount() return values
245 *
246 * ls_jid - the journal ID this node should use
247 * ls_first - this node is the first to mount the file system
248 * ls_lvb_size - size in bytes of lock value blocks
249 * ls_lockspace - lock module's context for this file system
250 * ls_ops - lock module's functions
251 * ls_flags - lock module features
252 */
253
254struct lm_lockstruct {
255 unsigned int ls_jid;
256 unsigned int ls_first;
257 unsigned int ls_lvb_size;
258 lm_lockspace_t *ls_lockspace;
259 struct lm_lockops *ls_ops;
260 int ls_flags;
261};
262
263void __init gfs2_init_lmh(void);
264
265/*
266 * Lock module bottom interface. A lock module makes itself available to GFS
267 * with these functions.
268 *
269 * For the time being, we copy the gfs1 lock module bottom interface so the
270 * same lock modules can be used with both gfs1 and gfs2 (it won't be possible
271 * to load both gfs1 and gfs2 at once.) Eventually the lock modules will fork
272 * for gfs1/gfs2 and this API can change to the gfs2_ prefix.
273 */
274
275int gfs_register_lockproto(struct lm_lockops *proto);
276
277void gfs_unregister_lockproto(struct lm_lockops *proto);
278
279/*
280 * Lock module top interface. GFS calls these functions when mounting or
281 * unmounting a file system.
282 */
283
284int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
285 lm_callback_t cb, lm_fsdata_t *fsdata,
286 unsigned int min_lvb_size, int flags,
287 struct lm_lockstruct *lockstruct,
288 struct kobject *fskobj);
289
290void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct);
291
292void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct);
293
294#endif /* __LM_INTERFACE_DOT_H__ */
295
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c
new file mode 100644
index 000000000000..0f4c50ebcbad
--- /dev/null
+++ b/fs/gfs2/locking.c
@@ -0,0 +1,191 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/string.h>
13#include <linux/slab.h>
14#include <linux/wait.h>
15#include <linux/sched.h>
16#include <linux/kmod.h>
17#include <linux/fs.h>
18#include <linux/delay.h>
19
20#include "lm_interface.h"
21
22struct lmh_wrapper {
23 struct list_head lw_list;
24 struct lm_lockops *lw_ops;
25};
26
27/* List of registered low-level locking protocols. A file system selects one
28 of them by name at mount time, e.g. lock_nolock, lock_dlm. */
29
30static struct list_head lmh_list;
31static struct semaphore lmh_lock;
32
33/**
34 * gfs_register_lockproto - Register a low-level locking protocol
35 * @proto: the protocol definition
36 *
37 * Returns: 0 on success, -EXXX on failure
38 */
39
40int gfs_register_lockproto(struct lm_lockops *proto)
41{
42 struct lmh_wrapper *lw;
43
44 down(&lmh_lock);
45
46 list_for_each_entry(lw, &lmh_list, lw_list) {
47 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
48 up(&lmh_lock);
49 printk(KERN_INFO "GFS2: protocol %s already exists\n",
50 proto->lm_proto_name);
51 return -EEXIST;
52 }
53 }
54
55 lw = kzalloc(sizeof(struct lmh_wrapper), GFP_KERNEL);
56 if (!lw) {
57 up(&lmh_lock);
58 return -ENOMEM;
59 }
60
61 lw->lw_ops = proto;
62 list_add(&lw->lw_list, &lmh_list);
63
64 up(&lmh_lock);
65
66 return 0;
67}
68
69/**
70 * gfs_unregister_lockproto - Unregister a low-level locking protocol
71 * @proto: the protocol definition
72 *
73 */
74
75void gfs_unregister_lockproto(struct lm_lockops *proto)
76{
77 struct lmh_wrapper *lw;
78
79 down(&lmh_lock);
80
81 list_for_each_entry(lw, &lmh_list, lw_list) {
82 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
83 list_del(&lw->lw_list);
84 up(&lmh_lock);
85 kfree(lw);
86 return;
87 }
88 }
89
90 up(&lmh_lock);
91
92 printk(KERN_WARNING "GFS2: can't unregister lock protocol %s\n",
93 proto->lm_proto_name);
94}
95
96/**
97 * gfs2_mount_lockproto - Mount a lock protocol
98 * @proto_name - the name of the protocol
99 * @table_name - the name of the lock space
100 * @host_data - data specific to this host
101 * @cb - the callback to the code using the lock module
102 * @fsdata - data to pass back with the callback
103 * @min_lvb_size - the mininum LVB size that the caller can deal with
104 * @flags - LM_MFLAG_*
105 * @lockstruct - a structure returned describing the mount
106 *
107 * Returns: 0 on success, -EXXX on failure
108 */
109
110int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
111 lm_callback_t cb, lm_fsdata_t *fsdata,
112 unsigned int min_lvb_size, int flags,
113 struct lm_lockstruct *lockstruct,
114 struct kobject *fskobj)
115{
116 struct lmh_wrapper *lw = NULL;
117 int try = 0;
118 int error, found;
119
120 retry:
121 down(&lmh_lock);
122
123 found = 0;
124 list_for_each_entry(lw, &lmh_list, lw_list) {
125 if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
126 found = 1;
127 break;
128 }
129 }
130
131 if (!found) {
132 if (!try && capable(CAP_SYS_MODULE)) {
133 try = 1;
134 up(&lmh_lock);
135 request_module(proto_name);
136 goto retry;
137 }
138 printk(KERN_INFO "GFS2: can't find protocol %s\n", proto_name);
139 error = -ENOENT;
140 goto out;
141 }
142
143 if (!try_module_get(lw->lw_ops->lm_owner)) {
144 try = 0;
145 up(&lmh_lock);
146 msleep(1000);
147 goto retry;
148 }
149
150 error = lw->lw_ops->lm_mount(table_name, host_data, cb, fsdata,
151 min_lvb_size, flags, lockstruct, fskobj);
152 if (error)
153 module_put(lw->lw_ops->lm_owner);
154 out:
155 up(&lmh_lock);
156 return error;
157}
158
159void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct)
160{
161 down(&lmh_lock);
162 lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
163 if (lockstruct->ls_ops->lm_owner)
164 module_put(lockstruct->ls_ops->lm_owner);
165 up(&lmh_lock);
166}
167
168/**
169 * gfs2_withdraw_lockproto - abnormally unmount a lock module
170 * @lockstruct: the lockstruct passed into mount
171 *
172 */
173
174void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct)
175{
176 down(&lmh_lock);
177 lockstruct->ls_ops->lm_withdraw(lockstruct->ls_lockspace);
178 if (lockstruct->ls_ops->lm_owner)
179 module_put(lockstruct->ls_ops->lm_owner);
180 up(&lmh_lock);
181}
182
183void __init gfs2_init_lmh(void)
184{
185 init_MUTEX(&lmh_lock);
186 INIT_LIST_HEAD(&lmh_list);
187}
188
189EXPORT_SYMBOL_GPL(gfs_register_lockproto);
190EXPORT_SYMBOL_GPL(gfs_unregister_lockproto);
191
diff --git a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile
new file mode 100644
index 000000000000..a9733ff80371
--- /dev/null
+++ b/fs/gfs2/locking/dlm/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_GFS2_FS) += lock_dlm.o
2lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o plock.o
3
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
new file mode 100644
index 000000000000..1799d2237e7e
--- /dev/null
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -0,0 +1,538 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include "lock_dlm.h"
11
12static char junk_lvb[GDLM_LVB_SIZE];
13
14static void queue_complete(struct gdlm_lock *lp)
15{
16 struct gdlm_ls *ls = lp->ls;
17
18 clear_bit(LFL_ACTIVE, &lp->flags);
19
20 spin_lock(&ls->async_lock);
21 list_add_tail(&lp->clist, &ls->complete);
22 spin_unlock(&ls->async_lock);
23 wake_up(&ls->thread_wait);
24}
25
26static inline void gdlm_ast(void *astarg)
27{
28 queue_complete((struct gdlm_lock *) astarg);
29}
30
31static inline void gdlm_bast(void *astarg, int mode)
32{
33 struct gdlm_lock *lp = astarg;
34 struct gdlm_ls *ls = lp->ls;
35
36 if (!mode) {
37 printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
38 lp->lockname.ln_type, lp->lockname.ln_number);
39 return;
40 }
41
42 spin_lock(&ls->async_lock);
43 if (!lp->bast_mode) {
44 list_add_tail(&lp->blist, &ls->blocking);
45 lp->bast_mode = mode;
46 } else if (lp->bast_mode < mode)
47 lp->bast_mode = mode;
48 spin_unlock(&ls->async_lock);
49 wake_up(&ls->thread_wait);
50}
51
52void gdlm_queue_delayed(struct gdlm_lock *lp)
53{
54 struct gdlm_ls *ls = lp->ls;
55
56 spin_lock(&ls->async_lock);
57 list_add_tail(&lp->delay_list, &ls->delayed);
58 spin_unlock(&ls->async_lock);
59}
60
61/* convert gfs lock-state to dlm lock-mode */
62
63static int16_t make_mode(int16_t lmstate)
64{
65 switch (lmstate) {
66 case LM_ST_UNLOCKED:
67 return DLM_LOCK_NL;
68 case LM_ST_EXCLUSIVE:
69 return DLM_LOCK_EX;
70 case LM_ST_DEFERRED:
71 return DLM_LOCK_CW;
72 case LM_ST_SHARED:
73 return DLM_LOCK_PR;
74 }
75 gdlm_assert(0, "unknown LM state %d", lmstate);
76 return -1;
77}
78
79/* convert dlm lock-mode to gfs lock-state */
80
81int16_t gdlm_make_lmstate(int16_t dlmmode)
82{
83 switch (dlmmode) {
84 case DLM_LOCK_IV:
85 case DLM_LOCK_NL:
86 return LM_ST_UNLOCKED;
87 case DLM_LOCK_EX:
88 return LM_ST_EXCLUSIVE;
89 case DLM_LOCK_CW:
90 return LM_ST_DEFERRED;
91 case DLM_LOCK_PR:
92 return LM_ST_SHARED;
93 }
94 gdlm_assert(0, "unknown DLM mode %d", dlmmode);
95 return -1;
96}
97
98/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
99 DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
100
101static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
102{
103 int16_t cur = make_mode(cur_state);
104 if (lp->cur != DLM_LOCK_IV)
105 gdlm_assert(lp->cur == cur, "%d, %d", lp->cur, cur);
106}
107
108static inline unsigned int make_flags(struct gdlm_lock *lp,
109 unsigned int gfs_flags,
110 int16_t cur, int16_t req)
111{
112 unsigned int lkf = 0;
113
114 if (gfs_flags & LM_FLAG_TRY)
115 lkf |= DLM_LKF_NOQUEUE;
116
117 if (gfs_flags & LM_FLAG_TRY_1CB) {
118 lkf |= DLM_LKF_NOQUEUE;
119 lkf |= DLM_LKF_NOQUEUEBAST;
120 }
121
122 if (gfs_flags & LM_FLAG_PRIORITY) {
123 lkf |= DLM_LKF_NOORDER;
124 lkf |= DLM_LKF_HEADQUE;
125 }
126
127 if (gfs_flags & LM_FLAG_ANY) {
128 if (req == DLM_LOCK_PR)
129 lkf |= DLM_LKF_ALTCW;
130 else if (req == DLM_LOCK_CW)
131 lkf |= DLM_LKF_ALTPR;
132 }
133
134 if (lp->lksb.sb_lkid != 0) {
135 lkf |= DLM_LKF_CONVERT;
136
137 /* Conversion deadlock avoidance by DLM */
138
139 if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
140 !(lkf & DLM_LKF_NOQUEUE) &&
141 cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
142 lkf |= DLM_LKF_CONVDEADLK;
143 }
144
145 if (lp->lvb)
146 lkf |= DLM_LKF_VALBLK;
147
148 return lkf;
149}
150
151/* make_strname - convert GFS lock numbers to a string */
152
153static inline void make_strname(struct lm_lockname *lockname,
154 struct gdlm_strname *str)
155{
156 sprintf(str->name, "%8x%16llx", lockname->ln_type,
157 lockname->ln_number);
158 str->namelen = GDLM_STRNAME_BYTES;
159}
160
161int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
162 struct gdlm_lock **lpp)
163{
164 struct gdlm_lock *lp;
165
166 lp = kzalloc(sizeof(struct gdlm_lock), GFP_KERNEL);
167 if (!lp)
168 return -ENOMEM;
169
170 lp->lockname = *name;
171 lp->ls = ls;
172 lp->cur = DLM_LOCK_IV;
173 lp->lvb = NULL;
174 lp->hold_null = NULL;
175 init_completion(&lp->ast_wait);
176 INIT_LIST_HEAD(&lp->clist);
177 INIT_LIST_HEAD(&lp->blist);
178 INIT_LIST_HEAD(&lp->delay_list);
179
180 spin_lock(&ls->async_lock);
181 list_add(&lp->all_list, &ls->all_locks);
182 ls->all_locks_count++;
183 spin_unlock(&ls->async_lock);
184
185 *lpp = lp;
186 return 0;
187}
188
189void gdlm_delete_lp(struct gdlm_lock *lp)
190{
191 struct gdlm_ls *ls = lp->ls;
192
193 spin_lock(&ls->async_lock);
194 if (!list_empty(&lp->clist))
195 list_del_init(&lp->clist);
196 if (!list_empty(&lp->blist))
197 list_del_init(&lp->blist);
198 if (!list_empty(&lp->delay_list))
199 list_del_init(&lp->delay_list);
200 gdlm_assert(!list_empty(&lp->all_list),
201 "%x,%llx", lp->lockname.ln_type, lp->lockname.ln_number);
202 list_del_init(&lp->all_list);
203 ls->all_locks_count--;
204 spin_unlock(&ls->async_lock);
205
206 kfree(lp);
207}
208
209int gdlm_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
210 lm_lock_t **lockp)
211{
212 struct gdlm_lock *lp;
213 int error;
214
215 error = gdlm_create_lp((struct gdlm_ls *) lockspace, name, &lp);
216
217 *lockp = (lm_lock_t *) lp;
218 return error;
219}
220
221void gdlm_put_lock(lm_lock_t *lock)
222{
223 gdlm_delete_lp((struct gdlm_lock *) lock);
224}
225
226unsigned int gdlm_do_lock(struct gdlm_lock *lp)
227{
228 struct gdlm_ls *ls = lp->ls;
229 struct gdlm_strname str;
230 int error, bast = 1;
231
232 /*
233 * When recovery is in progress, delay lock requests for submission
234 * once recovery is done. Requests for recovery (NOEXP) and unlocks
235 * can pass.
236 */
237
238 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
239 !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
240 gdlm_queue_delayed(lp);
241 return LM_OUT_ASYNC;
242 }
243
244 /*
245 * Submit the actual lock request.
246 */
247
248 if (test_bit(LFL_NOBAST, &lp->flags))
249 bast = 0;
250
251 make_strname(&lp->lockname, &str);
252
253 set_bit(LFL_ACTIVE, &lp->flags);
254
255 log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type,
256 lp->lockname.ln_number, lp->lksb.sb_lkid,
257 lp->cur, lp->req, lp->lkf);
258
259 error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
260 str.name, str.namelen, 0, gdlm_ast, (void *) lp,
261 bast ? gdlm_bast : NULL);
262
263 if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
264 lp->lksb.sb_status = -EAGAIN;
265 queue_complete(lp);
266 error = 0;
267 }
268
269 if (error) {
270 log_debug("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x "
271 "flags=%lx", ls->fsname, lp->lockname.ln_type,
272 lp->lockname.ln_number, error, lp->cur, lp->req,
273 lp->lkf, lp->flags);
274 return LM_OUT_ERROR;
275 }
276 return LM_OUT_ASYNC;
277}
278
279unsigned int gdlm_do_unlock(struct gdlm_lock *lp)
280{
281 struct gdlm_ls *ls = lp->ls;
282 unsigned int lkf = 0;
283 int error;
284
285 set_bit(LFL_DLM_UNLOCK, &lp->flags);
286 set_bit(LFL_ACTIVE, &lp->flags);
287
288 if (lp->lvb)
289 lkf = DLM_LKF_VALBLK;
290
291 log_debug("un %x,%llx %x %d %x", lp->lockname.ln_type,
292 lp->lockname.ln_number, lp->lksb.sb_lkid, lp->cur, lkf);
293
294 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp);
295
296 if (error) {
297 log_debug("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x "
298 "flags=%lx", ls->fsname, lp->lockname.ln_type,
299 lp->lockname.ln_number, error, lp->cur, lp->req,
300 lp->lkf, lp->flags);
301 return LM_OUT_ERROR;
302 }
303 return LM_OUT_ASYNC;
304}
305
306unsigned int gdlm_lock(lm_lock_t *lock, unsigned int cur_state,
307 unsigned int req_state, unsigned int flags)
308{
309 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
310
311 clear_bit(LFL_DLM_CANCEL, &lp->flags);
312 if (flags & LM_FLAG_NOEXP)
313 set_bit(LFL_NOBLOCK, &lp->flags);
314
315 check_cur_state(lp, cur_state);
316 lp->req = make_mode(req_state);
317 lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
318
319 return gdlm_do_lock(lp);
320}
321
322unsigned int gdlm_unlock(lm_lock_t *lock, unsigned int cur_state)
323{
324 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
325
326 clear_bit(LFL_DLM_CANCEL, &lp->flags);
327 if (lp->cur == DLM_LOCK_IV)
328 return 0;
329 return gdlm_do_unlock(lp);
330}
331
332void gdlm_cancel(lm_lock_t *lock)
333{
334 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
335 struct gdlm_ls *ls = lp->ls;
336 int error, delay_list = 0;
337
338 if (test_bit(LFL_DLM_CANCEL, &lp->flags))
339 return;
340
341 log_info("gdlm_cancel %x,%llx flags %lx",
342 lp->lockname.ln_type, lp->lockname.ln_number, lp->flags);
343
344 spin_lock(&ls->async_lock);
345 if (!list_empty(&lp->delay_list)) {
346 list_del_init(&lp->delay_list);
347 delay_list = 1;
348 }
349 spin_unlock(&ls->async_lock);
350
351 if (delay_list) {
352 set_bit(LFL_CANCEL, &lp->flags);
353 set_bit(LFL_ACTIVE, &lp->flags);
354 queue_complete(lp);
355 return;
356 }
357
358 if (!test_bit(LFL_ACTIVE, &lp->flags) ||
359 test_bit(LFL_DLM_UNLOCK, &lp->flags)) {
360 log_info("gdlm_cancel skip %x,%llx flags %lx",
361 lp->lockname.ln_type, lp->lockname.ln_number,
362 lp->flags);
363 return;
364 }
365
366 /* the lock is blocked in the dlm */
367
368 set_bit(LFL_DLM_CANCEL, &lp->flags);
369 set_bit(LFL_ACTIVE, &lp->flags);
370
371 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
372 NULL, lp);
373
374 log_info("gdlm_cancel rv %d %x,%llx flags %lx", error,
375 lp->lockname.ln_type, lp->lockname.ln_number, lp->flags);
376
377 if (error == -EBUSY)
378 clear_bit(LFL_DLM_CANCEL, &lp->flags);
379}
380
381int gdlm_add_lvb(struct gdlm_lock *lp)
382{
383 char *lvb;
384
385 lvb = kzalloc(GDLM_LVB_SIZE, GFP_KERNEL);
386 if (!lvb)
387 return -ENOMEM;
388
389 lp->lksb.sb_lvbptr = lvb;
390 lp->lvb = lvb;
391 return 0;
392}
393
394void gdlm_del_lvb(struct gdlm_lock *lp)
395{
396 kfree(lp->lvb);
397 lp->lvb = NULL;
398 lp->lksb.sb_lvbptr = NULL;
399}
400
401/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
402 the completion) because gfs won't call hold_lvb() during a callback (from
403 the context of a lock_dlm thread). */
404
405static int hold_null_lock(struct gdlm_lock *lp)
406{
407 struct gdlm_lock *lpn = NULL;
408 int error;
409
410 if (lp->hold_null) {
411 printk(KERN_INFO "lock_dlm: lvb already held\n");
412 return 0;
413 }
414
415 error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
416 if (error)
417 goto out;
418
419 lpn->lksb.sb_lvbptr = junk_lvb;
420 lpn->lvb = junk_lvb;
421
422 lpn->req = DLM_LOCK_NL;
423 lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
424 set_bit(LFL_NOBAST, &lpn->flags);
425 set_bit(LFL_INLOCK, &lpn->flags);
426
427 init_completion(&lpn->ast_wait);
428 gdlm_do_lock(lpn);
429 wait_for_completion(&lpn->ast_wait);
430 error = lp->lksb.sb_status;
431 if (error) {
432 printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
433 error);
434 gdlm_delete_lp(lpn);
435 lpn = NULL;
436 }
437 out:
438 lp->hold_null = lpn;
439 return error;
440}
441
442/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
443 the completion) because gfs may call unhold_lvb() during a callback (from
444 the context of a lock_dlm thread) which could cause a deadlock since the
445 other lock_dlm thread could be engaged in recovery. */
446
447static void unhold_null_lock(struct gdlm_lock *lp)
448{
449 struct gdlm_lock *lpn = lp->hold_null;
450
451 gdlm_assert(lpn, "%x,%llx",
452 lp->lockname.ln_type, lp->lockname.ln_number);
453 lpn->lksb.sb_lvbptr = NULL;
454 lpn->lvb = NULL;
455 set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
456 gdlm_do_unlock(lpn);
457 lp->hold_null = NULL;
458}
459
460/* Acquire a NL lock because gfs requires the value block to remain
461 intact on the resource while the lvb is "held" even if it's holding no locks
462 on the resource. */
463
464int gdlm_hold_lvb(lm_lock_t *lock, char **lvbp)
465{
466 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
467 int error;
468
469 error = gdlm_add_lvb(lp);
470 if (error)
471 return error;
472
473 *lvbp = lp->lvb;
474
475 error = hold_null_lock(lp);
476 if (error)
477 gdlm_del_lvb(lp);
478
479 return error;
480}
481
482void gdlm_unhold_lvb(lm_lock_t *lock, char *lvb)
483{
484 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
485
486 unhold_null_lock(lp);
487 gdlm_del_lvb(lp);
488}
489
490void gdlm_sync_lvb(lm_lock_t *lock, char *lvb)
491{
492 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
493
494 if (lp->cur != DLM_LOCK_EX)
495 return;
496
497 init_completion(&lp->ast_wait);
498 set_bit(LFL_SYNC_LVB, &lp->flags);
499
500 lp->req = DLM_LOCK_EX;
501 lp->lkf = make_flags(lp, 0, lp->cur, lp->req);
502
503 gdlm_do_lock(lp);
504 wait_for_completion(&lp->ast_wait);
505}
506
507void gdlm_submit_delayed(struct gdlm_ls *ls)
508{
509 struct gdlm_lock *lp, *safe;
510
511 spin_lock(&ls->async_lock);
512 list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
513 list_del_init(&lp->delay_list);
514 list_add_tail(&lp->delay_list, &ls->submit);
515 }
516 spin_unlock(&ls->async_lock);
517 wake_up(&ls->thread_wait);
518}
519
520int gdlm_release_all_locks(struct gdlm_ls *ls)
521{
522 struct gdlm_lock *lp, *safe;
523 int count = 0;
524
525 spin_lock(&ls->async_lock);
526 list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) {
527 list_del_init(&lp->all_list);
528
529 if (lp->lvb && lp->lvb != junk_lvb)
530 kfree(lp->lvb);
531 kfree(lp);
532 count++;
533 }
534 spin_unlock(&ls->async_lock);
535
536 return count;
537}
538
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
new file mode 100644
index 000000000000..6d76146953ce
--- /dev/null
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -0,0 +1,191 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef LOCK_DLM_DOT_H
11#define LOCK_DLM_DOT_H
12
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/spinlock.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/string.h>
19#include <linux/list.h>
20#include <linux/socket.h>
21#include <linux/delay.h>
22#include <linux/kthread.h>
23#include <linux/kobject.h>
24#include <linux/fcntl.h>
25#include <linux/wait.h>
26#include <net/sock.h>
27
28#include <linux/dlm.h>
29#include "../../lm_interface.h"
30
31/*
32 * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
33 * prefix of lock_dlm_ gets awkward. Externally, GFS refers to this module
34 * as "lock_dlm".
35 */
36
37#define GDLM_STRNAME_BYTES 24
38#define GDLM_LVB_SIZE 32
39#define GDLM_DROP_COUNT 50000
40#define GDLM_DROP_PERIOD 60
41#define GDLM_NAME_LEN 128
42
43/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
44 We sprintf these numbers into a 24 byte string of hex values to make them
45 human-readable (to make debugging simpler.) */
46
47struct gdlm_strname {
48 unsigned char name[GDLM_STRNAME_BYTES];
49 unsigned short namelen;
50};
51
52enum {
53 DFL_BLOCK_LOCKS = 0,
54 DFL_SPECTATOR = 1,
55 DFL_WITHDRAW = 2,
56};
57
58struct gdlm_ls {
59 uint32_t id;
60 int jid;
61 int first;
62 int first_done;
63 unsigned long flags;
64 struct kobject kobj;
65 char clustername[GDLM_NAME_LEN];
66 char fsname[GDLM_NAME_LEN];
67 int fsflags;
68 dlm_lockspace_t *dlm_lockspace;
69 lm_callback_t fscb;
70 lm_fsdata_t *fsdata;
71 int recover_jid;
72 int recover_jid_done;
73 spinlock_t async_lock;
74 struct list_head complete;
75 struct list_head blocking;
76 struct list_head delayed;
77 struct list_head submit;
78 struct list_head all_locks;
79 uint32_t all_locks_count;
80 wait_queue_head_t wait_control;
81 struct task_struct *thread1;
82 struct task_struct *thread2;
83 wait_queue_head_t thread_wait;
84 unsigned long drop_time;
85 int drop_locks_count;
86 int drop_locks_period;
87};
88
89enum {
90 LFL_NOBLOCK = 0,
91 LFL_NOCACHE = 1,
92 LFL_DLM_UNLOCK = 2,
93 LFL_DLM_CANCEL = 3,
94 LFL_SYNC_LVB = 4,
95 LFL_FORCE_PROMOTE = 5,
96 LFL_REREQUEST = 6,
97 LFL_ACTIVE = 7,
98 LFL_INLOCK = 8,
99 LFL_CANCEL = 9,
100 LFL_NOBAST = 10,
101 LFL_HEADQUE = 11,
102 LFL_UNLOCK_DELETE = 12,
103};
104
105struct gdlm_lock {
106 struct gdlm_ls *ls;
107 struct lm_lockname lockname;
108 char *lvb;
109 struct dlm_lksb lksb;
110
111 int16_t cur;
112 int16_t req;
113 int16_t prev_req;
114 uint32_t lkf; /* dlm flags DLM_LKF_ */
115 unsigned long flags; /* lock_dlm flags LFL_ */
116
117 int bast_mode; /* protected by async_lock */
118 struct completion ast_wait;
119
120 struct list_head clist; /* complete */
121 struct list_head blist; /* blocking */
122 struct list_head delay_list; /* delayed */
123 struct list_head all_list; /* all locks for the fs */
124 struct gdlm_lock *hold_null; /* NL lock for hold_lvb */
125};
126
127#define gdlm_assert(assertion, fmt, args...) \
128do { \
129 if (unlikely(!(assertion))) { \
130 printk(KERN_EMERG "lock_dlm: fatal assertion failed \"%s\"\n" \
131 "lock_dlm: " fmt "\n", \
132 #assertion, ##args); \
133 BUG(); \
134 } \
135} while (0)
136
137#define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg)
138#define log_info(fmt, arg...) log_print(KERN_INFO , fmt , ## arg)
139#define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg)
140#ifdef LOCK_DLM_LOG_DEBUG
141#define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg)
142#else
143#define log_debug(fmt, arg...)
144#endif
145
146/* sysfs.c */
147
148int gdlm_sysfs_init(void);
149void gdlm_sysfs_exit(void);
150int gdlm_kobject_setup(struct gdlm_ls *, struct kobject *);
151void gdlm_kobject_release(struct gdlm_ls *);
152
153/* thread.c */
154
155int gdlm_init_threads(struct gdlm_ls *);
156void gdlm_release_threads(struct gdlm_ls *);
157
158/* lock.c */
159
160int16_t gdlm_make_lmstate(int16_t);
161void gdlm_queue_delayed(struct gdlm_lock *);
162void gdlm_submit_delayed(struct gdlm_ls *);
163int gdlm_release_all_locks(struct gdlm_ls *);
164int gdlm_create_lp(struct gdlm_ls *, struct lm_lockname *, struct gdlm_lock **);
165void gdlm_delete_lp(struct gdlm_lock *);
166int gdlm_add_lvb(struct gdlm_lock *);
167void gdlm_del_lvb(struct gdlm_lock *);
168unsigned int gdlm_do_lock(struct gdlm_lock *);
169unsigned int gdlm_do_unlock(struct gdlm_lock *);
170
171int gdlm_get_lock(lm_lockspace_t *, struct lm_lockname *, lm_lock_t **);
172void gdlm_put_lock(lm_lock_t *);
173unsigned int gdlm_lock(lm_lock_t *, unsigned int, unsigned int, unsigned int);
174unsigned int gdlm_unlock(lm_lock_t *, unsigned int);
175void gdlm_cancel(lm_lock_t *);
176int gdlm_hold_lvb(lm_lock_t *, char **);
177void gdlm_unhold_lvb(lm_lock_t *, char *);
178void gdlm_sync_lvb(lm_lock_t *, char *);
179
180/* plock.c */
181
182int gdlm_plock_init(void);
183void gdlm_plock_exit(void);
184int gdlm_plock(lm_lockspace_t *, struct lm_lockname *, struct file *, int,
185 struct file_lock *);
186int gdlm_plock_get(lm_lockspace_t *, struct lm_lockname *, struct file *,
187 struct file_lock *);
188int gdlm_punlock(lm_lockspace_t *, struct lm_lockname *, struct file *,
189 struct file_lock *);
190#endif
191
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
new file mode 100644
index 000000000000..3c9adf18fd9c
--- /dev/null
+++ b/fs/gfs2/locking/dlm/main.c
@@ -0,0 +1,64 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/init.h>
11
12#include "lock_dlm.h"
13
14extern int gdlm_drop_count;
15extern int gdlm_drop_period;
16
17extern struct lm_lockops gdlm_ops;
18
19int __init init_lock_dlm(void)
20{
21 int error;
22
23 error = gfs_register_lockproto(&gdlm_ops);
24 if (error) {
25 printk(KERN_WARNING "lock_dlm: can't register protocol: %d\n",
26 error);
27 return error;
28 }
29
30 error = gdlm_sysfs_init();
31 if (error) {
32 gfs_unregister_lockproto(&gdlm_ops);
33 return error;
34 }
35
36 error = gdlm_plock_init();
37 if (error) {
38 gdlm_sysfs_exit();
39 gfs_unregister_lockproto(&gdlm_ops);
40 return error;
41 }
42
43 gdlm_drop_count = GDLM_DROP_COUNT;
44 gdlm_drop_period = GDLM_DROP_PERIOD;
45
46 printk(KERN_INFO
47 "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
48 return 0;
49}
50
51void __exit exit_lock_dlm(void)
52{
53 gdlm_plock_exit();
54 gdlm_sysfs_exit();
55 gfs_unregister_lockproto(&gdlm_ops);
56}
57
58module_init(init_lock_dlm);
59module_exit(exit_lock_dlm);
60
61MODULE_DESCRIPTION("GFS DLM Locking Module");
62MODULE_AUTHOR("Red Hat, Inc.");
63MODULE_LICENSE("GPL");
64
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
new file mode 100644
index 000000000000..042f3a75c441
--- /dev/null
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -0,0 +1,247 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include "lock_dlm.h"
11
12int gdlm_drop_count;
13int gdlm_drop_period;
14struct lm_lockops gdlm_ops;
15
16
17static struct gdlm_ls *init_gdlm(lm_callback_t cb, lm_fsdata_t *fsdata,
18 int flags, char *table_name)
19{
20 struct gdlm_ls *ls;
21 char buf[256], *p;
22
23 ls = kzalloc(sizeof(struct gdlm_ls), GFP_KERNEL);
24 if (!ls)
25 return NULL;
26
27 ls->drop_locks_count = gdlm_drop_count;
28 ls->drop_locks_period = gdlm_drop_period;
29 ls->fscb = cb;
30 ls->fsdata = fsdata;
31 ls->fsflags = flags;
32 spin_lock_init(&ls->async_lock);
33 INIT_LIST_HEAD(&ls->complete);
34 INIT_LIST_HEAD(&ls->blocking);
35 INIT_LIST_HEAD(&ls->delayed);
36 INIT_LIST_HEAD(&ls->submit);
37 INIT_LIST_HEAD(&ls->all_locks);
38 init_waitqueue_head(&ls->thread_wait);
39 init_waitqueue_head(&ls->wait_control);
40 ls->thread1 = NULL;
41 ls->thread2 = NULL;
42 ls->drop_time = jiffies;
43 ls->jid = -1;
44
45 strncpy(buf, table_name, 256);
46 buf[255] = '\0';
47
48 p = strstr(buf, ":");
49 if (!p) {
50 log_info("invalid table_name \"%s\"", table_name);
51 kfree(ls);
52 return NULL;
53 }
54 *p = '\0';
55 p++;
56
57 strncpy(ls->clustername, buf, GDLM_NAME_LEN);
58 strncpy(ls->fsname, p, GDLM_NAME_LEN);
59
60 return ls;
61}
62
63static int make_args(struct gdlm_ls *ls, char *data_arg)
64{
65 char data[256];
66 char *options, *x, *y;
67 int error = 0;
68
69 memset(data, 0, 256);
70 strncpy(data, data_arg, 255);
71
72 for (options = data; (x = strsep(&options, ":")); ) {
73 if (!*x)
74 continue;
75
76 y = strchr(x, '=');
77 if (y)
78 *y++ = 0;
79
80 if (!strcmp(x, "jid")) {
81 if (!y) {
82 log_error("need argument to jid");
83 error = -EINVAL;
84 break;
85 }
86 sscanf(y, "%u", &ls->jid);
87
88 } else if (!strcmp(x, "first")) {
89 if (!y) {
90 log_error("need argument to first");
91 error = -EINVAL;
92 break;
93 }
94 sscanf(y, "%u", &ls->first);
95
96 } else if (!strcmp(x, "id")) {
97 if (!y) {
98 log_error("need argument to id");
99 error = -EINVAL;
100 break;
101 }
102 sscanf(y, "%u", &ls->id);
103
104 } else {
105 log_error("unkonwn option: %s", x);
106 error = -EINVAL;
107 break;
108 }
109 }
110
111 return error;
112}
113
114static int gdlm_mount(char *table_name, char *host_data,
115 lm_callback_t cb, lm_fsdata_t *fsdata,
116 unsigned int min_lvb_size, int flags,
117 struct lm_lockstruct *lockstruct,
118 struct kobject *fskobj)
119{
120 struct gdlm_ls *ls;
121 int error = -ENOMEM;
122
123 if (min_lvb_size > GDLM_LVB_SIZE)
124 goto out;
125
126 ls = init_gdlm(cb, fsdata, flags, table_name);
127 if (!ls)
128 goto out;
129
130 error = gdlm_init_threads(ls);
131 if (error)
132 goto out_free;
133
134 error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
135 &ls->dlm_lockspace, 0, GDLM_LVB_SIZE);
136 if (error) {
137 log_error("dlm_new_lockspace error %d", error);
138 goto out_thread;
139 }
140
141 error = gdlm_kobject_setup(ls, fskobj);
142 if (error)
143 goto out_dlm;
144
145 error = make_args(ls, host_data);
146 if (error)
147 goto out_sysfs;
148
149 lockstruct->ls_jid = ls->jid;
150 lockstruct->ls_first = ls->first;
151 lockstruct->ls_lockspace = ls;
152 lockstruct->ls_ops = &gdlm_ops;
153 lockstruct->ls_flags = 0;
154 lockstruct->ls_lvb_size = GDLM_LVB_SIZE;
155 return 0;
156
157 out_sysfs:
158 gdlm_kobject_release(ls);
159 out_dlm:
160 dlm_release_lockspace(ls->dlm_lockspace, 2);
161 out_thread:
162 gdlm_release_threads(ls);
163 out_free:
164 kfree(ls);
165 out:
166 return error;
167}
168
169static void gdlm_unmount(lm_lockspace_t *lockspace)
170{
171 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
172 int rv;
173
174 log_debug("unmount flags %lx", ls->flags);
175
176 /* FIXME: serialize unmount and withdraw in case they
177 happen at once. Also, if unmount follows withdraw,
178 wait for withdraw to finish. */
179
180 if (test_bit(DFL_WITHDRAW, &ls->flags))
181 goto out;
182
183 gdlm_kobject_release(ls);
184 dlm_release_lockspace(ls->dlm_lockspace, 2);
185 gdlm_release_threads(ls);
186 rv = gdlm_release_all_locks(ls);
187 if (rv)
188 log_info("gdlm_unmount: %d stray locks freed", rv);
189 out:
190 kfree(ls);
191}
192
193static void gdlm_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
194 unsigned int message)
195{
196 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
197 ls->recover_jid_done = jid;
198 kobject_uevent(&ls->kobj, KOBJ_CHANGE);
199}
200
201static void gdlm_others_may_mount(lm_lockspace_t *lockspace)
202{
203 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
204 ls->first_done = 1;
205 kobject_uevent(&ls->kobj, KOBJ_CHANGE);
206}
207
208/* Userspace gets the offline uevent, blocks new gfs locks on
209 other mounters, and lets us know (sets WITHDRAW flag). Then,
210 userspace leaves the mount group while we leave the lockspace. */
211
212static void gdlm_withdraw(lm_lockspace_t *lockspace)
213{
214 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
215
216 kobject_uevent(&ls->kobj, KOBJ_OFFLINE);
217
218 wait_event_interruptible(ls->wait_control,
219 test_bit(DFL_WITHDRAW, &ls->flags));
220
221 dlm_release_lockspace(ls->dlm_lockspace, 2);
222 gdlm_release_threads(ls);
223 gdlm_release_all_locks(ls);
224 gdlm_kobject_release(ls);
225}
226
227struct lm_lockops gdlm_ops = {
228 .lm_proto_name = "lock_dlm",
229 .lm_mount = gdlm_mount,
230 .lm_others_may_mount = gdlm_others_may_mount,
231 .lm_unmount = gdlm_unmount,
232 .lm_withdraw = gdlm_withdraw,
233 .lm_get_lock = gdlm_get_lock,
234 .lm_put_lock = gdlm_put_lock,
235 .lm_lock = gdlm_lock,
236 .lm_unlock = gdlm_unlock,
237 .lm_plock = gdlm_plock,
238 .lm_punlock = gdlm_punlock,
239 .lm_plock_get = gdlm_plock_get,
240 .lm_cancel = gdlm_cancel,
241 .lm_hold_lvb = gdlm_hold_lvb,
242 .lm_unhold_lvb = gdlm_unhold_lvb,
243 .lm_sync_lvb = gdlm_sync_lvb,
244 .lm_recovery_done = gdlm_recovery_done,
245 .lm_owner = THIS_MODULE,
246};
247
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
new file mode 100644
index 000000000000..f7ac5821def9
--- /dev/null
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -0,0 +1,298 @@
1/*
2 * Copyright (C) 2005 Red Hat, Inc. All rights reserved.
3 *
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License v.2.
7 */
8
9#include <linux/miscdevice.h>
10#include <linux/lock_dlm_plock.h>
11
12#include "lock_dlm.h"
13
14
15static spinlock_t ops_lock;
16static struct list_head send_list;
17static struct list_head recv_list;
18static wait_queue_head_t send_wq;
19static wait_queue_head_t recv_wq;
20
21struct plock_op {
22 struct list_head list;
23 int done;
24 struct gdlm_plock_info info;
25};
26
27static inline void set_version(struct gdlm_plock_info *info)
28{
29 info->version[0] = GDLM_PLOCK_VERSION_MAJOR;
30 info->version[1] = GDLM_PLOCK_VERSION_MINOR;
31 info->version[2] = GDLM_PLOCK_VERSION_PATCH;
32}
33
34static int check_version(struct gdlm_plock_info *info)
35{
36 if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
37 (GDLM_PLOCK_VERSION_MINOR < info->version[1])) {
38 log_error("plock device version mismatch: "
39 "kernel (%u.%u.%u), user (%u.%u.%u)",
40 GDLM_PLOCK_VERSION_MAJOR,
41 GDLM_PLOCK_VERSION_MINOR,
42 GDLM_PLOCK_VERSION_PATCH,
43 info->version[0],
44 info->version[1],
45 info->version[2]);
46 return -EINVAL;
47 }
48 return 0;
49}
50
51static void send_op(struct plock_op *op)
52{
53 set_version(&op->info);
54 INIT_LIST_HEAD(&op->list);
55 spin_lock(&ops_lock);
56 list_add_tail(&op->list, &send_list);
57 spin_unlock(&ops_lock);
58 wake_up(&send_wq);
59}
60
61int gdlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
62 struct file *file, int cmd, struct file_lock *fl)
63{
64 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
65 struct plock_op *op;
66 int rv;
67
68 op = kzalloc(sizeof(*op), GFP_KERNEL);
69 if (!op)
70 return -ENOMEM;
71
72 op->info.optype = GDLM_PLOCK_OP_LOCK;
73 op->info.pid = (uint32_t) fl->fl_owner;
74 op->info.ex = (fl->fl_type == F_WRLCK);
75 op->info.wait = IS_SETLKW(cmd);
76 op->info.fsid = ls->id;
77 op->info.number = name->ln_number;
78 op->info.start = fl->fl_start;
79 op->info.end = fl->fl_end;
80
81 send_op(op);
82 wait_event(recv_wq, (op->done != 0));
83
84 spin_lock(&ops_lock);
85 if (!list_empty(&op->list)) {
86 printk(KERN_INFO "plock op on list\n");
87 list_del(&op->list);
88 }
89 spin_unlock(&ops_lock);
90
91 rv = op->info.rv;
92
93 if (!rv) {
94 if (posix_lock_file_wait(file, fl) < 0)
95 log_error("gdlm_plock: vfs lock error %x,%llx",
96 name->ln_type, name->ln_number);
97 }
98
99 kfree(op);
100 return rv;
101}
102
103int gdlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
104 struct file *file, struct file_lock *fl)
105{
106 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
107 struct plock_op *op;
108 int rv;
109
110 op = kzalloc(sizeof(*op), GFP_KERNEL);
111 if (!op)
112 return -ENOMEM;
113
114 if (posix_lock_file_wait(file, fl) < 0)
115 log_error("gdlm_punlock: vfs unlock error %x,%llx",
116 name->ln_type, name->ln_number);
117
118 op->info.optype = GDLM_PLOCK_OP_UNLOCK;
119 op->info.pid = (uint32_t) fl->fl_owner;
120 op->info.fsid = ls->id;
121 op->info.number = name->ln_number;
122 op->info.start = fl->fl_start;
123 op->info.end = fl->fl_end;
124
125 send_op(op);
126 wait_event(recv_wq, (op->done != 0));
127
128 spin_lock(&ops_lock);
129 if (!list_empty(&op->list)) {
130 printk(KERN_INFO "punlock op on list\n");
131 list_del(&op->list);
132 }
133 spin_unlock(&ops_lock);
134
135 rv = op->info.rv;
136
137 kfree(op);
138 return rv;
139}
140
141int gdlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
142 struct file *file, struct file_lock *fl)
143{
144 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
145 struct plock_op *op;
146 int rv;
147
148 op = kzalloc(sizeof(*op), GFP_KERNEL);
149 if (!op)
150 return -ENOMEM;
151
152 op->info.optype = GDLM_PLOCK_OP_GET;
153 op->info.pid = (uint32_t) fl->fl_owner;
154 op->info.ex = (fl->fl_type == F_WRLCK);
155 op->info.fsid = ls->id;
156 op->info.number = name->ln_number;
157 op->info.start = fl->fl_start;
158 op->info.end = fl->fl_end;
159
160 send_op(op);
161 wait_event(recv_wq, (op->done != 0));
162
163 spin_lock(&ops_lock);
164 if (!list_empty(&op->list)) {
165 printk(KERN_INFO "plock_get op on list\n");
166 list_del(&op->list);
167 }
168 spin_unlock(&ops_lock);
169
170 rv = op->info.rv;
171
172 if (rv == 0)
173 fl->fl_type = F_UNLCK;
174 else if (rv > 0) {
175 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
176 fl->fl_pid = op->info.pid;
177 fl->fl_start = op->info.start;
178 fl->fl_end = op->info.end;
179 }
180
181 kfree(op);
182 return rv;
183}
184
185/* a read copies out one plock request from the send list */
186static ssize_t dev_read(struct file *file, char __user *u, size_t count,
187 loff_t *ppos)
188{
189 struct gdlm_plock_info info;
190 struct plock_op *op = NULL;
191
192 if (count < sizeof(info))
193 return -EINVAL;
194
195 spin_lock(&ops_lock);
196 if (!list_empty(&send_list)) {
197 op = list_entry(send_list.next, struct plock_op, list);
198 list_move(&op->list, &recv_list);
199 memcpy(&info, &op->info, sizeof(info));
200 }
201 spin_unlock(&ops_lock);
202
203 if (!op)
204 return -EAGAIN;
205
206 if (copy_to_user(u, &info, sizeof(info)))
207 return -EFAULT;
208 return sizeof(info);
209}
210
211/* a write copies in one plock result that should match a plock_op
212 on the recv list */
213static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
214 loff_t *ppos)
215{
216 struct gdlm_plock_info info;
217 struct plock_op *op;
218 int found = 0;
219
220 if (count != sizeof(info))
221 return -EINVAL;
222
223 if (copy_from_user(&info, u, sizeof(info)))
224 return -EFAULT;
225
226 if (check_version(&info))
227 return -EINVAL;
228
229 spin_lock(&ops_lock);
230 list_for_each_entry(op, &recv_list, list) {
231 if (op->info.fsid == info.fsid &&
232 op->info.number == info.number) {
233 list_del_init(&op->list);
234 found = 1;
235 op->done = 1;
236 memcpy(&op->info, &info, sizeof(info));
237 break;
238 }
239 }
240 spin_unlock(&ops_lock);
241
242 if (found)
243 wake_up(&recv_wq);
244 else
245 printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid,
246 info.number);
247 return count;
248}
249
250static unsigned int dev_poll(struct file *file, poll_table *wait)
251{
252 poll_wait(file, &send_wq, wait);
253
254 spin_lock(&ops_lock);
255 if (!list_empty(&send_list)) {
256 spin_unlock(&ops_lock);
257 return POLLIN | POLLRDNORM;
258 }
259 spin_unlock(&ops_lock);
260 return 0;
261}
262
263static struct file_operations dev_fops = {
264 .read = dev_read,
265 .write = dev_write,
266 .poll = dev_poll,
267 .owner = THIS_MODULE
268};
269
270static struct miscdevice plock_dev_misc = {
271 .minor = MISC_DYNAMIC_MINOR,
272 .name = GDLM_PLOCK_MISC_NAME,
273 .fops = &dev_fops
274};
275
276int gdlm_plock_init(void)
277{
278 int rv;
279
280 spin_lock_init(&ops_lock);
281 INIT_LIST_HEAD(&send_list);
282 INIT_LIST_HEAD(&recv_list);
283 init_waitqueue_head(&send_wq);
284 init_waitqueue_head(&recv_wq);
285
286 rv = misc_register(&plock_dev_misc);
287 if (rv)
288 printk(KERN_INFO "gdlm_plock_init: misc_register failed %d",
289 rv);
290 return rv;
291}
292
293void gdlm_plock_exit(void)
294{
295 if (misc_deregister(&plock_dev_misc) < 0)
296 printk(KERN_INFO "gdlm_plock_exit: misc_deregister failed");
297}
298
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
new file mode 100644
index 000000000000..e1e5186c97c9
--- /dev/null
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -0,0 +1,218 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/ctype.h>
11#include <linux/stat.h>
12
13#include "lock_dlm.h"
14
15extern struct lm_lockops gdlm_ops;
16
17static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf)
18{
19 return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name);
20}
21
22static ssize_t block_show(struct gdlm_ls *ls, char *buf)
23{
24 ssize_t ret;
25 int val = 0;
26
27 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags))
28 val = 1;
29 ret = sprintf(buf, "%d\n", val);
30 return ret;
31}
32
33static ssize_t block_store(struct gdlm_ls *ls, const char *buf, size_t len)
34{
35 ssize_t ret = len;
36 int val;
37
38 val = simple_strtol(buf, NULL, 0);
39
40 if (val == 1)
41 set_bit(DFL_BLOCK_LOCKS, &ls->flags);
42 else if (val == 0) {
43 clear_bit(DFL_BLOCK_LOCKS, &ls->flags);
44 gdlm_submit_delayed(ls);
45 } else
46 ret = -EINVAL;
47 return ret;
48}
49
50static ssize_t withdraw_show(struct gdlm_ls *ls, char *buf)
51{
52 ssize_t ret;
53 int val = 0;
54
55 if (test_bit(DFL_WITHDRAW, &ls->flags))
56 val = 1;
57 ret = sprintf(buf, "%d\n", val);
58 return ret;
59}
60
61static ssize_t withdraw_store(struct gdlm_ls *ls, const char *buf, size_t len)
62{
63 ssize_t ret = len;
64 int val;
65
66 val = simple_strtol(buf, NULL, 0);
67
68 if (val == 1)
69 set_bit(DFL_WITHDRAW, &ls->flags);
70 else
71 ret = -EINVAL;
72 wake_up(&ls->wait_control);
73 return ret;
74}
75
76static ssize_t id_show(struct gdlm_ls *ls, char *buf)
77{
78 return sprintf(buf, "%u\n", ls->id);
79}
80
81static ssize_t jid_show(struct gdlm_ls *ls, char *buf)
82{
83 return sprintf(buf, "%d\n", ls->jid);
84}
85
86static ssize_t first_show(struct gdlm_ls *ls, char *buf)
87{
88 return sprintf(buf, "%d\n", ls->first);
89}
90
91static ssize_t first_done_show(struct gdlm_ls *ls, char *buf)
92{
93 return sprintf(buf, "%d\n", ls->first_done);
94}
95
96static ssize_t recover_show(struct gdlm_ls *ls, char *buf)
97{
98 return sprintf(buf, "%d\n", ls->recover_jid);
99}
100
101static ssize_t recover_store(struct gdlm_ls *ls, const char *buf, size_t len)
102{
103 ls->recover_jid = simple_strtol(buf, NULL, 0);
104 ls->fscb(ls->fsdata, LM_CB_NEED_RECOVERY, &ls->recover_jid);
105 return len;
106}
107
108static ssize_t recover_done_show(struct gdlm_ls *ls, char *buf)
109{
110 return sprintf(buf, "%d\n", ls->recover_jid_done);
111}
112
113struct gdlm_attr {
114 struct attribute attr;
115 ssize_t (*show)(struct gdlm_ls *, char *);
116 ssize_t (*store)(struct gdlm_ls *, const char *, size_t);
117};
118
119#define GDLM_ATTR(_name,_mode,_show,_store) \
120static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
121
122GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
123GDLM_ATTR(block, 0644, block_show, block_store);
124GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
125GDLM_ATTR(id, 0444, id_show, NULL);
126GDLM_ATTR(jid, 0444, jid_show, NULL);
127GDLM_ATTR(first, 0444, first_show, NULL);
128GDLM_ATTR(first_done, 0444, first_done_show, NULL);
129GDLM_ATTR(recover, 0644, recover_show, recover_store);
130GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
131
132static struct attribute *gdlm_attrs[] = {
133 &gdlm_attr_proto_name.attr,
134 &gdlm_attr_block.attr,
135 &gdlm_attr_withdraw.attr,
136 &gdlm_attr_id.attr,
137 &gdlm_attr_jid.attr,
138 &gdlm_attr_first.attr,
139 &gdlm_attr_first_done.attr,
140 &gdlm_attr_recover.attr,
141 &gdlm_attr_recover_done.attr,
142 NULL,
143};
144
145static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr,
146 char *buf)
147{
148 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
149 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
150 return a->show ? a->show(ls, buf) : 0;
151}
152
153static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr,
154 const char *buf, size_t len)
155{
156 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
157 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
158 return a->store ? a->store(ls, buf, len) : len;
159}
160
161static struct sysfs_ops gdlm_attr_ops = {
162 .show = gdlm_attr_show,
163 .store = gdlm_attr_store,
164};
165
166static struct kobj_type gdlm_ktype = {
167 .default_attrs = gdlm_attrs,
168 .sysfs_ops = &gdlm_attr_ops,
169};
170
171static struct kset gdlm_kset = {
172 .subsys = &kernel_subsys,
173 .kobj = {.name = "lock_dlm",},
174 .ktype = &gdlm_ktype,
175};
176
177int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj)
178{
179 int error;
180
181 error = kobject_set_name(&ls->kobj, "%s", "lock_module");
182 if (error) {
183 log_error("can't set kobj name %d", error);
184 return error;
185 }
186
187 ls->kobj.kset = &gdlm_kset;
188 ls->kobj.ktype = &gdlm_ktype;
189 ls->kobj.parent = fskobj;
190
191 error = kobject_register(&ls->kobj);
192 if (error)
193 log_error("can't register kobj %d", error);
194
195 return error;
196}
197
198void gdlm_kobject_release(struct gdlm_ls *ls)
199{
200 kobject_unregister(&ls->kobj);
201}
202
203int gdlm_sysfs_init(void)
204{
205 int error;
206
207 error = kset_register(&gdlm_kset);
208 if (error)
209 printk("lock_dlm: cannot register kset %d\n", error);
210
211 return error;
212}
213
214void gdlm_sysfs_exit(void)
215{
216 kset_unregister(&gdlm_kset);
217}
218
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
new file mode 100644
index 000000000000..3e2edcc2dbf6
--- /dev/null
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -0,0 +1,352 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include "lock_dlm.h"
11
12/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
13 thread gets to it. */
14
15static void queue_submit(struct gdlm_lock *lp)
16{
17 struct gdlm_ls *ls = lp->ls;
18
19 spin_lock(&ls->async_lock);
20 list_add_tail(&lp->delay_list, &ls->submit);
21 spin_unlock(&ls->async_lock);
22 wake_up(&ls->thread_wait);
23}
24
25static void process_blocking(struct gdlm_lock *lp, int bast_mode)
26{
27 struct gdlm_ls *ls = lp->ls;
28 unsigned int cb = 0;
29
30 switch (gdlm_make_lmstate(bast_mode)) {
31 case LM_ST_EXCLUSIVE:
32 cb = LM_CB_NEED_E;
33 break;
34 case LM_ST_DEFERRED:
35 cb = LM_CB_NEED_D;
36 break;
37 case LM_ST_SHARED:
38 cb = LM_CB_NEED_S;
39 break;
40 default:
41 gdlm_assert(0, "unknown bast mode %u", lp->bast_mode);
42 }
43
44 ls->fscb(ls->fsdata, cb, &lp->lockname);
45}
46
47static void process_complete(struct gdlm_lock *lp)
48{
49 struct gdlm_ls *ls = lp->ls;
50 struct lm_async_cb acb;
51 int16_t prev_mode = lp->cur;
52
53 memset(&acb, 0, sizeof(acb));
54
55 if (lp->lksb.sb_status == -DLM_ECANCEL) {
56 log_info("complete dlm cancel %x,%llx flags %lx",
57 lp->lockname.ln_type, lp->lockname.ln_number,
58 lp->flags);
59
60 lp->req = lp->cur;
61 acb.lc_ret |= LM_OUT_CANCELED;
62 if (lp->cur == DLM_LOCK_IV)
63 lp->lksb.sb_lkid = 0;
64 goto out;
65 }
66
67 if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
68 if (lp->lksb.sb_status != -DLM_EUNLOCK) {
69 log_info("unlock sb_status %d %x,%llx flags %lx",
70 lp->lksb.sb_status, lp->lockname.ln_type,
71 lp->lockname.ln_number, lp->flags);
72 return;
73 }
74
75 lp->cur = DLM_LOCK_IV;
76 lp->req = DLM_LOCK_IV;
77 lp->lksb.sb_lkid = 0;
78
79 if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
80 gdlm_delete_lp(lp);
81 return;
82 }
83 goto out;
84 }
85
86 if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
87 memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
88
89 if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
90 if (lp->req == DLM_LOCK_PR)
91 lp->req = DLM_LOCK_CW;
92 else if (lp->req == DLM_LOCK_CW)
93 lp->req = DLM_LOCK_PR;
94 }
95
96 /*
97 * A canceled lock request. The lock was just taken off the delayed
98 * list and was never even submitted to dlm.
99 */
100
101 if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
102 log_info("complete internal cancel %x,%llx",
103 lp->lockname.ln_type, lp->lockname.ln_number);
104 lp->req = lp->cur;
105 acb.lc_ret |= LM_OUT_CANCELED;
106 goto out;
107 }
108
109 /*
110 * An error occured.
111 */
112
113 if (lp->lksb.sb_status) {
114 /* a "normal" error */
115 if ((lp->lksb.sb_status == -EAGAIN) &&
116 (lp->lkf & DLM_LKF_NOQUEUE)) {
117 lp->req = lp->cur;
118 if (lp->cur == DLM_LOCK_IV)
119 lp->lksb.sb_lkid = 0;
120 goto out;
121 }
122
123 /* this could only happen with cancels I think */
124 log_info("ast sb_status %d %x,%llx flags %lx",
125 lp->lksb.sb_status, lp->lockname.ln_type,
126 lp->lockname.ln_number, lp->flags);
127 return;
128 }
129
130 /*
131 * This is an AST for an EX->EX conversion for sync_lvb from GFS.
132 */
133
134 if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
135 complete(&lp->ast_wait);
136 return;
137 }
138
139 /*
140 * A lock has been demoted to NL because it initially completed during
141 * BLOCK_LOCKS. Now it must be requested in the originally requested
142 * mode.
143 */
144
145 if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
146 gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
147 lp->lockname.ln_type, lp->lockname.ln_number);
148 gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
149 lp->lockname.ln_type, lp->lockname.ln_number);
150
151 lp->cur = DLM_LOCK_NL;
152 lp->req = lp->prev_req;
153 lp->prev_req = DLM_LOCK_IV;
154 lp->lkf &= ~DLM_LKF_CONVDEADLK;
155
156 set_bit(LFL_NOCACHE, &lp->flags);
157
158 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
159 !test_bit(LFL_NOBLOCK, &lp->flags))
160 gdlm_queue_delayed(lp);
161 else
162 queue_submit(lp);
163 return;
164 }
165
166 /*
167 * A request is granted during dlm recovery. It may be granted
168 * because the locks of a failed node were cleared. In that case,
169 * there may be inconsistent data beneath this lock and we must wait
170 * for recovery to complete to use it. When gfs recovery is done this
171 * granted lock will be converted to NL and then reacquired in this
172 * granted state.
173 */
174
175 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
176 !test_bit(LFL_NOBLOCK, &lp->flags) &&
177 lp->req != DLM_LOCK_NL) {
178
179 lp->cur = lp->req;
180 lp->prev_req = lp->req;
181 lp->req = DLM_LOCK_NL;
182 lp->lkf |= DLM_LKF_CONVERT;
183 lp->lkf &= ~DLM_LKF_CONVDEADLK;
184
185 log_debug("rereq %x,%llx id %x %d,%d",
186 lp->lockname.ln_type, lp->lockname.ln_number,
187 lp->lksb.sb_lkid, lp->cur, lp->req);
188
189 set_bit(LFL_REREQUEST, &lp->flags);
190 queue_submit(lp);
191 return;
192 }
193
194 /*
195 * DLM demoted the lock to NL before it was granted so GFS must be
196 * told it cannot cache data for this lock.
197 */
198
199 if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
200 set_bit(LFL_NOCACHE, &lp->flags);
201
202 out:
203 /*
204 * This is an internal lock_dlm lock
205 */
206
207 if (test_bit(LFL_INLOCK, &lp->flags)) {
208 clear_bit(LFL_NOBLOCK, &lp->flags);
209 lp->cur = lp->req;
210 complete(&lp->ast_wait);
211 return;
212 }
213
214 /*
215 * Normal completion of a lock request. Tell GFS it now has the lock.
216 */
217
218 clear_bit(LFL_NOBLOCK, &lp->flags);
219 lp->cur = lp->req;
220
221 acb.lc_name = lp->lockname;
222 acb.lc_ret |= gdlm_make_lmstate(lp->cur);
223
224 if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) &&
225 (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL))
226 acb.lc_ret |= LM_OUT_CACHEABLE;
227
228 ls->fscb(ls->fsdata, LM_CB_ASYNC, &acb);
229}
230
231static inline int no_work(struct gdlm_ls *ls, int blocking)
232{
233 int ret;
234
235 spin_lock(&ls->async_lock);
236 ret = list_empty(&ls->complete) && list_empty(&ls->submit);
237 if (ret && blocking)
238 ret = list_empty(&ls->blocking);
239 spin_unlock(&ls->async_lock);
240
241 return ret;
242}
243
244static inline int check_drop(struct gdlm_ls *ls)
245{
246 if (!ls->drop_locks_count)
247 return 0;
248
249 if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) {
250 ls->drop_time = jiffies;
251 if (ls->all_locks_count >= ls->drop_locks_count)
252 return 1;
253 }
254 return 0;
255}
256
257static int gdlm_thread(void *data)
258{
259 struct gdlm_ls *ls = (struct gdlm_ls *) data;
260 struct gdlm_lock *lp = NULL;
261 int blist = 0;
262 uint8_t complete, blocking, submit, drop;
263 DECLARE_WAITQUEUE(wait, current);
264
265 /* Only thread1 is allowed to do blocking callbacks since gfs
266 may wait for a completion callback within a blocking cb. */
267
268 if (current == ls->thread1)
269 blist = 1;
270
271 while (!kthread_should_stop()) {
272 set_current_state(TASK_INTERRUPTIBLE);
273 add_wait_queue(&ls->thread_wait, &wait);
274 if (no_work(ls, blist))
275 schedule();
276 remove_wait_queue(&ls->thread_wait, &wait);
277 set_current_state(TASK_RUNNING);
278
279 complete = blocking = submit = drop = 0;
280
281 spin_lock(&ls->async_lock);
282
283 if (blist && !list_empty(&ls->blocking)) {
284 lp = list_entry(ls->blocking.next, struct gdlm_lock,
285 blist);
286 list_del_init(&lp->blist);
287 blocking = lp->bast_mode;
288 lp->bast_mode = 0;
289 } else if (!list_empty(&ls->complete)) {
290 lp = list_entry(ls->complete.next, struct gdlm_lock,
291 clist);
292 list_del_init(&lp->clist);
293 complete = 1;
294 } else if (!list_empty(&ls->submit)) {
295 lp = list_entry(ls->submit.next, struct gdlm_lock,
296 delay_list);
297 list_del_init(&lp->delay_list);
298 submit = 1;
299 }
300
301 drop = check_drop(ls);
302 spin_unlock(&ls->async_lock);
303
304 if (complete)
305 process_complete(lp);
306
307 else if (blocking)
308 process_blocking(lp, blocking);
309
310 else if (submit)
311 gdlm_do_lock(lp);
312
313 if (drop)
314 ls->fscb(ls->fsdata, LM_CB_DROPLOCKS, NULL);
315
316 schedule();
317 }
318
319 return 0;
320}
321
322int gdlm_init_threads(struct gdlm_ls *ls)
323{
324 struct task_struct *p;
325 int error;
326
327 p = kthread_run(gdlm_thread, ls, "lock_dlm1");
328 error = IS_ERR(p);
329 if (error) {
330 log_error("can't start lock_dlm1 thread %d", error);
331 return error;
332 }
333 ls->thread1 = p;
334
335 p = kthread_run(gdlm_thread, ls, "lock_dlm2");
336 error = IS_ERR(p);
337 if (error) {
338 log_error("can't start lock_dlm2 thread %d", error);
339 kthread_stop(ls->thread1);
340 return error;
341 }
342 ls->thread2 = p;
343
344 return 0;
345}
346
347void gdlm_release_threads(struct gdlm_ls *ls)
348{
349 kthread_stop(ls->thread1);
350 kthread_stop(ls->thread2);
351}
352
diff --git a/fs/gfs2/locking/nolock/Makefile b/fs/gfs2/locking/nolock/Makefile
new file mode 100644
index 000000000000..cdadf956c831
--- /dev/null
+++ b/fs/gfs2/locking/nolock/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_GFS2_FS) += lock_nolock.o
2lock_nolock-y := main.o
3
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
new file mode 100644
index 000000000000..9398309f2810
--- /dev/null
+++ b/fs/gfs2/locking/nolock/main.c
@@ -0,0 +1,264 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/types.h>
15#include <linux/fs.h>
16#include <linux/smp_lock.h>
17
18#include "../../lm_interface.h"
19
20struct nolock_lockspace {
21 unsigned int nl_lvb_size;
22};
23
24struct lm_lockops nolock_ops;
25
26static int nolock_mount(char *table_name, char *host_data,
27 lm_callback_t cb, lm_fsdata_t *fsdata,
28 unsigned int min_lvb_size, int flags,
29 struct lm_lockstruct *lockstruct,
30 struct kobject *fskobj)
31{
32 char *c;
33 unsigned int jid;
34 struct nolock_lockspace *nl;
35
36 c = strstr(host_data, "jid=");
37 if (!c)
38 jid = 0;
39 else {
40 c += 4;
41 sscanf(c, "%u", &jid);
42 }
43
44 nl = kzalloc(sizeof(struct nolock_lockspace), GFP_KERNEL);
45 if (!nl)
46 return -ENOMEM;
47
48 nl->nl_lvb_size = min_lvb_size;
49
50 lockstruct->ls_jid = jid;
51 lockstruct->ls_first = 1;
52 lockstruct->ls_lvb_size = min_lvb_size;
53 lockstruct->ls_lockspace = (lm_lockspace_t *)nl;
54 lockstruct->ls_ops = &nolock_ops;
55 lockstruct->ls_flags = LM_LSFLAG_LOCAL;
56
57 return 0;
58}
59
60static void nolock_others_may_mount(lm_lockspace_t *lockspace)
61{
62}
63
64static void nolock_unmount(lm_lockspace_t *lockspace)
65{
66 struct nolock_lockspace *nl = (struct nolock_lockspace *)lockspace;
67 kfree(nl);
68}
69
70static void nolock_withdraw(lm_lockspace_t *lockspace)
71{
72}
73
74/**
75 * nolock_get_lock - get a lm_lock_t given a descripton of the lock
76 * @lockspace: the lockspace the lock lives in
77 * @name: the name of the lock
78 * @lockp: return the lm_lock_t here
79 *
80 * Returns: 0 on success, -EXXX on failure
81 */
82
83static int nolock_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
84 lm_lock_t **lockp)
85{
86 *lockp = (lm_lock_t *)lockspace;
87 return 0;
88}
89
90/**
91 * nolock_put_lock - get rid of a lock structure
92 * @lock: the lock to throw away
93 *
94 */
95
96static void nolock_put_lock(lm_lock_t *lock)
97{
98}
99
100/**
101 * nolock_lock - acquire a lock
102 * @lock: the lock to manipulate
103 * @cur_state: the current state
104 * @req_state: the requested state
105 * @flags: modifier flags
106 *
107 * Returns: A bitmap of LM_OUT_*
108 */
109
110static unsigned int nolock_lock(lm_lock_t *lock, unsigned int cur_state,
111 unsigned int req_state, unsigned int flags)
112{
113 return req_state | LM_OUT_CACHEABLE;
114}
115
116/**
117 * nolock_unlock - unlock a lock
118 * @lock: the lock to manipulate
119 * @cur_state: the current state
120 *
121 * Returns: 0
122 */
123
124static unsigned int nolock_unlock(lm_lock_t *lock, unsigned int cur_state)
125{
126 return 0;
127}
128
129static void nolock_cancel(lm_lock_t *lock)
130{
131}
132
133/**
134 * nolock_hold_lvb - hold on to a lock value block
135 * @lock: the lock the LVB is associated with
136 * @lvbp: return the lm_lvb_t here
137 *
138 * Returns: 0 on success, -EXXX on failure
139 */
140
141static int nolock_hold_lvb(lm_lock_t *lock, char **lvbp)
142{
143 struct nolock_lockspace *nl = (struct nolock_lockspace *)lock;
144 int error = 0;
145
146 *lvbp = kzalloc(nl->nl_lvb_size, GFP_KERNEL);
147 if (!*lvbp)
148 error = -ENOMEM;
149
150 return error;
151}
152
153/**
154 * nolock_unhold_lvb - release a LVB
155 * @lock: the lock the LVB is associated with
156 * @lvb: the lock value block
157 *
158 */
159
160static void nolock_unhold_lvb(lm_lock_t *lock, char *lvb)
161{
162 kfree(lvb);
163}
164
165/**
166 * nolock_sync_lvb - sync out the value of a lvb
167 * @lock: the lock the LVB is associated with
168 * @lvb: the lock value block
169 *
170 */
171
172static void nolock_sync_lvb(lm_lock_t *lock, char *lvb)
173{
174}
175
176static int nolock_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
177 struct file *file, struct file_lock *fl)
178{
179 struct file_lock *tmp;
180
181 lock_kernel();
182 tmp = posix_test_lock(file, fl);
183 fl->fl_type = F_UNLCK;
184 if (tmp)
185 memcpy(fl, tmp, sizeof(struct file_lock));
186 unlock_kernel();
187
188 return 0;
189}
190
191static int nolock_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
192 struct file *file, int cmd, struct file_lock *fl)
193{
194 int error;
195 lock_kernel();
196 error = posix_lock_file_wait(file, fl);
197 unlock_kernel();
198 return error;
199}
200
201static int nolock_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
202 struct file *file, struct file_lock *fl)
203{
204 int error;
205 lock_kernel();
206 error = posix_lock_file_wait(file, fl);
207 unlock_kernel();
208 return error;
209}
210
211static void nolock_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
212 unsigned int message)
213{
214}
215
216struct lm_lockops nolock_ops = {
217 .lm_proto_name = "lock_nolock",
218 .lm_mount = nolock_mount,
219 .lm_others_may_mount = nolock_others_may_mount,
220 .lm_unmount = nolock_unmount,
221 .lm_withdraw = nolock_withdraw,
222 .lm_get_lock = nolock_get_lock,
223 .lm_put_lock = nolock_put_lock,
224 .lm_lock = nolock_lock,
225 .lm_unlock = nolock_unlock,
226 .lm_cancel = nolock_cancel,
227 .lm_hold_lvb = nolock_hold_lvb,
228 .lm_unhold_lvb = nolock_unhold_lvb,
229 .lm_sync_lvb = nolock_sync_lvb,
230 .lm_plock_get = nolock_plock_get,
231 .lm_plock = nolock_plock,
232 .lm_punlock = nolock_punlock,
233 .lm_recovery_done = nolock_recovery_done,
234 .lm_owner = THIS_MODULE,
235};
236
237int __init init_nolock(void)
238{
239 int error;
240
241 error = gfs_register_lockproto(&nolock_ops);
242 if (error) {
243 printk(KERN_WARNING
244 "lock_nolock: can't register protocol: %d\n", error);
245 return error;
246 }
247
248 printk(KERN_INFO
249 "Lock_Nolock (built %s %s) installed\n", __DATE__, __TIME__);
250 return 0;
251}
252
253void __exit exit_nolock(void)
254{
255 gfs_unregister_lockproto(&nolock_ops);
256}
257
258module_init(init_nolock);
259module_exit(exit_nolock);
260
261MODULE_DESCRIPTION("GFS Nolock Locking Module");
262MODULE_AUTHOR("Red Hat, Inc.");
263MODULE_LICENSE("GPL");
264
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
new file mode 100644
index 000000000000..0e31d46edd4d
--- /dev/null
+++ b/fs/gfs2/log.c
@@ -0,0 +1,643 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "bmap.h"
19#include "glock.h"
20#include "log.h"
21#include "lops.h"
22#include "meta_io.h"
23
24#define PULL 1
25
26static void do_lock_wait(struct gfs2_sbd *sdp, wait_queue_head_t *wq,
27 atomic_t *a)
28{
29 wait_event(*wq, atomic_read(a) ? 0 : 1);
30}
31
32static void lock_for_trans(struct gfs2_sbd *sdp)
33{
34 do_lock_wait(sdp, &sdp->sd_log_trans_wq, &sdp->sd_log_flush_count);
35 atomic_inc(&sdp->sd_log_trans_count);
36}
37
38static void unlock_from_trans(struct gfs2_sbd *sdp)
39{
40 gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_trans_count));
41 if (atomic_dec_and_test(&sdp->sd_log_trans_count))
42 wake_up(&sdp->sd_log_flush_wq);
43}
44
45static void gfs2_lock_for_flush(struct gfs2_sbd *sdp)
46{
47 atomic_inc(&sdp->sd_log_flush_count);
48 do_lock_wait(sdp, &sdp->sd_log_flush_wq, &sdp->sd_log_trans_count);
49}
50
51static void gfs2_unlock_from_flush(struct gfs2_sbd *sdp)
52{
53 gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_flush_count));
54 if (atomic_dec_and_test(&sdp->sd_log_flush_count))
55 wake_up(&sdp->sd_log_trans_wq);
56}
57
58/**
59 * gfs2_struct2blk - compute stuff
60 * @sdp: the filesystem
61 * @nstruct: the number of structures
62 * @ssize: the size of the structures
63 *
64 * Compute the number of log descriptor blocks needed to hold a certain number
65 * of structures of a certain size.
66 *
67 * Returns: the number of blocks needed (minimum is always 1)
68 */
69
70unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
71 unsigned int ssize)
72{
73 unsigned int blks;
74 unsigned int first, second;
75
76 blks = 1;
77 first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) /
78 ssize;
79
80 if (nstruct > first) {
81 second = (sdp->sd_sb.sb_bsize -
82 sizeof(struct gfs2_meta_header)) / ssize;
83 blks += DIV_RU(nstruct - first, second);
84 }
85
86 return blks;
87}
88
89void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
90{
91 struct list_head *head = &sdp->sd_ail1_list;
92 uint64_t sync_gen;
93 struct list_head *first, *tmp;
94 struct gfs2_ail *first_ai, *ai;
95
96 gfs2_log_lock(sdp);
97 if (list_empty(head)) {
98 gfs2_log_unlock(sdp);
99 return;
100 }
101 sync_gen = sdp->sd_ail_sync_gen++;
102
103 first = head->prev;
104 first_ai = list_entry(first, struct gfs2_ail, ai_list);
105 first_ai->ai_sync_gen = sync_gen;
106 gfs2_ail1_start_one(sdp, first_ai);
107
108 if (flags & DIO_ALL)
109 first = NULL;
110
111 for (;;) {
112 if (first &&
113 (head->prev != first ||
114 gfs2_ail1_empty_one(sdp, first_ai, 0)))
115 break;
116
117 for (tmp = head->prev; tmp != head; tmp = tmp->prev) {
118 ai = list_entry(tmp, struct gfs2_ail, ai_list);
119 if (ai->ai_sync_gen >= sync_gen)
120 continue;
121 ai->ai_sync_gen = sync_gen;
122 gfs2_ail1_start_one(sdp, ai);
123 break;
124 }
125
126 if (tmp == head)
127 break;
128 }
129
130 gfs2_log_unlock(sdp);
131}
132
133int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
134{
135 struct gfs2_ail *ai, *s;
136 int ret;
137
138 gfs2_log_lock(sdp);
139
140 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
141 if (gfs2_ail1_empty_one(sdp, ai, flags))
142 list_move(&ai->ai_list, &sdp->sd_ail2_list);
143 else if (!(flags & DIO_ALL))
144 break;
145 }
146
147 ret = list_empty(&sdp->sd_ail1_list);
148
149 gfs2_log_unlock(sdp);
150
151 return ret;
152}
153
154static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
155{
156 struct gfs2_ail *ai, *safe;
157 unsigned int old_tail = sdp->sd_log_tail;
158 int wrap = (new_tail < old_tail);
159 int a, b, rm;
160
161 gfs2_log_lock(sdp);
162
163 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
164 a = (old_tail <= ai->ai_first);
165 b = (ai->ai_first < new_tail);
166 rm = (wrap) ? (a || b) : (a && b);
167 if (!rm)
168 continue;
169
170 gfs2_ail2_empty_one(sdp, ai);
171 list_del(&ai->ai_list);
172 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list));
173 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list));
174 kfree(ai);
175 }
176
177 gfs2_log_unlock(sdp);
178}
179
180/**
181 * gfs2_log_reserve - Make a log reservation
182 * @sdp: The GFS2 superblock
183 * @blks: The number of blocks to reserve
184 *
185 * Returns: errno
186 */
187
188int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
189{
190 LIST_HEAD(list);
191 unsigned int try = 0;
192
193 if (gfs2_assert_warn(sdp, blks) ||
194 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
195 return -EINVAL;
196
197 for (;;) {
198 gfs2_log_lock(sdp);
199 if (list_empty(&list)) {
200 list_add_tail(&list, &sdp->sd_log_blks_list);
201 while (sdp->sd_log_blks_list.next != &list) {
202 DECLARE_WAITQUEUE(__wait_chan, current);
203 set_current_state(TASK_UNINTERRUPTIBLE);
204 add_wait_queue(&sdp->sd_log_blks_wait,
205 &__wait_chan);
206 gfs2_log_unlock(sdp);
207 schedule();
208 gfs2_log_lock(sdp);
209 remove_wait_queue(&sdp->sd_log_blks_wait,
210 &__wait_chan);
211 set_current_state(TASK_RUNNING);
212 }
213 }
214 /* Never give away the last block so we can
215 always pull the tail if we need to. */
216 if (sdp->sd_log_blks_free > blks) {
217 sdp->sd_log_blks_free -= blks;
218 list_del(&list);
219 gfs2_log_unlock(sdp);
220 wake_up(&sdp->sd_log_blks_wait);
221 break;
222 }
223
224 gfs2_log_unlock(sdp);
225 gfs2_ail1_empty(sdp, 0);
226 gfs2_log_flush(sdp);
227
228 if (try++)
229 gfs2_ail1_start(sdp, 0);
230 }
231 lock_for_trans(sdp);
232
233 return 0;
234}
235
236/**
237 * gfs2_log_release - Release a given number of log blocks
238 * @sdp: The GFS2 superblock
239 * @blks: The number of blocks
240 *
241 */
242
243void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
244{
245 unlock_from_trans(sdp);
246
247 gfs2_log_lock(sdp);
248 sdp->sd_log_blks_free += blks;
249 gfs2_assert_withdraw(sdp,
250 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
251 gfs2_log_unlock(sdp);
252}
253
254static uint64_t log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
255{
256 int new = 0;
257 uint64_t dbn;
258 int error;
259
260 error = gfs2_block_map(get_v2ip(sdp->sd_jdesc->jd_inode),
261 lbn, &new, &dbn, NULL);
262 gfs2_assert_withdraw(sdp, !error && dbn);
263
264 return dbn;
265}
266
267/**
268 * log_distance - Compute distance between two journal blocks
269 * @sdp: The GFS2 superblock
270 * @newer: The most recent journal block of the pair
271 * @older: The older journal block of the pair
272 *
273 * Compute the distance (in the journal direction) between two
274 * blocks in the journal
275 *
276 * Returns: the distance in blocks
277 */
278
279static inline unsigned int log_distance(struct gfs2_sbd *sdp,
280 unsigned int newer,
281 unsigned int older)
282{
283 int dist;
284
285 dist = newer - older;
286 if (dist < 0)
287 dist += sdp->sd_jdesc->jd_blocks;
288
289 return dist;
290}
291
292static unsigned int current_tail(struct gfs2_sbd *sdp)
293{
294 struct gfs2_ail *ai;
295 unsigned int tail;
296
297 gfs2_log_lock(sdp);
298
299 if (list_empty(&sdp->sd_ail1_list))
300 tail = sdp->sd_log_head;
301 else {
302 ai = list_entry(sdp->sd_ail1_list.prev,
303 struct gfs2_ail, ai_list);
304 tail = ai->ai_first;
305 }
306
307 gfs2_log_unlock(sdp);
308
309 return tail;
310}
311
312static inline void log_incr_head(struct gfs2_sbd *sdp)
313{
314 if (sdp->sd_log_flush_head == sdp->sd_log_tail)
315 gfs2_assert_withdraw(sdp,
316 sdp->sd_log_flush_head == sdp->sd_log_head);
317
318 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
319 sdp->sd_log_flush_head = 0;
320 sdp->sd_log_flush_wrapped = 1;
321 }
322}
323
324/**
325 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
326 * @sdp: The GFS2 superblock
327 *
328 * Returns: the buffer_head
329 */
330
331struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
332{
333 uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
334 struct gfs2_log_buf *lb;
335 struct buffer_head *bh;
336
337 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
338 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
339
340 bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno);
341 lock_buffer(bh);
342 memset(bh->b_data, 0, bh->b_size);
343 set_buffer_uptodate(bh);
344 clear_buffer_dirty(bh);
345 unlock_buffer(bh);
346
347 log_incr_head(sdp);
348
349 return bh;
350}
351
352/**
353 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
354 * @sdp: the filesystem
355 * @data: the data the buffer_head should point to
356 *
357 * Returns: the log buffer descriptor
358 */
359
360struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
361 struct buffer_head *real)
362{
363 uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
364 struct gfs2_log_buf *lb;
365 struct buffer_head *bh;
366
367 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
368 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
369 lb->lb_real = real;
370
371 bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
372 atomic_set(&bh->b_count, 1);
373 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
374 set_bh_page(bh, real->b_page, bh_offset(real));
375 bh->b_blocknr = blkno;
376 bh->b_size = sdp->sd_sb.sb_bsize;
377 bh->b_bdev = sdp->sd_vfs->s_bdev;
378
379 log_incr_head(sdp);
380
381 return bh;
382}
383
384static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
385{
386 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
387
388 ail2_empty(sdp, new_tail);
389
390 gfs2_log_lock(sdp);
391 sdp->sd_log_blks_free += dist - ((pull) ? 1 : 0);
392 gfs2_assert_withdraw(sdp,
393 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
394 gfs2_log_unlock(sdp);
395
396 sdp->sd_log_tail = new_tail;
397}
398
399/**
400 * log_write_header - Get and initialize a journal header buffer
401 * @sdp: The GFS2 superblock
402 *
403 * Returns: the initialized log buffer descriptor
404 */
405
406static void log_write_header(struct gfs2_sbd *sdp, uint32_t flags, int pull)
407{
408 uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
409 struct buffer_head *bh;
410 struct gfs2_log_header *lh;
411 unsigned int tail;
412 uint32_t hash;
413
414 bh = sb_getblk(sdp->sd_vfs, blkno);
415 lock_buffer(bh);
416 memset(bh->b_data, 0, bh->b_size);
417 set_buffer_uptodate(bh);
418 clear_buffer_dirty(bh);
419 unlock_buffer(bh);
420
421 gfs2_ail1_empty(sdp, 0);
422 tail = current_tail(sdp);
423
424 lh = (struct gfs2_log_header *)bh->b_data;
425 memset(lh, 0, sizeof(struct gfs2_log_header));
426 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
427 lh->lh_header.mh_type = cpu_to_be16(GFS2_METATYPE_LH);
428 lh->lh_header.mh_format = cpu_to_be16(GFS2_FORMAT_LH);
429 lh->lh_sequence = be64_to_cpu(sdp->sd_log_sequence++);
430 lh->lh_flags = be32_to_cpu(flags);
431 lh->lh_tail = be32_to_cpu(tail);
432 lh->lh_blkno = be32_to_cpu(sdp->sd_log_flush_head);
433 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
434 lh->lh_hash = cpu_to_be32(hash);
435
436 set_buffer_dirty(bh);
437 if (sync_dirty_buffer(bh))
438 gfs2_io_error_bh(sdp, bh);
439 brelse(bh);
440
441 if (sdp->sd_log_tail != tail)
442 log_pull_tail(sdp, tail, pull);
443 else
444 gfs2_assert_withdraw(sdp, !pull);
445
446 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
447 log_incr_head(sdp);
448}
449
450static void log_flush_commit(struct gfs2_sbd *sdp)
451{
452 struct list_head *head = &sdp->sd_log_flush_list;
453 struct gfs2_log_buf *lb;
454 struct buffer_head *bh;
455 unsigned int d;
456
457 d = log_distance(sdp, sdp->sd_log_flush_head, sdp->sd_log_head);
458
459 gfs2_assert_withdraw(sdp, d + 1 == sdp->sd_log_blks_reserved);
460
461 while (!list_empty(head)) {
462 lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
463 list_del(&lb->lb_list);
464 bh = lb->lb_bh;
465
466 wait_on_buffer(bh);
467 if (!buffer_uptodate(bh))
468 gfs2_io_error_bh(sdp, bh);
469 if (lb->lb_real) {
470 while (atomic_read(&bh->b_count) != 1) /* Grrrr... */
471 schedule();
472 free_buffer_head(bh);
473 } else
474 brelse(bh);
475 kfree(lb);
476 }
477
478 log_write_header(sdp, 0, 0);
479}
480
481/**
482 * gfs2_log_flush_i - flush incore transaction(s)
483 * @sdp: the filesystem
484 * @gl: The glock structure to flush. If NULL, flush the whole incore log
485 *
486 */
487
488void gfs2_log_flush_i(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
489{
490 struct gfs2_ail *ai;
491
492 ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
493 INIT_LIST_HEAD(&ai->ai_ail1_list);
494 INIT_LIST_HEAD(&ai->ai_ail2_list);
495 gfs2_lock_for_flush(sdp);
496
497 if (gl) {
498 gfs2_log_lock(sdp);
499 if (list_empty(&gl->gl_le.le_list)) {
500 gfs2_log_unlock(sdp);
501 gfs2_unlock_from_flush(sdp);
502 kfree(ai);
503 return;
504 }
505 gfs2_log_unlock(sdp);
506 }
507
508 mutex_lock(&sdp->sd_log_flush_lock);
509
510 gfs2_assert_withdraw(sdp,
511 sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
512 gfs2_assert_withdraw(sdp,
513 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
514
515 sdp->sd_log_flush_head = sdp->sd_log_head;
516 sdp->sd_log_flush_wrapped = 0;
517 ai->ai_first = sdp->sd_log_flush_head;
518
519 lops_before_commit(sdp);
520 if (!list_empty(&sdp->sd_log_flush_list))
521 log_flush_commit(sdp);
522 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
523 log_write_header(sdp, 0, PULL);
524 lops_after_commit(sdp, ai);
525 sdp->sd_log_head = sdp->sd_log_flush_head;
526 if (sdp->sd_log_flush_wrapped)
527 sdp->sd_log_wraps++;
528
529 sdp->sd_log_blks_reserved =
530 sdp->sd_log_commited_buf =
531 sdp->sd_log_commited_revoke = 0;
532
533 gfs2_log_lock(sdp);
534 if (!list_empty(&ai->ai_ail1_list)) {
535 list_add(&ai->ai_list, &sdp->sd_ail1_list);
536 ai = NULL;
537 }
538 gfs2_log_unlock(sdp);
539
540 mutex_unlock(&sdp->sd_log_flush_lock);
541 sdp->sd_vfs->s_dirt = 0;
542 gfs2_unlock_from_flush(sdp);
543
544 kfree(ai);
545}
546
547static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
548{
549 unsigned int reserved = 1;
550 unsigned int old;
551
552 gfs2_log_lock(sdp);
553
554 sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
555 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
556 sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
557 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
558
559 if (sdp->sd_log_commited_buf)
560 reserved += 1 + sdp->sd_log_commited_buf +
561 sdp->sd_log_commited_buf/503;
562 if (sdp->sd_log_commited_revoke)
563 reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
564 sizeof(uint64_t));
565
566 old = sdp->sd_log_blks_free;
567 sdp->sd_log_blks_free += tr->tr_reserved -
568 (reserved - sdp->sd_log_blks_reserved);
569
570 gfs2_assert_withdraw(sdp,
571 sdp->sd_log_blks_free >= old);
572 gfs2_assert_withdraw(sdp,
573 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
574
575 sdp->sd_log_blks_reserved = reserved;
576
577 gfs2_log_unlock(sdp);
578}
579
580/**
581 * gfs2_log_commit - Commit a transaction to the log
582 * @sdp: the filesystem
583 * @tr: the transaction
584 *
585 * Returns: errno
586 */
587
588void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
589{
590 log_refund(sdp, tr);
591 lops_incore_commit(sdp, tr);
592
593 sdp->sd_vfs->s_dirt = 1;
594 unlock_from_trans(sdp);
595
596 kfree(tr);
597
598 gfs2_log_lock(sdp);
599 if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) {
600 gfs2_log_unlock(sdp);
601 gfs2_log_flush(sdp);
602 } else
603 gfs2_log_unlock(sdp);
604}
605
606/**
607 * gfs2_log_shutdown - write a shutdown header into a journal
608 * @sdp: the filesystem
609 *
610 */
611
612void gfs2_log_shutdown(struct gfs2_sbd *sdp)
613{
614 mutex_lock(&sdp->sd_log_flush_lock);
615
616 gfs2_assert_withdraw(sdp, !atomic_read(&sdp->sd_log_trans_count));
617 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
618 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
619 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
620 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
621 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
622 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
623 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
624 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
625
626 sdp->sd_log_flush_head = sdp->sd_log_head;
627 sdp->sd_log_flush_wrapped = 0;
628
629 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
630
631 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free ==
632 sdp->sd_jdesc->jd_blocks);
633 gfs2_assert_withdraw(sdp, sdp->sd_log_head == sdp->sd_log_tail);
634 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail2_list));
635
636 sdp->sd_log_head = sdp->sd_log_flush_head;
637 if (sdp->sd_log_flush_wrapped)
638 sdp->sd_log_wraps++;
639 sdp->sd_log_tail = sdp->sd_log_head;
640
641 mutex_unlock(&sdp->sd_log_flush_lock);
642}
643
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
new file mode 100644
index 000000000000..e7a6a65c530f
--- /dev/null
+++ b/fs/gfs2/log.h
@@ -0,0 +1,65 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LOG_DOT_H__
11#define __LOG_DOT_H__
12
13/**
14 * gfs2_log_lock - acquire the right to mess with the log manager
15 * @sdp: the filesystem
16 *
17 */
18
19static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
20{
21 spin_lock(&sdp->sd_log_lock);
22}
23
24/**
25 * gfs2_log_unlock - release the right to mess with the log manager
26 * @sdp: the filesystem
27 *
28 */
29
30static inline void gfs2_log_unlock(struct gfs2_sbd *sdp)
31{
32 spin_unlock(&sdp->sd_log_lock);
33}
34
35static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
36 unsigned int value)
37{
38 if (++value == sdp->sd_jdesc->jd_blocks) {
39 value = 0;
40 sdp->sd_log_wraps++;
41 }
42 sdp->sd_log_head = sdp->sd_log_tail = value;
43}
44
45unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
46 unsigned int ssize);
47
48void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags);
49int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
50
51int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
52void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
53
54struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
55struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
56 struct buffer_head *real);
57
58#define gfs2_log_flush(sdp) gfs2_log_flush_i((sdp), NULL)
59#define gfs2_log_flush_glock(gl) gfs2_log_flush_i((gl)->gl_sbd, (gl))
60void gfs2_log_flush_i(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
61void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
62
63void gfs2_log_shutdown(struct gfs2_sbd *sdp);
64
65#endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
new file mode 100644
index 000000000000..4bd89c0781e7
--- /dev/null
+++ b/fs/gfs2/lops.c
@@ -0,0 +1,788 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "glock.h"
19#include "log.h"
20#include "lops.h"
21#include "meta_io.h"
22#include "recovery.h"
23#include "rgrp.h"
24#include "trans.h"
25
26static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
27{
28 struct gfs2_glock *gl;
29
30 get_transaction->tr_touched = 1;
31
32 if (!list_empty(&le->le_list))
33 return;
34
35 gl = container_of(le, struct gfs2_glock, gl_le);
36 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
37 return;
38 gfs2_glock_hold(gl);
39 set_bit(GLF_DIRTY, &gl->gl_flags);
40
41 gfs2_log_lock(sdp);
42 sdp->sd_log_num_gl++;
43 list_add(&le->le_list, &sdp->sd_log_le_gl);
44 gfs2_log_unlock(sdp);
45}
46
47static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
48{
49 struct list_head *head = &sdp->sd_log_le_gl;
50 struct gfs2_glock *gl;
51
52 while (!list_empty(head)) {
53 gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list);
54 list_del_init(&gl->gl_le.le_list);
55 sdp->sd_log_num_gl--;
56
57 gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl));
58 gfs2_glock_put(gl);
59 }
60 gfs2_assert_warn(sdp, !sdp->sd_log_num_gl);
61}
62
63static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
64{
65 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
66 struct gfs2_trans *tr;
67
68 if (!list_empty(&bd->bd_list_tr))
69 return;
70
71 tr = get_transaction;
72 tr->tr_touched = 1;
73 tr->tr_num_buf++;
74 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
75
76 if (!list_empty(&le->le_list))
77 return;
78
79 gfs2_trans_add_gl(bd->bd_gl);
80
81 gfs2_meta_check(sdp, bd->bd_bh);
82 gfs2_pin(sdp, bd->bd_bh);
83
84 gfs2_log_lock(sdp);
85 sdp->sd_log_num_buf++;
86 list_add(&le->le_list, &sdp->sd_log_le_buf);
87 gfs2_log_unlock(sdp);
88
89 tr->tr_num_buf_new++;
90}
91
92static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
93{
94 struct list_head *head = &tr->tr_list_buf;
95 struct gfs2_bufdata *bd;
96
97 while (!list_empty(head)) {
98 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
99 list_del_init(&bd->bd_list_tr);
100 tr->tr_num_buf--;
101 }
102 gfs2_assert_warn(sdp, !tr->tr_num_buf);
103}
104
105static void buf_lo_before_commit(struct gfs2_sbd *sdp)
106{
107 struct buffer_head *bh;
108 struct gfs2_log_descriptor *ld;
109 struct gfs2_bufdata *bd1 = NULL, *bd2;
110 unsigned int total = sdp->sd_log_num_buf;
111 unsigned int offset = sizeof(struct gfs2_log_descriptor);
112 unsigned int limit;
113 unsigned int num;
114 unsigned n;
115 __be64 *ptr;
116
117 offset += (sizeof(__be64) - 1);
118 offset &= ~(sizeof(__be64) - 1);
119 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
120 /* for 4k blocks, limit = 503 */
121
122 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
123 while(total) {
124 num = total;
125 if (total > limit)
126 num = limit;
127 bh = gfs2_log_get_buf(sdp);
128 ld = (struct gfs2_log_descriptor *)bh->b_data;
129 ptr = (__be64 *)(bh->b_data + offset);
130 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
131 ld->ld_header.mh_type = cpu_to_be16(GFS2_METATYPE_LD);
132 ld->ld_header.mh_format = cpu_to_be16(GFS2_FORMAT_LD);
133 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
134 ld->ld_length = cpu_to_be32(num + 1);
135 ld->ld_data1 = cpu_to_be32(num);
136 ld->ld_data2 = cpu_to_be32(0);
137 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
138
139 n = 0;
140 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
141 bd_le.le_list) {
142 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
143 if (++n >= num)
144 break;
145 }
146
147 set_buffer_dirty(bh);
148 ll_rw_block(WRITE, 1, &bh);
149
150 n = 0;
151 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
152 bd_le.le_list) {
153 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
154 set_buffer_dirty(bh);
155 ll_rw_block(WRITE, 1, &bh);
156 if (++n >= num)
157 break;
158 }
159
160 total -= num;
161 }
162}
163
164static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
165{
166 struct list_head *head = &sdp->sd_log_le_buf;
167 struct gfs2_bufdata *bd;
168
169 while (!list_empty(head)) {
170 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
171 list_del_init(&bd->bd_le.le_list);
172 sdp->sd_log_num_buf--;
173
174 gfs2_unpin(sdp, bd->bd_bh, ai);
175 }
176 gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
177}
178
179static void buf_lo_before_scan(struct gfs2_jdesc *jd,
180 struct gfs2_log_header *head, int pass)
181{
182 struct gfs2_sbd *sdp = get_v2ip(jd->jd_inode)->i_sbd;
183
184 if (pass != 0)
185 return;
186
187 sdp->sd_found_blocks = 0;
188 sdp->sd_replayed_blocks = 0;
189}
190
191static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
192 struct gfs2_log_descriptor *ld, __be64 *ptr,
193 int pass)
194{
195 struct gfs2_sbd *sdp = get_v2ip(jd->jd_inode)->i_sbd;
196 struct gfs2_glock *gl = get_v2ip(jd->jd_inode)->i_gl;
197 unsigned int blks = be32_to_cpu(ld->ld_data1);
198 struct buffer_head *bh_log, *bh_ip;
199 uint64_t blkno;
200 int error = 0;
201
202 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
203 return 0;
204
205 gfs2_replay_incr_blk(sdp, &start);
206
207 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
208 blkno = be64_to_cpu(*ptr++);
209
210 sdp->sd_found_blocks++;
211
212 if (gfs2_revoke_check(sdp, blkno, start))
213 continue;
214
215 error = gfs2_replay_read_block(jd, start, &bh_log);
216 if (error)
217 return error;
218
219 bh_ip = gfs2_meta_new(gl, blkno);
220 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
221
222 if (gfs2_meta_check(sdp, bh_ip))
223 error = -EIO;
224 else
225 mark_buffer_dirty(bh_ip);
226
227 brelse(bh_log);
228 brelse(bh_ip);
229
230 if (error)
231 break;
232
233 sdp->sd_replayed_blocks++;
234 }
235
236 return error;
237}
238
239static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
240{
241 struct gfs2_sbd *sdp = get_v2ip(jd->jd_inode)->i_sbd;
242
243 if (error) {
244 gfs2_meta_sync(get_v2ip(jd->jd_inode)->i_gl,
245 DIO_START | DIO_WAIT);
246 return;
247 }
248 if (pass != 1)
249 return;
250
251 gfs2_meta_sync(get_v2ip(jd->jd_inode)->i_gl, DIO_START | DIO_WAIT);
252
253 fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
254 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
255}
256
257static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
258{
259 struct gfs2_trans *tr;
260
261 tr = get_transaction;
262 tr->tr_touched = 1;
263 tr->tr_num_revoke++;
264
265 gfs2_log_lock(sdp);
266 sdp->sd_log_num_revoke++;
267 list_add(&le->le_list, &sdp->sd_log_le_revoke);
268 gfs2_log_unlock(sdp);
269}
270
271static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
272{
273 struct gfs2_log_descriptor *ld;
274 struct gfs2_meta_header *mh;
275 struct buffer_head *bh;
276 unsigned int offset;
277 struct list_head *head = &sdp->sd_log_le_revoke;
278 struct gfs2_revoke *rv;
279
280 if (!sdp->sd_log_num_revoke)
281 return;
282
283 bh = gfs2_log_get_buf(sdp);
284 ld = (struct gfs2_log_descriptor *)bh->b_data;
285 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
286 ld->ld_header.mh_type = cpu_to_be16(GFS2_METATYPE_LD);
287 ld->ld_header.mh_format = cpu_to_be16(GFS2_FORMAT_LD);
288 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
289 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
290 sizeof(uint64_t)));
291 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
292 ld->ld_data2 = cpu_to_be32(0);
293 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
294 offset = sizeof(struct gfs2_log_descriptor);
295
296 while (!list_empty(head)) {
297 rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list);
298 list_del_init(&rv->rv_le.le_list);
299 sdp->sd_log_num_revoke--;
300
301 if (offset + sizeof(uint64_t) > sdp->sd_sb.sb_bsize) {
302 set_buffer_dirty(bh);
303 ll_rw_block(WRITE, 1, &bh);
304
305 bh = gfs2_log_get_buf(sdp);
306 mh = (struct gfs2_meta_header *)bh->b_data;
307 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
308 mh->mh_type = cpu_to_be16(GFS2_METATYPE_LB);
309 mh->mh_format = cpu_to_be16(GFS2_FORMAT_LB);
310 offset = sizeof(struct gfs2_meta_header);
311 }
312
313 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno);
314 kfree(rv);
315
316 offset += sizeof(uint64_t);
317 }
318 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
319
320 set_buffer_dirty(bh);
321 ll_rw_block(WRITE, 1, &bh);
322}
323
324static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
325 struct gfs2_log_header *head, int pass)
326{
327 struct gfs2_sbd *sdp = get_v2ip(jd->jd_inode)->i_sbd;
328
329 if (pass != 0)
330 return;
331
332 sdp->sd_found_revokes = 0;
333 sdp->sd_replay_tail = head->lh_tail;
334}
335
336static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
337 struct gfs2_log_descriptor *ld, __be64 *ptr,
338 int pass)
339{
340 struct gfs2_sbd *sdp = get_v2ip(jd->jd_inode)->i_sbd;
341 unsigned int blks = be32_to_cpu(ld->ld_length);
342 unsigned int revokes = be32_to_cpu(ld->ld_data1);
343 struct buffer_head *bh;
344 unsigned int offset;
345 uint64_t blkno;
346 int first = 1;
347 int error;
348
349 if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
350 return 0;
351
352 offset = sizeof(struct gfs2_log_descriptor);
353
354 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
355 error = gfs2_replay_read_block(jd, start, &bh);
356 if (error)
357 return error;
358
359 if (!first)
360 gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
361
362 while (offset + sizeof(uint64_t) <= sdp->sd_sb.sb_bsize) {
363 blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
364
365 error = gfs2_revoke_add(sdp, blkno, start);
366 if (error < 0)
367 return error;
368 else if (error)
369 sdp->sd_found_revokes++;
370
371 if (!--revokes)
372 break;
373 offset += sizeof(uint64_t);
374 }
375
376 brelse(bh);
377 offset = sizeof(struct gfs2_meta_header);
378 first = 0;
379 }
380
381 return 0;
382}
383
384static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
385{
386 struct gfs2_sbd *sdp = get_v2ip(jd->jd_inode)->i_sbd;
387
388 if (error) {
389 gfs2_revoke_clean(sdp);
390 return;
391 }
392 if (pass != 1)
393 return;
394
395 fs_info(sdp, "jid=%u: Found %u revoke tags\n",
396 jd->jd_jid, sdp->sd_found_revokes);
397
398 gfs2_revoke_clean(sdp);
399}
400
401static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
402{
403 struct gfs2_rgrpd *rgd;
404
405 get_transaction->tr_touched = 1;
406
407 if (!list_empty(&le->le_list))
408 return;
409
410 rgd = container_of(le, struct gfs2_rgrpd, rd_le);
411 gfs2_rgrp_bh_hold(rgd);
412
413 gfs2_log_lock(sdp);
414 sdp->sd_log_num_rg++;
415 list_add(&le->le_list, &sdp->sd_log_le_rg);
416 gfs2_log_unlock(sdp);
417}
418
419static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
420{
421 struct list_head *head = &sdp->sd_log_le_rg;
422 struct gfs2_rgrpd *rgd;
423
424 while (!list_empty(head)) {
425 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
426 list_del_init(&rgd->rd_le.le_list);
427 sdp->sd_log_num_rg--;
428
429 gfs2_rgrp_repolish_clones(rgd);
430 gfs2_rgrp_bh_put(rgd);
431 }
432 gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
433}
434
435/**
436 * databuf_lo_add - Add a databuf to the transaction.
437 *
438 * This is used in two distinct cases:
439 * i) In ordered write mode
440 * We put the data buffer on a list so that we can ensure that its
441 * synced to disk at the right time
442 * ii) In journaled data mode
443 * We need to journal the data block in the same way as metadata in
444 * the functions above. The difference is that here we have a tag
445 * which is two __be64's being the block number (as per meta data)
446 * and a flag which says whether the data block needs escaping or
447 * not. This means we need a new log entry for each 251 or so data
448 * blocks, which isn't an enormous overhead but twice as much as
449 * for normal metadata blocks.
450 */
451static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
452{
453 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
454 struct gfs2_trans *tr = get_transaction;
455 struct address_space *mapping = bd->bd_bh->b_page->mapping;
456 struct gfs2_inode *ip = get_v2ip(mapping->host);
457
458 tr->tr_touched = 1;
459 if (!list_empty(&bd->bd_list_tr) &&
460 (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
461 tr->tr_num_buf++;
462 gfs2_trans_add_gl(bd->bd_gl);
463 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
464 gfs2_pin(sdp, bd->bd_bh);
465 }
466 gfs2_log_lock(sdp);
467 if (!list_empty(&le->le_list)) {
468 if (ip->i_di.di_flags & GFS2_DIF_JDATA)
469 sdp->sd_log_num_jdata++;
470 sdp->sd_log_num_databuf++;
471 list_add(&le->le_list, &sdp->sd_log_le_databuf);
472 }
473 gfs2_log_unlock(sdp);
474}
475
476static int gfs2_check_magic(struct buffer_head *bh)
477{
478 struct page *page = bh->b_page;
479 void *kaddr;
480 __be32 *ptr;
481 int rv = 0;
482
483 kaddr = kmap_atomic(page, KM_USER0);
484 ptr = kaddr + bh_offset(bh);
485 if (*ptr == cpu_to_be32(GFS2_MAGIC))
486 rv = 1;
487 kunmap_atomic(page, KM_USER0);
488
489 return rv;
490}
491
492/**
493 * databuf_lo_before_commit - Scan the data buffers, writing as we go
494 *
495 * Here we scan through the lists of buffers and make the assumption
496 * that any buffer thats been pinned is being journaled, and that
497 * any unpinned buffer is an ordered write data buffer and therefore
498 * will be written back rather than journaled.
499 */
500static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
501{
502 LIST_HEAD(started);
503 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
504 struct buffer_head *bh = NULL;
505 unsigned int offset = sizeof(struct gfs2_log_descriptor);
506 struct gfs2_log_descriptor *ld;
507 unsigned int limit;
508 unsigned int total_dbuf = sdp->sd_log_num_databuf;
509 unsigned int total_jdata = sdp->sd_log_num_jdata;
510 unsigned int num, n;
511 __be64 *ptr = NULL;
512
513 offset += (2*sizeof(__be64) - 1);
514 offset &= ~(2*sizeof(__be64) - 1);
515 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
516
517 /*
518 * Start writing ordered buffers, write journaled buffers
519 * into the log along with a header
520 */
521 gfs2_log_lock(sdp);
522 bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf,
523 bd_le.le_list);
524 while(total_dbuf) {
525 num = total_jdata;
526 if (num > limit)
527 num = limit;
528 n = 0;
529 list_for_each_entry_safe_continue(bd1, bdt,
530 &sdp->sd_log_le_databuf,
531 bd_le.le_list) {
532 /* An ordered write buffer */
533 if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) {
534 list_move(&bd1->bd_le.le_list, &started);
535 if (bd1 == bd2) {
536 bd2 = NULL;
537 bd2 = list_prepare_entry(bd2,
538 &sdp->sd_log_le_databuf,
539 bd_le.le_list);
540 }
541 total_dbuf--;
542 if (bd1->bd_bh) {
543 get_bh(bd1->bd_bh);
544 if (buffer_dirty(bd1->bd_bh)) {
545 gfs2_log_unlock(sdp);
546 wait_on_buffer(bd1->bd_bh);
547 ll_rw_block(WRITE, 1,
548 &bd1->bd_bh);
549 gfs2_log_lock(sdp);
550 }
551 brelse(bd1->bd_bh);
552 continue;
553 }
554 continue;
555 } else if (bd1->bd_bh) { /* A journaled buffer */
556 int magic;
557 gfs2_log_unlock(sdp);
558 if (!bh) {
559 bh = gfs2_log_get_buf(sdp);
560 ld = (struct gfs2_log_descriptor *)
561 bh->b_data;
562 ptr = (__be64 *)(bh->b_data + offset);
563 ld->ld_header.mh_magic =
564 cpu_to_be32(GFS2_MAGIC);
565 ld->ld_header.mh_type =
566 cpu_to_be16(GFS2_METATYPE_LD);
567 ld->ld_header.mh_format =
568 cpu_to_be16(GFS2_FORMAT_LD);
569 ld->ld_type =
570 cpu_to_be32(GFS2_LOG_DESC_JDATA);
571 ld->ld_length = cpu_to_be32(num + 1);
572 ld->ld_data1 = cpu_to_be32(num);
573 ld->ld_data2 = cpu_to_be32(0);
574 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
575 }
576 magic = gfs2_check_magic(bd1->bd_bh);
577 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
578 *ptr++ = cpu_to_be64((__u64)magic);
579 clear_buffer_escaped(bd1->bd_bh);
580 if (unlikely(magic != 0))
581 set_buffer_escaped(bd1->bd_bh);
582 gfs2_log_lock(sdp);
583 if (n++ > num)
584 break;
585 }
586 }
587 gfs2_log_unlock(sdp);
588 if (bh) {
589 set_buffer_dirty(bh);
590 ll_rw_block(WRITE, 1, &bh);
591 bh = NULL;
592 }
593 n = 0;
594 gfs2_log_lock(sdp);
595 list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf,
596 bd_le.le_list) {
597 if (!bd2->bd_bh)
598 continue;
599 /* copy buffer if it needs escaping */
600 gfs2_log_unlock(sdp);
601 if (unlikely(buffer_escaped(bd2->bd_bh))) {
602 void *kaddr;
603 struct page *page = bd2->bd_bh->b_page;
604 bh = gfs2_log_get_buf(sdp);
605 kaddr = kmap_atomic(page, KM_USER0);
606 memcpy(bh->b_data,
607 kaddr + bh_offset(bd2->bd_bh),
608 sdp->sd_sb.sb_bsize);
609 kunmap_atomic(page, KM_USER0);
610 *(__be32 *)bh->b_data = 0;
611 } else {
612 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
613 }
614 set_buffer_dirty(bh);
615 ll_rw_block(WRITE, 1, &bh);
616 gfs2_log_lock(sdp);
617 if (++n >= num)
618 break;
619 }
620 bh = NULL;
621 total_dbuf -= num;
622 total_jdata -= num;
623 }
624 gfs2_log_unlock(sdp);
625
626 /* Wait on all ordered buffers */
627 while (!list_empty(&started)) {
628 gfs2_log_lock(sdp);
629 bd1 = list_entry(started.next, struct gfs2_bufdata,
630 bd_le.le_list);
631 list_del(&bd1->bd_le.le_list);
632 sdp->sd_log_num_databuf--;
633
634 bh = bd1->bd_bh;
635 if (bh) {
636 set_v2bd(bh, NULL);
637 gfs2_log_unlock(sdp);
638 wait_on_buffer(bh);
639 brelse(bh);
640 } else
641 gfs2_log_unlock(sdp);
642
643 kfree(bd1);
644 }
645
646 /* We've removed all the ordered write bufs here, so only jdata left */
647 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
648}
649
650static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
651 struct gfs2_log_descriptor *ld,
652 __be64 *ptr, int pass)
653{
654 struct gfs2_sbd *sdp = get_v2ip(jd->jd_inode)->i_sbd;
655 struct gfs2_glock *gl = get_v2ip(jd->jd_inode)->i_gl;
656 unsigned int blks = be32_to_cpu(ld->ld_data1);
657 struct buffer_head *bh_log, *bh_ip;
658 uint64_t blkno;
659 uint64_t esc;
660 int error = 0;
661
662 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
663 return 0;
664
665 gfs2_replay_incr_blk(sdp, &start);
666 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
667 blkno = be64_to_cpu(*ptr++);
668 esc = be64_to_cpu(*ptr++);
669
670 sdp->sd_found_blocks++;
671
672 if (gfs2_revoke_check(sdp, blkno, start))
673 continue;
674
675 error = gfs2_replay_read_block(jd, start, &bh_log);
676 if (error)
677 return error;
678
679 bh_ip = gfs2_meta_new(gl, blkno);
680 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
681
682 /* Unescape */
683 if (esc) {
684 __be32 *eptr = (__be32 *)bh_ip->b_data;
685 *eptr = cpu_to_be32(GFS2_MAGIC);
686 }
687 mark_buffer_dirty(bh_ip);
688
689 brelse(bh_log);
690 brelse(bh_ip);
691 if (error)
692 break;
693
694 sdp->sd_replayed_blocks++;
695 }
696
697 return error;
698}
699
700/* FIXME: sort out accounting for log blocks etc. */
701
702static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
703{
704 struct gfs2_sbd *sdp = get_v2ip(jd->jd_inode)->i_sbd;
705
706 if (error) {
707 gfs2_meta_sync(get_v2ip(jd->jd_inode)->i_gl,
708 DIO_START | DIO_WAIT);
709 return;
710 }
711 if (pass != 1)
712 return;
713
714 /* data sync? */
715 gfs2_meta_sync(get_v2ip(jd->jd_inode)->i_gl, DIO_START | DIO_WAIT);
716
717 fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
718 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
719}
720
721static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
722{
723 struct list_head *head = &sdp->sd_log_le_databuf;
724 struct gfs2_bufdata *bd;
725
726 while (!list_empty(head)) {
727 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
728 list_del(&bd->bd_le.le_list);
729 sdp->sd_log_num_databuf--;
730 sdp->sd_log_num_jdata--;
731 gfs2_unpin(sdp, bd->bd_bh, ai);
732 }
733 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
734 gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
735}
736
737
738struct gfs2_log_operations gfs2_glock_lops = {
739 .lo_add = glock_lo_add,
740 .lo_after_commit = glock_lo_after_commit,
741 .lo_name = "glock"
742};
743
744struct gfs2_log_operations gfs2_buf_lops = {
745 .lo_add = buf_lo_add,
746 .lo_incore_commit = buf_lo_incore_commit,
747 .lo_before_commit = buf_lo_before_commit,
748 .lo_after_commit = buf_lo_after_commit,
749 .lo_before_scan = buf_lo_before_scan,
750 .lo_scan_elements = buf_lo_scan_elements,
751 .lo_after_scan = buf_lo_after_scan,
752 .lo_name = "buf"
753};
754
755struct gfs2_log_operations gfs2_revoke_lops = {
756 .lo_add = revoke_lo_add,
757 .lo_before_commit = revoke_lo_before_commit,
758 .lo_before_scan = revoke_lo_before_scan,
759 .lo_scan_elements = revoke_lo_scan_elements,
760 .lo_after_scan = revoke_lo_after_scan,
761 .lo_name = "revoke"
762};
763
764struct gfs2_log_operations gfs2_rg_lops = {
765 .lo_add = rg_lo_add,
766 .lo_after_commit = rg_lo_after_commit,
767 .lo_name = "rg"
768};
769
770struct gfs2_log_operations gfs2_databuf_lops = {
771 .lo_add = databuf_lo_add,
772 .lo_incore_commit = buf_lo_incore_commit,
773 .lo_before_commit = databuf_lo_before_commit,
774 .lo_after_commit = databuf_lo_after_commit,
775 .lo_scan_elements = databuf_lo_scan_elements,
776 .lo_after_scan = databuf_lo_after_scan,
777 .lo_name = "databuf"
778};
779
780struct gfs2_log_operations *gfs2_log_ops[] = {
781 &gfs2_glock_lops,
782 &gfs2_buf_lops,
783 &gfs2_revoke_lops,
784 &gfs2_rg_lops,
785 &gfs2_databuf_lops,
786 NULL
787};
788
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
new file mode 100644
index 000000000000..417f5aade4b1
--- /dev/null
+++ b/fs/gfs2/lops.h
@@ -0,0 +1,96 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LOPS_DOT_H__
11#define __LOPS_DOT_H__
12
13extern struct gfs2_log_operations gfs2_glock_lops;
14extern struct gfs2_log_operations gfs2_buf_lops;
15extern struct gfs2_log_operations gfs2_revoke_lops;
16extern struct gfs2_log_operations gfs2_rg_lops;
17extern struct gfs2_log_operations gfs2_databuf_lops;
18
19extern struct gfs2_log_operations *gfs2_log_ops[];
20
21static inline void lops_init_le(struct gfs2_log_element *le,
22 struct gfs2_log_operations *lops)
23{
24 INIT_LIST_HEAD(&le->le_list);
25 le->le_ops = lops;
26}
27
28static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
29{
30 if (le->le_ops->lo_add)
31 le->le_ops->lo_add(sdp, le);
32}
33
34static inline void lops_incore_commit(struct gfs2_sbd *sdp,
35 struct gfs2_trans *tr)
36{
37 int x;
38 for (x = 0; gfs2_log_ops[x]; x++)
39 if (gfs2_log_ops[x]->lo_incore_commit)
40 gfs2_log_ops[x]->lo_incore_commit(sdp, tr);
41}
42
43static inline void lops_before_commit(struct gfs2_sbd *sdp)
44{
45 int x;
46 for (x = 0; gfs2_log_ops[x]; x++)
47 if (gfs2_log_ops[x]->lo_before_commit)
48 gfs2_log_ops[x]->lo_before_commit(sdp);
49}
50
51static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
52{
53 int x;
54 for (x = 0; gfs2_log_ops[x]; x++)
55 if (gfs2_log_ops[x]->lo_after_commit)
56 gfs2_log_ops[x]->lo_after_commit(sdp, ai);
57}
58
59static inline void lops_before_scan(struct gfs2_jdesc *jd,
60 struct gfs2_log_header *head,
61 unsigned int pass)
62{
63 int x;
64 for (x = 0; gfs2_log_ops[x]; x++)
65 if (gfs2_log_ops[x]->lo_before_scan)
66 gfs2_log_ops[x]->lo_before_scan(jd, head, pass);
67}
68
69static inline int lops_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
70 struct gfs2_log_descriptor *ld,
71 __be64 *ptr,
72 unsigned int pass)
73{
74 int x, error;
75 for (x = 0; gfs2_log_ops[x]; x++)
76 if (gfs2_log_ops[x]->lo_scan_elements) {
77 error = gfs2_log_ops[x]->lo_scan_elements(jd, start,
78 ld, ptr, pass);
79 if (error)
80 return error;
81 }
82
83 return 0;
84}
85
86static inline void lops_after_scan(struct gfs2_jdesc *jd, int error,
87 unsigned int pass)
88{
89 int x;
90 for (x = 0; gfs2_log_ops[x]; x++)
91 if (gfs2_log_ops[x]->lo_before_scan)
92 gfs2_log_ops[x]->lo_after_scan(jd, error, pass);
93}
94
95#endif /* __LOPS_DOT_H__ */
96
diff --git a/fs/gfs2/lvb.c b/fs/gfs2/lvb.c
new file mode 100644
index 000000000000..ca959ebb80c1
--- /dev/null
+++ b/fs/gfs2/lvb.c
@@ -0,0 +1,49 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18
19#define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \
20 struct->member);
21
22void gfs2_quota_lvb_in(struct gfs2_quota_lvb *qb, char *lvb)
23{
24 struct gfs2_quota_lvb *str = (struct gfs2_quota_lvb *)lvb;
25
26 qb->qb_magic = be32_to_cpu(str->qb_magic);
27 qb->qb_limit = be64_to_cpu(str->qb_limit);
28 qb->qb_warn = be64_to_cpu(str->qb_warn);
29 qb->qb_value = be64_to_cpu(str->qb_value);
30}
31
32void gfs2_quota_lvb_out(struct gfs2_quota_lvb *qb, char *lvb)
33{
34 struct gfs2_quota_lvb *str = (struct gfs2_quota_lvb *)lvb;
35
36 str->qb_magic = cpu_to_be32(qb->qb_magic);
37 str->qb_limit = cpu_to_be64(qb->qb_limit);
38 str->qb_warn = cpu_to_be64(qb->qb_warn);
39 str->qb_value = cpu_to_be64(qb->qb_value);
40}
41
42void gfs2_quota_lvb_print(struct gfs2_quota_lvb *qb)
43{
44 pv(qb, qb_magic, "%u");
45 pv(qb, qb_limit, "%llu");
46 pv(qb, qb_warn, "%llu");
47 pv(qb, qb_value, "%lld");
48}
49
diff --git a/fs/gfs2/lvb.h b/fs/gfs2/lvb.h
new file mode 100644
index 000000000000..ca9732b2d9f4
--- /dev/null
+++ b/fs/gfs2/lvb.h
@@ -0,0 +1,28 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LVB_DOT_H__
11#define __LVB_DOT_H__
12
13#define GFS2_MIN_LVB_SIZE 32
14
15struct gfs2_quota_lvb {
16 uint32_t qb_magic;
17 uint32_t __pad;
18 uint64_t qb_limit; /* Hard limit of # blocks to alloc */
19 uint64_t qb_warn; /* Warn user when alloc is above this # */
20 int64_t qb_value; /* Current # blocks allocated */
21};
22
23void gfs2_quota_lvb_in(struct gfs2_quota_lvb *qb, char *lvb);
24void gfs2_quota_lvb_out(struct gfs2_quota_lvb *qb, char *lvb);
25void gfs2_quota_lvb_print(struct gfs2_quota_lvb *qb);
26
27#endif /* __LVB_DOT_H__ */
28
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
new file mode 100644
index 000000000000..0c60f2b10fdd
--- /dev/null
+++ b/fs/gfs2/main.c
@@ -0,0 +1,103 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/init.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "ops_fstype.h"
21#include "sys.h"
22
23/**
24 * init_gfs2_fs - Register GFS2 as a filesystem
25 *
26 * Returns: 0 on success, error code on failure
27 */
28
29static int __init init_gfs2_fs(void)
30{
31 int error;
32
33 gfs2_init_lmh();
34
35 error = gfs2_sys_init();
36 if (error)
37 return error;
38
39 error = -ENOMEM;
40
41 gfs2_glock_cachep = kmem_cache_create("gfs2_glock",
42 sizeof(struct gfs2_glock),
43 0, 0, NULL, NULL);
44 if (!gfs2_glock_cachep)
45 goto fail;
46
47 gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
48 sizeof(struct gfs2_inode),
49 0, 0, NULL, NULL);
50 if (!gfs2_inode_cachep)
51 goto fail;
52
53 gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata",
54 sizeof(struct gfs2_bufdata),
55 0, 0, NULL, NULL);
56 if (!gfs2_bufdata_cachep)
57 goto fail;
58
59 error = register_filesystem(&gfs2_fs_type);
60 if (error)
61 goto fail;
62
63 printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__);
64
65 return 0;
66
67 fail:
68 if (gfs2_bufdata_cachep)
69 kmem_cache_destroy(gfs2_bufdata_cachep);
70
71 if (gfs2_inode_cachep)
72 kmem_cache_destroy(gfs2_inode_cachep);
73
74 if (gfs2_glock_cachep)
75 kmem_cache_destroy(gfs2_glock_cachep);
76
77 gfs2_sys_uninit();
78 return error;
79}
80
81/**
82 * exit_gfs2_fs - Unregister the file system
83 *
84 */
85
86static void __exit exit_gfs2_fs(void)
87{
88 unregister_filesystem(&gfs2_fs_type);
89
90 kmem_cache_destroy(gfs2_bufdata_cachep);
91 kmem_cache_destroy(gfs2_inode_cachep);
92 kmem_cache_destroy(gfs2_glock_cachep);
93
94 gfs2_sys_uninit();
95}
96
97MODULE_DESCRIPTION("Global File System");
98MODULE_AUTHOR("Red Hat, Inc.");
99MODULE_LICENSE("GPL");
100
101module_init(init_gfs2_fs);
102module_exit(exit_gfs2_fs);
103
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
new file mode 100644
index 000000000000..53f33fa899f9
--- /dev/null
+++ b/fs/gfs2/meta_io.c
@@ -0,0 +1,882 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <linux/writeback.h>
18#include <linux/swap.h>
19#include <linux/delay.h>
20#include <asm/semaphore.h>
21
22#include "gfs2.h"
23#include "glock.h"
24#include "glops.h"
25#include "inode.h"
26#include "log.h"
27#include "lops.h"
28#include "meta_io.h"
29#include "rgrp.h"
30#include "trans.h"
31
32#define buffer_busy(bh) \
33((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
34#define buffer_in_io(bh) \
35((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
36
37static int aspace_get_block(struct inode *inode, sector_t lblock,
38 struct buffer_head *bh_result, int create)
39{
40 gfs2_assert_warn(get_v2sdp(inode->i_sb), 0);
41 return -EOPNOTSUPP;
42}
43
44static int gfs2_aspace_writepage(struct page *page,
45 struct writeback_control *wbc)
46{
47 return block_write_full_page(page, aspace_get_block, wbc);
48}
49
50/**
51 * stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out.
52 * @bh: the buffer we're stuck on
53 *
54 */
55
56static void stuck_releasepage(struct buffer_head *bh)
57{
58 struct gfs2_sbd *sdp = get_v2sdp(bh->b_page->mapping->host->i_sb);
59 struct gfs2_bufdata *bd = get_v2bd(bh);
60 struct gfs2_glock *gl;
61
62 fs_warn(sdp, "stuck in gfs2_releasepage()\n");
63 fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n",
64 (uint64_t)bh->b_blocknr, atomic_read(&bh->b_count));
65 fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh));
66 fs_warn(sdp, "get_v2bd(bh) = %s\n", (bd) ? "!NULL" : "NULL");
67
68 if (!bd)
69 return;
70
71 gl = bd->bd_gl;
72
73 fs_warn(sdp, "gl = (%u, %llu)\n",
74 gl->gl_name.ln_type, gl->gl_name.ln_number);
75
76 fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n",
77 (list_empty(&bd->bd_list_tr)) ? "no" : "yes",
78 (list_empty(&bd->bd_le.le_list)) ? "no" : "yes");
79
80 if (gl->gl_ops == &gfs2_inode_glops) {
81 struct gfs2_inode *ip = get_gl2ip(gl);
82 unsigned int x;
83
84 if (!ip)
85 return;
86
87 fs_warn(sdp, "ip = %llu %llu\n",
88 ip->i_num.no_formal_ino, ip->i_num.no_addr);
89 fs_warn(sdp, "ip->i_count = %d, ip->i_vnode = %s\n",
90 atomic_read(&ip->i_count),
91 (ip->i_vnode) ? "!NULL" : "NULL");
92
93 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
94 fs_warn(sdp, "ip->i_cache[%u] = %s\n",
95 x, (ip->i_cache[x]) ? "!NULL" : "NULL");
96 }
97}
98
99/**
100 * gfs2_aspace_releasepage - free the metadata associated with a page
101 * @page: the page that's being released
102 * @gfp_mask: passed from Linux VFS, ignored by us
103 *
104 * Call try_to_free_buffers() if the buffers in this page can be
105 * released.
106 *
107 * Returns: 0
108 */
109
110static int gfs2_aspace_releasepage(struct page *page, gfp_t gfp_mask)
111{
112 struct inode *aspace = page->mapping->host;
113 struct gfs2_sbd *sdp = get_v2sdp(aspace->i_sb);
114 struct buffer_head *bh, *head;
115 struct gfs2_bufdata *bd;
116 unsigned long t;
117
118 if (!page_has_buffers(page))
119 goto out;
120
121 head = bh = page_buffers(page);
122 do {
123 t = jiffies;
124
125 while (atomic_read(&bh->b_count)) {
126 if (atomic_read(&aspace->i_writecount)) {
127 if (time_after_eq(jiffies, t +
128 gfs2_tune_get(sdp, gt_stall_secs) * HZ)) {
129 stuck_releasepage(bh);
130 t = jiffies;
131 }
132
133 yield();
134 continue;
135 }
136
137 return 0;
138 }
139
140 gfs2_assert_warn(sdp, !buffer_pinned(bh));
141
142 bd = get_v2bd(bh);
143 if (bd) {
144 gfs2_assert_warn(sdp, bd->bd_bh == bh);
145 gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
146 gfs2_assert_warn(sdp, list_empty(&bd->bd_le.le_list));
147 gfs2_assert_warn(sdp, !bd->bd_ail);
148 kmem_cache_free(gfs2_bufdata_cachep, bd);
149 set_v2bd(bh, NULL);
150 }
151
152 bh = bh->b_this_page;
153 }
154 while (bh != head);
155
156 out:
157 return try_to_free_buffers(page);
158}
159
160static struct address_space_operations aspace_aops = {
161 .writepage = gfs2_aspace_writepage,
162 .releasepage = gfs2_aspace_releasepage,
163};
164
165/**
166 * gfs2_aspace_get - Create and initialize a struct inode structure
167 * @sdp: the filesystem the aspace is in
168 *
169 * Right now a struct inode is just a struct inode. Maybe Linux
170 * will supply a more lightweight address space construct (that works)
171 * in the future.
172 *
173 * Make sure pages/buffers in this aspace aren't in high memory.
174 *
175 * Returns: the aspace
176 */
177
178struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
179{
180 struct inode *aspace;
181
182 aspace = new_inode(sdp->sd_vfs);
183 if (aspace) {
184 mapping_set_gfp_mask(aspace->i_mapping, GFP_KERNEL);
185 aspace->i_mapping->a_ops = &aspace_aops;
186 aspace->i_size = ~0ULL;
187 set_v2ip(aspace, NULL);
188 insert_inode_hash(aspace);
189 }
190
191 return aspace;
192}
193
194void gfs2_aspace_put(struct inode *aspace)
195{
196 remove_inode_hash(aspace);
197 iput(aspace);
198}
199
200/**
201 * gfs2_ail1_start_one - Start I/O on a part of the AIL
202 * @sdp: the filesystem
203 * @tr: the part of the AIL
204 *
205 */
206
207void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
208{
209 struct gfs2_bufdata *bd, *s;
210 struct buffer_head *bh;
211 int retry;
212
213 do {
214 retry = 0;
215
216 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
217 bd_ail_st_list) {
218 bh = bd->bd_bh;
219
220 gfs2_assert(sdp, bd->bd_ail == ai);
221
222 if (!buffer_busy(bh)) {
223 if (!buffer_uptodate(bh))
224 gfs2_io_error_bh(sdp, bh);
225 list_move(&bd->bd_ail_st_list,
226 &ai->ai_ail2_list);
227 continue;
228 }
229
230 if (!buffer_dirty(bh))
231 continue;
232
233 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
234
235 gfs2_log_unlock(sdp);
236 wait_on_buffer(bh);
237 ll_rw_block(WRITE, 1, &bh);
238 gfs2_log_lock(sdp);
239
240 retry = 1;
241 break;
242 }
243 } while (retry);
244}
245
246/**
247 * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
248 * @sdp: the filesystem
249 * @ai: the AIL entry
250 *
251 */
252
253int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags)
254{
255 struct gfs2_bufdata *bd, *s;
256 struct buffer_head *bh;
257
258 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
259 bd_ail_st_list) {
260 bh = bd->bd_bh;
261
262 gfs2_assert(sdp, bd->bd_ail == ai);
263
264 if (buffer_busy(bh)) {
265 if (flags & DIO_ALL)
266 continue;
267 else
268 break;
269 }
270
271 if (!buffer_uptodate(bh))
272 gfs2_io_error_bh(sdp, bh);
273
274 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
275 }
276
277 return list_empty(&ai->ai_ail1_list);
278}
279
280/**
281 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
282 * @sdp: the filesystem
283 * @ai: the AIL entry
284 *
285 */
286
287void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
288{
289 struct list_head *head = &ai->ai_ail2_list;
290 struct gfs2_bufdata *bd;
291
292 while (!list_empty(head)) {
293 bd = list_entry(head->prev, struct gfs2_bufdata,
294 bd_ail_st_list);
295 gfs2_assert(sdp, bd->bd_ail == ai);
296 bd->bd_ail = NULL;
297 list_del(&bd->bd_ail_st_list);
298 list_del(&bd->bd_ail_gl_list);
299 atomic_dec(&bd->bd_gl->gl_ail_count);
300 brelse(bd->bd_bh);
301 }
302}
303
304/**
305 * ail_empty_gl - remove all buffers for a given lock from the AIL
306 * @gl: the glock
307 *
308 * None of the buffers should be dirty, locked, or pinned.
309 */
310
311void gfs2_ail_empty_gl(struct gfs2_glock *gl)
312{
313 struct gfs2_sbd *sdp = gl->gl_sbd;
314 unsigned int blocks;
315 struct list_head *head = &gl->gl_ail_list;
316 struct gfs2_bufdata *bd;
317 struct buffer_head *bh;
318 uint64_t blkno;
319 int error;
320
321 blocks = atomic_read(&gl->gl_ail_count);
322 if (!blocks)
323 return;
324
325 error = gfs2_trans_begin(sdp, 0, blocks);
326 if (gfs2_assert_withdraw(sdp, !error))
327 return;
328
329 gfs2_log_lock(sdp);
330 while (!list_empty(head)) {
331 bd = list_entry(head->next, struct gfs2_bufdata,
332 bd_ail_gl_list);
333 bh = bd->bd_bh;
334 blkno = bh->b_blocknr;
335 gfs2_assert_withdraw(sdp, !buffer_busy(bh));
336
337 bd->bd_ail = NULL;
338 list_del(&bd->bd_ail_st_list);
339 list_del(&bd->bd_ail_gl_list);
340 atomic_dec(&gl->gl_ail_count);
341 brelse(bh);
342 gfs2_log_unlock(sdp);
343
344 gfs2_trans_add_revoke(sdp, blkno);
345
346 gfs2_log_lock(sdp);
347 }
348 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
349 gfs2_log_unlock(sdp);
350
351 gfs2_trans_end(sdp);
352 gfs2_log_flush(sdp);
353}
354
355/**
356 * gfs2_meta_inval - Invalidate all buffers associated with a glock
357 * @gl: the glock
358 *
359 */
360
361void gfs2_meta_inval(struct gfs2_glock *gl)
362{
363 struct gfs2_sbd *sdp = gl->gl_sbd;
364 struct inode *aspace = gl->gl_aspace;
365 struct address_space *mapping = gl->gl_aspace->i_mapping;
366
367 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
368
369 atomic_inc(&aspace->i_writecount);
370 truncate_inode_pages(mapping, 0);
371 atomic_dec(&aspace->i_writecount);
372
373 gfs2_assert_withdraw(sdp, !mapping->nrpages);
374}
375
376/**
377 * gfs2_meta_sync - Sync all buffers associated with a glock
378 * @gl: The glock
379 * @flags: DIO_START | DIO_WAIT
380 *
381 */
382
383void gfs2_meta_sync(struct gfs2_glock *gl, int flags)
384{
385 struct address_space *mapping = gl->gl_aspace->i_mapping;
386 int error = 0;
387
388 if (flags & DIO_START)
389 filemap_fdatawrite(mapping);
390 if (!error && (flags & DIO_WAIT))
391 error = filemap_fdatawait(mapping);
392
393 if (error)
394 gfs2_io_error(gl->gl_sbd);
395}
396
397/**
398 * getbuf - Get a buffer with a given address space
399 * @sdp: the filesystem
400 * @aspace: the address space
401 * @blkno: the block number (filesystem scope)
402 * @create: 1 if the buffer should be created
403 *
404 * Returns: the buffer
405 */
406
407static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
408 uint64_t blkno, int create)
409{
410 struct page *page;
411 struct buffer_head *bh;
412 unsigned int shift;
413 unsigned long index;
414 unsigned int bufnum;
415
416 shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
417 index = blkno >> shift; /* convert block to page */
418 bufnum = blkno - (index << shift); /* block buf index within page */
419
420 if (create) {
421 for (;;) {
422 page = grab_cache_page(aspace->i_mapping, index);
423 if (page)
424 break;
425 yield();
426 }
427 } else {
428 page = find_lock_page(aspace->i_mapping, index);
429 if (!page)
430 return NULL;
431 }
432
433 if (!page_has_buffers(page))
434 create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
435
436 /* Locate header for our buffer within our page */
437 for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
438 /* Do nothing */;
439 get_bh(bh);
440
441 if (!buffer_mapped(bh))
442 map_bh(bh, sdp->sd_vfs, blkno);
443
444 unlock_page(page);
445 mark_page_accessed(page);
446 page_cache_release(page);
447
448 return bh;
449}
450
451static void meta_prep_new(struct buffer_head *bh)
452{
453 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
454
455 lock_buffer(bh);
456 clear_buffer_dirty(bh);
457 set_buffer_uptodate(bh);
458 unlock_buffer(bh);
459
460 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
461}
462
463/**
464 * gfs2_meta_new - Get a block
465 * @gl: The glock associated with this block
466 * @blkno: The block number
467 *
468 * Returns: The buffer
469 */
470
471struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno)
472{
473 struct buffer_head *bh;
474 bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
475 meta_prep_new(bh);
476 return bh;
477}
478
479/**
480 * gfs2_meta_read - Read a block from disk
481 * @gl: The glock covering the block
482 * @blkno: The block number
483 * @flags: flags to gfs2_dreread()
484 * @bhp: the place where the buffer is returned (NULL on failure)
485 *
486 * Returns: errno
487 */
488
489int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno, int flags,
490 struct buffer_head **bhp)
491{
492 int error;
493
494 *bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
495 error = gfs2_meta_reread(gl->gl_sbd, *bhp, flags);
496 if (error)
497 brelse(*bhp);
498
499 return error;
500}
501
502/**
503 * gfs2_meta_reread - Reread a block from disk
504 * @sdp: the filesystem
505 * @bh: The block to read
506 * @flags: Flags that control the read
507 *
508 * Returns: errno
509 */
510
511int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags)
512{
513 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
514 return -EIO;
515
516 if (flags & DIO_FORCE)
517 clear_buffer_uptodate(bh);
518
519 if ((flags & DIO_START) && !buffer_uptodate(bh))
520 ll_rw_block(READ, 1, &bh);
521
522 if (flags & DIO_WAIT) {
523 wait_on_buffer(bh);
524
525 if (!buffer_uptodate(bh)) {
526 struct gfs2_trans *tr = get_transaction;
527 if (tr && tr->tr_touched)
528 gfs2_io_error_bh(sdp, bh);
529 return -EIO;
530 }
531 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
532 return -EIO;
533 }
534
535 return 0;
536}
537
538/**
539 * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
540 * @gl: the glock the buffer belongs to
541 * @bh: The buffer to be attached to
542 * @meta: Flag to indicate whether its metadata or not
543 */
544
545void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
546 int meta)
547{
548 struct gfs2_bufdata *bd;
549
550 if (meta)
551 lock_page(bh->b_page);
552
553 if (get_v2bd(bh)) {
554 if (meta)
555 unlock_page(bh->b_page);
556 return;
557 }
558
559 bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
560 memset(bd, 0, sizeof(struct gfs2_bufdata));
561
562 bd->bd_bh = bh;
563 bd->bd_gl = gl;
564
565 INIT_LIST_HEAD(&bd->bd_list_tr);
566 if (meta) {
567 lops_init_le(&bd->bd_le, &gfs2_buf_lops);
568 } else {
569 lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
570 get_bh(bh);
571 }
572 set_v2bd(bh, bd);
573
574 if (meta)
575 unlock_page(bh->b_page);
576}
577
578/**
579 * gfs2_pin - Pin a buffer in memory
580 * @sdp: the filesystem the buffer belongs to
581 * @bh: The buffer to be pinned
582 *
583 */
584
585void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
586{
587 struct gfs2_bufdata *bd = get_v2bd(bh);
588
589 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
590
591 if (test_set_buffer_pinned(bh))
592 gfs2_assert_withdraw(sdp, 0);
593
594 wait_on_buffer(bh);
595
596 /* If this buffer is in the AIL and it has already been written
597 to in-place disk block, remove it from the AIL. */
598
599 gfs2_log_lock(sdp);
600 if (bd->bd_ail && !buffer_in_io(bh))
601 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
602 gfs2_log_unlock(sdp);
603
604 clear_buffer_dirty(bh);
605 wait_on_buffer(bh);
606
607 if (!buffer_uptodate(bh))
608 gfs2_io_error_bh(sdp, bh);
609
610 get_bh(bh);
611}
612
613/**
614 * gfs2_unpin - Unpin a buffer
615 * @sdp: the filesystem the buffer belongs to
616 * @bh: The buffer to unpin
617 * @ai:
618 *
619 */
620
621void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
622 struct gfs2_ail *ai)
623{
624 struct gfs2_bufdata *bd = get_v2bd(bh);
625
626 gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
627
628 if (!buffer_pinned(bh))
629 gfs2_assert_withdraw(sdp, 0);
630
631 mark_buffer_dirty(bh);
632 clear_buffer_pinned(bh);
633
634 gfs2_log_lock(sdp);
635 if (bd->bd_ail) {
636 list_del(&bd->bd_ail_st_list);
637 brelse(bh);
638 } else {
639 struct gfs2_glock *gl = bd->bd_gl;
640 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
641 atomic_inc(&gl->gl_ail_count);
642 }
643 bd->bd_ail = ai;
644 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
645 gfs2_log_unlock(sdp);
646}
647
648/**
649 * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore
650 * @ip: the inode who owns the buffers
651 * @bstart: the first buffer in the run
652 * @blen: the number of buffers in the run
653 *
654 */
655
656void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
657{
658 struct gfs2_sbd *sdp = ip->i_sbd;
659 struct inode *aspace = ip->i_gl->gl_aspace;
660 struct buffer_head *bh;
661
662 while (blen) {
663 bh = getbuf(sdp, aspace, bstart, NO_CREATE);
664 if (bh) {
665 struct gfs2_bufdata *bd = get_v2bd(bh);
666
667 if (test_clear_buffer_pinned(bh)) {
668 gfs2_log_lock(sdp);
669 list_del_init(&bd->bd_le.le_list);
670 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
671 sdp->sd_log_num_buf--;
672 gfs2_log_unlock(sdp);
673 get_transaction->tr_num_buf_rm++;
674 brelse(bh);
675 }
676 if (bd) {
677 gfs2_log_lock(sdp);
678 if (bd->bd_ail) {
679 uint64_t blkno = bh->b_blocknr;
680 bd->bd_ail = NULL;
681 list_del(&bd->bd_ail_st_list);
682 list_del(&bd->bd_ail_gl_list);
683 atomic_dec(&bd->bd_gl->gl_ail_count);
684 brelse(bh);
685 gfs2_log_unlock(sdp);
686 gfs2_trans_add_revoke(sdp, blkno);
687 } else
688 gfs2_log_unlock(sdp);
689 }
690
691 lock_buffer(bh);
692 clear_buffer_dirty(bh);
693 clear_buffer_uptodate(bh);
694 unlock_buffer(bh);
695
696 brelse(bh);
697 }
698
699 bstart++;
700 blen--;
701 }
702}
703
704/**
705 * gfs2_meta_cache_flush - get rid of any references on buffers for this inode
706 * @ip: The GFS2 inode
707 *
708 * This releases buffers that are in the most-recently-used array of
709 * blocks used for indirect block addressing for this inode.
710 */
711
712void gfs2_meta_cache_flush(struct gfs2_inode *ip)
713{
714 struct buffer_head **bh_slot;
715 unsigned int x;
716
717 spin_lock(&ip->i_spin);
718
719 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
720 bh_slot = &ip->i_cache[x];
721 if (!*bh_slot)
722 break;
723 brelse(*bh_slot);
724 *bh_slot = NULL;
725 }
726
727 spin_unlock(&ip->i_spin);
728}
729
730/**
731 * gfs2_meta_indirect_buffer - Get a metadata buffer
732 * @ip: The GFS2 inode
733 * @height: The level of this buf in the metadata (indir addr) tree (if any)
734 * @num: The block number (device relative) of the buffer
735 * @new: Non-zero if we may create a new buffer
736 * @bhp: the buffer is returned here
737 *
738 * Try to use the gfs2_inode's MRU metadata tree cache.
739 *
740 * Returns: errno
741 */
742
743int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num,
744 int new, struct buffer_head **bhp)
745{
746 struct buffer_head *bh, **bh_slot = ip->i_cache + height;
747 int error;
748
749 spin_lock(&ip->i_spin);
750 bh = *bh_slot;
751 if (bh) {
752 if (bh->b_blocknr == num)
753 get_bh(bh);
754 else
755 bh = NULL;
756 }
757 spin_unlock(&ip->i_spin);
758
759 if (bh) {
760 if (new)
761 meta_prep_new(bh);
762 else {
763 error = gfs2_meta_reread(ip->i_sbd, bh,
764 DIO_START | DIO_WAIT);
765 if (error) {
766 brelse(bh);
767 return error;
768 }
769 }
770 } else {
771 if (new)
772 bh = gfs2_meta_new(ip->i_gl, num);
773 else {
774 error = gfs2_meta_read(ip->i_gl, num,
775 DIO_START | DIO_WAIT, &bh);
776 if (error)
777 return error;
778 }
779
780 spin_lock(&ip->i_spin);
781 if (*bh_slot != bh) {
782 brelse(*bh_slot);
783 *bh_slot = bh;
784 get_bh(bh);
785 }
786 spin_unlock(&ip->i_spin);
787 }
788
789 if (new) {
790 if (gfs2_assert_warn(ip->i_sbd, height)) {
791 brelse(bh);
792 return -EIO;
793 }
794 gfs2_trans_add_bh(ip->i_gl, bh, 1);
795 gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
796 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
797
798 } else if (gfs2_metatype_check(ip->i_sbd, bh,
799 (height) ? GFS2_METATYPE_IN : GFS2_METATYPE_DI)) {
800 brelse(bh);
801 return -EIO;
802 }
803
804 *bhp = bh;
805
806 return 0;
807}
808
809/**
810 * gfs2_meta_ra - start readahead on an extent of a file
811 * @gl: the glock the blocks belong to
812 * @dblock: the starting disk block
813 * @extlen: the number of blocks in the extent
814 *
815 */
816
817void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen)
818{
819 struct gfs2_sbd *sdp = gl->gl_sbd;
820 struct inode *aspace = gl->gl_aspace;
821 struct buffer_head *first_bh, *bh;
822 uint32_t max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
823 sdp->sd_sb.sb_bsize_shift;
824 int error;
825
826 if (!extlen || !max_ra)
827 return;
828 if (extlen > max_ra)
829 extlen = max_ra;
830
831 first_bh = getbuf(sdp, aspace, dblock, CREATE);
832
833 if (buffer_uptodate(first_bh))
834 goto out;
835 if (!buffer_locked(first_bh)) {
836 error = gfs2_meta_reread(sdp, first_bh, DIO_START);
837 if (error)
838 goto out;
839 }
840
841 dblock++;
842 extlen--;
843
844 while (extlen) {
845 bh = getbuf(sdp, aspace, dblock, CREATE);
846
847 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
848 error = gfs2_meta_reread(sdp, bh, DIO_START);
849 brelse(bh);
850 if (error)
851 goto out;
852 } else
853 brelse(bh);
854
855 dblock++;
856 extlen--;
857
858 if (buffer_uptodate(first_bh))
859 break;
860 }
861
862 out:
863 brelse(first_bh);
864}
865
866/**
867 * gfs2_meta_syncfs - sync all the buffers in a filesystem
868 * @sdp: the filesystem
869 *
870 */
871
872void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
873{
874 gfs2_log_flush(sdp);
875 for (;;) {
876 gfs2_ail1_start(sdp, DIO_ALL);
877 if (gfs2_ail1_empty(sdp, DIO_ALL))
878 break;
879 msleep(100);
880 }
881}
882
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
new file mode 100644
index 000000000000..d72144d5d727
--- /dev/null
+++ b/fs/gfs2/meta_io.h
@@ -0,0 +1,89 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __DIO_DOT_H__
11#define __DIO_DOT_H__
12
13static inline void gfs2_buffer_clear(struct buffer_head *bh)
14{
15 memset(bh->b_data, 0, bh->b_size);
16}
17
18static inline void gfs2_buffer_clear_tail(struct buffer_head *bh, int head)
19{
20 memset(bh->b_data + head, 0, bh->b_size - head);
21}
22
23static inline void gfs2_buffer_clear_ends(struct buffer_head *bh, int offset,
24 int amount, int journaled)
25{
26 int z_off1 = (journaled) ? sizeof(struct gfs2_meta_header) : 0;
27 int z_len1 = offset - z_off1;
28 int z_off2 = offset + amount;
29 int z_len2 = (bh)->b_size - z_off2;
30
31 if (z_len1)
32 memset(bh->b_data + z_off1, 0, z_len1);
33
34 if (z_len2)
35 memset(bh->b_data + z_off2, 0, z_len2);
36}
37
38static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
39 int to_head,
40 struct buffer_head *from_bh,
41 int from_head)
42{
43 memcpy(to_bh->b_data + to_head,
44 from_bh->b_data + from_head,
45 from_bh->b_size - from_head);
46 memset(to_bh->b_data + to_bh->b_size + to_head - from_head,
47 0,
48 from_head - to_head);
49}
50
51struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp);
52void gfs2_aspace_put(struct inode *aspace);
53
54void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai);
55int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags);
56void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai);
57void gfs2_ail_empty_gl(struct gfs2_glock *gl);
58
59void gfs2_meta_inval(struct gfs2_glock *gl);
60void gfs2_meta_sync(struct gfs2_glock *gl, int flags);
61
62struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno);
63int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno,
64 int flags, struct buffer_head **bhp);
65int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags);
66
67void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
68 int meta);
69void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
70void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
71 struct gfs2_ail *ai);
72
73void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
74
75void gfs2_meta_cache_flush(struct gfs2_inode *ip);
76int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num,
77 int new, struct buffer_head **bhp);
78
79static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
80 struct buffer_head **bhp)
81{
82 return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp);
83}
84
85void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen);
86void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
87
88#endif /* __DIO_DOT_H__ */
89
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
new file mode 100644
index 000000000000..3e42697aafc7
--- /dev/null
+++ b/fs/gfs2/mount.c
@@ -0,0 +1,211 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "mount.h"
19#include "sys.h"
20
21/**
22 * gfs2_mount_args - Parse mount options
23 * @sdp:
24 * @data:
25 *
26 * Return: errno
27 */
28
29int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
30{
31 struct gfs2_args *args = &sdp->sd_args;
32 char *data = data_arg;
33 char *options, *o, *v;
34 int error = 0;
35
36 if (!remount) {
37 /* If someone preloaded options, use those instead */
38 spin_lock(&gfs2_sys_margs_lock);
39 if (gfs2_sys_margs) {
40 data = gfs2_sys_margs;
41 gfs2_sys_margs = NULL;
42 }
43 spin_unlock(&gfs2_sys_margs_lock);
44
45 /* Set some defaults */
46 args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
47 args->ar_quota = GFS2_QUOTA_DEFAULT;
48 args->ar_data = GFS2_DATA_DEFAULT;
49 }
50
51 /* Split the options into tokens with the "," character and
52 process them */
53
54 for (options = data; (o = strsep(&options, ",")); ) {
55 if (!*o)
56 continue;
57
58 v = strchr(o, '=');
59 if (v)
60 *v++ = 0;
61
62 if (!strcmp(o, "lockproto")) {
63 if (!v)
64 goto need_value;
65 if (remount && strcmp(v, args->ar_lockproto))
66 goto cant_remount;
67 strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN);
68 args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0;
69 }
70
71 else if (!strcmp(o, "locktable")) {
72 if (!v)
73 goto need_value;
74 if (remount && strcmp(v, args->ar_locktable))
75 goto cant_remount;
76 strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN);
77 args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0;
78 }
79
80 else if (!strcmp(o, "hostdata")) {
81 if (!v)
82 goto need_value;
83 if (remount && strcmp(v, args->ar_hostdata))
84 goto cant_remount;
85 strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN);
86 args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0;
87 }
88
89 else if (!strcmp(o, "spectator")) {
90 if (remount && !args->ar_spectator)
91 goto cant_remount;
92 args->ar_spectator = 1;
93 sdp->sd_vfs->s_flags |= MS_RDONLY;
94 }
95
96 else if (!strcmp(o, "ignore_local_fs")) {
97 if (remount && !args->ar_ignore_local_fs)
98 goto cant_remount;
99 args->ar_ignore_local_fs = 1;
100 }
101
102 else if (!strcmp(o, "localflocks")) {
103 if (remount && !args->ar_localflocks)
104 goto cant_remount;
105 args->ar_localflocks = 1;
106 }
107
108 else if (!strcmp(o, "localcaching")) {
109 if (remount && !args->ar_localcaching)
110 goto cant_remount;
111 args->ar_localcaching = 1;
112 }
113
114 else if (!strcmp(o, "debug"))
115 args->ar_debug = 1;
116
117 else if (!strcmp(o, "nodebug"))
118 args->ar_debug = 0;
119
120 else if (!strcmp(o, "upgrade")) {
121 if (remount && !args->ar_upgrade)
122 goto cant_remount;
123 args->ar_upgrade = 1;
124 }
125
126 else if (!strcmp(o, "num_glockd")) {
127 unsigned int x;
128 if (!v)
129 goto need_value;
130 sscanf(v, "%u", &x);
131 if (remount && x != args->ar_num_glockd)
132 goto cant_remount;
133 if (!x || x > GFS2_GLOCKD_MAX) {
134 fs_info(sdp, "0 < num_glockd <= %u (not %u)\n",
135 GFS2_GLOCKD_MAX, x);
136 error = -EINVAL;
137 break;
138 }
139 args->ar_num_glockd = x;
140 }
141
142 else if (!strcmp(o, "acl")) {
143 args->ar_posix_acl = 1;
144 sdp->sd_vfs->s_flags |= MS_POSIXACL;
145 }
146
147 else if (!strcmp(o, "noacl")) {
148 args->ar_posix_acl = 0;
149 sdp->sd_vfs->s_flags &= ~MS_POSIXACL;
150 }
151
152 else if (!strcmp(o, "quota")) {
153 if (!v)
154 goto need_value;
155 if (!strcmp(v, "off"))
156 args->ar_quota = GFS2_QUOTA_OFF;
157 else if (!strcmp(v, "account"))
158 args->ar_quota = GFS2_QUOTA_ACCOUNT;
159 else if (!strcmp(v, "on"))
160 args->ar_quota = GFS2_QUOTA_ON;
161 else {
162 fs_info(sdp, "invalid value for quota\n");
163 error = -EINVAL;
164 break;
165 }
166 }
167
168 else if (!strcmp(o, "suiddir"))
169 args->ar_suiddir = 1;
170
171 else if (!strcmp(o, "nosuiddir"))
172 args->ar_suiddir = 0;
173
174 else if (!strcmp(o, "data")) {
175 if (!v)
176 goto need_value;
177 if (!strcmp(v, "writeback"))
178 args->ar_data = GFS2_DATA_WRITEBACK;
179 else if (!strcmp(v, "ordered"))
180 args->ar_data = GFS2_DATA_ORDERED;
181 else {
182 fs_info(sdp, "invalid value for data\n");
183 error = -EINVAL;
184 break;
185 }
186 }
187
188 else {
189 fs_info(sdp, "unknown option: %s\n", o);
190 error = -EINVAL;
191 break;
192 }
193 }
194
195 if (error)
196 fs_info(sdp, "invalid mount option(s)\n");
197
198 if (data != data_arg)
199 kfree(data);
200
201 return error;
202
203 need_value:
204 fs_info(sdp, "need value for option %s\n", o);
205 return -EINVAL;
206
207 cant_remount:
208 fs_info(sdp, "can't remount with option %s\n", o);
209 return -EINVAL;
210}
211
diff --git a/fs/gfs2/mount.h b/fs/gfs2/mount.h
new file mode 100644
index 000000000000..bc8331cd7b2c
--- /dev/null
+++ b/fs/gfs2/mount.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __MOUNT_DOT_H__
11#define __MOUNT_DOT_H__
12
13int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount);
14
15#endif /* __MOUNT_DOT_H__ */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
new file mode 100644
index 000000000000..5a0bdc22a1f4
--- /dev/null
+++ b/fs/gfs2/ondisk.c
@@ -0,0 +1,528 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include <linux/gfs2_ondisk.h>
19
20#define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \
21 struct->member);
22#define pa(struct, member, count) print_array(#member, struct->member, count);
23
24/**
25 * print_array - Print out an array of bytes
26 * @title: what to print before the array
27 * @buf: the array
28 * @count: the number of bytes
29 *
30 */
31
32static void print_array(char *title, char *buf, int count)
33{
34 int x;
35
36 printk(KERN_INFO " %s =\n" KERN_INFO, title);
37 for (x = 0; x < count; x++) {
38 printk("%.2X ", (unsigned char)buf[x]);
39 if (x % 16 == 15)
40 printk("\n" KERN_INFO);
41 }
42 if (x % 16)
43 printk("\n");
44}
45
46/*
47 * gfs2_xxx_in - read in an xxx struct
48 * first arg: the cpu-order structure
49 * buf: the disk-order buffer
50 *
51 * gfs2_xxx_out - write out an xxx struct
52 * first arg: the cpu-order structure
53 * buf: the disk-order buffer
54 *
55 * gfs2_xxx_print - print out an xxx struct
56 * first arg: the cpu-order structure
57 */
58
59void gfs2_inum_in(struct gfs2_inum *no, char *buf)
60{
61 struct gfs2_inum *str = (struct gfs2_inum *)buf;
62
63 no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
64 no->no_addr = be64_to_cpu(str->no_addr);
65}
66
67void gfs2_inum_out(struct gfs2_inum *no, char *buf)
68{
69 struct gfs2_inum *str = (struct gfs2_inum *)buf;
70
71 str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
72 str->no_addr = cpu_to_be64(no->no_addr);
73}
74
75void gfs2_inum_print(struct gfs2_inum *no)
76{
77 pv(no, no_formal_ino, "%llu");
78 pv(no, no_addr, "%llu");
79}
80
81static void gfs2_meta_header_in(struct gfs2_meta_header *mh, char *buf)
82{
83 struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf;
84
85 mh->mh_magic = be32_to_cpu(str->mh_magic);
86 mh->mh_type = be16_to_cpu(str->mh_type);
87 mh->mh_format = be16_to_cpu(str->mh_format);
88}
89
90static void gfs2_meta_header_out(struct gfs2_meta_header *mh, char *buf)
91{
92 struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf;
93
94 str->mh_magic = cpu_to_be32(mh->mh_magic);
95 str->mh_type = cpu_to_be16(mh->mh_type);
96 str->mh_format = cpu_to_be16(mh->mh_format);
97}
98
99void gfs2_meta_header_print(struct gfs2_meta_header *mh)
100{
101 pv(mh, mh_magic, "0x%.8X");
102 pv(mh, mh_type, "%u");
103 pv(mh, mh_format, "%u");
104}
105
106void gfs2_sb_in(struct gfs2_sb *sb, char *buf)
107{
108 struct gfs2_sb *str = (struct gfs2_sb *)buf;
109
110 gfs2_meta_header_in(&sb->sb_header, buf);
111
112 sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
113 sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
114 sb->sb_bsize = be32_to_cpu(str->sb_bsize);
115 sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
116
117 gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
118 gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
119
120 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
121 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
122}
123
124void gfs2_sb_print(struct gfs2_sb *sb)
125{
126 gfs2_meta_header_print(&sb->sb_header);
127
128 pv(sb, sb_fs_format, "%u");
129 pv(sb, sb_multihost_format, "%u");
130
131 pv(sb, sb_bsize, "%u");
132 pv(sb, sb_bsize_shift, "%u");
133
134 gfs2_inum_print(&sb->sb_master_dir);
135
136 pv(sb, sb_lockproto, "%s");
137 pv(sb, sb_locktable, "%s");
138}
139
140void gfs2_rindex_in(struct gfs2_rindex *ri, char *buf)
141{
142 struct gfs2_rindex *str = (struct gfs2_rindex *)buf;
143
144 ri->ri_addr = be64_to_cpu(str->ri_addr);
145 ri->ri_length = be32_to_cpu(str->ri_length);
146 ri->ri_data0 = be64_to_cpu(str->ri_data0);
147 ri->ri_data = be32_to_cpu(str->ri_data);
148 ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
149
150}
151
152void gfs2_rindex_out(struct gfs2_rindex *ri, char *buf)
153{
154 struct gfs2_rindex *str = (struct gfs2_rindex *)buf;
155
156 str->ri_addr = cpu_to_be64(ri->ri_addr);
157 str->ri_length = cpu_to_be32(ri->ri_length);
158 str->__pad = 0;
159
160 str->ri_data0 = cpu_to_be64(ri->ri_data0);
161 str->ri_data = cpu_to_be32(ri->ri_data);
162 str->ri_bitbytes = cpu_to_be32(ri->ri_bitbytes);
163 memset(str->ri_reserved, 0, sizeof(str->ri_reserved));
164}
165
166void gfs2_rindex_print(struct gfs2_rindex *ri)
167{
168 pv(ri, ri_addr, "%llu");
169 pv(ri, ri_length, "%u");
170
171 pv(ri, ri_data0, "%llu");
172 pv(ri, ri_data, "%u");
173
174 pv(ri, ri_bitbytes, "%u");
175}
176
177void gfs2_rgrp_in(struct gfs2_rgrp *rg, char *buf)
178{
179 struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf;
180
181 gfs2_meta_header_in(&rg->rg_header, buf);
182 rg->rg_flags = be32_to_cpu(str->rg_flags);
183 rg->rg_free = be32_to_cpu(str->rg_free);
184 rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
185}
186
187void gfs2_rgrp_out(struct gfs2_rgrp *rg, char *buf)
188{
189 struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf;
190
191 gfs2_meta_header_out(&rg->rg_header, buf);
192 str->rg_flags = cpu_to_be32(rg->rg_flags);
193 str->rg_free = cpu_to_be32(rg->rg_free);
194 str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
195
196 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
197}
198
199void gfs2_rgrp_print(struct gfs2_rgrp *rg)
200{
201 gfs2_meta_header_print(&rg->rg_header);
202 pv(rg, rg_flags, "%u");
203 pv(rg, rg_free, "%u");
204 pv(rg, rg_dinodes, "%u");
205
206 pa(rg, rg_reserved, 36);
207}
208
209void gfs2_quota_in(struct gfs2_quota *qu, char *buf)
210{
211 struct gfs2_quota *str = (struct gfs2_quota *)buf;
212
213 qu->qu_limit = be64_to_cpu(str->qu_limit);
214 qu->qu_warn = be64_to_cpu(str->qu_warn);
215 qu->qu_value = be64_to_cpu(str->qu_value);
216}
217
218void gfs2_quota_out(struct gfs2_quota *qu, char *buf)
219{
220 struct gfs2_quota *str = (struct gfs2_quota *)buf;
221
222 str->qu_limit = cpu_to_be64(qu->qu_limit);
223 str->qu_warn = cpu_to_be64(qu->qu_warn);
224 str->qu_value = cpu_to_be64(qu->qu_value);
225}
226
227void gfs2_quota_print(struct gfs2_quota *qu)
228{
229 pv(qu, qu_limit, "%llu");
230 pv(qu, qu_warn, "%llu");
231 pv(qu, qu_value, "%lld");
232}
233
234void gfs2_dinode_in(struct gfs2_dinode *di, char *buf)
235{
236 struct gfs2_dinode *str = (struct gfs2_dinode *)buf;
237
238 gfs2_meta_header_in(&di->di_header, buf);
239 gfs2_inum_in(&di->di_num, (char *)&str->di_num);
240
241 di->di_mode = be32_to_cpu(str->di_mode);
242 di->di_uid = be32_to_cpu(str->di_uid);
243 di->di_gid = be32_to_cpu(str->di_gid);
244 di->di_nlink = be32_to_cpu(str->di_nlink);
245 di->di_size = be64_to_cpu(str->di_size);
246 di->di_blocks = be64_to_cpu(str->di_blocks);
247 di->di_atime = be64_to_cpu(str->di_atime);
248 di->di_mtime = be64_to_cpu(str->di_mtime);
249 di->di_ctime = be64_to_cpu(str->di_ctime);
250 di->di_major = be32_to_cpu(str->di_major);
251 di->di_minor = be32_to_cpu(str->di_minor);
252
253 di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
254 di->di_goal_data = be64_to_cpu(str->di_goal_data);
255
256 di->di_flags = be32_to_cpu(str->di_flags);
257 di->di_payload_format = be32_to_cpu(str->di_payload_format);
258 di->di_height = be16_to_cpu(str->di_height);
259
260 di->di_depth = be16_to_cpu(str->di_depth);
261 di->di_entries = be32_to_cpu(str->di_entries);
262
263 di->di_eattr = be64_to_cpu(str->di_eattr);
264
265}
266
267void gfs2_dinode_out(struct gfs2_dinode *di, char *buf)
268{
269 struct gfs2_dinode *str = (struct gfs2_dinode *)buf;
270
271 gfs2_meta_header_out(&di->di_header, buf);
272 gfs2_inum_out(&di->di_num, (char *)&str->di_num);
273
274 str->di_mode = cpu_to_be32(di->di_mode);
275 str->di_uid = cpu_to_be32(di->di_uid);
276 str->di_gid = cpu_to_be32(di->di_gid);
277 str->di_nlink = cpu_to_be32(di->di_nlink);
278 str->di_size = cpu_to_be64(di->di_size);
279 str->di_blocks = cpu_to_be64(di->di_blocks);
280 str->di_atime = cpu_to_be64(di->di_atime);
281 str->di_mtime = cpu_to_be64(di->di_mtime);
282 str->di_ctime = cpu_to_be64(di->di_ctime);
283 str->di_major = cpu_to_be32(di->di_major);
284 str->di_minor = cpu_to_be32(di->di_minor);
285
286 str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
287 str->di_goal_data = cpu_to_be64(di->di_goal_data);
288
289 str->di_flags = cpu_to_be32(di->di_flags);
290 str->di_payload_format = cpu_to_be32(di->di_payload_format);
291 str->di_height = cpu_to_be16(di->di_height);
292
293 str->di_depth = cpu_to_be16(di->di_depth);
294 str->di_entries = cpu_to_be32(di->di_entries);
295
296 str->di_eattr = cpu_to_be64(di->di_eattr);
297
298}
299
300void gfs2_dinode_print(struct gfs2_dinode *di)
301{
302 gfs2_meta_header_print(&di->di_header);
303 gfs2_inum_print(&di->di_num);
304
305 pv(di, di_mode, "0%o");
306 pv(di, di_uid, "%u");
307 pv(di, di_gid, "%u");
308 pv(di, di_nlink, "%u");
309 pv(di, di_size, "%llu");
310 pv(di, di_blocks, "%llu");
311 pv(di, di_atime, "%lld");
312 pv(di, di_mtime, "%lld");
313 pv(di, di_ctime, "%lld");
314 pv(di, di_major, "%u");
315 pv(di, di_minor, "%u");
316
317 pv(di, di_goal_meta, "%llu");
318 pv(di, di_goal_data, "%llu");
319
320 pv(di, di_flags, "0x%.8X");
321 pv(di, di_payload_format, "%u");
322 pv(di, di_height, "%u");
323
324 pv(di, di_depth, "%u");
325 pv(di, di_entries, "%u");
326
327 pv(di, di_eattr, "%llu");
328}
329
330void gfs2_dirent_print(struct gfs2_dirent *de, char *name)
331{
332 char buf[GFS2_FNAMESIZE + 1];
333
334 gfs2_inum_print(&de->de_inum);
335 pv(de, de_hash, "0x%.8X");
336 pv(de, de_rec_len, "%u");
337 pv(de, de_name_len, "%u");
338 pv(de, de_type, "%u");
339
340 memset(buf, 0, GFS2_FNAMESIZE + 1);
341 memcpy(buf, name, de->de_name_len);
342 printk(KERN_INFO " name = %s\n", buf);
343}
344
345void gfs2_leaf_in(struct gfs2_leaf *lf, char *buf)
346{
347 struct gfs2_leaf *str = (struct gfs2_leaf *)buf;
348
349 gfs2_meta_header_in(&lf->lf_header, buf);
350 lf->lf_depth = be16_to_cpu(str->lf_depth);
351 lf->lf_entries = be16_to_cpu(str->lf_entries);
352 lf->lf_dirent_format = be32_to_cpu(str->lf_dirent_format);
353 lf->lf_next = be64_to_cpu(str->lf_next);
354}
355
356void gfs2_leaf_print(struct gfs2_leaf *lf)
357{
358 gfs2_meta_header_print(&lf->lf_header);
359 pv(lf, lf_depth, "%u");
360 pv(lf, lf_entries, "%u");
361 pv(lf, lf_dirent_format, "%u");
362 pv(lf, lf_next, "%llu");
363
364 pa(lf, lf_reserved, 32);
365}
366
367void gfs2_ea_header_in(struct gfs2_ea_header *ea, char *buf)
368{
369 struct gfs2_ea_header *str = (struct gfs2_ea_header *)buf;
370
371 ea->ea_rec_len = be32_to_cpu(str->ea_rec_len);
372 ea->ea_data_len = be32_to_cpu(str->ea_data_len);
373 ea->ea_name_len = str->ea_name_len;
374 ea->ea_type = str->ea_type;
375 ea->ea_flags = str->ea_flags;
376 ea->ea_num_ptrs = str->ea_num_ptrs;
377}
378
379void gfs2_ea_header_out(struct gfs2_ea_header *ea, char *buf)
380{
381 struct gfs2_ea_header *str = (struct gfs2_ea_header *)buf;
382
383 str->ea_rec_len = cpu_to_be32(ea->ea_rec_len);
384 str->ea_data_len = cpu_to_be32(ea->ea_data_len);
385 str->ea_name_len = ea->ea_name_len;
386 str->ea_type = ea->ea_type;
387 str->ea_flags = ea->ea_flags;
388 str->ea_num_ptrs = ea->ea_num_ptrs;
389 str->__pad = 0;
390}
391
392void gfs2_ea_header_print(struct gfs2_ea_header *ea, char *name)
393{
394 char buf[GFS2_EA_MAX_NAME_LEN + 1];
395
396 pv(ea, ea_rec_len, "%u");
397 pv(ea, ea_data_len, "%u");
398 pv(ea, ea_name_len, "%u");
399 pv(ea, ea_type, "%u");
400 pv(ea, ea_flags, "%u");
401 pv(ea, ea_num_ptrs, "%u");
402
403 memset(buf, 0, GFS2_EA_MAX_NAME_LEN + 1);
404 memcpy(buf, name, ea->ea_name_len);
405 printk(KERN_INFO " name = %s\n", buf);
406}
407
408void gfs2_log_header_in(struct gfs2_log_header *lh, char *buf)
409{
410 struct gfs2_log_header *str = (struct gfs2_log_header *)buf;
411
412 gfs2_meta_header_in(&lh->lh_header, buf);
413 lh->lh_sequence = be64_to_cpu(str->lh_sequence);
414 lh->lh_flags = be32_to_cpu(str->lh_flags);
415 lh->lh_tail = be32_to_cpu(str->lh_tail);
416 lh->lh_blkno = be32_to_cpu(str->lh_blkno);
417 lh->lh_hash = be32_to_cpu(str->lh_hash);
418}
419
420void gfs2_log_header_print(struct gfs2_log_header *lh)
421{
422 gfs2_meta_header_print(&lh->lh_header);
423 pv(lh, lh_sequence, "%llu");
424 pv(lh, lh_flags, "0x%.8X");
425 pv(lh, lh_tail, "%u");
426 pv(lh, lh_blkno, "%u");
427 pv(lh, lh_hash, "0x%.8X");
428}
429
430void gfs2_log_descriptor_print(struct gfs2_log_descriptor *ld)
431{
432 gfs2_meta_header_print(&ld->ld_header);
433 pv(ld, ld_type, "%u");
434 pv(ld, ld_length, "%u");
435 pv(ld, ld_data1, "%u");
436 pv(ld, ld_data2, "%u");
437
438 pa(ld, ld_reserved, 32);
439}
440
441void gfs2_inum_range_in(struct gfs2_inum_range *ir, char *buf)
442{
443 struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf;
444
445 ir->ir_start = be64_to_cpu(str->ir_start);
446 ir->ir_length = be64_to_cpu(str->ir_length);
447}
448
449void gfs2_inum_range_out(struct gfs2_inum_range *ir, char *buf)
450{
451 struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf;
452
453 str->ir_start = cpu_to_be64(ir->ir_start);
454 str->ir_length = cpu_to_be64(ir->ir_length);
455}
456
457void gfs2_inum_range_print(struct gfs2_inum_range *ir)
458{
459 pv(ir, ir_start, "%llu");
460 pv(ir, ir_length, "%llu");
461}
462
463void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, char *buf)
464{
465 struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf;
466
467 sc->sc_total = be64_to_cpu(str->sc_total);
468 sc->sc_free = be64_to_cpu(str->sc_free);
469 sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
470}
471
472void gfs2_statfs_change_out(struct gfs2_statfs_change *sc, char *buf)
473{
474 struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf;
475
476 str->sc_total = cpu_to_be64(sc->sc_total);
477 str->sc_free = cpu_to_be64(sc->sc_free);
478 str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
479}
480
481void gfs2_statfs_change_print(struct gfs2_statfs_change *sc)
482{
483 pv(sc, sc_total, "%lld");
484 pv(sc, sc_free, "%lld");
485 pv(sc, sc_dinodes, "%lld");
486}
487
488void gfs2_unlinked_tag_in(struct gfs2_unlinked_tag *ut, char *buf)
489{
490 struct gfs2_unlinked_tag *str = (struct gfs2_unlinked_tag *)buf;
491
492 gfs2_inum_in(&ut->ut_inum, buf);
493 ut->ut_flags = be32_to_cpu(str->ut_flags);
494}
495
496void gfs2_unlinked_tag_out(struct gfs2_unlinked_tag *ut, char *buf)
497{
498 struct gfs2_unlinked_tag *str = (struct gfs2_unlinked_tag *)buf;
499
500 gfs2_inum_out(&ut->ut_inum, buf);
501 str->ut_flags = cpu_to_be32(ut->ut_flags);
502 str->__pad = 0;
503}
504
505void gfs2_unlinked_tag_print(struct gfs2_unlinked_tag *ut)
506{
507 gfs2_inum_print(&ut->ut_inum);
508 pv(ut, ut_flags, "%u");
509}
510
511void gfs2_quota_change_in(struct gfs2_quota_change *qc, char *buf)
512{
513 struct gfs2_quota_change *str = (struct gfs2_quota_change *)buf;
514
515 qc->qc_change = be64_to_cpu(str->qc_change);
516 qc->qc_flags = be32_to_cpu(str->qc_flags);
517 qc->qc_id = be32_to_cpu(str->qc_id);
518}
519
520void gfs2_quota_change_print(struct gfs2_quota_change *qc)
521{
522 pv(qc, qc_change, "%lld");
523 pv(qc, qc_flags, "0x%.8X");
524 pv(qc, qc_id, "%u");
525}
526
527
528
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
new file mode 100644
index 000000000000..89a8b8fad2e7
--- /dev/null
+++ b/fs/gfs2/ops_address.c
@@ -0,0 +1,632 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/mpage.h>
17#include <linux/fs.h>
18#include <asm/semaphore.h>
19
20#include "gfs2.h"
21#include "bmap.h"
22#include "glock.h"
23#include "inode.h"
24#include "log.h"
25#include "meta_io.h"
26#include "ops_address.h"
27#include "page.h"
28#include "quota.h"
29#include "trans.h"
30#include "rgrp.h"
31#include "ops_file.h"
32
33/**
34 * gfs2_get_block - Fills in a buffer head with details about a block
35 * @inode: The inode
36 * @lblock: The block number to look up
37 * @bh_result: The buffer head to return the result in
38 * @create: Non-zero if we may add block to the file
39 *
40 * Returns: errno
41 */
42
43int gfs2_get_block(struct inode *inode, sector_t lblock,
44 struct buffer_head *bh_result, int create)
45{
46 struct gfs2_inode *ip = get_v2ip(inode);
47 int new = create;
48 uint64_t dblock;
49 int error;
50
51 error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
52 if (error)
53 return error;
54
55 if (!dblock)
56 return 0;
57
58 map_bh(bh_result, inode->i_sb, dblock);
59 if (new)
60 set_buffer_new(bh_result);
61
62 return 0;
63}
64
65/**
66 * get_block_noalloc - Fills in a buffer head with details about a block
67 * @inode: The inode
68 * @lblock: The block number to look up
69 * @bh_result: The buffer head to return the result in
70 * @create: Non-zero if we may add block to the file
71 *
72 * Returns: errno
73 */
74
75static int get_block_noalloc(struct inode *inode, sector_t lblock,
76 struct buffer_head *bh_result, int create)
77{
78 struct gfs2_inode *ip = get_v2ip(inode);
79 int new = 0;
80 uint64_t dblock;
81 int error;
82
83 error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
84 if (error)
85 return error;
86
87 if (dblock)
88 map_bh(bh_result, inode->i_sb, dblock);
89 else if (gfs2_assert_withdraw(ip->i_sbd, !create))
90 error = -EIO;
91
92 return error;
93}
94
95static int get_blocks(struct inode *inode, sector_t lblock,
96 unsigned long max_blocks, struct buffer_head *bh_result,
97 int create)
98{
99 struct gfs2_inode *ip = get_v2ip(inode);
100 int new = create;
101 uint64_t dblock;
102 uint32_t extlen;
103 int error;
104
105 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
106 if (error)
107 return error;
108
109 if (!dblock)
110 return 0;
111
112 map_bh(bh_result, inode->i_sb, dblock);
113 if (new)
114 set_buffer_new(bh_result);
115
116 if (extlen > max_blocks)
117 extlen = max_blocks;
118 bh_result->b_size = extlen << inode->i_blkbits;
119
120 return 0;
121}
122
123static int get_blocks_noalloc(struct inode *inode, sector_t lblock,
124 unsigned long max_blocks,
125 struct buffer_head *bh_result, int create)
126{
127 struct gfs2_inode *ip = get_v2ip(inode);
128 int new = 0;
129 uint64_t dblock;
130 uint32_t extlen;
131 int error;
132
133 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
134 if (error)
135 return error;
136
137 if (dblock) {
138 map_bh(bh_result, inode->i_sb, dblock);
139 if (extlen > max_blocks)
140 extlen = max_blocks;
141 bh_result->b_size = extlen << inode->i_blkbits;
142 } else if (gfs2_assert_withdraw(ip->i_sbd, !create))
143 error = -EIO;
144
145 return error;
146}
147
148/**
149 * gfs2_writepage - Write complete page
150 * @page: Page to write
151 *
152 * Returns: errno
153 *
154 * Some of this is copied from block_write_full_page() although we still
155 * call it to do most of the work.
156 */
157
158static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
159{
160 struct inode *inode = page->mapping->host;
161 struct gfs2_inode *ip = get_v2ip(page->mapping->host);
162 struct gfs2_sbd *sdp = ip->i_sbd;
163 loff_t i_size = i_size_read(inode);
164 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
165 unsigned offset;
166 int error;
167 int done_trans = 0;
168
169 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
170 unlock_page(page);
171 return -EIO;
172 }
173 if (get_transaction)
174 goto out_ignore;
175
176 /* Is the page fully outside i_size? (truncate in progress) */
177 offset = i_size & (PAGE_CACHE_SIZE-1);
178 if (page->index >= end_index+1 || !offset) {
179 page->mapping->a_ops->invalidatepage(page, 0);
180 unlock_page(page);
181 return 0; /* don't care */
182 }
183
184 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
185 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
186 if (error)
187 goto out_ignore;
188 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
189 done_trans = 1;
190 }
191 error = block_write_full_page(page, get_block_noalloc, wbc);
192 if (done_trans)
193 gfs2_trans_end(sdp);
194 gfs2_meta_cache_flush(ip);
195 return error;
196
197out_ignore:
198 redirty_page_for_writepage(wbc, page);
199 unlock_page(page);
200 return 0;
201}
202
203/**
204 * stuffed_readpage - Fill in a Linux page with stuffed file data
205 * @ip: the inode
206 * @page: the page
207 *
208 * Returns: errno
209 */
210
211static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
212{
213 struct buffer_head *dibh;
214 void *kaddr;
215 int error;
216
217 error = gfs2_meta_inode_buffer(ip, &dibh);
218 if (error)
219 return error;
220
221 kaddr = kmap_atomic(page, KM_USER0);
222 memcpy((char *)kaddr,
223 dibh->b_data + sizeof(struct gfs2_dinode),
224 ip->i_di.di_size);
225 memset((char *)kaddr + ip->i_di.di_size,
226 0,
227 PAGE_CACHE_SIZE - ip->i_di.di_size);
228 kunmap_atomic(page, KM_USER0);
229
230 brelse(dibh);
231
232 SetPageUptodate(page);
233
234 return 0;
235}
236
237static int zero_readpage(struct page *page)
238{
239 void *kaddr;
240
241 kaddr = kmap_atomic(page, KM_USER0);
242 memset(kaddr, 0, PAGE_CACHE_SIZE);
243 kunmap_atomic(page, KM_USER0);
244
245 SetPageUptodate(page);
246 unlock_page(page);
247
248 return 0;
249}
250
251/**
252 * gfs2_readpage - readpage with locking
253 * @file: The file to read a page for. N.B. This may be NULL if we are
254 * reading an internal file.
255 * @page: The page to read
256 *
257 * Returns: errno
258 */
259
260static int gfs2_readpage(struct file *file, struct page *page)
261{
262 struct gfs2_inode *ip = get_v2ip(page->mapping->host);
263 struct gfs2_sbd *sdp = ip->i_sbd;
264 struct gfs2_holder gh;
265 int error;
266
267 if (file != &gfs2_internal_file_sentinal) {
268 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
269 error = gfs2_glock_nq_m_atime(1, &gh);
270 if (error)
271 goto out_unlock;
272 }
273
274 if (gfs2_is_stuffed(ip)) {
275 if (!page->index) {
276 error = stuffed_readpage(ip, page);
277 unlock_page(page);
278 } else
279 error = zero_readpage(page);
280 } else
281 error = mpage_readpage(page, gfs2_get_block);
282
283 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
284 error = -EIO;
285
286 if (file != &gfs2_internal_file_sentinal) {
287 gfs2_glock_dq_m(1, &gh);
288 gfs2_holder_uninit(&gh);
289 }
290out:
291 return error;
292out_unlock:
293 unlock_page(page);
294 goto out;
295}
296
297/**
298 * gfs2_prepare_write - Prepare to write a page to a file
299 * @file: The file to write to
300 * @page: The page which is to be prepared for writing
301 * @from: From (byte range within page)
302 * @to: To (byte range within page)
303 *
304 * Returns: errno
305 */
306
307static int gfs2_prepare_write(struct file *file, struct page *page,
308 unsigned from, unsigned to)
309{
310 struct gfs2_inode *ip = get_v2ip(page->mapping->host);
311 struct gfs2_sbd *sdp = ip->i_sbd;
312 unsigned int data_blocks, ind_blocks, rblocks;
313 int alloc_required;
314 int error = 0;
315 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
316 loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
317 struct gfs2_alloc *al;
318
319 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh);
320 error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
321 if (error)
322 goto out_uninit;
323
324 gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks);
325
326 error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required);
327 if (error)
328 goto out_unlock;
329
330
331 if (alloc_required) {
332 al = gfs2_alloc_get(ip);
333
334 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
335 if (error)
336 goto out_alloc_put;
337
338 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
339 if (error)
340 goto out_qunlock;
341
342 al->al_requested = data_blocks + ind_blocks;
343 error = gfs2_inplace_reserve(ip);
344 if (error)
345 goto out_qunlock;
346 }
347
348 rblocks = RES_DINODE + ind_blocks;
349 if (gfs2_is_jdata(ip))
350 rblocks += data_blocks ? data_blocks : 1;
351 if (ind_blocks || data_blocks)
352 rblocks += RES_STATFS + RES_QUOTA;
353
354 error = gfs2_trans_begin(sdp, rblocks, 0);
355 if (error)
356 goto out;
357
358 if (gfs2_is_stuffed(ip)) {
359 if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
360 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
361 page);
362 if (error == 0)
363 goto prepare_write;
364 } else if (!PageUptodate(page))
365 error = stuffed_readpage(ip, page);
366 goto out;
367 }
368
369prepare_write:
370 error = block_prepare_write(page, from, to, gfs2_get_block);
371
372out:
373 if (error) {
374 gfs2_trans_end(sdp);
375 if (alloc_required) {
376 gfs2_inplace_release(ip);
377out_qunlock:
378 gfs2_quota_unlock(ip);
379out_alloc_put:
380 gfs2_alloc_put(ip);
381 }
382out_unlock:
383 gfs2_glock_dq_m(1, &ip->i_gh);
384out_uninit:
385 gfs2_holder_uninit(&ip->i_gh);
386 }
387
388 return error;
389}
390
391/**
392 * gfs2_commit_write - Commit write to a file
393 * @file: The file to write to
394 * @page: The page containing the data
395 * @from: From (byte range within page)
396 * @to: To (byte range within page)
397 *
398 * Returns: errno
399 */
400
401static int gfs2_commit_write(struct file *file, struct page *page,
402 unsigned from, unsigned to)
403{
404 struct inode *inode = page->mapping->host;
405 struct gfs2_inode *ip = get_v2ip(inode);
406 struct gfs2_sbd *sdp = ip->i_sbd;
407 int error = -EOPNOTSUPP;
408 struct buffer_head *dibh;
409 struct gfs2_alloc *al = &ip->i_alloc;;
410
411 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
412 goto fail_nounlock;
413
414 error = gfs2_meta_inode_buffer(ip, &dibh);
415 if (error)
416 goto fail_endtrans;
417
418 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
419
420 if (gfs2_is_stuffed(ip)) {
421 uint64_t file_size;
422 void *kaddr;
423
424 file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
425
426 kaddr = kmap_atomic(page, KM_USER0);
427 memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
428 (char *)kaddr + from, to - from);
429 kunmap_atomic(page, KM_USER0);
430
431 SetPageUptodate(page);
432
433 if (inode->i_size < file_size)
434 i_size_write(inode, file_size);
435 } else {
436 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
437 gfs2_is_jdata(ip))
438 gfs2_page_add_databufs(ip, page, from, to);
439 error = generic_commit_write(file, page, from, to);
440 if (error)
441 goto fail;
442 }
443
444 if (ip->i_di.di_size < inode->i_size)
445 ip->i_di.di_size = inode->i_size;
446
447 gfs2_dinode_out(&ip->i_di, dibh->b_data);
448 brelse(dibh);
449 gfs2_trans_end(sdp);
450 if (al->al_requested) {
451 gfs2_inplace_release(ip);
452 gfs2_quota_unlock(ip);
453 gfs2_alloc_put(ip);
454 }
455 gfs2_glock_dq_m(1, &ip->i_gh);
456 gfs2_holder_uninit(&ip->i_gh);
457 return 0;
458
459fail:
460 brelse(dibh);
461fail_endtrans:
462 gfs2_trans_end(sdp);
463 if (al->al_requested) {
464 gfs2_inplace_release(ip);
465 gfs2_quota_unlock(ip);
466 gfs2_alloc_put(ip);
467 }
468 gfs2_glock_dq_m(1, &ip->i_gh);
469 gfs2_holder_uninit(&ip->i_gh);
470fail_nounlock:
471 ClearPageUptodate(page);
472 return error;
473}
474
475/**
476 * gfs2_bmap - Block map function
477 * @mapping: Address space info
478 * @lblock: The block to map
479 *
480 * Returns: The disk address for the block or 0 on hole or error
481 */
482
483static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
484{
485 struct gfs2_inode *ip = get_v2ip(mapping->host);
486 struct gfs2_holder i_gh;
487 sector_t dblock = 0;
488 int error;
489
490 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
491 if (error)
492 return 0;
493
494 if (!gfs2_is_stuffed(ip))
495 dblock = generic_block_bmap(mapping, lblock, gfs2_get_block);
496
497 gfs2_glock_dq_uninit(&i_gh);
498
499 return dblock;
500}
501
502static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
503{
504 struct gfs2_bufdata *bd;
505
506 gfs2_log_lock(sdp);
507 bd = get_v2bd(bh);
508 if (bd) {
509 bd->bd_bh = NULL;
510 set_v2bd(bh, NULL);
511 gfs2_log_unlock(sdp);
512 brelse(bh);
513 } else
514 gfs2_log_unlock(sdp);
515
516 lock_buffer(bh);
517 clear_buffer_dirty(bh);
518 bh->b_bdev = NULL;
519 clear_buffer_mapped(bh);
520 clear_buffer_req(bh);
521 clear_buffer_new(bh);
522 clear_buffer_delay(bh);
523 unlock_buffer(bh);
524}
525
526static int gfs2_invalidatepage(struct page *page, unsigned long offset)
527{
528 struct gfs2_sbd *sdp = get_v2sdp(page->mapping->host->i_sb);
529 struct buffer_head *head, *bh, *next;
530 unsigned int curr_off = 0;
531 int ret = 1;
532
533 BUG_ON(!PageLocked(page));
534 if (!page_has_buffers(page))
535 return 1;
536
537 bh = head = page_buffers(page);
538 do {
539 unsigned int next_off = curr_off + bh->b_size;
540 next = bh->b_this_page;
541
542 if (offset <= curr_off)
543 discard_buffer(sdp, bh);
544
545 curr_off = next_off;
546 bh = next;
547 } while (bh != head);
548
549 if (!offset)
550 ret = try_to_release_page(page, 0);
551
552 return ret;
553}
554
555static ssize_t gfs2_direct_IO_write(struct kiocb *iocb, const struct iovec *iov,
556 loff_t offset, unsigned long nr_segs)
557{
558 struct file *file = iocb->ki_filp;
559 struct inode *inode = file->f_mapping->host;
560 struct gfs2_inode *ip = get_v2ip(inode);
561 struct gfs2_holder gh;
562 int rv;
563
564 /*
565 * Shared lock, even though its write, since we do no allocation
566 * on this path. All we need change is atime.
567 */
568 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
569 rv = gfs2_glock_nq_m_atime(1, &gh);
570 if (rv)
571 goto out;
572
573 /*
574 * Should we return an error here? I can't see that O_DIRECT for
575 * a journaled file makes any sense. For now we'll silently fall
576 * back to buffered I/O, likewise we do the same for stuffed
577 * files since they are (a) small and (b) unaligned.
578 */
579 if (gfs2_is_jdata(ip))
580 goto out;
581
582 if (gfs2_is_stuffed(ip))
583 goto out;
584
585 rv = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
586 iov, offset, nr_segs, get_blocks_noalloc,
587 NULL, DIO_OWN_LOCKING);
588out:
589 gfs2_glock_dq_m(1, &gh);
590 gfs2_holder_uninit(&gh);
591
592 return rv;
593}
594
595/**
596 * gfs2_direct_IO
597 *
598 * This is called with a shared lock already held for the read path.
599 * Currently, no locks are held when the write path is called.
600 */
601static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
602 const struct iovec *iov, loff_t offset,
603 unsigned long nr_segs)
604{
605 struct file *file = iocb->ki_filp;
606 struct inode *inode = file->f_mapping->host;
607 struct gfs2_inode *ip = get_v2ip(inode);
608 struct gfs2_sbd *sdp = ip->i_sbd;
609
610 if (rw == WRITE)
611 return gfs2_direct_IO_write(iocb, iov, offset, nr_segs);
612
613 if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
614 gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
615 return -EINVAL;
616
617 return __blockdev_direct_IO(READ, iocb, inode, inode->i_sb->s_bdev, iov,
618 offset, nr_segs, get_blocks, NULL,
619 DIO_OWN_LOCKING);
620}
621
622struct address_space_operations gfs2_file_aops = {
623 .writepage = gfs2_writepage,
624 .readpage = gfs2_readpage,
625 .sync_page = block_sync_page,
626 .prepare_write = gfs2_prepare_write,
627 .commit_write = gfs2_commit_write,
628 .bmap = gfs2_bmap,
629 .invalidatepage = gfs2_invalidatepage,
630 .direct_IO = gfs2_direct_IO,
631};
632
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
new file mode 100644
index 000000000000..f201a059fd91
--- /dev/null
+++ b/fs/gfs2/ops_address.h
@@ -0,0 +1,17 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_ADDRESS_DOT_H__
11#define __OPS_ADDRESS_DOT_H__
12
13extern struct address_space_operations gfs2_file_aops;
14extern int gfs2_get_block(struct inode *inode, sector_t lblock,
15 struct buffer_head *bh_result, int create);
16
17#endif /* __OPS_ADDRESS_DOT_H__ */
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
new file mode 100644
index 000000000000..b020ad8f180b
--- /dev/null
+++ b/fs/gfs2/ops_dentry.c
@@ -0,0 +1,114 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/smp_lock.h>
16#include <asm/semaphore.h>
17
18#include "gfs2.h"
19#include "dir.h"
20#include "glock.h"
21#include "ops_dentry.h"
22
23/**
24 * gfs2_drevalidate - Check directory lookup consistency
25 * @dentry: the mapping to check
26 * @nd:
27 *
28 * Check to make sure the lookup necessary to arrive at this inode from its
29 * parent is still good.
30 *
31 * Returns: 1 if the dentry is ok, 0 if it isn't
32 */
33
34static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
35{
36 struct dentry *parent = dget_parent(dentry);
37 struct gfs2_inode *dip = get_v2ip(parent->d_inode);
38 struct inode *inode;
39 struct gfs2_holder d_gh;
40 struct gfs2_inode *ip;
41 struct gfs2_inum inum;
42 unsigned int type;
43 int error;
44
45 lock_kernel();
46
47 inode = dentry->d_inode;
48 if (inode && is_bad_inode(inode))
49 goto invalid;
50
51 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
52 if (error)
53 goto fail;
54
55 error = gfs2_dir_search(dip, &dentry->d_name, &inum, &type);
56 switch (error) {
57 case 0:
58 if (!inode)
59 goto invalid_gunlock;
60 break;
61 case -ENOENT:
62 if (!inode)
63 goto valid_gunlock;
64 goto invalid_gunlock;
65 default:
66 goto fail_gunlock;
67 }
68
69 ip = get_v2ip(inode);
70
71 if (!gfs2_inum_equal(&ip->i_num, &inum))
72 goto invalid_gunlock;
73
74 if (IF2DT(ip->i_di.di_mode) != type) {
75 gfs2_consist_inode(dip);
76 goto fail_gunlock;
77 }
78
79 valid_gunlock:
80 gfs2_glock_dq_uninit(&d_gh);
81
82 valid:
83 unlock_kernel();
84 dput(parent);
85 return 1;
86
87 invalid_gunlock:
88 gfs2_glock_dq_uninit(&d_gh);
89
90 invalid:
91 if (inode && S_ISDIR(inode->i_mode)) {
92 if (have_submounts(dentry))
93 goto valid;
94 shrink_dcache_parent(dentry);
95 }
96 d_drop(dentry);
97
98 unlock_kernel();
99 dput(parent);
100 return 0;
101
102 fail_gunlock:
103 gfs2_glock_dq_uninit(&d_gh);
104
105 fail:
106 unlock_kernel();
107 dput(parent);
108 return 0;
109}
110
111struct dentry_operations gfs2_dops = {
112 .d_revalidate = gfs2_drevalidate,
113};
114
diff --git a/fs/gfs2/ops_dentry.h b/fs/gfs2/ops_dentry.h
new file mode 100644
index 000000000000..94e3ee170165
--- /dev/null
+++ b/fs/gfs2/ops_dentry.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_DENTRY_DOT_H__
11#define __OPS_DENTRY_DOT_H__
12
13extern struct dentry_operations gfs2_dops;
14
15#endif /* __OPS_DENTRY_DOT_H__ */
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
new file mode 100644
index 000000000000..60d006402553
--- /dev/null
+++ b/fs/gfs2/ops_export.c
@@ -0,0 +1,290 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "dir.h"
19#include "glock.h"
20#include "glops.h"
21#include "inode.h"
22#include "ops_export.h"
23#include "rgrp.h"
24
25static struct dentry *gfs2_decode_fh(struct super_block *sb,
26 __u32 *fh,
27 int fh_len,
28 int fh_type,
29 int (*acceptable)(void *context,
30 struct dentry *dentry),
31 void *context)
32{
33 struct gfs2_inum this, parent;
34
35 if (fh_type != fh_len)
36 return NULL;
37
38 memset(&parent, 0, sizeof(struct gfs2_inum));
39
40 switch (fh_type) {
41 case 8:
42 parent.no_formal_ino = ((uint64_t)be32_to_cpu(fh[4])) << 32;
43 parent.no_formal_ino |= be32_to_cpu(fh[5]);
44 parent.no_addr = ((uint64_t)be32_to_cpu(fh[6])) << 32;
45 parent.no_addr |= be32_to_cpu(fh[7]);
46 case 4:
47 this.no_formal_ino = ((uint64_t)be32_to_cpu(fh[0])) << 32;
48 this.no_formal_ino |= be32_to_cpu(fh[1]);
49 this.no_addr = ((uint64_t)be32_to_cpu(fh[2])) << 32;
50 this.no_addr |= be32_to_cpu(fh[3]);
51 break;
52 default:
53 return NULL;
54 }
55
56 return gfs2_export_ops.find_exported_dentry(sb, &this, &parent,
57 acceptable, context);
58}
59
60static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
61 int connectable)
62{
63 struct inode *inode = dentry->d_inode;
64 struct gfs2_inode *ip = get_v2ip(inode);
65 struct gfs2_sbd *sdp = ip->i_sbd;
66
67 if (*len < 4 || (connectable && *len < 8))
68 return 255;
69
70 fh[0] = ip->i_num.no_formal_ino >> 32;
71 fh[0] = cpu_to_be32(fh[0]);
72 fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
73 fh[1] = cpu_to_be32(fh[1]);
74 fh[2] = ip->i_num.no_addr >> 32;
75 fh[2] = cpu_to_be32(fh[2]);
76 fh[3] = ip->i_num.no_addr & 0xFFFFFFFF;
77 fh[3] = cpu_to_be32(fh[3]);
78 *len = 4;
79
80 if (!connectable || ip == get_v2ip(sdp->sd_root_dir))
81 return *len;
82
83 spin_lock(&dentry->d_lock);
84 inode = dentry->d_parent->d_inode;
85 ip = get_v2ip(inode);
86 gfs2_inode_hold(ip);
87 spin_unlock(&dentry->d_lock);
88
89 fh[4] = ip->i_num.no_formal_ino >> 32;
90 fh[4] = cpu_to_be32(fh[4]);
91 fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
92 fh[5] = cpu_to_be32(fh[5]);
93 fh[6] = ip->i_num.no_addr >> 32;
94 fh[6] = cpu_to_be32(fh[6]);
95 fh[7] = ip->i_num.no_addr & 0xFFFFFFFF;
96 fh[7] = cpu_to_be32(fh[7]);
97 *len = 8;
98
99 gfs2_inode_put(ip);
100
101 return *len;
102}
103
104struct get_name_filldir {
105 struct gfs2_inum inum;
106 char *name;
107};
108
109static int get_name_filldir(void *opaque, const char *name, unsigned int length,
110 uint64_t offset, struct gfs2_inum *inum,
111 unsigned int type)
112{
113 struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
114
115 if (!gfs2_inum_equal(inum, &gnfd->inum))
116 return 0;
117
118 memcpy(gnfd->name, name, length);
119 gnfd->name[length] = 0;
120
121 return 1;
122}
123
124static int gfs2_get_name(struct dentry *parent, char *name,
125 struct dentry *child)
126{
127 struct inode *dir = parent->d_inode;
128 struct inode *inode = child->d_inode;
129 struct gfs2_inode *dip, *ip;
130 struct get_name_filldir gnfd;
131 struct gfs2_holder gh;
132 uint64_t offset = 0;
133 int error;
134
135 if (!dir)
136 return -EINVAL;
137
138 if (!S_ISDIR(dir->i_mode) || !inode)
139 return -EINVAL;
140
141 dip = get_v2ip(dir);
142 ip = get_v2ip(inode);
143
144 *name = 0;
145 gnfd.inum = ip->i_num;
146 gnfd.name = name;
147
148 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
149 if (error)
150 return error;
151
152 error = gfs2_dir_read(dip, &offset, &gnfd, get_name_filldir);
153
154 gfs2_glock_dq_uninit(&gh);
155
156 if (!error && !*name)
157 error = -ENOENT;
158
159 return error;
160}
161
162static struct dentry *gfs2_get_parent(struct dentry *child)
163{
164 struct qstr dotdot = { .name = "..", .len = 2 };
165 struct inode *inode;
166 struct dentry *dentry;
167 int error;
168
169 error = gfs2_lookupi(child->d_inode, &dotdot, 1, &inode);
170 if (error)
171 return ERR_PTR(error);
172
173 dentry = d_alloc_anon(inode);
174 if (!dentry) {
175 iput(inode);
176 return ERR_PTR(-ENOMEM);
177 }
178
179 return dentry;
180}
181
182static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_p)
183{
184 struct gfs2_sbd *sdp = get_v2sdp(sb);
185 struct gfs2_inum *inum = (struct gfs2_inum *)inum_p;
186 struct gfs2_holder i_gh, ri_gh, rgd_gh;
187 struct gfs2_rgrpd *rgd;
188 struct gfs2_inode *ip;
189 struct inode *inode;
190 struct dentry *dentry;
191 int error;
192
193 /* System files? */
194
195 inode = gfs2_iget(sb, inum);
196 if (inode) {
197 ip = get_v2ip(inode);
198 if (ip->i_num.no_formal_ino != inum->no_formal_ino) {
199 iput(inode);
200 return ERR_PTR(-ESTALE);
201 }
202 goto out_inode;
203 }
204
205 error = gfs2_glock_nq_num(sdp,
206 inum->no_addr, &gfs2_inode_glops,
207 LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL,
208 &i_gh);
209 if (error)
210 return ERR_PTR(error);
211
212 error = gfs2_inode_get(i_gh.gh_gl, inum, NO_CREATE, &ip);
213 if (error)
214 goto fail;
215 if (ip)
216 goto out_ip;
217
218 error = gfs2_rindex_hold(sdp, &ri_gh);
219 if (error)
220 goto fail;
221
222 error = -EINVAL;
223 rgd = gfs2_blk2rgrpd(sdp, inum->no_addr);
224 if (!rgd)
225 goto fail_rindex;
226
227 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
228 if (error)
229 goto fail_rindex;
230
231 error = -ESTALE;
232 if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE)
233 goto fail_rgd;
234
235 gfs2_glock_dq_uninit(&rgd_gh);
236 gfs2_glock_dq_uninit(&ri_gh);
237
238 error = gfs2_inode_get(i_gh.gh_gl, inum, CREATE, &ip);
239 if (error)
240 goto fail;
241
242 error = gfs2_inode_refresh(ip);
243 if (error) {
244 gfs2_inode_put(ip);
245 goto fail;
246 }
247
248 out_ip:
249 error = -EIO;
250 if (ip->i_di.di_flags & GFS2_DIF_SYSTEM) {
251 gfs2_inode_put(ip);
252 goto fail;
253 }
254
255 gfs2_glock_dq_uninit(&i_gh);
256
257 inode = gfs2_ip2v(ip);
258 gfs2_inode_put(ip);
259
260 if (!inode)
261 return ERR_PTR(-ENOMEM);
262
263 out_inode:
264 dentry = d_alloc_anon(inode);
265 if (!dentry) {
266 iput(inode);
267 return ERR_PTR(-ENOMEM);
268 }
269
270 return dentry;
271
272 fail_rgd:
273 gfs2_glock_dq_uninit(&rgd_gh);
274
275 fail_rindex:
276 gfs2_glock_dq_uninit(&ri_gh);
277
278 fail:
279 gfs2_glock_dq_uninit(&i_gh);
280 return ERR_PTR(error);
281}
282
283struct export_operations gfs2_export_ops = {
284 .decode_fh = gfs2_decode_fh,
285 .encode_fh = gfs2_encode_fh,
286 .get_name = gfs2_get_name,
287 .get_parent = gfs2_get_parent,
288 .get_dentry = gfs2_get_dentry,
289};
290
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
new file mode 100644
index 000000000000..2f342f3d8755
--- /dev/null
+++ b/fs/gfs2/ops_export.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_EXPORT_DOT_H__
11#define __OPS_EXPORT_DOT_H__
12
13extern struct export_operations gfs2_export_ops;
14
15#endif /* __OPS_EXPORT_DOT_H__ */
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
new file mode 100644
index 000000000000..e6ae2551b0cb
--- /dev/null
+++ b/fs/gfs2/ops_file.c
@@ -0,0 +1,943 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/uio.h>
17#include <linux/blkdev.h>
18#include <linux/mm.h>
19#include <linux/smp_lock.h>
20#include <linux/gfs2_ioctl.h>
21#include <linux/fs.h>
22#include <asm/semaphore.h>
23#include <asm/uaccess.h>
24
25#include "gfs2.h"
26#include "bmap.h"
27#include "dir.h"
28#include "glock.h"
29#include "glops.h"
30#include "inode.h"
31#include "lm.h"
32#include "log.h"
33#include "meta_io.h"
34#include "ops_file.h"
35#include "ops_vm.h"
36#include "quota.h"
37#include "rgrp.h"
38#include "trans.h"
39
40/* "bad" is for NFS support */
41struct filldir_bad_entry {
42 char *fbe_name;
43 unsigned int fbe_length;
44 uint64_t fbe_offset;
45 struct gfs2_inum fbe_inum;
46 unsigned int fbe_type;
47};
48
49struct filldir_bad {
50 struct gfs2_sbd *fdb_sbd;
51
52 struct filldir_bad_entry *fdb_entry;
53 unsigned int fdb_entry_num;
54 unsigned int fdb_entry_off;
55
56 char *fdb_name;
57 unsigned int fdb_name_size;
58 unsigned int fdb_name_off;
59};
60
61/* For regular, non-NFS */
62struct filldir_reg {
63 struct gfs2_sbd *fdr_sbd;
64 int fdr_prefetch;
65
66 filldir_t fdr_filldir;
67 void *fdr_opaque;
68};
69
70/*
71 * Most fields left uninitialised to catch anybody who tries to
72 * use them. f_flags set to prevent file_accessed() from touching
73 * any other part of this. Its use is purely as a flag so that we
74 * know (in readpage()) whether or not do to locking.
75 */
76struct file gfs2_internal_file_sentinal = {
77 .f_flags = O_NOATIME|O_RDONLY,
78};
79
80static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
81 unsigned long offset, unsigned long size)
82{
83 char *kaddr;
84 unsigned long count = desc->count;
85
86 if (size > count)
87 size = count;
88
89 kaddr = kmap(page);
90 memcpy(desc->arg.buf, kaddr + offset, size);
91 kunmap(page);
92
93 desc->count = count - size;
94 desc->written += size;
95 desc->arg.buf += size;
96 return size;
97}
98
99int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
100 char *buf, loff_t *pos, unsigned size)
101{
102 struct inode *inode = ip->i_vnode;
103 read_descriptor_t desc;
104 desc.written = 0;
105 desc.arg.buf = buf;
106 desc.count = size;
107 desc.error = 0;
108 do_generic_mapping_read(inode->i_mapping, ra_state,
109 &gfs2_internal_file_sentinal, pos, &desc,
110 gfs2_read_actor);
111 return desc.written ? desc.written : desc.error;
112}
113
114/**
115 * gfs2_llseek - seek to a location in a file
116 * @file: the file
117 * @offset: the offset
118 * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
119 *
120 * SEEK_END requires the glock for the file because it references the
121 * file's size.
122 *
123 * Returns: The new offset, or errno
124 */
125
126static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
127{
128 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
129 struct gfs2_holder i_gh;
130 loff_t error;
131
132 if (origin == 2) {
133 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
134 &i_gh);
135 if (!error) {
136 error = remote_llseek(file, offset, origin);
137 gfs2_glock_dq_uninit(&i_gh);
138 }
139 } else
140 error = remote_llseek(file, offset, origin);
141
142 return error;
143}
144
145
146static ssize_t gfs2_direct_IO_read(struct kiocb *iocb, const struct iovec *iov,
147 loff_t offset, unsigned long nr_segs)
148{
149 struct file *file = iocb->ki_filp;
150 struct address_space *mapping = file->f_mapping;
151 ssize_t retval;
152
153 retval = filemap_write_and_wait(mapping);
154 if (retval == 0) {
155 retval = mapping->a_ops->direct_IO(READ, iocb, iov, offset,
156 nr_segs);
157 }
158 return retval;
159}
160
161/**
162 * __gfs2_file_aio_read - The main GFS2 read function
163 *
164 * N.B. This is almost, but not quite the same as __generic_file_aio_read()
165 * the important subtle different being that inode->i_size isn't valid
166 * unless we are holding a lock, and we do this _only_ on the O_DIRECT
167 * path since otherwise locking is done entirely at the page cache
168 * layer.
169 */
170static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
171 const struct iovec *iov,
172 unsigned long nr_segs, loff_t *ppos)
173{
174 struct file *filp = iocb->ki_filp;
175 struct gfs2_inode *ip = get_v2ip(filp->f_mapping->host);
176 struct gfs2_holder gh;
177 ssize_t retval;
178 unsigned long seg;
179 size_t count;
180
181 count = 0;
182 for (seg = 0; seg < nr_segs; seg++) {
183 const struct iovec *iv = &iov[seg];
184
185 /*
186 * If any segment has a negative length, or the cumulative
187 * length ever wraps negative then return -EINVAL.
188 */
189 count += iv->iov_len;
190 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
191 return -EINVAL;
192 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
193 continue;
194 if (seg == 0)
195 return -EFAULT;
196 nr_segs = seg;
197 count -= iv->iov_len; /* This segment is no good */
198 break;
199 }
200
201 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
202 if (filp->f_flags & O_DIRECT) {
203 loff_t pos = *ppos, size;
204 struct address_space *mapping;
205 struct inode *inode;
206
207 mapping = filp->f_mapping;
208 inode = mapping->host;
209 retval = 0;
210 if (!count)
211 goto out; /* skip atime */
212
213 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
214 retval = gfs2_glock_nq_m_atime(1, &gh);
215 if (retval)
216 goto out;
217 if (gfs2_is_stuffed(ip)) {
218 gfs2_glock_dq_m(1, &gh);
219 gfs2_holder_uninit(&gh);
220 goto fallback_to_normal;
221 }
222 size = i_size_read(inode);
223 if (pos < size) {
224 retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
225 if (retval > 0 && !is_sync_kiocb(iocb))
226 retval = -EIOCBQUEUED;
227 if (retval > 0)
228 *ppos = pos + retval;
229 }
230 file_accessed(filp);
231 gfs2_glock_dq_m(1, &gh);
232 gfs2_holder_uninit(&gh);
233 goto out;
234 }
235
236fallback_to_normal:
237 retval = 0;
238 if (count) {
239 for (seg = 0; seg < nr_segs; seg++) {
240 read_descriptor_t desc;
241
242 desc.written = 0;
243 desc.arg.buf = iov[seg].iov_base;
244 desc.count = iov[seg].iov_len;
245 if (desc.count == 0)
246 continue;
247 desc.error = 0;
248 do_generic_file_read(filp,ppos,&desc,file_read_actor);
249 retval += desc.written;
250 if (desc.error) {
251 retval = retval ?: desc.error;
252 break;
253 }
254 }
255 }
256out:
257 return retval;
258}
259
260/**
261 * gfs2_read - Read bytes from a file
262 * @file: The file to read from
263 * @buf: The buffer to copy into
264 * @size: The amount of data requested
265 * @offset: The current file offset
266 *
267 * Outputs: Offset - updated according to number of bytes read
268 *
269 * Returns: The number of bytes read, errno on failure
270 */
271
272static ssize_t gfs2_read(struct file *filp, char __user *buf, size_t size,
273 loff_t *offset)
274{
275 struct iovec local_iov = { .iov_base = buf, .iov_len = size };
276 struct kiocb kiocb;
277 ssize_t ret;
278
279 init_sync_kiocb(&kiocb, filp);
280 ret = __gfs2_file_aio_read(&kiocb, &local_iov, 1, offset);
281 if (-EIOCBQUEUED == ret)
282 ret = wait_on_sync_kiocb(&kiocb);
283 return ret;
284}
285
286static ssize_t gfs2_file_readv(struct file *filp, const struct iovec *iov,
287 unsigned long nr_segs, loff_t *ppos)
288{
289 struct kiocb kiocb;
290 ssize_t ret;
291
292 init_sync_kiocb(&kiocb, filp);
293 ret = __gfs2_file_aio_read(&kiocb, iov, nr_segs, ppos);
294 if (-EIOCBQUEUED == ret)
295 ret = wait_on_sync_kiocb(&kiocb);
296 return ret;
297}
298
299static ssize_t gfs2_file_aio_read(struct kiocb *iocb, char __user *buf,
300 size_t count, loff_t pos)
301{
302 struct iovec local_iov = { .iov_base = buf, .iov_len = count };
303
304 BUG_ON(iocb->ki_pos != pos);
305 return __gfs2_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
306}
307
308
309/**
310 * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read()
311 * @opaque: opaque data used by the function
312 * @name: the name of the directory entry
313 * @length: the length of the name
314 * @offset: the entry's offset in the directory
315 * @inum: the inode number the entry points to
316 * @type: the type of inode the entry points to
317 *
318 * Returns: 0 on success, 1 if buffer full
319 */
320
321static int filldir_reg_func(void *opaque, const char *name, unsigned int length,
322 uint64_t offset, struct gfs2_inum *inum,
323 unsigned int type)
324{
325 struct filldir_reg *fdr = (struct filldir_reg *)opaque;
326 struct gfs2_sbd *sdp = fdr->fdr_sbd;
327 int error;
328
329 error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
330 inum->no_formal_ino, type);
331 if (error)
332 return 1;
333
334 if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
335 gfs2_glock_prefetch_num(sdp,
336 inum->no_addr, &gfs2_inode_glops,
337 LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
338 gfs2_glock_prefetch_num(sdp,
339 inum->no_addr, &gfs2_iopen_glops,
340 LM_ST_SHARED, LM_FLAG_TRY);
341 }
342
343 return 0;
344}
345
346/**
347 * readdir_reg - Read directory entries from a directory
348 * @file: The directory to read from
349 * @dirent: Buffer for dirents
350 * @filldir: Function used to do the copying
351 *
352 * Returns: errno
353 */
354
355static int readdir_reg(struct file *file, void *dirent, filldir_t filldir)
356{
357 struct gfs2_inode *dip = get_v2ip(file->f_mapping->host);
358 struct filldir_reg fdr;
359 struct gfs2_holder d_gh;
360 uint64_t offset = file->f_pos;
361 int error;
362
363 fdr.fdr_sbd = dip->i_sbd;
364 fdr.fdr_prefetch = 1;
365 fdr.fdr_filldir = filldir;
366 fdr.fdr_opaque = dirent;
367
368 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
369 error = gfs2_glock_nq_atime(&d_gh);
370 if (error) {
371 gfs2_holder_uninit(&d_gh);
372 return error;
373 }
374
375 error = gfs2_dir_read(dip, &offset, &fdr, filldir_reg_func);
376
377 gfs2_glock_dq_uninit(&d_gh);
378
379 file->f_pos = offset;
380
381 return error;
382}
383
384/**
385 * filldir_bad_func - Report a directory entry to the caller of gfs2_dir_read()
386 * @opaque: opaque data used by the function
387 * @name: the name of the directory entry
388 * @length: the length of the name
389 * @offset: the entry's offset in the directory
390 * @inum: the inode number the entry points to
391 * @type: the type of inode the entry points to
392 *
393 * For supporting NFS.
394 *
395 * Returns: 0 on success, 1 if buffer full
396 */
397
398static int filldir_bad_func(void *opaque, const char *name, unsigned int length,
399 uint64_t offset, struct gfs2_inum *inum,
400 unsigned int type)
401{
402 struct filldir_bad *fdb = (struct filldir_bad *)opaque;
403 struct gfs2_sbd *sdp = fdb->fdb_sbd;
404 struct filldir_bad_entry *fbe;
405
406 if (fdb->fdb_entry_off == fdb->fdb_entry_num ||
407 fdb->fdb_name_off + length > fdb->fdb_name_size)
408 return 1;
409
410 fbe = &fdb->fdb_entry[fdb->fdb_entry_off];
411 fbe->fbe_name = fdb->fdb_name + fdb->fdb_name_off;
412 memcpy(fbe->fbe_name, name, length);
413 fbe->fbe_length = length;
414 fbe->fbe_offset = offset;
415 fbe->fbe_inum = *inum;
416 fbe->fbe_type = type;
417
418 fdb->fdb_entry_off++;
419 fdb->fdb_name_off += length;
420
421 if (!(length == 1 && *name == '.')) {
422 gfs2_glock_prefetch_num(sdp,
423 inum->no_addr, &gfs2_inode_glops,
424 LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
425 gfs2_glock_prefetch_num(sdp,
426 inum->no_addr, &gfs2_iopen_glops,
427 LM_ST_SHARED, LM_FLAG_TRY);
428 }
429
430 return 0;
431}
432
433/**
434 * readdir_bad - Read directory entries from a directory
435 * @file: The directory to read from
436 * @dirent: Buffer for dirents
437 * @filldir: Function used to do the copying
438 *
439 * For supporting NFS.
440 *
441 * Returns: errno
442 */
443
444static int readdir_bad(struct file *file, void *dirent, filldir_t filldir)
445{
446 struct gfs2_inode *dip = get_v2ip(file->f_mapping->host);
447 struct gfs2_sbd *sdp = dip->i_sbd;
448 struct filldir_reg fdr;
449 unsigned int entries, size;
450 struct filldir_bad *fdb;
451 struct gfs2_holder d_gh;
452 uint64_t offset = file->f_pos;
453 unsigned int x;
454 struct filldir_bad_entry *fbe;
455 int error;
456
457 entries = gfs2_tune_get(sdp, gt_entries_per_readdir);
458 size = sizeof(struct filldir_bad) +
459 entries * (sizeof(struct filldir_bad_entry) + GFS2_FAST_NAME_SIZE);
460
461 fdb = kzalloc(size, GFP_KERNEL);
462 if (!fdb)
463 return -ENOMEM;
464
465 fdb->fdb_sbd = sdp;
466 fdb->fdb_entry = (struct filldir_bad_entry *)(fdb + 1);
467 fdb->fdb_entry_num = entries;
468 fdb->fdb_name = ((char *)fdb) + sizeof(struct filldir_bad) +
469 entries * sizeof(struct filldir_bad_entry);
470 fdb->fdb_name_size = entries * GFS2_FAST_NAME_SIZE;
471
472 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
473 error = gfs2_glock_nq_atime(&d_gh);
474 if (error) {
475 gfs2_holder_uninit(&d_gh);
476 goto out;
477 }
478
479 error = gfs2_dir_read(dip, &offset, fdb, filldir_bad_func);
480
481 gfs2_glock_dq_uninit(&d_gh);
482
483 fdr.fdr_sbd = sdp;
484 fdr.fdr_prefetch = 0;
485 fdr.fdr_filldir = filldir;
486 fdr.fdr_opaque = dirent;
487
488 for (x = 0; x < fdb->fdb_entry_off; x++) {
489 fbe = &fdb->fdb_entry[x];
490
491 error = filldir_reg_func(&fdr,
492 fbe->fbe_name, fbe->fbe_length,
493 fbe->fbe_offset,
494 &fbe->fbe_inum, fbe->fbe_type);
495 if (error) {
496 file->f_pos = fbe->fbe_offset;
497 error = 0;
498 goto out;
499 }
500 }
501
502 file->f_pos = offset;
503
504 out:
505 kfree(fdb);
506
507 return error;
508}
509
510/**
511 * gfs2_readdir - Read directory entries from a directory
512 * @file: The directory to read from
513 * @dirent: Buffer for dirents
514 * @filldir: Function used to do the copying
515 *
516 * Returns: errno
517 */
518
519static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
520{
521 int error;
522
523 if (strcmp(current->comm, "nfsd") != 0)
524 error = readdir_reg(file, dirent, filldir);
525 else
526 error = readdir_bad(file, dirent, filldir);
527
528 return error;
529}
530
531static int gfs2_ioctl_flags(struct gfs2_inode *ip, unsigned int cmd,
532 unsigned long arg)
533{
534 unsigned int lmode = (cmd == GFS2_IOCTL_SETFLAGS) ?
535 LM_ST_EXCLUSIVE : LM_ST_SHARED;
536 struct buffer_head *dibh;
537 struct gfs2_holder i_gh;
538 int error;
539 __u32 flags = 0, change;
540
541 if (cmd == GFS2_IOCTL_SETFLAGS) {
542 error = get_user(flags, (__u32 __user *)arg);
543 if (error)
544 return -EFAULT;
545 }
546
547 error = gfs2_glock_nq_init(ip->i_gl, lmode, 0, &i_gh);
548 if (error)
549 return error;
550
551 if (cmd == GFS2_IOCTL_SETFLAGS) {
552 change = flags ^ ip->i_di.di_flags;
553 error = -EPERM;
554 if (change & (GFS2_DIF_IMMUTABLE|GFS2_DIF_APPENDONLY)) {
555 if (!capable(CAP_LINUX_IMMUTABLE))
556 goto out;
557 }
558 error = -EINVAL;
559 if (flags & (GFS2_DIF_JDATA|GFS2_DIF_DIRECTIO)) {
560 if (!S_ISREG(ip->i_di.di_mode))
561 goto out;
562 }
563 if (flags &
564 (GFS2_DIF_INHERIT_JDATA|GFS2_DIF_INHERIT_DIRECTIO)) {
565 if (!S_ISDIR(ip->i_di.di_mode))
566 goto out;
567 }
568
569 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE, 0);
570 if (error)
571 goto out;
572
573 error = gfs2_meta_inode_buffer(ip, &dibh);
574 if (error)
575 goto out_trans_end;
576
577 ip->i_di.di_flags = flags;
578
579 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
580 gfs2_dinode_out(&ip->i_di, dibh->b_data);
581
582 brelse(dibh);
583
584out_trans_end:
585 gfs2_trans_end(ip->i_sbd);
586 } else {
587 flags = ip->i_di.di_flags;
588 }
589out:
590 gfs2_glock_dq_uninit(&i_gh);
591 if (cmd == GFS2_IOCTL_GETFLAGS) {
592 if (put_user(flags, (__u32 __user *)arg))
593 return -EFAULT;
594 }
595 return error;
596}
597
598/**
599 * gfs2_ioctl - do an ioctl on a file
600 * @inode: the inode
601 * @file: the file pointer
602 * @cmd: the ioctl command
603 * @arg: the argument
604 *
605 * Returns: errno
606 */
607
608static int gfs2_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
609 unsigned long arg)
610{
611 struct gfs2_inode *ip = get_v2ip(inode);
612
613 switch (cmd) {
614 case GFS2_IOCTL_SETFLAGS:
615 case GFS2_IOCTL_GETFLAGS:
616 return gfs2_ioctl_flags(ip, cmd, arg);
617
618 default:
619 return -ENOTTY;
620 }
621}
622
623/**
624 * gfs2_mmap -
625 * @file: The file to map
626 * @vma: The VMA which described the mapping
627 *
628 * Returns: 0 or error code
629 */
630
631static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
632{
633 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
634 struct gfs2_holder i_gh;
635 int error;
636
637 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
638 error = gfs2_glock_nq_atime(&i_gh);
639 if (error) {
640 gfs2_holder_uninit(&i_gh);
641 return error;
642 }
643
644 /* This is VM_MAYWRITE instead of VM_WRITE because a call
645 to mprotect() can turn on VM_WRITE later. */
646
647 if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
648 (VM_MAYSHARE | VM_MAYWRITE))
649 vma->vm_ops = &gfs2_vm_ops_sharewrite;
650 else
651 vma->vm_ops = &gfs2_vm_ops_private;
652
653 gfs2_glock_dq_uninit(&i_gh);
654
655 return error;
656}
657
658/**
659 * gfs2_open - open a file
660 * @inode: the inode to open
661 * @file: the struct file for this opening
662 *
663 * Returns: errno
664 */
665
666static int gfs2_open(struct inode *inode, struct file *file)
667{
668 struct gfs2_inode *ip = get_v2ip(inode);
669 struct gfs2_holder i_gh;
670 struct gfs2_file *fp;
671 int error;
672
673 fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL);
674 if (!fp)
675 return -ENOMEM;
676
677 mutex_init(&fp->f_fl_mutex);
678
679 fp->f_inode = ip;
680 fp->f_vfile = file;
681
682 gfs2_assert_warn(ip->i_sbd, !get_v2fp(file));
683 set_v2fp(file, fp);
684
685 if (S_ISREG(ip->i_di.di_mode)) {
686 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
687 &i_gh);
688 if (error)
689 goto fail;
690
691 if (!(file->f_flags & O_LARGEFILE) &&
692 ip->i_di.di_size > MAX_NON_LFS) {
693 error = -EFBIG;
694 goto fail_gunlock;
695 }
696
697 /* Listen to the Direct I/O flag */
698
699 if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
700 file->f_flags |= O_DIRECT;
701
702 gfs2_glock_dq_uninit(&i_gh);
703 }
704
705 return 0;
706
707 fail_gunlock:
708 gfs2_glock_dq_uninit(&i_gh);
709
710 fail:
711 set_v2fp(file, NULL);
712 kfree(fp);
713
714 return error;
715}
716
717/**
718 * gfs2_close - called to close a struct file
719 * @inode: the inode the struct file belongs to
720 * @file: the struct file being closed
721 *
722 * Returns: errno
723 */
724
725static int gfs2_close(struct inode *inode, struct file *file)
726{
727 struct gfs2_sbd *sdp = get_v2sdp(inode->i_sb);
728 struct gfs2_file *fp;
729
730 fp = get_v2fp(file);
731 set_v2fp(file, NULL);
732
733 if (gfs2_assert_warn(sdp, fp))
734 return -EIO;
735
736 kfree(fp);
737
738 return 0;
739}
740
741/**
742 * gfs2_fsync - sync the dirty data for a file (across the cluster)
743 * @file: the file that points to the dentry (we ignore this)
744 * @dentry: the dentry that points to the inode to sync
745 *
746 * Returns: errno
747 */
748
749static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
750{
751 struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
752
753 gfs2_log_flush_glock(ip->i_gl);
754
755 return 0;
756}
757
758/**
759 * gfs2_lock - acquire/release a posix lock on a file
760 * @file: the file pointer
761 * @cmd: either modify or retrieve lock state, possibly wait
762 * @fl: type and range of lock
763 *
764 * Returns: errno
765 */
766
767static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
768{
769 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
770 struct gfs2_sbd *sdp = ip->i_sbd;
771 struct lm_lockname name =
772 { .ln_number = ip->i_num.no_addr,
773 .ln_type = LM_TYPE_PLOCK };
774
775 if (!(fl->fl_flags & FL_POSIX))
776 return -ENOLCK;
777 if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
778 return -ENOLCK;
779
780 if (sdp->sd_args.ar_localflocks) {
781 if (IS_GETLK(cmd)) {
782 struct file_lock *tmp;
783 lock_kernel();
784 tmp = posix_test_lock(file, fl);
785 fl->fl_type = F_UNLCK;
786 if (tmp)
787 memcpy(fl, tmp, sizeof(struct file_lock));
788 unlock_kernel();
789 return 0;
790 } else {
791 int error;
792 lock_kernel();
793 error = posix_lock_file_wait(file, fl);
794 unlock_kernel();
795 return error;
796 }
797 }
798
799 if (IS_GETLK(cmd))
800 return gfs2_lm_plock_get(sdp, &name, file, fl);
801 else if (fl->fl_type == F_UNLCK)
802 return gfs2_lm_punlock(sdp, &name, file, fl);
803 else
804 return gfs2_lm_plock(sdp, &name, file, cmd, fl);
805}
806
807/**
808 * gfs2_sendfile - Send bytes to a file or socket
809 * @in_file: The file to read from
810 * @out_file: The file to write to
811 * @count: The amount of data
812 * @offset: The beginning file offset
813 *
814 * Outputs: offset - updated according to number of bytes read
815 *
816 * Returns: The number of bytes sent, errno on failure
817 */
818
819static ssize_t gfs2_sendfile(struct file *in_file, loff_t *offset, size_t count,
820 read_actor_t actor, void *target)
821{
822 return generic_file_sendfile(in_file, offset, count, actor, target);
823}
824
825static int do_flock(struct file *file, int cmd, struct file_lock *fl)
826{
827 struct gfs2_file *fp = get_v2fp(file);
828 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
829 struct gfs2_inode *ip = fp->f_inode;
830 struct gfs2_glock *gl;
831 unsigned int state;
832 int flags;
833 int error = 0;
834
835 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
836 flags = ((IS_SETLKW(cmd)) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
837
838 mutex_lock(&fp->f_fl_mutex);
839
840 gl = fl_gh->gh_gl;
841 if (gl) {
842 if (fl_gh->gh_state == state)
843 goto out;
844 gfs2_glock_hold(gl);
845 flock_lock_file_wait(file,
846 &(struct file_lock){.fl_type = F_UNLCK});
847 gfs2_glock_dq_uninit(fl_gh);
848 } else {
849 error = gfs2_glock_get(ip->i_sbd,
850 ip->i_num.no_addr, &gfs2_flock_glops,
851 CREATE, &gl);
852 if (error)
853 goto out;
854 }
855
856 gfs2_holder_init(gl, state, flags, fl_gh);
857 gfs2_glock_put(gl);
858
859 error = gfs2_glock_nq(fl_gh);
860 if (error) {
861 gfs2_holder_uninit(fl_gh);
862 if (error == GLR_TRYFAILED)
863 error = -EAGAIN;
864 } else {
865 error = flock_lock_file_wait(file, fl);
866 gfs2_assert_warn(ip->i_sbd, !error);
867 }
868
869 out:
870 mutex_unlock(&fp->f_fl_mutex);
871
872 return error;
873}
874
875static void do_unflock(struct file *file, struct file_lock *fl)
876{
877 struct gfs2_file *fp = get_v2fp(file);
878 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
879
880 mutex_lock(&fp->f_fl_mutex);
881 flock_lock_file_wait(file, fl);
882 if (fl_gh->gh_gl)
883 gfs2_glock_dq_uninit(fl_gh);
884 mutex_unlock(&fp->f_fl_mutex);
885}
886
887/**
888 * gfs2_flock - acquire/release a flock lock on a file
889 * @file: the file pointer
890 * @cmd: either modify or retrieve lock state, possibly wait
891 * @fl: type and range of lock
892 *
893 * Returns: errno
894 */
895
896static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
897{
898 struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
899 struct gfs2_sbd *sdp = ip->i_sbd;
900
901 if (!(fl->fl_flags & FL_FLOCK))
902 return -ENOLCK;
903 if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
904 return -ENOLCK;
905
906 if (sdp->sd_args.ar_localflocks)
907 return flock_lock_file_wait(file, fl);
908
909 if (fl->fl_type == F_UNLCK) {
910 do_unflock(file, fl);
911 return 0;
912 } else
913 return do_flock(file, cmd, fl);
914}
915
916struct file_operations gfs2_file_fops = {
917 .llseek = gfs2_llseek,
918 .read = gfs2_read,
919 .readv = gfs2_file_readv,
920 .aio_read = gfs2_file_aio_read,
921 .write = generic_file_write,
922 .writev = generic_file_writev,
923 .aio_write = generic_file_aio_write,
924 .ioctl = gfs2_ioctl,
925 .mmap = gfs2_mmap,
926 .open = gfs2_open,
927 .release = gfs2_close,
928 .fsync = gfs2_fsync,
929 .lock = gfs2_lock,
930 .sendfile = gfs2_sendfile,
931 .flock = gfs2_flock,
932};
933
934struct file_operations gfs2_dir_fops = {
935 .readdir = gfs2_readdir,
936 .ioctl = gfs2_ioctl,
937 .open = gfs2_open,
938 .release = gfs2_close,
939 .fsync = gfs2_fsync,
940 .lock = gfs2_lock,
941 .flock = gfs2_flock,
942};
943
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h
new file mode 100644
index 000000000000..192577b411f0
--- /dev/null
+++ b/fs/gfs2/ops_file.h
@@ -0,0 +1,20 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_FILE_DOT_H__
11#define __OPS_FILE_DOT_H__
12extern struct file gfs2_internal_file_sentinal;
13extern int gfs2_internal_read(struct gfs2_inode *ip,
14 struct file_ra_state *ra_state,
15 char *buf, loff_t *pos, unsigned size);
16
17extern struct file_operations gfs2_file_fops;
18extern struct file_operations gfs2_dir_fops;
19
20#endif /* __OPS_FILE_DOT_H__ */
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
new file mode 100644
index 000000000000..535f020f1e0c
--- /dev/null
+++ b/fs/gfs2/ops_fstype.c
@@ -0,0 +1,884 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/vmalloc.h>
16#include <linux/blkdev.h>
17#include <linux/kthread.h>
18#include <asm/semaphore.h>
19
20#include "gfs2.h"
21#include "daemon.h"
22#include "glock.h"
23#include "glops.h"
24#include "inode.h"
25#include "lm.h"
26#include "mount.h"
27#include "ops_export.h"
28#include "ops_fstype.h"
29#include "ops_super.h"
30#include "recovery.h"
31#include "rgrp.h"
32#include "super.h"
33#include "unlinked.h"
34#include "sys.h"
35
36#define DO 0
37#define UNDO 1
38
39static struct gfs2_sbd *init_sbd(struct super_block *sb)
40{
41 struct gfs2_sbd *sdp;
42 unsigned int x;
43
44 sdp = vmalloc(sizeof(struct gfs2_sbd));
45 if (!sdp)
46 return NULL;
47
48 memset(sdp, 0, sizeof(struct gfs2_sbd));
49
50 set_v2sdp(sb, sdp);
51 sdp->sd_vfs = sb;
52
53 gfs2_tune_init(&sdp->sd_tune);
54
55 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
56 sdp->sd_gl_hash[x].hb_lock = RW_LOCK_UNLOCKED;
57 INIT_LIST_HEAD(&sdp->sd_gl_hash[x].hb_list);
58 }
59 INIT_LIST_HEAD(&sdp->sd_reclaim_list);
60 spin_lock_init(&sdp->sd_reclaim_lock);
61 init_waitqueue_head(&sdp->sd_reclaim_wq);
62 mutex_init(&sdp->sd_invalidate_inodes_mutex);
63
64 mutex_init(&sdp->sd_inum_mutex);
65 spin_lock_init(&sdp->sd_statfs_spin);
66 mutex_init(&sdp->sd_statfs_mutex);
67
68 spin_lock_init(&sdp->sd_rindex_spin);
69 mutex_init(&sdp->sd_rindex_mutex);
70 INIT_LIST_HEAD(&sdp->sd_rindex_list);
71 INIT_LIST_HEAD(&sdp->sd_rindex_mru_list);
72 INIT_LIST_HEAD(&sdp->sd_rindex_recent_list);
73
74 INIT_LIST_HEAD(&sdp->sd_jindex_list);
75 spin_lock_init(&sdp->sd_jindex_spin);
76 mutex_init(&sdp->sd_jindex_mutex);
77
78 INIT_LIST_HEAD(&sdp->sd_unlinked_list);
79 spin_lock_init(&sdp->sd_unlinked_spin);
80 mutex_init(&sdp->sd_unlinked_mutex);
81
82 INIT_LIST_HEAD(&sdp->sd_quota_list);
83 spin_lock_init(&sdp->sd_quota_spin);
84 mutex_init(&sdp->sd_quota_mutex);
85
86 spin_lock_init(&sdp->sd_log_lock);
87 init_waitqueue_head(&sdp->sd_log_trans_wq);
88 init_waitqueue_head(&sdp->sd_log_flush_wq);
89
90 INIT_LIST_HEAD(&sdp->sd_log_le_gl);
91 INIT_LIST_HEAD(&sdp->sd_log_le_buf);
92 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
93 INIT_LIST_HEAD(&sdp->sd_log_le_rg);
94 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
95
96 INIT_LIST_HEAD(&sdp->sd_log_blks_list);
97 init_waitqueue_head(&sdp->sd_log_blks_wait);
98
99 INIT_LIST_HEAD(&sdp->sd_ail1_list);
100 INIT_LIST_HEAD(&sdp->sd_ail2_list);
101
102 mutex_init(&sdp->sd_log_flush_lock);
103 INIT_LIST_HEAD(&sdp->sd_log_flush_list);
104
105 INIT_LIST_HEAD(&sdp->sd_revoke_list);
106
107 mutex_init(&sdp->sd_freeze_lock);
108
109 return sdp;
110}
111
112static void init_vfs(struct gfs2_sbd *sdp)
113{
114 struct super_block *sb = sdp->sd_vfs;
115
116 sb->s_magic = GFS2_MAGIC;
117 sb->s_op = &gfs2_super_ops;
118 sb->s_export_op = &gfs2_export_ops;
119 sb->s_maxbytes = MAX_LFS_FILESIZE;
120
121 if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
122 set_bit(SDF_NOATIME, &sdp->sd_flags);
123
124 /* Don't let the VFS update atimes. GFS2 handles this itself. */
125 sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
126
127 /* Set up the buffer cache and fill in some fake block size values
128 to allow us to read-in the on-disk superblock. */
129 sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK);
130 sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
131 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
132 GFS2_BASIC_BLOCK_SHIFT;
133 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
134}
135
136static int init_names(struct gfs2_sbd *sdp, int silent)
137{
138 struct gfs2_sb *sb = NULL;
139 char *proto, *table;
140 int error = 0;
141
142 proto = sdp->sd_args.ar_lockproto;
143 table = sdp->sd_args.ar_locktable;
144
145 /* Try to autodetect */
146
147 if (!proto[0] || !table[0]) {
148 struct buffer_head *bh;
149 bh = sb_getblk(sdp->sd_vfs,
150 GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
151 lock_buffer(bh);
152 clear_buffer_uptodate(bh);
153 clear_buffer_dirty(bh);
154 unlock_buffer(bh);
155 ll_rw_block(READ, 1, &bh);
156 wait_on_buffer(bh);
157
158 if (!buffer_uptodate(bh)) {
159 brelse(bh);
160 return -EIO;
161 }
162
163 sb = kmalloc(sizeof(struct gfs2_sb), GFP_KERNEL);
164 if (!sb) {
165 brelse(bh);
166 return -ENOMEM;
167 }
168 gfs2_sb_in(sb, bh->b_data);
169 brelse(bh);
170
171 error = gfs2_check_sb(sdp, sb, silent);
172 if (error)
173 goto out;
174
175 if (!proto[0])
176 proto = sb->sb_lockproto;
177 if (!table[0])
178 table = sb->sb_locktable;
179 }
180
181 if (!table[0])
182 table = sdp->sd_vfs->s_id;
183
184 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
185 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
186
187 out:
188 kfree(sb);
189
190 return error;
191}
192
193static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
194 int undo)
195{
196 struct task_struct *p;
197 int error = 0;
198
199 if (undo)
200 goto fail_trans;
201
202 p = kthread_run(gfs2_scand, sdp, "gfs2_scand");
203 error = IS_ERR(p);
204 if (error) {
205 fs_err(sdp, "can't start scand thread: %d\n", error);
206 return error;
207 }
208 sdp->sd_scand_process = p;
209
210 for (sdp->sd_glockd_num = 0;
211 sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
212 sdp->sd_glockd_num++) {
213 p = kthread_run(gfs2_glockd, sdp, "gfs2_glockd");
214 error = IS_ERR(p);
215 if (error) {
216 fs_err(sdp, "can't start glockd thread: %d\n", error);
217 goto fail;
218 }
219 sdp->sd_glockd_process[sdp->sd_glockd_num] = p;
220 }
221
222 error = gfs2_glock_nq_num(sdp,
223 GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
224 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
225 mount_gh);
226 if (error) {
227 fs_err(sdp, "can't acquire mount glock: %d\n", error);
228 goto fail;
229 }
230
231 error = gfs2_glock_nq_num(sdp,
232 GFS2_LIVE_LOCK, &gfs2_nondisk_glops,
233 LM_ST_SHARED,
234 LM_FLAG_NOEXP | GL_EXACT | GL_NEVER_RECURSE,
235 &sdp->sd_live_gh);
236 if (error) {
237 fs_err(sdp, "can't acquire live glock: %d\n", error);
238 goto fail_mount;
239 }
240
241 error = gfs2_glock_get(sdp, GFS2_RENAME_LOCK, &gfs2_nondisk_glops,
242 CREATE, &sdp->sd_rename_gl);
243 if (error) {
244 fs_err(sdp, "can't create rename glock: %d\n", error);
245 goto fail_live;
246 }
247
248 error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops,
249 CREATE, &sdp->sd_trans_gl);
250 if (error) {
251 fs_err(sdp, "can't create transaction glock: %d\n", error);
252 goto fail_rename;
253 }
254 set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags);
255
256 return 0;
257
258 fail_trans:
259 gfs2_glock_put(sdp->sd_trans_gl);
260
261 fail_rename:
262 gfs2_glock_put(sdp->sd_rename_gl);
263
264 fail_live:
265 gfs2_glock_dq_uninit(&sdp->sd_live_gh);
266
267 fail_mount:
268 gfs2_glock_dq_uninit(mount_gh);
269
270 fail:
271 while (sdp->sd_glockd_num--)
272 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
273
274 kthread_stop(sdp->sd_scand_process);
275
276 return error;
277}
278
279int gfs2_lookup_root(struct gfs2_sbd *sdp)
280{
281 int error;
282 struct gfs2_glock *gl;
283 struct gfs2_inode *ip;
284
285 error = gfs2_glock_get(sdp, sdp->sd_sb.sb_root_dir.no_addr,
286 &gfs2_inode_glops, CREATE, &gl);
287 if (!error) {
288 error = gfs2_inode_get(gl, &sdp->sd_sb.sb_root_dir,
289 CREATE, &ip);
290 if (!error) {
291 if (!error)
292 gfs2_inode_min_init(ip, DT_DIR);
293 sdp->sd_root_dir = gfs2_ip2v(ip);
294 gfs2_inode_put(ip);
295 }
296 gfs2_glock_put(gl);
297 }
298
299 return error;
300}
301
302static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
303{
304 struct super_block *sb = sdp->sd_vfs;
305 struct gfs2_holder sb_gh;
306 struct inode *inode;
307 int error = 0;
308
309 if (undo) {
310 iput(sdp->sd_master_dir);
311 return 0;
312 }
313
314 error = gfs2_glock_nq_num(sdp,
315 GFS2_SB_LOCK, &gfs2_meta_glops,
316 LM_ST_SHARED, 0, &sb_gh);
317 if (error) {
318 fs_err(sdp, "can't acquire superblock glock: %d\n", error);
319 return error;
320 }
321
322 error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
323 if (error) {
324 fs_err(sdp, "can't read superblock: %d\n", error);
325 goto out;
326 }
327
328 /* Set up the buffer cache and SB for real */
329 error = -EINVAL;
330 if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
331 fs_err(sdp, "FS block size (%u) is too small for device "
332 "block size (%u)\n",
333 sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
334 goto out;
335 }
336 if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
337 fs_err(sdp, "FS block size (%u) is too big for machine "
338 "page size (%u)\n",
339 sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
340 goto out;
341 }
342
343 /* Get rid of buffers from the original block size */
344 sb_gh.gh_gl->gl_ops->go_inval(sb_gh.gh_gl, DIO_METADATA | DIO_DATA);
345 sb_gh.gh_gl->gl_aspace->i_blkbits = sdp->sd_sb.sb_bsize_shift;
346
347 sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
348
349 /* Get the root inode */
350 error = gfs2_lookup_root(sdp);
351 if (error) {
352 fs_err(sdp, "can't read in root inode: %d\n", error);
353 goto out;
354 }
355
356 /* Get the root inode/dentry */
357 inode = sdp->sd_root_dir;
358 if (!inode) {
359 fs_err(sdp, "can't get root inode\n");
360 error = -ENOMEM;
361 goto out_rooti;
362 }
363
364 igrab(inode);
365 sb->s_root = d_alloc_root(inode);
366 if (!sb->s_root) {
367 fs_err(sdp, "can't get root dentry\n");
368 error = -ENOMEM;
369 goto out_rooti;
370 }
371
372out:
373 gfs2_glock_dq_uninit(&sb_gh);
374
375 return error;
376out_rooti:
377 iput(sdp->sd_root_dir);
378 goto out;
379}
380
381static int init_journal(struct gfs2_sbd *sdp, int undo)
382{
383 struct gfs2_holder ji_gh;
384 struct task_struct *p;
385 int jindex = 1;
386 int error = 0;
387
388 if (undo) {
389 jindex = 0;
390 goto fail_recoverd;
391 }
392
393 error = gfs2_lookup_simple(sdp->sd_master_dir, "jindex",
394 &sdp->sd_jindex);
395 if (error) {
396 fs_err(sdp, "can't lookup journal index: %d\n", error);
397 return error;
398 }
399 set_bit(GLF_STICKY, &get_v2ip(sdp->sd_jindex)->i_gl->gl_flags);
400
401 /* Load in the journal index special file */
402
403 error = gfs2_jindex_hold(sdp, &ji_gh);
404 if (error) {
405 fs_err(sdp, "can't read journal index: %d\n", error);
406 goto fail;
407 }
408
409 error = -EINVAL;
410 if (!gfs2_jindex_size(sdp)) {
411 fs_err(sdp, "no journals!\n");
412 goto fail_jindex;
413 }
414
415 if (sdp->sd_args.ar_spectator) {
416 sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
417 sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
418 } else {
419 if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
420 fs_err(sdp, "can't mount journal #%u\n",
421 sdp->sd_lockstruct.ls_jid);
422 fs_err(sdp, "there are only %u journals (0 - %u)\n",
423 gfs2_jindex_size(sdp),
424 gfs2_jindex_size(sdp) - 1);
425 goto fail_jindex;
426 }
427 sdp->sd_jdesc = gfs2_jdesc_find(sdp, sdp->sd_lockstruct.ls_jid);
428
429 error = gfs2_glock_nq_num(sdp,
430 sdp->sd_lockstruct.ls_jid,
431 &gfs2_journal_glops,
432 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
433 &sdp->sd_journal_gh);
434 if (error) {
435 fs_err(sdp, "can't acquire journal glock: %d\n", error);
436 goto fail_jindex;
437 }
438
439 error = gfs2_glock_nq_init(
440 get_v2ip(sdp->sd_jdesc->jd_inode)->i_gl,
441 LM_ST_SHARED,
442 LM_FLAG_NOEXP | GL_EXACT,
443 &sdp->sd_jinode_gh);
444 if (error) {
445 fs_err(sdp, "can't acquire journal inode glock: %d\n",
446 error);
447 goto fail_journal_gh;
448 }
449
450 error = gfs2_jdesc_check(sdp->sd_jdesc);
451 if (error) {
452 fs_err(sdp, "my journal (%u) is bad: %d\n",
453 sdp->sd_jdesc->jd_jid, error);
454 goto fail_jinode_gh;
455 }
456 sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
457 }
458
459 if (sdp->sd_lockstruct.ls_first) {
460 unsigned int x;
461 for (x = 0; x < sdp->sd_journals; x++) {
462 error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x),
463 WAIT);
464 if (error) {
465 fs_err(sdp, "error recovering journal %u: %d\n",
466 x, error);
467 goto fail_jinode_gh;
468 }
469 }
470
471 gfs2_lm_others_may_mount(sdp);
472 } else if (!sdp->sd_args.ar_spectator) {
473 error = gfs2_recover_journal(sdp->sd_jdesc, WAIT);
474 if (error) {
475 fs_err(sdp, "error recovering my journal: %d\n", error);
476 goto fail_jinode_gh;
477 }
478 }
479
480 set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
481 gfs2_glock_dq_uninit(&ji_gh);
482 jindex = 0;
483
484 /* Disown my Journal glock */
485
486 sdp->sd_journal_gh.gh_owner = NULL;
487 sdp->sd_jinode_gh.gh_owner = NULL;
488
489 p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd");
490 error = IS_ERR(p);
491 if (error) {
492 fs_err(sdp, "can't start recoverd thread: %d\n", error);
493 goto fail_jinode_gh;
494 }
495 sdp->sd_recoverd_process = p;
496
497 return 0;
498
499 fail_recoverd:
500 kthread_stop(sdp->sd_recoverd_process);
501
502 fail_jinode_gh:
503 if (!sdp->sd_args.ar_spectator)
504 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
505
506 fail_journal_gh:
507 if (!sdp->sd_args.ar_spectator)
508 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
509
510 fail_jindex:
511 gfs2_jindex_free(sdp);
512 if (jindex)
513 gfs2_glock_dq_uninit(&ji_gh);
514
515 fail:
516 iput(sdp->sd_jindex);
517
518 return error;
519}
520
521
522static int init_inodes(struct gfs2_sbd *sdp, int undo)
523{
524 int error = 0;
525
526 if (undo)
527 goto fail_qinode;
528
529 error = gfs2_lookup_master_dir(sdp);
530 if (error) {
531 fs_err(sdp, "can't read in master directory: %d\n", error);
532 goto fail;
533 }
534
535 error = init_journal(sdp, undo);
536 if (error)
537 goto fail_master;
538
539 /* Read in the master inode number inode */
540 error = gfs2_lookup_simple(sdp->sd_master_dir, "inum",
541 &sdp->sd_inum_inode);
542 if (error) {
543 fs_err(sdp, "can't read in inum inode: %d\n", error);
544 goto fail_journal;
545 }
546
547
548 /* Read in the master statfs inode */
549 error = gfs2_lookup_simple(sdp->sd_master_dir, "statfs",
550 &sdp->sd_statfs_inode);
551 if (error) {
552 fs_err(sdp, "can't read in statfs inode: %d\n", error);
553 goto fail_inum;
554 }
555
556 /* Read in the resource index inode */
557 error = gfs2_lookup_simple(sdp->sd_master_dir, "rindex",
558 &sdp->sd_rindex);
559 if (error) {
560 fs_err(sdp, "can't get resource index inode: %d\n", error);
561 goto fail_statfs;
562 }
563 set_bit(GLF_STICKY, &get_v2ip(sdp->sd_rindex)->i_gl->gl_flags);
564 sdp->sd_rindex_vn = get_v2ip(sdp->sd_rindex)->i_gl->gl_vn - 1;
565
566 /* Read in the quota inode */
567 error = gfs2_lookup_simple(sdp->sd_master_dir, "quota",
568 &sdp->sd_quota_inode);
569 if (error) {
570 fs_err(sdp, "can't get quota file inode: %d\n", error);
571 goto fail_rindex;
572 }
573 return 0;
574
575fail_qinode:
576 iput(sdp->sd_quota_inode);
577
578fail_rindex:
579 gfs2_clear_rgrpd(sdp);
580 iput(sdp->sd_rindex);
581
582fail_statfs:
583 iput(sdp->sd_statfs_inode);
584
585fail_inum:
586 iput(sdp->sd_inum_inode);
587fail_journal:
588 init_journal(sdp, UNDO);
589fail_master:
590 iput(sdp->sd_master_dir);
591fail:
592 return error;
593}
594
595static int init_per_node(struct gfs2_sbd *sdp, int undo)
596{
597 struct inode *pn = NULL;
598 char buf[30];
599 int error = 0;
600
601 if (sdp->sd_args.ar_spectator)
602 return 0;
603
604 if (undo)
605 goto fail_qc_gh;
606
607 error = gfs2_lookup_simple(sdp->sd_master_dir, "per_node", &pn);
608 if (error) {
609 fs_err(sdp, "can't find per_node directory: %d\n", error);
610 return error;
611 }
612
613 sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid);
614 error = gfs2_lookup_simple(pn, buf, &sdp->sd_ir_inode);
615 if (error) {
616 fs_err(sdp, "can't find local \"ir\" file: %d\n", error);
617 goto fail;
618 }
619
620 sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
621 error = gfs2_lookup_simple(pn, buf, &sdp->sd_sc_inode);
622 if (error) {
623 fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
624 goto fail_ir_i;
625 }
626
627 sprintf(buf, "unlinked_tag%u", sdp->sd_jdesc->jd_jid);
628 error = gfs2_lookup_simple(pn, buf, &sdp->sd_ut_inode);
629 if (error) {
630 fs_err(sdp, "can't find local \"ut\" file: %d\n", error);
631 goto fail_sc_i;
632 }
633
634 sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
635 error = gfs2_lookup_simple(pn, buf, &sdp->sd_qc_inode);
636 if (error) {
637 fs_err(sdp, "can't find local \"qc\" file: %d\n", error);
638 goto fail_ut_i;
639 }
640
641 iput(pn);
642 pn = NULL;
643
644 error = gfs2_glock_nq_init(get_v2ip(sdp->sd_ir_inode)->i_gl,
645 LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
646 &sdp->sd_ir_gh);
647 if (error) {
648 fs_err(sdp, "can't lock local \"ir\" file: %d\n", error);
649 goto fail_qc_i;
650 }
651
652 error = gfs2_glock_nq_init(get_v2ip(sdp->sd_sc_inode)->i_gl,
653 LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
654 &sdp->sd_sc_gh);
655 if (error) {
656 fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
657 goto fail_ir_gh;
658 }
659
660 error = gfs2_glock_nq_init(get_v2ip(sdp->sd_ut_inode)->i_gl,
661 LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
662 &sdp->sd_ut_gh);
663 if (error) {
664 fs_err(sdp, "can't lock local \"ut\" file: %d\n", error);
665 goto fail_sc_gh;
666 }
667
668 error = gfs2_glock_nq_init(get_v2ip(sdp->sd_qc_inode)->i_gl,
669 LM_ST_EXCLUSIVE, GL_NEVER_RECURSE,
670 &sdp->sd_qc_gh);
671 if (error) {
672 fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
673 goto fail_ut_gh;
674 }
675
676 return 0;
677
678 fail_qc_gh:
679 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
680
681 fail_ut_gh:
682 gfs2_glock_dq_uninit(&sdp->sd_ut_gh);
683
684 fail_sc_gh:
685 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
686
687 fail_ir_gh:
688 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
689
690 fail_qc_i:
691 iput(sdp->sd_qc_inode);
692
693 fail_ut_i:
694 iput(sdp->sd_ut_inode);
695
696 fail_sc_i:
697 iput(sdp->sd_sc_inode);
698
699 fail_ir_i:
700 iput(sdp->sd_ir_inode);
701
702 fail:
703 if (pn)
704 iput(pn);
705 return error;
706}
707
708static int init_threads(struct gfs2_sbd *sdp, int undo)
709{
710 struct task_struct *p;
711 int error = 0;
712
713 if (undo)
714 goto fail_inoded;
715
716 sdp->sd_log_flush_time = jiffies;
717 sdp->sd_jindex_refresh_time = jiffies;
718
719 p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
720 error = IS_ERR(p);
721 if (error) {
722 fs_err(sdp, "can't start logd thread: %d\n", error);
723 return error;
724 }
725 sdp->sd_logd_process = p;
726
727 sdp->sd_statfs_sync_time = jiffies;
728 sdp->sd_quota_sync_time = jiffies;
729
730 p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
731 error = IS_ERR(p);
732 if (error) {
733 fs_err(sdp, "can't start quotad thread: %d\n", error);
734 goto fail;
735 }
736 sdp->sd_quotad_process = p;
737
738 p = kthread_run(gfs2_inoded, sdp, "gfs2_inoded");
739 error = IS_ERR(p);
740 if (error) {
741 fs_err(sdp, "can't start inoded thread: %d\n", error);
742 goto fail_quotad;
743 }
744 sdp->sd_inoded_process = p;
745
746 return 0;
747
748 fail_inoded:
749 kthread_stop(sdp->sd_inoded_process);
750
751 fail_quotad:
752 kthread_stop(sdp->sd_quotad_process);
753
754 fail:
755 kthread_stop(sdp->sd_logd_process);
756
757 return error;
758}
759
760/**
761 * fill_super - Read in superblock
762 * @sb: The VFS superblock
763 * @data: Mount options
764 * @silent: Don't complain if it's not a GFS2 filesystem
765 *
766 * Returns: errno
767 */
768
769static int fill_super(struct super_block *sb, void *data, int silent)
770{
771 struct gfs2_sbd *sdp;
772 struct gfs2_holder mount_gh;
773 int error;
774
775 sdp = init_sbd(sb);
776 if (!sdp) {
777 printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n");
778 return -ENOMEM;
779 }
780
781 error = gfs2_mount_args(sdp, (char *)data, 0);
782 if (error) {
783 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
784 goto fail;
785 }
786
787 init_vfs(sdp);
788
789 error = init_names(sdp, silent);
790 if (error)
791 goto fail;
792
793 error = gfs2_sys_fs_add(sdp);
794 if (error)
795 goto fail;
796
797 error = gfs2_lm_mount(sdp, silent);
798 if (error)
799 goto fail_sys;
800
801 error = init_locking(sdp, &mount_gh, DO);
802 if (error)
803 goto fail_lm;
804
805 error = init_sb(sdp, silent, DO);
806 if (error)
807 goto fail_locking;
808
809 error = init_inodes(sdp, DO);
810 if (error)
811 goto fail_sb;
812
813 error = init_per_node(sdp, DO);
814 if (error)
815 goto fail_inodes;
816
817 error = gfs2_statfs_init(sdp);
818 if (error) {
819 fs_err(sdp, "can't initialize statfs subsystem: %d\n", error);
820 goto fail_per_node;
821 }
822
823 error = init_threads(sdp, DO);
824 if (error)
825 goto fail_per_node;
826
827 if (!(sb->s_flags & MS_RDONLY)) {
828 error = gfs2_make_fs_rw(sdp);
829 if (error) {
830 fs_err(sdp, "can't make FS RW: %d\n", error);
831 goto fail_threads;
832 }
833 }
834
835 gfs2_glock_dq_uninit(&mount_gh);
836
837 return 0;
838
839 fail_threads:
840 init_threads(sdp, UNDO);
841
842 fail_per_node:
843 init_per_node(sdp, UNDO);
844
845 fail_inodes:
846 init_inodes(sdp, UNDO);
847
848 fail_sb:
849 init_sb(sdp, 0, UNDO);
850
851 fail_locking:
852 init_locking(sdp, &mount_gh, UNDO);
853
854 fail_lm:
855 gfs2_gl_hash_clear(sdp, WAIT);
856 gfs2_lm_unmount(sdp);
857 while (invalidate_inodes(sb))
858 yield();
859
860 fail_sys:
861 gfs2_sys_fs_del(sdp);
862
863 fail:
864 vfree(sdp);
865 set_v2sdp(sb, NULL);
866
867 return error;
868}
869
870static struct super_block *gfs2_get_sb(struct file_system_type *fs_type,
871 int flags, const char *dev_name,
872 void *data)
873{
874 return get_sb_bdev(fs_type, flags, dev_name, data, fill_super);
875}
876
877struct file_system_type gfs2_fs_type = {
878 .name = "gfs2",
879 .fs_flags = FS_REQUIRES_DEV,
880 .get_sb = gfs2_get_sb,
881 .kill_sb = kill_block_super,
882 .owner = THIS_MODULE,
883};
884
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h
new file mode 100644
index 000000000000..7008364e76ea
--- /dev/null
+++ b/fs/gfs2/ops_fstype.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_FSTYPE_DOT_H__
11#define __OPS_FSTYPE_DOT_H__
12
13extern struct file_system_type gfs2_fs_type;
14
15#endif /* __OPS_FSTYPE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
new file mode 100644
index 000000000000..9971a30eb78e
--- /dev/null
+++ b/fs/gfs2/ops_inode.c
@@ -0,0 +1,1198 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/namei.h>
16#include <linux/utsname.h>
17#include <linux/mm.h>
18#include <linux/xattr.h>
19#include <linux/posix_acl.h>
20#include <asm/semaphore.h>
21#include <asm/uaccess.h>
22
23#include "gfs2.h"
24#include "acl.h"
25#include "bmap.h"
26#include "dir.h"
27#include "eaops.h"
28#include "eattr.h"
29#include "glock.h"
30#include "inode.h"
31#include "meta_io.h"
32#include "ops_dentry.h"
33#include "ops_inode.h"
34#include "page.h"
35#include "quota.h"
36#include "rgrp.h"
37#include "trans.h"
38#include "unlinked.h"
39
40/**
41 * gfs2_create - Create a file
42 * @dir: The directory in which to create the file
43 * @dentry: The dentry of the new file
44 * @mode: The mode of the new file
45 *
46 * Returns: errno
47 */
48
49static int gfs2_create(struct inode *dir, struct dentry *dentry,
50 int mode, struct nameidata *nd)
51{
52 struct gfs2_inode *dip = get_v2ip(dir);
53 struct gfs2_sbd *sdp = dip->i_sbd;
54 struct gfs2_holder ghs[2];
55 struct inode *inode;
56 int new = 1;
57 int error;
58
59 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
60
61 for (;;) {
62 inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode);
63 if (!IS_ERR(inode)) {
64 gfs2_trans_end(sdp);
65 if (dip->i_alloc.al_rgd)
66 gfs2_inplace_release(dip);
67 gfs2_quota_unlock(dip);
68 gfs2_alloc_put(dip);
69 gfs2_glock_dq_uninit_m(2, ghs);
70 break;
71 } else if (PTR_ERR(inode) != -EEXIST ||
72 (nd->intent.open.flags & O_EXCL)) {
73 gfs2_holder_uninit(ghs);
74 return PTR_ERR(inode);
75 }
76
77 error = gfs2_lookupi(dir, &dentry->d_name, 0, &inode);
78 if (!error) {
79 new = 0;
80 gfs2_holder_uninit(ghs);
81 break;
82 } else if (error != -ENOENT) {
83 gfs2_holder_uninit(ghs);
84 return error;
85 }
86 }
87
88 d_instantiate(dentry, inode);
89 if (new)
90 mark_inode_dirty(inode);
91
92 return 0;
93}
94
95/**
96 * gfs2_lookup - Look up a filename in a directory and return its inode
97 * @dir: The directory inode
98 * @dentry: The dentry of the new inode
99 * @nd: passed from Linux VFS, ignored by us
100 *
101 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
102 *
103 * Returns: errno
104 */
105
106static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
107 struct nameidata *nd)
108{
109 struct gfs2_inode *dip = get_v2ip(dir);
110 struct gfs2_sbd *sdp = dip->i_sbd;
111 struct inode *inode = NULL;
112 int error;
113
114 if (!sdp->sd_args.ar_localcaching)
115 dentry->d_op = &gfs2_dops;
116
117 error = gfs2_lookupi(dir, &dentry->d_name, 0, &inode);
118 if (error && error != -ENOENT)
119 return ERR_PTR(error);
120
121 if (inode)
122 return d_splice_alias(inode, dentry);
123 d_add(dentry, inode);
124
125 return NULL;
126}
127
128/**
129 * gfs2_link - Link to a file
130 * @old_dentry: The inode to link
131 * @dir: Add link to this directory
132 * @dentry: The name of the link
133 *
134 * Link the inode in "old_dentry" into the directory "dir" with the
135 * name in "dentry".
136 *
137 * Returns: errno
138 */
139
140static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
141 struct dentry *dentry)
142{
143 struct gfs2_inode *dip = get_v2ip(dir);
144 struct gfs2_sbd *sdp = dip->i_sbd;
145 struct inode *inode = old_dentry->d_inode;
146 struct gfs2_inode *ip = get_v2ip(inode);
147 struct gfs2_holder ghs[2];
148 int alloc_required;
149 int error;
150
151 if (S_ISDIR(ip->i_di.di_mode))
152 return -EPERM;
153
154 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
155 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
156
157 error = gfs2_glock_nq_m(2, ghs);
158 if (error)
159 goto out;
160
161 error = gfs2_repermission(dir, MAY_WRITE | MAY_EXEC, NULL);
162 if (error)
163 goto out_gunlock;
164
165 error = gfs2_dir_search(dip, &dentry->d_name, NULL, NULL);
166 switch (error) {
167 case -ENOENT:
168 break;
169 case 0:
170 error = -EEXIST;
171 default:
172 goto out_gunlock;
173 }
174
175 error = -EINVAL;
176 if (!dip->i_di.di_nlink)
177 goto out_gunlock;
178 error = -EFBIG;
179 if (dip->i_di.di_entries == (uint32_t)-1)
180 goto out_gunlock;
181 error = -EPERM;
182 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
183 goto out_gunlock;
184 error = -EINVAL;
185 if (!ip->i_di.di_nlink)
186 goto out_gunlock;
187 error = -EMLINK;
188 if (ip->i_di.di_nlink == (uint32_t)-1)
189 goto out_gunlock;
190
191 error = gfs2_diradd_alloc_required(dip, &dentry->d_name,
192 &alloc_required);
193 if (error)
194 goto out_gunlock;
195
196 if (alloc_required) {
197 struct gfs2_alloc *al = gfs2_alloc_get(dip);
198
199 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
200 if (error)
201 goto out_alloc;
202
203 error = gfs2_quota_check(dip, dip->i_di.di_uid,
204 dip->i_di.di_gid);
205 if (error)
206 goto out_gunlock_q;
207
208 al->al_requested = sdp->sd_max_dirres;
209
210 error = gfs2_inplace_reserve(dip);
211 if (error)
212 goto out_gunlock_q;
213
214 error = gfs2_trans_begin(sdp,
215 sdp->sd_max_dirres +
216 al->al_rgd->rd_ri.ri_length +
217 2 * RES_DINODE + RES_STATFS +
218 RES_QUOTA, 0);
219 if (error)
220 goto out_ipres;
221 } else {
222 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
223 if (error)
224 goto out_ipres;
225 }
226
227 error = gfs2_dir_add(dip, &dentry->d_name, &ip->i_num,
228 IF2DT(ip->i_di.di_mode));
229 if (error)
230 goto out_end_trans;
231
232 error = gfs2_change_nlink(ip, +1);
233
234 out_end_trans:
235 gfs2_trans_end(sdp);
236
237 out_ipres:
238 if (alloc_required)
239 gfs2_inplace_release(dip);
240
241 out_gunlock_q:
242 if (alloc_required)
243 gfs2_quota_unlock(dip);
244
245 out_alloc:
246 if (alloc_required)
247 gfs2_alloc_put(dip);
248
249 out_gunlock:
250 gfs2_glock_dq_m(2, ghs);
251
252 out:
253 gfs2_holder_uninit(ghs);
254 gfs2_holder_uninit(ghs + 1);
255
256 if (!error) {
257 atomic_inc(&inode->i_count);
258 d_instantiate(dentry, inode);
259 mark_inode_dirty(inode);
260 }
261
262 return error;
263}
264
265/**
266 * gfs2_unlink - Unlink a file
267 * @dir: The inode of the directory containing the file to unlink
268 * @dentry: The file itself
269 *
270 * Unlink a file. Call gfs2_unlinki()
271 *
272 * Returns: errno
273 */
274
275static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
276{
277 struct gfs2_inode *dip = get_v2ip(dir);
278 struct gfs2_sbd *sdp = dip->i_sbd;
279 struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
280 struct gfs2_unlinked *ul;
281 struct gfs2_holder ghs[2];
282 int error;
283
284 error = gfs2_unlinked_get(sdp, &ul);
285 if (error)
286 return error;
287
288 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
289 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
290
291 error = gfs2_glock_nq_m(2, ghs);
292 if (error)
293 goto out;
294
295 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
296 if (error)
297 goto out_gunlock;
298
299 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF +
300 RES_UNLINKED, 0);
301 if (error)
302 goto out_gunlock;
303
304 error = gfs2_unlinki(dip, &dentry->d_name, ip,ul);
305
306 gfs2_trans_end(sdp);
307
308 out_gunlock:
309 gfs2_glock_dq_m(2, ghs);
310
311 out:
312 gfs2_holder_uninit(ghs);
313 gfs2_holder_uninit(ghs + 1);
314
315 gfs2_unlinked_put(sdp, ul);
316
317 return error;
318}
319
320/**
321 * gfs2_symlink - Create a symlink
322 * @dir: The directory to create the symlink in
323 * @dentry: The dentry to put the symlink in
324 * @symname: The thing which the link points to
325 *
326 * Returns: errno
327 */
328
329static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
330 const char *symname)
331{
332 struct gfs2_inode *dip = get_v2ip(dir), *ip;
333 struct gfs2_sbd *sdp = dip->i_sbd;
334 struct gfs2_holder ghs[2];
335 struct inode *inode;
336 struct buffer_head *dibh;
337 int size;
338 int error;
339
340 /* Must be stuffed with a null terminator for gfs2_follow_link() */
341 size = strlen(symname);
342 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
343 return -ENAMETOOLONG;
344
345 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
346
347 inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO);
348 if (IS_ERR(inode)) {
349 gfs2_holder_uninit(ghs);
350 return PTR_ERR(inode);
351 }
352
353 ip = get_gl2ip(ghs[1].gh_gl);
354
355 ip->i_di.di_size = size;
356
357 error = gfs2_meta_inode_buffer(ip, &dibh);
358
359 if (!gfs2_assert_withdraw(sdp, !error)) {
360 gfs2_dinode_out(&ip->i_di, dibh->b_data);
361 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
362 size);
363 brelse(dibh);
364 }
365
366 gfs2_trans_end(sdp);
367 if (dip->i_alloc.al_rgd)
368 gfs2_inplace_release(dip);
369 gfs2_quota_unlock(dip);
370 gfs2_alloc_put(dip);
371
372 gfs2_glock_dq_uninit_m(2, ghs);
373
374 d_instantiate(dentry, inode);
375 mark_inode_dirty(inode);
376
377 return 0;
378}
379
380/**
381 * gfs2_mkdir - Make a directory
382 * @dir: The parent directory of the new one
383 * @dentry: The dentry of the new directory
384 * @mode: The mode of the new directory
385 *
386 * Returns: errno
387 */
388
389static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
390{
391 struct gfs2_inode *dip = get_v2ip(dir), *ip;
392 struct gfs2_sbd *sdp = dip->i_sbd;
393 struct gfs2_holder ghs[2];
394 struct inode *inode;
395 struct buffer_head *dibh;
396 int error;
397
398 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
399
400 inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode);
401 if (IS_ERR(inode)) {
402 gfs2_holder_uninit(ghs);
403 return PTR_ERR(inode);
404 }
405
406 ip = get_gl2ip(ghs[1].gh_gl);
407
408 ip->i_di.di_nlink = 2;
409 ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
410 ip->i_di.di_flags |= GFS2_DIF_JDATA;
411 ip->i_di.di_payload_format = GFS2_FORMAT_DE;
412 ip->i_di.di_entries = 2;
413
414 error = gfs2_meta_inode_buffer(ip, &dibh);
415
416 if (!gfs2_assert_withdraw(sdp, !error)) {
417 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
418 struct gfs2_dirent *dent;
419
420 gfs2_dirent_alloc(ip, dibh, 1, &dent);
421
422 dent->de_inum = di->di_num; /* already GFS2 endian */
423 dent->de_hash = gfs2_disk_hash(".", 1);
424 dent->de_hash = cpu_to_be32(dent->de_hash);
425 dent->de_type = DT_DIR;
426 memcpy((char *) (dent + 1), ".", 1);
427 di->di_entries = cpu_to_be32(1);
428
429 gfs2_dirent_alloc(ip, dibh, 2, &dent);
430
431 gfs2_inum_out(&dip->i_num, (char *) &dent->de_inum);
432 dent->de_hash = gfs2_disk_hash("..", 2);
433 dent->de_hash = cpu_to_be32(dent->de_hash);
434 dent->de_type = DT_DIR;
435 memcpy((char *) (dent + 1), "..", 2);
436
437 gfs2_dinode_out(&ip->i_di, (char *)di);
438
439 brelse(dibh);
440 }
441
442 error = gfs2_change_nlink(dip, +1);
443 gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
444
445 gfs2_trans_end(sdp);
446 if (dip->i_alloc.al_rgd)
447 gfs2_inplace_release(dip);
448 gfs2_quota_unlock(dip);
449 gfs2_alloc_put(dip);
450
451 gfs2_glock_dq_uninit_m(2, ghs);
452
453 d_instantiate(dentry, inode);
454 mark_inode_dirty(inode);
455
456 return 0;
457}
458
459/**
460 * gfs2_rmdir - Remove a directory
461 * @dir: The parent directory of the directory to be removed
462 * @dentry: The dentry of the directory to remove
463 *
464 * Remove a directory. Call gfs2_rmdiri()
465 *
466 * Returns: errno
467 */
468
469static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
470{
471 struct gfs2_inode *dip = get_v2ip(dir);
472 struct gfs2_sbd *sdp = dip->i_sbd;
473 struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
474 struct gfs2_unlinked *ul;
475 struct gfs2_holder ghs[2];
476 int error;
477
478 error = gfs2_unlinked_get(sdp, &ul);
479 if (error)
480 return error;
481
482 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
483 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
484
485 error = gfs2_glock_nq_m(2, ghs);
486 if (error)
487 goto out;
488
489 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
490 if (error)
491 goto out_gunlock;
492
493 if (ip->i_di.di_entries < 2) {
494 if (gfs2_consist_inode(ip))
495 gfs2_dinode_print(&ip->i_di);
496 error = -EIO;
497 goto out_gunlock;
498 }
499 if (ip->i_di.di_entries > 2) {
500 error = -ENOTEMPTY;
501 goto out_gunlock;
502 }
503
504 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF +
505 RES_UNLINKED, 0);
506 if (error)
507 goto out_gunlock;
508
509 error = gfs2_rmdiri(dip, &dentry->d_name, ip, ul);
510
511 gfs2_trans_end(sdp);
512
513 out_gunlock:
514 gfs2_glock_dq_m(2, ghs);
515
516 out:
517 gfs2_holder_uninit(ghs);
518 gfs2_holder_uninit(ghs + 1);
519
520 gfs2_unlinked_put(sdp, ul);
521
522 return error;
523}
524
525/**
526 * gfs2_mknod - Make a special file
527 * @dir: The directory in which the special file will reside
528 * @dentry: The dentry of the special file
529 * @mode: The mode of the special file
530 * @rdev: The device specification of the special file
531 *
532 */
533
534static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
535 dev_t dev)
536{
537 struct gfs2_inode *dip = get_v2ip(dir), *ip;
538 struct gfs2_sbd *sdp = dip->i_sbd;
539 struct gfs2_holder ghs[2];
540 struct inode *inode;
541 struct buffer_head *dibh;
542 uint32_t major = 0, minor = 0;
543 int error;
544
545 switch (mode & S_IFMT) {
546 case S_IFBLK:
547 case S_IFCHR:
548 major = MAJOR(dev);
549 minor = MINOR(dev);
550 break;
551 case S_IFIFO:
552 case S_IFSOCK:
553 break;
554 default:
555 return -EOPNOTSUPP;
556 };
557
558 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
559
560 inode = gfs2_createi(ghs, &dentry->d_name, mode);
561 if (IS_ERR(inode)) {
562 gfs2_holder_uninit(ghs);
563 return PTR_ERR(inode);
564 }
565
566 ip = get_gl2ip(ghs[1].gh_gl);
567
568 ip->i_di.di_major = major;
569 ip->i_di.di_minor = minor;
570
571 error = gfs2_meta_inode_buffer(ip, &dibh);
572
573 if (!gfs2_assert_withdraw(sdp, !error)) {
574 gfs2_dinode_out(&ip->i_di, dibh->b_data);
575 brelse(dibh);
576 }
577
578 gfs2_trans_end(sdp);
579 if (dip->i_alloc.al_rgd)
580 gfs2_inplace_release(dip);
581 gfs2_quota_unlock(dip);
582 gfs2_alloc_put(dip);
583
584 gfs2_glock_dq_uninit_m(2, ghs);
585
586 d_instantiate(dentry, inode);
587 mark_inode_dirty(inode);
588
589 return 0;
590}
591
592/**
593 * gfs2_rename - Rename a file
594 * @odir: Parent directory of old file name
595 * @odentry: The old dentry of the file
596 * @ndir: Parent directory of new file name
597 * @ndentry: The new dentry of the file
598 *
599 * Returns: errno
600 */
601
602static int gfs2_rename(struct inode *odir, struct dentry *odentry,
603 struct inode *ndir, struct dentry *ndentry)
604{
605 struct gfs2_inode *odip = get_v2ip(odir);
606 struct gfs2_inode *ndip = get_v2ip(ndir);
607 struct gfs2_inode *ip = get_v2ip(odentry->d_inode);
608 struct gfs2_inode *nip = NULL;
609 struct gfs2_sbd *sdp = odip->i_sbd;
610 struct gfs2_unlinked *ul;
611 struct gfs2_holder ghs[4], r_gh;
612 unsigned int num_gh;
613 int dir_rename = 0;
614 int alloc_required;
615 unsigned int x;
616 int error;
617
618 if (ndentry->d_inode) {
619 nip = get_v2ip(ndentry->d_inode);
620 if (ip == nip)
621 return 0;
622 }
623
624 error = gfs2_unlinked_get(sdp, &ul);
625 if (error)
626 return error;
627
628 /* Make sure we aren't trying to move a dirctory into it's subdir */
629
630 if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) {
631 dir_rename = 1;
632
633 error = gfs2_glock_nq_init(sdp->sd_rename_gl,
634 LM_ST_EXCLUSIVE, 0,
635 &r_gh);
636 if (error)
637 goto out;
638
639 error = gfs2_ok_to_move(ip, ndip);
640 if (error)
641 goto out_gunlock_r;
642 }
643
644 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
645 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
646 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
647 num_gh = 3;
648
649 if (nip)
650 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
651
652 error = gfs2_glock_nq_m(num_gh, ghs);
653 if (error)
654 goto out_uninit;
655
656 /* Check out the old directory */
657
658 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
659 if (error)
660 goto out_gunlock;
661
662 /* Check out the new directory */
663
664 if (nip) {
665 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
666 if (error)
667 goto out_gunlock;
668
669 if (S_ISDIR(nip->i_di.di_mode)) {
670 if (nip->i_di.di_entries < 2) {
671 if (gfs2_consist_inode(nip))
672 gfs2_dinode_print(&nip->i_di);
673 error = -EIO;
674 goto out_gunlock;
675 }
676 if (nip->i_di.di_entries > 2) {
677 error = -ENOTEMPTY;
678 goto out_gunlock;
679 }
680 }
681 } else {
682 error = gfs2_repermission(ndir, MAY_WRITE | MAY_EXEC, NULL);
683 if (error)
684 goto out_gunlock;
685
686 error = gfs2_dir_search(ndip, &ndentry->d_name, NULL, NULL);
687 switch (error) {
688 case -ENOENT:
689 error = 0;
690 break;
691 case 0:
692 error = -EEXIST;
693 default:
694 goto out_gunlock;
695 };
696
697 if (odip != ndip) {
698 if (!ndip->i_di.di_nlink) {
699 error = -EINVAL;
700 goto out_gunlock;
701 }
702 if (ndip->i_di.di_entries == (uint32_t)-1) {
703 error = -EFBIG;
704 goto out_gunlock;
705 }
706 if (S_ISDIR(ip->i_di.di_mode) &&
707 ndip->i_di.di_nlink == (uint32_t)-1) {
708 error = -EMLINK;
709 goto out_gunlock;
710 }
711 }
712 }
713
714 /* Check out the dir to be renamed */
715
716 if (dir_rename) {
717 error = gfs2_repermission(odentry->d_inode, MAY_WRITE, NULL);
718 if (error)
719 goto out_gunlock;
720 }
721
722 error = gfs2_diradd_alloc_required(ndip, &ndentry->d_name,
723 &alloc_required);
724 if (error)
725 goto out_gunlock;
726
727 if (alloc_required) {
728 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
729
730 error = gfs2_quota_lock(ndip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
731 if (error)
732 goto out_alloc;
733
734 error = gfs2_quota_check(ndip, ndip->i_di.di_uid,
735 ndip->i_di.di_gid);
736 if (error)
737 goto out_gunlock_q;
738
739 al->al_requested = sdp->sd_max_dirres;
740
741 error = gfs2_inplace_reserve(ndip);
742 if (error)
743 goto out_gunlock_q;
744
745 error = gfs2_trans_begin(sdp,
746 sdp->sd_max_dirres +
747 al->al_rgd->rd_ri.ri_length +
748 4 * RES_DINODE + 4 * RES_LEAF +
749 RES_UNLINKED + RES_STATFS +
750 RES_QUOTA, 0);
751 if (error)
752 goto out_ipreserv;
753 } else {
754 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
755 5 * RES_LEAF +
756 RES_UNLINKED, 0);
757 if (error)
758 goto out_gunlock;
759 }
760
761 /* Remove the target file, if it exists */
762
763 if (nip) {
764 if (S_ISDIR(nip->i_di.di_mode))
765 error = gfs2_rmdiri(ndip, &ndentry->d_name, nip, ul);
766 else
767 error = gfs2_unlinki(ndip, &ndentry->d_name, nip, ul);
768 if (error)
769 goto out_end_trans;
770 }
771
772 if (dir_rename) {
773 struct qstr name;
774 name.len = 2;
775 name.name = "..";
776
777 error = gfs2_change_nlink(ndip, +1);
778 if (error)
779 goto out_end_trans;
780 error = gfs2_change_nlink(odip, -1);
781 if (error)
782 goto out_end_trans;
783
784 error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR);
785 if (error)
786 goto out_end_trans;
787 } else {
788 struct buffer_head *dibh;
789 error = gfs2_meta_inode_buffer(ip, &dibh);
790 if (error)
791 goto out_end_trans;
792 ip->i_di.di_ctime = get_seconds();
793 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
794 gfs2_dinode_out(&ip->i_di, dibh->b_data);
795 brelse(dibh);
796 }
797
798 error = gfs2_dir_del(odip, &odentry->d_name);
799 if (error)
800 goto out_end_trans;
801
802 error = gfs2_dir_add(ndip, &ndentry->d_name, &ip->i_num,
803 IF2DT(ip->i_di.di_mode));
804 if (error)
805 goto out_end_trans;
806
807 out_end_trans:
808 gfs2_trans_end(sdp);
809
810 out_ipreserv:
811 if (alloc_required)
812 gfs2_inplace_release(ndip);
813
814 out_gunlock_q:
815 if (alloc_required)
816 gfs2_quota_unlock(ndip);
817
818 out_alloc:
819 if (alloc_required)
820 gfs2_alloc_put(ndip);
821
822 out_gunlock:
823 gfs2_glock_dq_m(num_gh, ghs);
824
825 out_uninit:
826 for (x = 0; x < num_gh; x++)
827 gfs2_holder_uninit(ghs + x);
828
829 out_gunlock_r:
830 if (dir_rename)
831 gfs2_glock_dq_uninit(&r_gh);
832
833 out:
834 gfs2_unlinked_put(sdp, ul);
835
836 return error;
837}
838
839/**
840 * gfs2_readlink - Read the value of a symlink
841 * @dentry: the symlink
842 * @buf: the buffer to read the symlink data into
843 * @size: the size of the buffer
844 *
845 * Returns: errno
846 */
847
848static int gfs2_readlink(struct dentry *dentry, char __user *user_buf,
849 int user_size)
850{
851 struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
852 char array[GFS2_FAST_NAME_SIZE], *buf = array;
853 unsigned int len = GFS2_FAST_NAME_SIZE;
854 int error;
855
856 error = gfs2_readlinki(ip, &buf, &len);
857 if (error)
858 return error;
859
860 if (user_size > len - 1)
861 user_size = len - 1;
862
863 if (copy_to_user(user_buf, buf, user_size))
864 error = -EFAULT;
865 else
866 error = user_size;
867
868 if (buf != array)
869 kfree(buf);
870
871 return error;
872}
873
874/**
875 * gfs2_follow_link - Follow a symbolic link
876 * @dentry: The dentry of the link
877 * @nd: Data that we pass to vfs_follow_link()
878 *
879 * This can handle symlinks of any size. It is optimised for symlinks
880 * under GFS2_FAST_NAME_SIZE.
881 *
882 * Returns: 0 on success or error code
883 */
884
885static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
886{
887 struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
888 char array[GFS2_FAST_NAME_SIZE], *buf = array;
889 unsigned int len = GFS2_FAST_NAME_SIZE;
890 int error;
891
892 error = gfs2_readlinki(ip, &buf, &len);
893 if (!error) {
894 error = vfs_follow_link(nd, buf);
895 if (buf != array)
896 kfree(buf);
897 }
898
899 return ERR_PTR(error);
900}
901
902/**
903 * gfs2_permission -
904 * @inode:
905 * @mask:
906 * @nd: passed from Linux VFS, ignored by us
907 *
908 * Returns: errno
909 */
910
911static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
912{
913 struct gfs2_inode *ip = get_v2ip(inode);
914 struct gfs2_holder i_gh;
915 int error;
916
917 if (ip->i_vn == ip->i_gl->gl_vn)
918 return generic_permission(inode, mask, gfs2_check_acl);
919
920 error = gfs2_glock_nq_init(ip->i_gl,
921 LM_ST_SHARED, LM_FLAG_ANY,
922 &i_gh);
923 if (!error) {
924 error = generic_permission(inode, mask, gfs2_check_acl_locked);
925 gfs2_glock_dq_uninit(&i_gh);
926 }
927
928 return error;
929}
930
931static int setattr_size(struct inode *inode, struct iattr *attr)
932{
933 struct gfs2_inode *ip = get_v2ip(inode);
934 int error;
935
936 if (attr->ia_size != ip->i_di.di_size) {
937 error = vmtruncate(inode, attr->ia_size);
938 if (error)
939 return error;
940 }
941
942 error = gfs2_truncatei(ip, attr->ia_size);
943 if (error)
944 return error;
945
946 return error;
947}
948
949static int setattr_chown(struct inode *inode, struct iattr *attr)
950{
951 struct gfs2_inode *ip = get_v2ip(inode);
952 struct gfs2_sbd *sdp = ip->i_sbd;
953 struct buffer_head *dibh;
954 uint32_t ouid, ogid, nuid, ngid;
955 int error;
956
957 ouid = ip->i_di.di_uid;
958 ogid = ip->i_di.di_gid;
959 nuid = attr->ia_uid;
960 ngid = attr->ia_gid;
961
962 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
963 ouid = nuid = NO_QUOTA_CHANGE;
964 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
965 ogid = ngid = NO_QUOTA_CHANGE;
966
967 gfs2_alloc_get(ip);
968
969 error = gfs2_quota_lock(ip, nuid, ngid);
970 if (error)
971 goto out_alloc;
972
973 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
974 error = gfs2_quota_check(ip, nuid, ngid);
975 if (error)
976 goto out_gunlock_q;
977 }
978
979 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
980 if (error)
981 goto out_gunlock_q;
982
983 error = gfs2_meta_inode_buffer(ip, &dibh);
984 if (error)
985 goto out_end_trans;
986
987 error = inode_setattr(inode, attr);
988 gfs2_assert_warn(sdp, !error);
989 gfs2_inode_attr_out(ip);
990
991 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
992 gfs2_dinode_out(&ip->i_di, dibh->b_data);
993 brelse(dibh);
994
995 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
996 gfs2_quota_change(ip, -ip->i_di.di_blocks,
997 ouid, ogid);
998 gfs2_quota_change(ip, ip->i_di.di_blocks,
999 nuid, ngid);
1000 }
1001
1002 out_end_trans:
1003 gfs2_trans_end(sdp);
1004
1005 out_gunlock_q:
1006 gfs2_quota_unlock(ip);
1007
1008 out_alloc:
1009 gfs2_alloc_put(ip);
1010
1011 return error;
1012}
1013
1014/**
1015 * gfs2_setattr - Change attributes on an inode
1016 * @dentry: The dentry which is changing
1017 * @attr: The structure describing the change
1018 *
1019 * The VFS layer wants to change one or more of an inodes attributes. Write
1020 * that change out to disk.
1021 *
1022 * Returns: errno
1023 */
1024
1025static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1026{
1027 struct inode *inode = dentry->d_inode;
1028 struct gfs2_inode *ip = get_v2ip(inode);
1029 struct gfs2_holder i_gh;
1030 int error;
1031
1032 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1033 if (error)
1034 return error;
1035
1036 error = -EPERM;
1037 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1038 goto out;
1039
1040 error = inode_change_ok(inode, attr);
1041 if (error)
1042 goto out;
1043
1044 if (attr->ia_valid & ATTR_SIZE)
1045 error = setattr_size(inode, attr);
1046 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1047 error = setattr_chown(inode, attr);
1048 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1049 error = gfs2_acl_chmod(ip, attr);
1050 else
1051 error = gfs2_setattr_simple(ip, attr);
1052
1053 out:
1054 gfs2_glock_dq_uninit(&i_gh);
1055
1056 if (!error)
1057 mark_inode_dirty(inode);
1058
1059 return error;
1060}
1061
1062/**
1063 * gfs2_getattr - Read out an inode's attributes
1064 * @mnt: ?
1065 * @dentry: The dentry to stat
1066 * @stat: The inode's stats
1067 *
1068 * Returns: errno
1069 */
1070
1071static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1072 struct kstat *stat)
1073{
1074 struct inode *inode = dentry->d_inode;
1075 struct gfs2_inode *ip = get_v2ip(inode);
1076 struct gfs2_holder gh;
1077 int error;
1078
1079 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1080 if (!error) {
1081 generic_fillattr(inode, stat);
1082 gfs2_glock_dq_uninit(&gh);
1083 }
1084
1085 return error;
1086}
1087
1088static int gfs2_setxattr(struct dentry *dentry, const char *name,
1089 const void *data, size_t size, int flags)
1090{
1091 struct gfs2_inode *ip = get_v2ip(dentry->d_inode);
1092 struct gfs2_ea_request er;
1093
1094 memset(&er, 0, sizeof(struct gfs2_ea_request));
1095 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1096 if (er.er_type == GFS2_EATYPE_UNUSED)
1097 return -EOPNOTSUPP;
1098 er.er_data = (char *)data;
1099 er.er_name_len = strlen(er.er_name);
1100 er.er_data_len = size;
1101 er.er_flags = flags;
1102
1103 gfs2_assert_warn(ip->i_sbd, !(er.er_flags & GFS2_ERF_MODE));
1104
1105 return gfs2_ea_set(ip, &er);
1106}
1107
1108static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1109 void *data, size_t size)
1110{
1111 struct gfs2_ea_request er;
1112
1113 memset(&er, 0, sizeof(struct gfs2_ea_request));
1114 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1115 if (er.er_type == GFS2_EATYPE_UNUSED)
1116 return -EOPNOTSUPP;
1117 er.er_data = data;
1118 er.er_name_len = strlen(er.er_name);
1119 er.er_data_len = size;
1120
1121 return gfs2_ea_get(get_v2ip(dentry->d_inode), &er);
1122}
1123
1124static ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
1125{
1126 struct gfs2_ea_request er;
1127
1128 memset(&er, 0, sizeof(struct gfs2_ea_request));
1129 er.er_data = (size) ? buffer : NULL;
1130 er.er_data_len = size;
1131
1132 return gfs2_ea_list(get_v2ip(dentry->d_inode), &er);
1133}
1134
1135static int gfs2_removexattr(struct dentry *dentry, const char *name)
1136{
1137 struct gfs2_ea_request er;
1138
1139 memset(&er, 0, sizeof(struct gfs2_ea_request));
1140 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1141 if (er.er_type == GFS2_EATYPE_UNUSED)
1142 return -EOPNOTSUPP;
1143 er.er_name_len = strlen(er.er_name);
1144
1145 return gfs2_ea_remove(get_v2ip(dentry->d_inode), &er);
1146}
1147
1148struct inode_operations gfs2_file_iops = {
1149 .permission = gfs2_permission,
1150 .setattr = gfs2_setattr,
1151 .getattr = gfs2_getattr,
1152 .setxattr = gfs2_setxattr,
1153 .getxattr = gfs2_getxattr,
1154 .listxattr = gfs2_listxattr,
1155 .removexattr = gfs2_removexattr,
1156};
1157
1158struct inode_operations gfs2_dev_iops = {
1159 .permission = gfs2_permission,
1160 .setattr = gfs2_setattr,
1161 .getattr = gfs2_getattr,
1162 .setxattr = gfs2_setxattr,
1163 .getxattr = gfs2_getxattr,
1164 .listxattr = gfs2_listxattr,
1165 .removexattr = gfs2_removexattr,
1166};
1167
1168struct inode_operations gfs2_dir_iops = {
1169 .create = gfs2_create,
1170 .lookup = gfs2_lookup,
1171 .link = gfs2_link,
1172 .unlink = gfs2_unlink,
1173 .symlink = gfs2_symlink,
1174 .mkdir = gfs2_mkdir,
1175 .rmdir = gfs2_rmdir,
1176 .mknod = gfs2_mknod,
1177 .rename = gfs2_rename,
1178 .permission = gfs2_permission,
1179 .setattr = gfs2_setattr,
1180 .getattr = gfs2_getattr,
1181 .setxattr = gfs2_setxattr,
1182 .getxattr = gfs2_getxattr,
1183 .listxattr = gfs2_listxattr,
1184 .removexattr = gfs2_removexattr,
1185};
1186
1187struct inode_operations gfs2_symlink_iops = {
1188 .readlink = gfs2_readlink,
1189 .follow_link = gfs2_follow_link,
1190 .permission = gfs2_permission,
1191 .setattr = gfs2_setattr,
1192 .getattr = gfs2_getattr,
1193 .setxattr = gfs2_setxattr,
1194 .getxattr = gfs2_getxattr,
1195 .listxattr = gfs2_listxattr,
1196 .removexattr = gfs2_removexattr,
1197};
1198
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
new file mode 100644
index 000000000000..5fafd87c8d7b
--- /dev/null
+++ b/fs/gfs2/ops_inode.h
@@ -0,0 +1,18 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_INODE_DOT_H__
11#define __OPS_INODE_DOT_H__
12
13extern struct inode_operations gfs2_file_iops;
14extern struct inode_operations gfs2_dir_iops;
15extern struct inode_operations gfs2_symlink_iops;
16extern struct inode_operations gfs2_dev_iops;
17
18#endif /* __OPS_INODE_DOT_H__ */
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
new file mode 100644
index 000000000000..48a94522406e
--- /dev/null
+++ b/fs/gfs2/ops_super.c
@@ -0,0 +1,384 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/statfs.h>
16#include <linux/vmalloc.h>
17#include <linux/seq_file.h>
18#include <linux/mount.h>
19#include <linux/kthread.h>
20#include <linux/delay.h>
21#include <asm/semaphore.h>
22
23#include "gfs2.h"
24#include "glock.h"
25#include "inode.h"
26#include "lm.h"
27#include "log.h"
28#include "mount.h"
29#include "ops_super.h"
30#include "page.h"
31#include "quota.h"
32#include "recovery.h"
33#include "rgrp.h"
34#include "super.h"
35#include "sys.h"
36
37/**
38 * gfs2_write_inode - Make sure the inode is stable on the disk
39 * @inode: The inode
40 * @sync: synchronous write flag
41 *
42 * Returns: errno
43 */
44
45static int gfs2_write_inode(struct inode *inode, int sync)
46{
47 struct gfs2_inode *ip = get_v2ip(inode);
48
49 if (current->flags & PF_MEMALLOC)
50 return 0;
51 if (ip && sync)
52 gfs2_log_flush_glock(ip->i_gl);
53
54 return 0;
55}
56
57/**
58 * gfs2_put_super - Unmount the filesystem
59 * @sb: The VFS superblock
60 *
61 */
62
63static void gfs2_put_super(struct super_block *sb)
64{
65 struct gfs2_sbd *sdp = get_v2sdp(sb);
66 int error;
67
68 if (!sdp)
69 return;
70
71 /* Unfreeze the filesystem, if we need to */
72
73 mutex_lock(&sdp->sd_freeze_lock);
74 if (sdp->sd_freeze_count)
75 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
76 mutex_unlock(&sdp->sd_freeze_lock);
77
78 kthread_stop(sdp->sd_inoded_process);
79 kthread_stop(sdp->sd_quotad_process);
80 kthread_stop(sdp->sd_logd_process);
81 kthread_stop(sdp->sd_recoverd_process);
82 while (sdp->sd_glockd_num--)
83 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
84 kthread_stop(sdp->sd_scand_process);
85
86 if (!(sb->s_flags & MS_RDONLY)) {
87 error = gfs2_make_fs_ro(sdp);
88 if (error)
89 gfs2_io_error(sdp);
90 }
91
92 /* At this point, we're through modifying the disk */
93
94 /* Release stuff */
95
96 iput(sdp->sd_master_dir);
97 iput(sdp->sd_jindex);
98 iput(sdp->sd_inum_inode);
99 iput(sdp->sd_statfs_inode);
100 iput(sdp->sd_rindex);
101 iput(sdp->sd_quota_inode);
102 iput(sdp->sd_root_dir);
103
104 gfs2_glock_put(sdp->sd_rename_gl);
105 gfs2_glock_put(sdp->sd_trans_gl);
106
107 if (!sdp->sd_args.ar_spectator) {
108 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
109 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
110 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
111 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
112 gfs2_glock_dq_uninit(&sdp->sd_ut_gh);
113 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
114 iput(sdp->sd_ir_inode);
115 iput(sdp->sd_sc_inode);
116 iput(sdp->sd_ut_inode);
117 iput(sdp->sd_qc_inode);
118 }
119
120 gfs2_glock_dq_uninit(&sdp->sd_live_gh);
121
122 gfs2_clear_rgrpd(sdp);
123 gfs2_jindex_free(sdp);
124
125 /* Take apart glock structures and buffer lists */
126 gfs2_gl_hash_clear(sdp, WAIT);
127
128 /* Unmount the locking protocol */
129 gfs2_lm_unmount(sdp);
130
131 /* At this point, we're through participating in the lockspace */
132
133 gfs2_sys_fs_del(sdp);
134
135 /* Get rid of any extra inodes */
136 while (invalidate_inodes(sb))
137 yield();
138
139 vfree(sdp);
140
141 set_v2sdp(sb, NULL);
142}
143
144/**
145 * gfs2_write_super - disk commit all incore transactions
146 * @sb: the filesystem
147 *
148 * This function is called every time sync(2) is called.
149 * After this exits, all dirty buffers and synced.
150 */
151
152static void gfs2_write_super(struct super_block *sb)
153{
154 struct gfs2_sbd *sdp = get_v2sdp(sb);
155 gfs2_log_flush(sdp);
156}
157
158/**
159 * gfs2_write_super_lockfs - prevent further writes to the filesystem
160 * @sb: the VFS structure for the filesystem
161 *
162 */
163
164static void gfs2_write_super_lockfs(struct super_block *sb)
165{
166 struct gfs2_sbd *sdp = get_v2sdp(sb);
167 int error;
168
169 for (;;) {
170 error = gfs2_freeze_fs(sdp);
171 if (!error)
172 break;
173
174 switch (error) {
175 case -EBUSY:
176 fs_err(sdp, "waiting for recovery before freeze\n");
177 break;
178
179 default:
180 fs_err(sdp, "error freezing FS: %d\n", error);
181 break;
182 }
183
184 fs_err(sdp, "retrying...\n");
185 msleep(1000);
186 }
187}
188
189/**
190 * gfs2_unlockfs - reallow writes to the filesystem
191 * @sb: the VFS structure for the filesystem
192 *
193 */
194
195static void gfs2_unlockfs(struct super_block *sb)
196{
197 struct gfs2_sbd *sdp = get_v2sdp(sb);
198 gfs2_unfreeze_fs(sdp);
199}
200
201/**
202 * gfs2_statfs - Gather and return stats about the filesystem
203 * @sb: The superblock
204 * @statfsbuf: The buffer
205 *
206 * Returns: 0 on success or error code
207 */
208
209static int gfs2_statfs(struct super_block *sb, struct kstatfs *buf)
210{
211 struct gfs2_sbd *sdp = get_v2sdp(sb);
212 struct gfs2_statfs_change sc;
213 int error;
214
215 if (gfs2_tune_get(sdp, gt_statfs_slow))
216 error = gfs2_statfs_slow(sdp, &sc);
217 else
218 error = gfs2_statfs_i(sdp, &sc);
219
220 if (error)
221 return error;
222
223 memset(buf, 0, sizeof(struct kstatfs));
224
225 buf->f_type = GFS2_MAGIC;
226 buf->f_bsize = sdp->sd_sb.sb_bsize;
227 buf->f_blocks = sc.sc_total;
228 buf->f_bfree = sc.sc_free;
229 buf->f_bavail = sc.sc_free;
230 buf->f_files = sc.sc_dinodes + sc.sc_free;
231 buf->f_ffree = sc.sc_free;
232 buf->f_namelen = GFS2_FNAMESIZE;
233
234 return 0;
235}
236
237/**
238 * gfs2_remount_fs - called when the FS is remounted
239 * @sb: the filesystem
240 * @flags: the remount flags
241 * @data: extra data passed in (not used right now)
242 *
243 * Returns: errno
244 */
245
246static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
247{
248 struct gfs2_sbd *sdp = get_v2sdp(sb);
249 int error;
250
251 error = gfs2_mount_args(sdp, data, 1);
252 if (error)
253 return error;
254
255 if (sdp->sd_args.ar_spectator)
256 *flags |= MS_RDONLY;
257 else {
258 if (*flags & MS_RDONLY) {
259 if (!(sb->s_flags & MS_RDONLY))
260 error = gfs2_make_fs_ro(sdp);
261 } else if (!(*flags & MS_RDONLY) &&
262 (sb->s_flags & MS_RDONLY)) {
263 error = gfs2_make_fs_rw(sdp);
264 }
265 }
266
267 if (*flags & (MS_NOATIME | MS_NODIRATIME))
268 set_bit(SDF_NOATIME, &sdp->sd_flags);
269 else
270 clear_bit(SDF_NOATIME, &sdp->sd_flags);
271
272 /* Don't let the VFS update atimes. GFS2 handles this itself. */
273 *flags |= MS_NOATIME | MS_NODIRATIME;
274
275 return error;
276}
277
278/**
279 * gfs2_clear_inode - Deallocate an inode when VFS is done with it
280 * @inode: The VFS inode
281 *
282 */
283
284static void gfs2_clear_inode(struct inode *inode)
285{
286 struct gfs2_inode *ip = get_v2ip(inode);
287
288 if (ip) {
289 spin_lock(&ip->i_spin);
290 ip->i_vnode = NULL;
291 set_v2ip(inode, NULL);
292 spin_unlock(&ip->i_spin);
293
294 gfs2_glock_schedule_for_reclaim(ip->i_gl);
295 gfs2_inode_put(ip);
296 }
297}
298
299/**
300 * gfs2_show_options - Show mount options for /proc/mounts
301 * @s: seq_file structure
302 * @mnt: vfsmount
303 *
304 * Returns: 0 on success or error code
305 */
306
307static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
308{
309 struct gfs2_sbd *sdp = get_v2sdp(mnt->mnt_sb);
310 struct gfs2_args *args = &sdp->sd_args;
311
312 if (args->ar_lockproto[0])
313 seq_printf(s, ",lockproto=%s", args->ar_lockproto);
314 if (args->ar_locktable[0])
315 seq_printf(s, ",locktable=%s", args->ar_locktable);
316 if (args->ar_hostdata[0])
317 seq_printf(s, ",hostdata=%s", args->ar_hostdata);
318 if (args->ar_spectator)
319 seq_printf(s, ",spectator");
320 if (args->ar_ignore_local_fs)
321 seq_printf(s, ",ignore_local_fs");
322 if (args->ar_localflocks)
323 seq_printf(s, ",localflocks");
324 if (args->ar_localcaching)
325 seq_printf(s, ",localcaching");
326 if (args->ar_debug)
327 seq_printf(s, ",debug");
328 if (args->ar_upgrade)
329 seq_printf(s, ",upgrade");
330 if (args->ar_num_glockd != GFS2_GLOCKD_DEFAULT)
331 seq_printf(s, ",num_glockd=%u", args->ar_num_glockd);
332 if (args->ar_posix_acl)
333 seq_printf(s, ",acl");
334 if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
335 char *state;
336 switch (args->ar_quota) {
337 case GFS2_QUOTA_OFF:
338 state = "off";
339 break;
340 case GFS2_QUOTA_ACCOUNT:
341 state = "account";
342 break;
343 case GFS2_QUOTA_ON:
344 state = "on";
345 break;
346 default:
347 state = "unknown";
348 break;
349 }
350 seq_printf(s, ",quota=%s", state);
351 }
352 if (args->ar_suiddir)
353 seq_printf(s, ",suiddir");
354 if (args->ar_data != GFS2_DATA_DEFAULT) {
355 char *state;
356 switch (args->ar_data) {
357 case GFS2_DATA_WRITEBACK:
358 state = "writeback";
359 break;
360 case GFS2_DATA_ORDERED:
361 state = "ordered";
362 break;
363 default:
364 state = "unknown";
365 break;
366 }
367 seq_printf(s, ",data=%s", state);
368 }
369
370 return 0;
371}
372
373struct super_operations gfs2_super_ops = {
374 .write_inode = gfs2_write_inode,
375 .put_super = gfs2_put_super,
376 .write_super = gfs2_write_super,
377 .write_super_lockfs = gfs2_write_super_lockfs,
378 .unlockfs = gfs2_unlockfs,
379 .statfs = gfs2_statfs,
380 .remount_fs = gfs2_remount_fs,
381 .clear_inode = gfs2_clear_inode,
382 .show_options = gfs2_show_options,
383};
384
diff --git a/fs/gfs2/ops_super.h b/fs/gfs2/ops_super.h
new file mode 100644
index 000000000000..a41d208dc558
--- /dev/null
+++ b/fs/gfs2/ops_super.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_SUPER_DOT_H__
11#define __OPS_SUPER_DOT_H__
12
13extern struct super_operations gfs2_super_ops;
14
15#endif /* __OPS_SUPER_DOT_H__ */
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
new file mode 100644
index 000000000000..bfeb920dccee
--- /dev/null
+++ b/fs/gfs2/ops_vm.c
@@ -0,0 +1,194 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "bmap.h"
21#include "glock.h"
22#include "inode.h"
23#include "ops_vm.h"
24#include "page.h"
25#include "quota.h"
26#include "rgrp.h"
27#include "trans.h"
28
29static void pfault_be_greedy(struct gfs2_inode *ip)
30{
31 unsigned int time;
32
33 spin_lock(&ip->i_spin);
34 time = ip->i_greedy;
35 ip->i_last_pfault = jiffies;
36 spin_unlock(&ip->i_spin);
37
38 gfs2_inode_hold(ip);
39 if (gfs2_glock_be_greedy(ip->i_gl, time))
40 gfs2_inode_put(ip);
41}
42
43static struct page *gfs2_private_nopage(struct vm_area_struct *area,
44 unsigned long address, int *type)
45{
46 struct gfs2_inode *ip = get_v2ip(area->vm_file->f_mapping->host);
47 struct gfs2_holder i_gh;
48 struct page *result;
49 int error;
50
51 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
52 if (error)
53 return NULL;
54
55 set_bit(GIF_PAGED, &ip->i_flags);
56
57 result = filemap_nopage(area, address, type);
58
59 if (result && result != NOPAGE_OOM)
60 pfault_be_greedy(ip);
61
62 gfs2_glock_dq_uninit(&i_gh);
63
64 return result;
65}
66
67static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
68{
69 struct gfs2_sbd *sdp = ip->i_sbd;
70 unsigned long index = page->index;
71 uint64_t lblock = index << (PAGE_CACHE_SHIFT -
72 sdp->sd_sb.sb_bsize_shift);
73 unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift;
74 struct gfs2_alloc *al;
75 unsigned int data_blocks, ind_blocks;
76 unsigned int x;
77 int error;
78
79 al = gfs2_alloc_get(ip);
80
81 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
82 if (error)
83 goto out;
84
85 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
86 if (error)
87 goto out_gunlock_q;
88
89 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE,
90 &data_blocks, &ind_blocks);
91
92 al->al_requested = data_blocks + ind_blocks;
93
94 error = gfs2_inplace_reserve(ip);
95 if (error)
96 goto out_gunlock_q;
97
98 error = gfs2_trans_begin(sdp,
99 al->al_rgd->rd_ri.ri_length +
100 ind_blocks + RES_DINODE +
101 RES_STATFS + RES_QUOTA, 0);
102 if (error)
103 goto out_ipres;
104
105 if (gfs2_is_stuffed(ip)) {
106 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, NULL);
107 if (error)
108 goto out_trans;
109 }
110
111 for (x = 0; x < blocks; ) {
112 uint64_t dblock;
113 unsigned int extlen;
114 int new = 1;
115
116 error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen);
117 if (error)
118 goto out_trans;
119
120 lblock += extlen;
121 x += extlen;
122 }
123
124 gfs2_assert_warn(sdp, al->al_alloced);
125
126 out_trans:
127 gfs2_trans_end(sdp);
128
129 out_ipres:
130 gfs2_inplace_release(ip);
131
132 out_gunlock_q:
133 gfs2_quota_unlock(ip);
134
135 out:
136 gfs2_alloc_put(ip);
137
138 return error;
139}
140
141static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
142 unsigned long address, int *type)
143{
144 struct gfs2_inode *ip = get_v2ip(area->vm_file->f_mapping->host);
145 struct gfs2_holder i_gh;
146 struct page *result = NULL;
147 unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) +
148 area->vm_pgoff;
149 int alloc_required;
150 int error;
151
152 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
153 if (error)
154 return NULL;
155
156 set_bit(GIF_PAGED, &ip->i_flags);
157 set_bit(GIF_SW_PAGED, &ip->i_flags);
158
159 error = gfs2_write_alloc_required(ip,
160 (uint64_t)index << PAGE_CACHE_SHIFT,
161 PAGE_CACHE_SIZE, &alloc_required);
162 if (error)
163 goto out;
164
165 result = filemap_nopage(area, address, type);
166 if (!result || result == NOPAGE_OOM)
167 goto out;
168
169 if (alloc_required) {
170 error = alloc_page_backing(ip, result);
171 if (error) {
172 page_cache_release(result);
173 result = NULL;
174 goto out;
175 }
176 set_page_dirty(result);
177 }
178
179 pfault_be_greedy(ip);
180
181 out:
182 gfs2_glock_dq_uninit(&i_gh);
183
184 return result;
185}
186
187struct vm_operations_struct gfs2_vm_ops_private = {
188 .nopage = gfs2_private_nopage,
189};
190
191struct vm_operations_struct gfs2_vm_ops_sharewrite = {
192 .nopage = gfs2_sharewrite_nopage,
193};
194
diff --git a/fs/gfs2/ops_vm.h b/fs/gfs2/ops_vm.h
new file mode 100644
index 000000000000..54e3a8769cbb
--- /dev/null
+++ b/fs/gfs2/ops_vm.h
@@ -0,0 +1,16 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_VM_DOT_H__
11#define __OPS_VM_DOT_H__
12
13extern struct vm_operations_struct gfs2_vm_ops_private;
14extern struct vm_operations_struct gfs2_vm_ops_sharewrite;
15
16#endif /* __OPS_VM_DOT_H__ */
diff --git a/fs/gfs2/page.c b/fs/gfs2/page.c
new file mode 100644
index 000000000000..3542aa6b01c4
--- /dev/null
+++ b/fs/gfs2/page.c
@@ -0,0 +1,279 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/mm.h>
17#include <asm/semaphore.h>
18
19#include "gfs2.h"
20#include "bmap.h"
21#include "inode.h"
22#include "page.h"
23#include "trans.h"
24#include "ops_address.h"
25
26/**
27 * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock
28 * @gl: the glock
29 *
30 */
31
32void gfs2_pte_inval(struct gfs2_glock *gl)
33{
34 struct gfs2_inode *ip;
35 struct inode *inode;
36
37 ip = get_gl2ip(gl);
38 if (!ip || !S_ISREG(ip->i_di.di_mode))
39 return;
40
41 if (!test_bit(GIF_PAGED, &ip->i_flags))
42 return;
43
44 inode = gfs2_ip2v_lookup(ip);
45 if (inode) {
46 unmap_shared_mapping_range(inode->i_mapping, 0, 0);
47 iput(inode);
48
49 if (test_bit(GIF_SW_PAGED, &ip->i_flags))
50 set_bit(GLF_DIRTY, &gl->gl_flags);
51 }
52
53 clear_bit(GIF_SW_PAGED, &ip->i_flags);
54}
55
56/**
57 * gfs2_page_inval - Invalidate all pages associated with a glock
58 * @gl: the glock
59 *
60 */
61
62void gfs2_page_inval(struct gfs2_glock *gl)
63{
64 struct gfs2_inode *ip;
65 struct inode *inode;
66
67 ip = get_gl2ip(gl);
68 if (!ip || !S_ISREG(ip->i_di.di_mode))
69 return;
70
71 inode = gfs2_ip2v_lookup(ip);
72 if (inode) {
73 struct address_space *mapping = inode->i_mapping;
74
75 truncate_inode_pages(mapping, 0);
76 gfs2_assert_withdraw(ip->i_sbd, !mapping->nrpages);
77
78 iput(inode);
79 }
80
81 clear_bit(GIF_PAGED, &ip->i_flags);
82}
83
84/**
85 * gfs2_page_sync - Sync the data pages (not metadata) associated with a glock
86 * @gl: the glock
87 * @flags: DIO_START | DIO_WAIT
88 *
89 * Syncs data (not metadata) for a regular file.
90 * No-op for all other types.
91 */
92
93void gfs2_page_sync(struct gfs2_glock *gl, int flags)
94{
95 struct gfs2_inode *ip;
96 struct inode *inode;
97
98 ip = get_gl2ip(gl);
99 if (!ip || !S_ISREG(ip->i_di.di_mode))
100 return;
101
102 inode = gfs2_ip2v_lookup(ip);
103 if (inode) {
104 struct address_space *mapping = inode->i_mapping;
105 int error = 0;
106
107 if (flags & DIO_START)
108 filemap_fdatawrite(mapping);
109 if (!error && (flags & DIO_WAIT))
110 error = filemap_fdatawait(mapping);
111
112 /* Put back any errors cleared by filemap_fdatawait()
113 so they can be caught by someone who can pass them
114 up to user space. */
115
116 if (error == -ENOSPC)
117 set_bit(AS_ENOSPC, &mapping->flags);
118 else if (error)
119 set_bit(AS_EIO, &mapping->flags);
120
121 iput(inode);
122 }
123}
124
125/**
126 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
127 * @ip: the inode
128 * @dibh: the dinode buffer
129 * @block: the block number that was allocated
130 * @private: any locked page held by the caller process
131 *
132 * Returns: errno
133 */
134
135int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
136 uint64_t block, void *private)
137{
138 struct gfs2_sbd *sdp = ip->i_sbd;
139 struct inode *inode = ip->i_vnode;
140 struct page *page = (struct page *)private;
141 struct buffer_head *bh;
142 int release = 0;
143
144 if (!page || page->index) {
145 page = grab_cache_page(inode->i_mapping, 0);
146 if (!page)
147 return -ENOMEM;
148 release = 1;
149 }
150
151 if (!PageUptodate(page)) {
152 void *kaddr = kmap(page);
153
154 memcpy(kaddr,
155 dibh->b_data + sizeof(struct gfs2_dinode),
156 ip->i_di.di_size);
157 memset(kaddr + ip->i_di.di_size,
158 0,
159 PAGE_CACHE_SIZE - ip->i_di.di_size);
160 kunmap(page);
161
162 SetPageUptodate(page);
163 }
164
165 if (!page_has_buffers(page))
166 create_empty_buffers(page, 1 << inode->i_blkbits,
167 (1 << BH_Uptodate));
168
169 bh = page_buffers(page);
170
171 if (!buffer_mapped(bh))
172 map_bh(bh, inode->i_sb, block);
173
174 set_buffer_uptodate(bh);
175 if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED) || gfs2_is_jdata(ip))
176 gfs2_trans_add_bh(ip->i_gl, bh, 0);
177 mark_buffer_dirty(bh);
178
179 if (release) {
180 unlock_page(page);
181 page_cache_release(page);
182 }
183
184 return 0;
185}
186
187/**
188 * gfs2_block_truncate_page - Deal with zeroing out data for truncate
189 *
190 * This is partly borrowed from ext3.
191 */
192int gfs2_block_truncate_page(struct address_space *mapping)
193{
194 struct inode *inode = mapping->host;
195 struct gfs2_inode *ip = get_v2ip(inode);
196 struct gfs2_sbd *sdp = ip->i_sbd;
197 loff_t from = inode->i_size;
198 unsigned long index = from >> PAGE_CACHE_SHIFT;
199 unsigned offset = from & (PAGE_CACHE_SIZE-1);
200 unsigned blocksize, iblock, length, pos;
201 struct buffer_head *bh;
202 struct page *page;
203 void *kaddr;
204 int err;
205
206 page = grab_cache_page(mapping, index);
207 if (!page)
208 return 0;
209
210 blocksize = inode->i_sb->s_blocksize;
211 length = blocksize - (offset & (blocksize - 1));
212 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
213
214 if (!page_has_buffers(page))
215 create_empty_buffers(page, blocksize, 0);
216
217 /* Find the buffer that contains "offset" */
218 bh = page_buffers(page);
219 pos = blocksize;
220 while (offset >= pos) {
221 bh = bh->b_this_page;
222 iblock++;
223 pos += blocksize;
224 }
225
226 err = 0;
227
228 if (!buffer_mapped(bh)) {
229 gfs2_get_block(inode, iblock, bh, 0);
230 /* unmapped? It's a hole - nothing to do */
231 if (!buffer_mapped(bh))
232 goto unlock;
233 }
234
235 /* Ok, it's mapped. Make sure it's up-to-date */
236 if (PageUptodate(page))
237 set_buffer_uptodate(bh);
238
239 if (!buffer_uptodate(bh)) {
240 err = -EIO;
241 ll_rw_block(READ, 1, &bh);
242 wait_on_buffer(bh);
243 /* Uhhuh. Read error. Complain and punt. */
244 if (!buffer_uptodate(bh))
245 goto unlock;
246 }
247
248 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
249 gfs2_trans_add_bh(ip->i_gl, bh, 0);
250
251 kaddr = kmap_atomic(page, KM_USER0);
252 memset(kaddr + offset, 0, length);
253 flush_dcache_page(page);
254 kunmap_atomic(kaddr, KM_USER0);
255
256unlock:
257 unlock_page(page);
258 page_cache_release(page);
259 return err;
260}
261
262void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
263 unsigned int from, unsigned int to)
264{
265 struct buffer_head *head = page_buffers(page);
266 unsigned int bsize = head->b_size;
267 struct buffer_head *bh;
268 unsigned int start, end;
269
270 for (bh = head, start = 0;
271 bh != head || !start;
272 bh = bh->b_this_page, start = end) {
273 end = start + bsize;
274 if (end <= from || start >= to)
275 continue;
276 gfs2_trans_add_bh(ip->i_gl, bh, 0);
277 }
278}
279
diff --git a/fs/gfs2/page.h b/fs/gfs2/page.h
new file mode 100644
index 000000000000..346e296420c6
--- /dev/null
+++ b/fs/gfs2/page.h
@@ -0,0 +1,23 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __PAGE_DOT_H__
11#define __PAGE_DOT_H__
12
13void gfs2_pte_inval(struct gfs2_glock *gl);
14void gfs2_page_inval(struct gfs2_glock *gl);
15void gfs2_page_sync(struct gfs2_glock *gl, int flags);
16
17int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
18 uint64_t block, void *private);
19int gfs2_block_truncate_page(struct address_space *mapping);
20void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
21 unsigned int from, unsigned int to);
22
23#endif /* __PAGE_DOT_H__ */
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
new file mode 100644
index 000000000000..40c7cf87eb44
--- /dev/null
+++ b/fs/gfs2/quota.c
@@ -0,0 +1,1297 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10/*
11 * Quota change tags are associated with each transaction that allocates or
12 * deallocates space. Those changes are accumulated locally to each node (in a
13 * per-node file) and then are periodically synced to the quota file. This
14 * avoids the bottleneck of constantly touching the quota file, but introduces
15 * fuzziness in the current usage value of IDs that are being used on different
16 * nodes in the cluster simultaneously. So, it is possible for a user on
17 * multiple nodes to overrun their quota, but that overrun is controlable.
18 * Since quota tags are part of transactions, there is no need to a quota check
19 * program to be run on node crashes or anything like that.
20 *
21 * There are couple of knobs that let the administrator manage the quota
22 * fuzziness. "quota_quantum" sets the maximum time a quota change can be
23 * sitting on one node before being synced to the quota file. (The default is
24 * 60 seconds.) Another knob, "quota_scale" controls how quickly the frequency
25 * of quota file syncs increases as the user moves closer to their limit. The
26 * more frequent the syncs, the more accurate the quota enforcement, but that
27 * means that there is more contention between the nodes for the quota file.
28 * The default value is one. This sets the maximum theoretical quota overrun
29 * (with infinite node with infinite bandwidth) to twice the user's limit. (In
30 * practice, the maximum overrun you see should be much less.) A "quota_scale"
31 * number greater than one makes quota syncs more frequent and reduces the
32 * maximum overrun. Numbers less than one (but greater than zero) make quota
33 * syncs less frequent.
34 *
35 * GFS quotas also use per-ID Lock Value Blocks (LVBs) to cache the contents of
36 * the quota file, so it is not being constantly read.
37 */
38
39#include <linux/sched.h>
40#include <linux/slab.h>
41#include <linux/spinlock.h>
42#include <linux/completion.h>
43#include <linux/buffer_head.h>
44#include <linux/tty.h>
45#include <linux/sort.h>
46#include <linux/fs.h>
47#include <asm/semaphore.h>
48
49#include "gfs2.h"
50#include "bmap.h"
51#include "glock.h"
52#include "glops.h"
53#include "log.h"
54#include "meta_io.h"
55#include "quota.h"
56#include "rgrp.h"
57#include "super.h"
58#include "trans.h"
59#include "inode.h"
60#include "ops_file.h"
61#include "ops_address.h"
62
63#define QUOTA_USER 1
64#define QUOTA_GROUP 0
65
66static uint64_t qd2offset(struct gfs2_quota_data *qd)
67{
68 uint64_t offset;
69
70 offset = 2 * (uint64_t)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags);
71 offset *= sizeof(struct gfs2_quota);
72
73 return offset;
74}
75
76static int qd_alloc(struct gfs2_sbd *sdp, int user, uint32_t id,
77 struct gfs2_quota_data **qdp)
78{
79 struct gfs2_quota_data *qd;
80 int error;
81
82 qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_KERNEL);
83 if (!qd)
84 return -ENOMEM;
85
86 qd->qd_count = 1;
87 qd->qd_id = id;
88 if (user)
89 set_bit(QDF_USER, &qd->qd_flags);
90 qd->qd_slot = -1;
91
92 error = gfs2_glock_get(sdp, 2 * (uint64_t)id + !user,
93 &gfs2_quota_glops, CREATE, &qd->qd_gl);
94 if (error)
95 goto fail;
96
97 error = gfs2_lvb_hold(qd->qd_gl);
98 gfs2_glock_put(qd->qd_gl);
99 if (error)
100 goto fail;
101
102 *qdp = qd;
103
104 return 0;
105
106 fail:
107 kfree(qd);
108 return error;
109}
110
111static int qd_get(struct gfs2_sbd *sdp, int user, uint32_t id, int create,
112 struct gfs2_quota_data **qdp)
113{
114 struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
115 int error, found;
116
117 *qdp = NULL;
118
119 for (;;) {
120 found = 0;
121 spin_lock(&sdp->sd_quota_spin);
122 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
123 if (qd->qd_id == id &&
124 !test_bit(QDF_USER, &qd->qd_flags) == !user) {
125 qd->qd_count++;
126 found = 1;
127 break;
128 }
129 }
130
131 if (!found)
132 qd = NULL;
133
134 if (!qd && new_qd) {
135 qd = new_qd;
136 list_add(&qd->qd_list, &sdp->sd_quota_list);
137 atomic_inc(&sdp->sd_quota_count);
138 new_qd = NULL;
139 }
140
141 spin_unlock(&sdp->sd_quota_spin);
142
143 if (qd || !create) {
144 if (new_qd) {
145 gfs2_lvb_unhold(new_qd->qd_gl);
146 kfree(new_qd);
147 }
148 *qdp = qd;
149 return 0;
150 }
151
152 error = qd_alloc(sdp, user, id, &new_qd);
153 if (error)
154 return error;
155 }
156}
157
158static void qd_hold(struct gfs2_quota_data *qd)
159{
160 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
161
162 spin_lock(&sdp->sd_quota_spin);
163 gfs2_assert(sdp, qd->qd_count);
164 qd->qd_count++;
165 spin_unlock(&sdp->sd_quota_spin);
166}
167
168static void qd_put(struct gfs2_quota_data *qd)
169{
170 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
171 spin_lock(&sdp->sd_quota_spin);
172 gfs2_assert(sdp, qd->qd_count);
173 if (!--qd->qd_count)
174 qd->qd_last_touched = jiffies;
175 spin_unlock(&sdp->sd_quota_spin);
176}
177
178static int slot_get(struct gfs2_quota_data *qd)
179{
180 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
181 unsigned int c, o = 0, b;
182 unsigned char byte = 0;
183
184 spin_lock(&sdp->sd_quota_spin);
185
186 if (qd->qd_slot_count++) {
187 spin_unlock(&sdp->sd_quota_spin);
188 return 0;
189 }
190
191 for (c = 0; c < sdp->sd_quota_chunks; c++)
192 for (o = 0; o < PAGE_SIZE; o++) {
193 byte = sdp->sd_quota_bitmap[c][o];
194 if (byte != 0xFF)
195 goto found;
196 }
197
198 goto fail;
199
200 found:
201 for (b = 0; b < 8; b++)
202 if (!(byte & (1 << b)))
203 break;
204 qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b;
205
206 if (qd->qd_slot >= sdp->sd_quota_slots)
207 goto fail;
208
209 sdp->sd_quota_bitmap[c][o] |= 1 << b;
210
211 spin_unlock(&sdp->sd_quota_spin);
212
213 return 0;
214
215 fail:
216 qd->qd_slot_count--;
217 spin_unlock(&sdp->sd_quota_spin);
218 return -ENOSPC;
219}
220
221static void slot_hold(struct gfs2_quota_data *qd)
222{
223 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
224
225 spin_lock(&sdp->sd_quota_spin);
226 gfs2_assert(sdp, qd->qd_slot_count);
227 qd->qd_slot_count++;
228 spin_unlock(&sdp->sd_quota_spin);
229}
230
231static void slot_put(struct gfs2_quota_data *qd)
232{
233 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
234
235 spin_lock(&sdp->sd_quota_spin);
236 gfs2_assert(sdp, qd->qd_slot_count);
237 if (!--qd->qd_slot_count) {
238 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
239 qd->qd_slot = -1;
240 }
241 spin_unlock(&sdp->sd_quota_spin);
242}
243
244static int bh_get(struct gfs2_quota_data *qd)
245{
246 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
247 struct gfs2_inode *ip = get_v2ip(sdp->sd_qc_inode);
248 unsigned int block, offset;
249 uint64_t dblock;
250 int new = 0;
251 struct buffer_head *bh;
252 int error;
253
254 mutex_lock(&sdp->sd_quota_mutex);
255
256 if (qd->qd_bh_count++) {
257 mutex_unlock(&sdp->sd_quota_mutex);
258 return 0;
259 }
260
261 block = qd->qd_slot / sdp->sd_qc_per_block;
262 offset = qd->qd_slot % sdp->sd_qc_per_block;;
263
264 error = gfs2_block_map(ip, block, &new, &dblock, NULL);
265 if (error)
266 goto fail;
267 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, &bh);
268 if (error)
269 goto fail;
270 error = -EIO;
271 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC))
272 goto fail_brelse;
273
274 qd->qd_bh = bh;
275 qd->qd_bh_qc = (struct gfs2_quota_change *)
276 (bh->b_data + sizeof(struct gfs2_meta_header) +
277 offset * sizeof(struct gfs2_quota_change));
278
279 mutex_lock(&sdp->sd_quota_mutex);
280
281 return 0;
282
283 fail_brelse:
284 brelse(bh);
285
286 fail:
287 qd->qd_bh_count--;
288 mutex_unlock(&sdp->sd_quota_mutex);
289 return error;
290}
291
292static void bh_put(struct gfs2_quota_data *qd)
293{
294 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
295
296 mutex_lock(&sdp->sd_quota_mutex);
297 gfs2_assert(sdp, qd->qd_bh_count);
298 if (!--qd->qd_bh_count) {
299 brelse(qd->qd_bh);
300 qd->qd_bh = NULL;
301 qd->qd_bh_qc = NULL;
302 }
303 mutex_unlock(&sdp->sd_quota_mutex);
304}
305
306static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
307{
308 struct gfs2_quota_data *qd = NULL;
309 int error;
310 int found = 0;
311
312 *qdp = NULL;
313
314 if (sdp->sd_vfs->s_flags & MS_RDONLY)
315 return 0;
316
317 spin_lock(&sdp->sd_quota_spin);
318
319 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
320 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
321 !test_bit(QDF_CHANGE, &qd->qd_flags) ||
322 qd->qd_sync_gen >= sdp->sd_quota_sync_gen)
323 continue;
324
325 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
326
327 set_bit(QDF_LOCKED, &qd->qd_flags);
328 gfs2_assert_warn(sdp, qd->qd_count);
329 qd->qd_count++;
330 qd->qd_change_sync = qd->qd_change;
331 gfs2_assert_warn(sdp, qd->qd_slot_count);
332 qd->qd_slot_count++;
333 found = 1;
334
335 break;
336 }
337
338 if (!found)
339 qd = NULL;
340
341 spin_unlock(&sdp->sd_quota_spin);
342
343 if (qd) {
344 gfs2_assert_warn(sdp, qd->qd_change_sync);
345 error = bh_get(qd);
346 if (error) {
347 clear_bit(QDF_LOCKED, &qd->qd_flags);
348 slot_put(qd);
349 qd_put(qd);
350 return error;
351 }
352 }
353
354 *qdp = qd;
355
356 return 0;
357}
358
359static int qd_trylock(struct gfs2_quota_data *qd)
360{
361 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
362
363 if (sdp->sd_vfs->s_flags & MS_RDONLY)
364 return 0;
365
366 spin_lock(&sdp->sd_quota_spin);
367
368 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
369 !test_bit(QDF_CHANGE, &qd->qd_flags)) {
370 spin_unlock(&sdp->sd_quota_spin);
371 return 0;
372 }
373
374 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
375
376 set_bit(QDF_LOCKED, &qd->qd_flags);
377 gfs2_assert_warn(sdp, qd->qd_count);
378 qd->qd_count++;
379 qd->qd_change_sync = qd->qd_change;
380 gfs2_assert_warn(sdp, qd->qd_slot_count);
381 qd->qd_slot_count++;
382
383 spin_unlock(&sdp->sd_quota_spin);
384
385 gfs2_assert_warn(sdp, qd->qd_change_sync);
386 if (bh_get(qd)) {
387 clear_bit(QDF_LOCKED, &qd->qd_flags);
388 slot_put(qd);
389 qd_put(qd);
390 return 0;
391 }
392
393 return 1;
394}
395
396static void qd_unlock(struct gfs2_quota_data *qd)
397{
398 gfs2_assert_warn(qd->qd_gl->gl_sbd,
399 test_bit(QDF_LOCKED, &qd->qd_flags));
400 clear_bit(QDF_LOCKED, &qd->qd_flags);
401 bh_put(qd);
402 slot_put(qd);
403 qd_put(qd);
404}
405
406static int qdsb_get(struct gfs2_sbd *sdp, int user, uint32_t id, int create,
407 struct gfs2_quota_data **qdp)
408{
409 int error;
410
411 error = qd_get(sdp, user, id, create, qdp);
412 if (error)
413 return error;
414
415 error = slot_get(*qdp);
416 if (error)
417 goto fail;
418
419 error = bh_get(*qdp);
420 if (error)
421 goto fail_slot;
422
423 return 0;
424
425 fail_slot:
426 slot_put(*qdp);
427
428 fail:
429 qd_put(*qdp);
430 return error;
431}
432
433static void qdsb_put(struct gfs2_quota_data *qd)
434{
435 bh_put(qd);
436 slot_put(qd);
437 qd_put(qd);
438}
439
440int gfs2_quota_hold(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
441{
442 struct gfs2_sbd *sdp = ip->i_sbd;
443 struct gfs2_alloc *al = &ip->i_alloc;
444 struct gfs2_quota_data **qd = al->al_qd;
445 int error;
446
447 if (gfs2_assert_warn(sdp, !al->al_qd_num) ||
448 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
449 return -EIO;
450
451 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
452 return 0;
453
454 error = qdsb_get(sdp, QUOTA_USER, ip->i_di.di_uid, CREATE, qd);
455 if (error)
456 goto out;
457 al->al_qd_num++;
458 qd++;
459
460 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_di.di_gid, CREATE, qd);
461 if (error)
462 goto out;
463 al->al_qd_num++;
464 qd++;
465
466 if (uid != NO_QUOTA_CHANGE && uid != ip->i_di.di_uid) {
467 error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd);
468 if (error)
469 goto out;
470 al->al_qd_num++;
471 qd++;
472 }
473
474 if (gid != NO_QUOTA_CHANGE && gid != ip->i_di.di_gid) {
475 error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd);
476 if (error)
477 goto out;
478 al->al_qd_num++;
479 qd++;
480 }
481
482 out:
483 if (error)
484 gfs2_quota_unhold(ip);
485
486 return error;
487}
488
489void gfs2_quota_unhold(struct gfs2_inode *ip)
490{
491 struct gfs2_sbd *sdp = ip->i_sbd;
492 struct gfs2_alloc *al = &ip->i_alloc;
493 unsigned int x;
494
495 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
496
497 for (x = 0; x < al->al_qd_num; x++) {
498 qdsb_put(al->al_qd[x]);
499 al->al_qd[x] = NULL;
500 }
501 al->al_qd_num = 0;
502}
503
504static int sort_qd(const void *a, const void *b)
505{
506 struct gfs2_quota_data *qd_a = *(struct gfs2_quota_data **)a;
507 struct gfs2_quota_data *qd_b = *(struct gfs2_quota_data **)b;
508 int ret = 0;
509
510 if (!test_bit(QDF_USER, &qd_a->qd_flags) !=
511 !test_bit(QDF_USER, &qd_b->qd_flags)) {
512 if (test_bit(QDF_USER, &qd_a->qd_flags))
513 ret = -1;
514 else
515 ret = 1;
516 } else {
517 if (qd_a->qd_id < qd_b->qd_id)
518 ret = -1;
519 else if (qd_a->qd_id > qd_b->qd_id)
520 ret = 1;
521 }
522
523 return ret;
524}
525
526static void do_qc(struct gfs2_quota_data *qd, int64_t change)
527{
528 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
529 struct gfs2_inode *ip = get_v2ip(sdp->sd_qc_inode);
530 struct gfs2_quota_change *qc = qd->qd_bh_qc;
531 int64_t x;
532
533 mutex_lock(&sdp->sd_quota_mutex);
534 gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1);
535
536 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
537 qc->qc_change = 0;
538 qc->qc_flags = 0;
539 if (test_bit(QDF_USER, &qd->qd_flags))
540 qc->qc_flags = cpu_to_be32(GFS2_QCF_USER);
541 qc->qc_id = cpu_to_be32(qd->qd_id);
542 }
543
544 x = qc->qc_change;
545 x = be64_to_cpu(x) + change;
546 qc->qc_change = cpu_to_be64(x);
547
548 spin_lock(&sdp->sd_quota_spin);
549 qd->qd_change = x;
550 spin_unlock(&sdp->sd_quota_spin);
551
552 if (!x) {
553 gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
554 clear_bit(QDF_CHANGE, &qd->qd_flags);
555 qc->qc_flags = 0;
556 qc->qc_id = 0;
557 slot_put(qd);
558 qd_put(qd);
559 } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) {
560 qd_hold(qd);
561 slot_hold(qd);
562 }
563
564 mutex_unlock(&sdp->sd_quota_mutex);
565}
566
567/**
568 * gfs2_adjust_quota
569 *
570 * This function was mostly borrowed from gfs2_block_truncate_page which was
571 * in turn mostly borrowed from ext3
572 */
573static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
574 int64_t change, struct gfs2_quota_data *qd)
575{
576 struct inode *inode = ip->i_vnode;
577 struct address_space *mapping = inode->i_mapping;
578 unsigned long index = loc >> PAGE_CACHE_SHIFT;
579 unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
580 unsigned blocksize, iblock, pos;
581 struct buffer_head *bh;
582 struct page *page;
583 void *kaddr;
584 __be64 *ptr;
585 u64 value;
586 int err = -EIO;
587
588 page = grab_cache_page(mapping, index);
589 if (!page)
590 return -ENOMEM;
591
592 blocksize = inode->i_sb->s_blocksize;
593 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
594
595 if (!page_has_buffers(page))
596 create_empty_buffers(page, blocksize, 0);
597
598 bh = page_buffers(page);
599 pos = blocksize;
600 while (offset >= pos) {
601 bh = bh->b_this_page;
602 iblock++;
603 pos += blocksize;
604 }
605
606 if (!buffer_mapped(bh)) {
607 gfs2_get_block(inode, iblock, bh, 1);
608 if (!buffer_mapped(bh))
609 goto unlock;
610 }
611
612 if (PageUptodate(page))
613 set_buffer_uptodate(bh);
614
615 if (!buffer_uptodate(bh)) {
616 ll_rw_block(READ, 1, &bh);
617 wait_on_buffer(bh);
618 if (!buffer_uptodate(bh))
619 goto unlock;
620 }
621
622 gfs2_trans_add_bh(ip->i_gl, bh, 0);
623
624 kaddr = kmap_atomic(page, KM_USER0);
625 ptr = (__be64 *)(kaddr + offset);
626 value = *ptr = cpu_to_be64(be64_to_cpu(*ptr) + change);
627 flush_dcache_page(page);
628 kunmap_atomic(kaddr, KM_USER0);
629 err = 0;
630 qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
631#if 0
632 qd->qd_qb.qb_limit = cpu_to_be64(q.qu_limit);
633 qd->qd_qb.qb_warn = cpu_to_be64(q.qu_warn);
634#endif
635 qd->qd_qb.qb_value = cpu_to_be64(value);
636unlock:
637 unlock_page(page);
638 page_cache_release(page);
639 return err;
640}
641
642static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
643{
644 struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
645 struct gfs2_inode *ip = get_v2ip(sdp->sd_quota_inode);
646 unsigned int data_blocks, ind_blocks;
647 struct file_ra_state ra_state;
648 struct gfs2_holder *ghs, i_gh;
649 unsigned int qx, x;
650 struct gfs2_quota_data *qd;
651 loff_t offset;
652 unsigned int nalloc = 0;
653 struct gfs2_alloc *al = NULL;
654 int error;
655
656 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
657 &data_blocks, &ind_blocks);
658
659 ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_KERNEL);
660 if (!ghs)
661 return -ENOMEM;
662
663 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
664 for (qx = 0; qx < num_qd; qx++) {
665 error = gfs2_glock_nq_init(qda[qx]->qd_gl,
666 LM_ST_EXCLUSIVE,
667 GL_NOCACHE, &ghs[qx]);
668 if (error)
669 goto out;
670 }
671
672 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
673 if (error)
674 goto out;
675
676 for (x = 0; x < num_qd; x++) {
677 int alloc_required;
678
679 offset = qd2offset(qda[x]);
680 error = gfs2_write_alloc_required(ip, offset,
681 sizeof(struct gfs2_quota),
682 &alloc_required);
683 if (error)
684 goto out_gunlock;
685 if (alloc_required)
686 nalloc++;
687 }
688
689 if (nalloc) {
690 al = gfs2_alloc_get(ip);
691
692 al->al_requested = nalloc * (data_blocks + ind_blocks);
693
694 error = gfs2_inplace_reserve(ip);
695 if (error)
696 goto out_alloc;
697
698 error = gfs2_trans_begin(sdp,
699 al->al_rgd->rd_ri.ri_length +
700 num_qd * data_blocks +
701 nalloc * ind_blocks +
702 RES_DINODE + num_qd +
703 RES_STATFS, 0);
704 if (error)
705 goto out_ipres;
706 } else {
707 error = gfs2_trans_begin(sdp,
708 num_qd * data_blocks +
709 RES_DINODE + num_qd, 0);
710 if (error)
711 goto out_gunlock;
712 }
713
714 file_ra_state_init(&ra_state, ip->i_vnode->i_mapping);
715 for (x = 0; x < num_qd; x++) {
716 qd = qda[x];
717 offset = qd2offset(qd);
718 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
719 (struct gfs2_quota_data *)
720 qd->qd_gl->gl_lvb);
721 if (error)
722 goto out_end_trans;
723
724 do_qc(qd, -qd->qd_change_sync);
725 }
726
727 error = 0;
728
729 out_end_trans:
730 gfs2_trans_end(sdp);
731
732 out_ipres:
733 if (nalloc)
734 gfs2_inplace_release(ip);
735
736 out_alloc:
737 if (nalloc)
738 gfs2_alloc_put(ip);
739
740 out_gunlock:
741 gfs2_glock_dq_uninit(&i_gh);
742
743 out:
744 while (qx--)
745 gfs2_glock_dq_uninit(&ghs[qx]);
746 kfree(ghs);
747 gfs2_log_flush_glock(ip->i_gl);
748
749 return error;
750}
751
752static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
753 struct gfs2_holder *q_gh)
754{
755 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
756 struct gfs2_holder i_gh;
757 struct gfs2_quota q;
758 char buf[sizeof(struct gfs2_quota)];
759 struct file_ra_state ra_state;
760 int error;
761
762 file_ra_state_init(&ra_state, sdp->sd_quota_inode->i_mapping);
763 restart:
764 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
765 if (error)
766 return error;
767
768 gfs2_quota_lvb_in(&qd->qd_qb, qd->qd_gl->gl_lvb);
769
770 if (force_refresh || qd->qd_qb.qb_magic != GFS2_MAGIC) {
771 loff_t pos;
772 gfs2_glock_dq_uninit(q_gh);
773 error = gfs2_glock_nq_init(qd->qd_gl,
774 LM_ST_EXCLUSIVE, GL_NOCACHE,
775 q_gh);
776 if (error)
777 return error;
778
779 error = gfs2_glock_nq_init(get_v2ip(sdp->sd_quota_inode)->i_gl,
780 LM_ST_SHARED, 0,
781 &i_gh);
782 if (error)
783 goto fail;
784
785 memset(buf, 0, sizeof(struct gfs2_quota));
786 pos = qd2offset(qd);
787 error = gfs2_internal_read(get_v2ip(sdp->sd_quota_inode),
788 &ra_state, buf,
789 &pos,
790 sizeof(struct gfs2_quota));
791 if (error < 0)
792 goto fail_gunlock;
793
794 gfs2_glock_dq_uninit(&i_gh);
795
796 gfs2_quota_in(&q, buf);
797
798 memset(&qd->qd_qb, 0, sizeof(struct gfs2_quota_lvb));
799 qd->qd_qb.qb_magic = GFS2_MAGIC;
800 qd->qd_qb.qb_limit = q.qu_limit;
801 qd->qd_qb.qb_warn = q.qu_warn;
802 qd->qd_qb.qb_value = q.qu_value;
803
804 gfs2_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb);
805
806 if (gfs2_glock_is_blocking(qd->qd_gl)) {
807 gfs2_glock_dq_uninit(q_gh);
808 force_refresh = 0;
809 goto restart;
810 }
811 }
812
813 return 0;
814
815 fail_gunlock:
816 gfs2_glock_dq_uninit(&i_gh);
817
818 fail:
819 gfs2_glock_dq_uninit(q_gh);
820
821 return error;
822}
823
824int gfs2_quota_lock(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
825{
826 struct gfs2_sbd *sdp = ip->i_sbd;
827 struct gfs2_alloc *al = &ip->i_alloc;
828 unsigned int x;
829 int error = 0;
830
831 gfs2_quota_hold(ip, uid, gid);
832
833 if (capable(CAP_SYS_RESOURCE) ||
834 sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
835 return 0;
836
837 sort(al->al_qd, al->al_qd_num, sizeof(struct gfs2_quota_data *),
838 sort_qd, NULL);
839
840 for (x = 0; x < al->al_qd_num; x++) {
841 error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]);
842 if (error)
843 break;
844 }
845
846 if (!error)
847 set_bit(GIF_QD_LOCKED, &ip->i_flags);
848 else {
849 while (x--)
850 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
851 gfs2_quota_unhold(ip);
852 }
853
854 return error;
855}
856
857static int need_sync(struct gfs2_quota_data *qd)
858{
859 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
860 struct gfs2_tune *gt = &sdp->sd_tune;
861 int64_t value;
862 unsigned int num, den;
863 int do_sync = 1;
864
865 if (!qd->qd_qb.qb_limit)
866 return 0;
867
868 spin_lock(&sdp->sd_quota_spin);
869 value = qd->qd_change;
870 spin_unlock(&sdp->sd_quota_spin);
871
872 spin_lock(&gt->gt_spin);
873 num = gt->gt_quota_scale_num;
874 den = gt->gt_quota_scale_den;
875 spin_unlock(&gt->gt_spin);
876
877 if (value < 0)
878 do_sync = 0;
879 else if (qd->qd_qb.qb_value >= (int64_t)qd->qd_qb.qb_limit)
880 do_sync = 0;
881 else {
882 value *= gfs2_jindex_size(sdp) * num;
883 do_div(value, den);
884 value += qd->qd_qb.qb_value;
885 if (value < (int64_t)qd->qd_qb.qb_limit)
886 do_sync = 0;
887 }
888
889 return do_sync;
890}
891
892void gfs2_quota_unlock(struct gfs2_inode *ip)
893{
894 struct gfs2_alloc *al = &ip->i_alloc;
895 struct gfs2_quota_data *qda[4];
896 unsigned int count = 0;
897 unsigned int x;
898
899 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
900 goto out;
901
902 for (x = 0; x < al->al_qd_num; x++) {
903 struct gfs2_quota_data *qd;
904 int sync;
905
906 qd = al->al_qd[x];
907 sync = need_sync(qd);
908
909 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
910
911 if (sync && qd_trylock(qd))
912 qda[count++] = qd;
913 }
914
915 if (count) {
916 do_sync(count, qda);
917 for (x = 0; x < count; x++)
918 qd_unlock(qda[x]);
919 }
920
921 out:
922 gfs2_quota_unhold(ip);
923}
924
925#define MAX_LINE 256
926
927static int print_message(struct gfs2_quota_data *qd, char *type)
928{
929 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
930 char *line;
931 int len;
932
933 line = kmalloc(MAX_LINE, GFP_KERNEL);
934 if (!line)
935 return -ENOMEM;
936
937 len = snprintf(line, MAX_LINE-1,
938 "GFS2: fsid=%s: quota %s for %s %u\r\n",
939 sdp->sd_fsname, type,
940 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
941 qd->qd_id);
942 line[MAX_LINE-1] = 0;
943
944 if (current->signal) { /* Is this test still required? */
945 tty_write_message(current->signal->tty, line);
946 }
947
948 kfree(line);
949
950 return 0;
951}
952
953int gfs2_quota_check(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
954{
955 struct gfs2_sbd *sdp = ip->i_sbd;
956 struct gfs2_alloc *al = &ip->i_alloc;
957 struct gfs2_quota_data *qd;
958 int64_t value;
959 unsigned int x;
960 int error = 0;
961
962 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
963 return 0;
964
965 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
966 return 0;
967
968 for (x = 0; x < al->al_qd_num; x++) {
969 qd = al->al_qd[x];
970
971 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
972 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
973 continue;
974
975 value = qd->qd_qb.qb_value;
976 spin_lock(&sdp->sd_quota_spin);
977 value += qd->qd_change;
978 spin_unlock(&sdp->sd_quota_spin);
979
980 if (qd->qd_qb.qb_limit && (int64_t)qd->qd_qb.qb_limit < value) {
981 print_message(qd, "exceeded");
982 error = -EDQUOT;
983 break;
984 } else if (qd->qd_qb.qb_warn &&
985 (int64_t)qd->qd_qb.qb_warn < value &&
986 time_after_eq(jiffies, qd->qd_last_warn +
987 gfs2_tune_get(sdp,
988 gt_quota_warn_period) * HZ)) {
989 error = print_message(qd, "warning");
990 qd->qd_last_warn = jiffies;
991 }
992 }
993
994 return error;
995}
996
997void gfs2_quota_change(struct gfs2_inode *ip, int64_t change,
998 uint32_t uid, uint32_t gid)
999{
1000 struct gfs2_alloc *al = &ip->i_alloc;
1001 struct gfs2_quota_data *qd;
1002 unsigned int x;
1003 unsigned int found = 0;
1004
1005 if (gfs2_assert_warn(ip->i_sbd, change))
1006 return;
1007 if (ip->i_di.di_flags & GFS2_DIF_SYSTEM)
1008 return;
1009
1010 for (x = 0; x < al->al_qd_num; x++) {
1011 qd = al->al_qd[x];
1012
1013 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
1014 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
1015 do_qc(qd, change);
1016 found++;
1017 }
1018 }
1019}
1020
1021int gfs2_quota_sync(struct gfs2_sbd *sdp)
1022{
1023 struct gfs2_quota_data **qda;
1024 unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync);
1025 unsigned int num_qd;
1026 unsigned int x;
1027 int error = 0;
1028
1029 sdp->sd_quota_sync_gen++;
1030
1031 qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
1032 if (!qda)
1033 return -ENOMEM;
1034
1035 do {
1036 num_qd = 0;
1037
1038 for (;;) {
1039 error = qd_fish(sdp, qda + num_qd);
1040 if (error || !qda[num_qd])
1041 break;
1042 if (++num_qd == max_qd)
1043 break;
1044 }
1045
1046 if (num_qd) {
1047 if (!error)
1048 error = do_sync(num_qd, qda);
1049 if (!error)
1050 for (x = 0; x < num_qd; x++)
1051 qda[x]->qd_sync_gen =
1052 sdp->sd_quota_sync_gen;
1053
1054 for (x = 0; x < num_qd; x++)
1055 qd_unlock(qda[x]);
1056 }
1057 } while (!error && num_qd == max_qd);
1058
1059 kfree(qda);
1060
1061 return error;
1062}
1063
1064int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, uint32_t id)
1065{
1066 struct gfs2_quota_data *qd;
1067 struct gfs2_holder q_gh;
1068 int error;
1069
1070 error = qd_get(sdp, user, id, CREATE, &qd);
1071 if (error)
1072 return error;
1073
1074 error = do_glock(qd, FORCE, &q_gh);
1075 if (!error)
1076 gfs2_glock_dq_uninit(&q_gh);
1077
1078 qd_put(qd);
1079
1080 return error;
1081}
1082
1083int gfs2_quota_read(struct gfs2_sbd *sdp, int user, uint32_t id,
1084 struct gfs2_quota *q)
1085{
1086 struct gfs2_quota_data *qd;
1087 struct gfs2_holder q_gh;
1088 int error;
1089
1090 if (((user) ? (id != current->fsuid) : (!in_group_p(id))) &&
1091 !capable(CAP_SYS_ADMIN))
1092 return -EACCES;
1093
1094 error = qd_get(sdp, user, id, CREATE, &qd);
1095 if (error)
1096 return error;
1097
1098 error = do_glock(qd, NO_FORCE, &q_gh);
1099 if (error)
1100 goto out;
1101
1102 memset(q, 0, sizeof(struct gfs2_quota));
1103 q->qu_limit = qd->qd_qb.qb_limit;
1104 q->qu_warn = qd->qd_qb.qb_warn;
1105 q->qu_value = qd->qd_qb.qb_value;
1106
1107 spin_lock(&sdp->sd_quota_spin);
1108 q->qu_value += qd->qd_change;
1109 spin_unlock(&sdp->sd_quota_spin);
1110
1111 gfs2_glock_dq_uninit(&q_gh);
1112
1113 out:
1114 qd_put(qd);
1115
1116 return error;
1117}
1118
1119int gfs2_quota_init(struct gfs2_sbd *sdp)
1120{
1121 struct gfs2_inode *ip = get_v2ip(sdp->sd_qc_inode);
1122 unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
1123 unsigned int x, slot = 0;
1124 unsigned int found = 0;
1125 uint64_t dblock;
1126 uint32_t extlen = 0;
1127 int error;
1128
1129 if (!ip->i_di.di_size ||
1130 ip->i_di.di_size > (64 << 20) ||
1131 ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
1132 gfs2_consist_inode(ip);
1133 return -EIO;
1134 }
1135 sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block;
1136 sdp->sd_quota_chunks = DIV_RU(sdp->sd_quota_slots, 8 * PAGE_SIZE);
1137
1138 error = -ENOMEM;
1139
1140 sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks,
1141 sizeof(unsigned char *), GFP_KERNEL);
1142 if (!sdp->sd_quota_bitmap)
1143 return error;
1144
1145 for (x = 0; x < sdp->sd_quota_chunks; x++) {
1146 sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL);
1147 if (!sdp->sd_quota_bitmap[x])
1148 goto fail;
1149 }
1150
1151 for (x = 0; x < blocks; x++) {
1152 struct buffer_head *bh;
1153 unsigned int y;
1154
1155 if (!extlen) {
1156 int new = 0;
1157 error = gfs2_block_map(ip, x, &new, &dblock, &extlen);
1158 if (error)
1159 goto fail;
1160 }
1161 gfs2_meta_ra(ip->i_gl, dblock, extlen);
1162 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT,
1163 &bh);
1164 if (error)
1165 goto fail;
1166 error = -EIO;
1167 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) {
1168 brelse(bh);
1169 goto fail;
1170 }
1171
1172 for (y = 0;
1173 y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots;
1174 y++, slot++) {
1175 struct gfs2_quota_change qc;
1176 struct gfs2_quota_data *qd;
1177
1178 gfs2_quota_change_in(&qc, bh->b_data +
1179 sizeof(struct gfs2_meta_header) +
1180 y * sizeof(struct gfs2_quota_change));
1181 if (!qc.qc_change)
1182 continue;
1183
1184 error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER),
1185 qc.qc_id, &qd);
1186 if (error) {
1187 brelse(bh);
1188 goto fail;
1189 }
1190
1191 set_bit(QDF_CHANGE, &qd->qd_flags);
1192 qd->qd_change = qc.qc_change;
1193 qd->qd_slot = slot;
1194 qd->qd_slot_count = 1;
1195 qd->qd_last_touched = jiffies;
1196
1197 spin_lock(&sdp->sd_quota_spin);
1198 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1);
1199 list_add(&qd->qd_list, &sdp->sd_quota_list);
1200 atomic_inc(&sdp->sd_quota_count);
1201 spin_unlock(&sdp->sd_quota_spin);
1202
1203 found++;
1204 }
1205
1206 brelse(bh);
1207 dblock++;
1208 extlen--;
1209 }
1210
1211 if (found)
1212 fs_info(sdp, "found %u quota changes\n", found);
1213
1214 return 0;
1215
1216 fail:
1217 gfs2_quota_cleanup(sdp);
1218 return error;
1219}
1220
1221void gfs2_quota_scan(struct gfs2_sbd *sdp)
1222{
1223 struct gfs2_quota_data *qd, *safe;
1224 LIST_HEAD(dead);
1225
1226 spin_lock(&sdp->sd_quota_spin);
1227 list_for_each_entry_safe(qd, safe, &sdp->sd_quota_list, qd_list) {
1228 if (!qd->qd_count &&
1229 time_after_eq(jiffies, qd->qd_last_touched +
1230 gfs2_tune_get(sdp, gt_quota_cache_secs) * HZ)) {
1231 list_move(&qd->qd_list, &dead);
1232 gfs2_assert_warn(sdp,
1233 atomic_read(&sdp->sd_quota_count) > 0);
1234 atomic_dec(&sdp->sd_quota_count);
1235 }
1236 }
1237 spin_unlock(&sdp->sd_quota_spin);
1238
1239 while (!list_empty(&dead)) {
1240 qd = list_entry(dead.next, struct gfs2_quota_data, qd_list);
1241 list_del(&qd->qd_list);
1242
1243 gfs2_assert_warn(sdp, !qd->qd_change);
1244 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1245 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1246
1247 gfs2_lvb_unhold(qd->qd_gl);
1248 kfree(qd);
1249 }
1250}
1251
1252void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
1253{
1254 struct list_head *head = &sdp->sd_quota_list;
1255 struct gfs2_quota_data *qd;
1256 unsigned int x;
1257
1258 spin_lock(&sdp->sd_quota_spin);
1259 while (!list_empty(head)) {
1260 qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
1261
1262 if (qd->qd_count > 1 ||
1263 (qd->qd_count && !test_bit(QDF_CHANGE, &qd->qd_flags))) {
1264 list_move(&qd->qd_list, head);
1265 spin_unlock(&sdp->sd_quota_spin);
1266 schedule();
1267 spin_lock(&sdp->sd_quota_spin);
1268 continue;
1269 }
1270
1271 list_del(&qd->qd_list);
1272 atomic_dec(&sdp->sd_quota_count);
1273 spin_unlock(&sdp->sd_quota_spin);
1274
1275 if (!qd->qd_count) {
1276 gfs2_assert_warn(sdp, !qd->qd_change);
1277 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1278 } else
1279 gfs2_assert_warn(sdp, qd->qd_slot_count == 1);
1280 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1281
1282 gfs2_lvb_unhold(qd->qd_gl);
1283 kfree(qd);
1284
1285 spin_lock(&sdp->sd_quota_spin);
1286 }
1287 spin_unlock(&sdp->sd_quota_spin);
1288
1289 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
1290
1291 if (sdp->sd_quota_bitmap) {
1292 for (x = 0; x < sdp->sd_quota_chunks; x++)
1293 kfree(sdp->sd_quota_bitmap[x]);
1294 kfree(sdp->sd_quota_bitmap);
1295 }
1296}
1297
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
new file mode 100644
index 000000000000..005529f6895d
--- /dev/null
+++ b/fs/gfs2/quota.h
@@ -0,0 +1,34 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __QUOTA_DOT_H__
11#define __QUOTA_DOT_H__
12
13#define NO_QUOTA_CHANGE ((uint32_t)-1)
14
15int gfs2_quota_hold(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
16void gfs2_quota_unhold(struct gfs2_inode *ip);
17
18int gfs2_quota_lock(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
19void gfs2_quota_unlock(struct gfs2_inode *ip);
20
21int gfs2_quota_check(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
22void gfs2_quota_change(struct gfs2_inode *ip, int64_t change,
23 uint32_t uid, uint32_t gid);
24
25int gfs2_quota_sync(struct gfs2_sbd *sdp);
26int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, uint32_t id);
27int gfs2_quota_read(struct gfs2_sbd *sdp, int user, uint32_t id,
28 struct gfs2_quota *q);
29
30int gfs2_quota_init(struct gfs2_sbd *sdp);
31void gfs2_quota_scan(struct gfs2_sbd *sdp);
32void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
33
34#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
new file mode 100644
index 000000000000..e5f2b284fa54
--- /dev/null
+++ b/fs/gfs2/recovery.c
@@ -0,0 +1,571 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "bmap.h"
19#include "glock.h"
20#include "glops.h"
21#include "lm.h"
22#include "lops.h"
23#include "meta_io.h"
24#include "recovery.h"
25#include "super.h"
26
27int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
28 struct buffer_head **bh)
29{
30 struct gfs2_glock *gl = get_v2ip(jd->jd_inode)->i_gl;
31 int new = 0;
32 uint64_t dblock;
33 uint32_t extlen;
34 int error;
35
36 error = gfs2_block_map(get_v2ip(jd->jd_inode), blk, &new, &dblock,
37 &extlen);
38 if (error)
39 return error;
40 if (!dblock) {
41 gfs2_consist_inode(get_v2ip(jd->jd_inode));
42 return -EIO;
43 }
44
45 gfs2_meta_ra(gl, dblock, extlen);
46 error = gfs2_meta_read(gl, dblock, DIO_START | DIO_WAIT, bh);
47
48 return error;
49}
50
51int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
52{
53 struct list_head *head = &sdp->sd_revoke_list;
54 struct gfs2_revoke_replay *rr;
55 int found = 0;
56
57 list_for_each_entry(rr, head, rr_list) {
58 if (rr->rr_blkno == blkno) {
59 found = 1;
60 break;
61 }
62 }
63
64 if (found) {
65 rr->rr_where = where;
66 return 0;
67 }
68
69 rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL);
70 if (!rr)
71 return -ENOMEM;
72
73 rr->rr_blkno = blkno;
74 rr->rr_where = where;
75 list_add(&rr->rr_list, head);
76
77 return 1;
78}
79
80int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
81{
82 struct gfs2_revoke_replay *rr;
83 int wrap, a, b, revoke;
84 int found = 0;
85
86 list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) {
87 if (rr->rr_blkno == blkno) {
88 found = 1;
89 break;
90 }
91 }
92
93 if (!found)
94 return 0;
95
96 wrap = (rr->rr_where < sdp->sd_replay_tail);
97 a = (sdp->sd_replay_tail < where);
98 b = (where < rr->rr_where);
99 revoke = (wrap) ? (a || b) : (a && b);
100
101 return revoke;
102}
103
104void gfs2_revoke_clean(struct gfs2_sbd *sdp)
105{
106 struct list_head *head = &sdp->sd_revoke_list;
107 struct gfs2_revoke_replay *rr;
108
109 while (!list_empty(head)) {
110 rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
111 list_del(&rr->rr_list);
112 kfree(rr);
113 }
114}
115
116/**
117 * get_log_header - read the log header for a given segment
118 * @jd: the journal
119 * @blk: the block to look at
120 * @lh: the log header to return
121 *
122 * Read the log header for a given segement in a given journal. Do a few
123 * sanity checks on it.
124 *
125 * Returns: 0 on success,
126 * 1 if the header was invalid or incomplete,
127 * errno on error
128 */
129
130static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
131 struct gfs2_log_header *head)
132{
133 struct buffer_head *bh;
134 struct gfs2_log_header lh;
135 uint32_t hash;
136 int error;
137
138 error = gfs2_replay_read_block(jd, blk, &bh);
139 if (error)
140 return error;
141
142 memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));
143 lh.lh_hash = 0;
144 hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
145 gfs2_log_header_in(&lh, bh->b_data);
146
147 brelse(bh);
148
149 if (lh.lh_header.mh_magic != GFS2_MAGIC ||
150 lh.lh_header.mh_type != GFS2_METATYPE_LH ||
151 lh.lh_blkno != blk ||
152 lh.lh_hash != hash)
153 return 1;
154
155 *head = lh;
156
157 return 0;
158}
159
160/**
161 * find_good_lh - find a good log header
162 * @jd: the journal
163 * @blk: the segment to start searching from
164 * @lh: the log header to fill in
165 * @forward: if true search forward in the log, else search backward
166 *
167 * Call get_log_header() to get a log header for a segment, but if the
168 * segment is bad, either scan forward or backward until we find a good one.
169 *
170 * Returns: errno
171 */
172
173static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
174 struct gfs2_log_header *head)
175{
176 unsigned int orig_blk = *blk;
177 int error;
178
179 for (;;) {
180 error = get_log_header(jd, *blk, head);
181 if (error <= 0)
182 return error;
183
184 if (++*blk == jd->jd_blocks)
185 *blk = 0;
186
187 if (*blk == orig_blk) {
188 gfs2_consist_inode(get_v2ip(jd->jd_inode));
189 return -EIO;
190 }
191 }
192}
193
194/**
195 * jhead_scan - make sure we've found the head of the log
196 * @jd: the journal
197 * @head: this is filled in with the log descriptor of the head
198 *
199 * At this point, seg and lh should be either the head of the log or just
200 * before. Scan forward until we find the head.
201 *
202 * Returns: errno
203 */
204
205static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
206{
207 unsigned int blk = head->lh_blkno;
208 struct gfs2_log_header lh;
209 int error;
210
211 for (;;) {
212 if (++blk == jd->jd_blocks)
213 blk = 0;
214
215 error = get_log_header(jd, blk, &lh);
216 if (error < 0)
217 return error;
218 if (error == 1)
219 continue;
220
221 if (lh.lh_sequence == head->lh_sequence) {
222 gfs2_consist_inode(get_v2ip(jd->jd_inode));
223 return -EIO;
224 }
225 if (lh.lh_sequence < head->lh_sequence)
226 break;
227
228 *head = lh;
229 }
230
231 return 0;
232}
233
234/**
235 * gfs2_find_jhead - find the head of a log
236 * @jd: the journal
237 * @head: the log descriptor for the head of the log is returned here
238 *
239 * Do a binary search of a journal and find the valid log entry with the
240 * highest sequence number. (i.e. the log head)
241 *
242 * Returns: errno
243 */
244
245int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
246{
247 struct gfs2_log_header lh_1, lh_m;
248 uint32_t blk_1, blk_2, blk_m;
249 int error;
250
251 blk_1 = 0;
252 blk_2 = jd->jd_blocks - 1;
253
254 for (;;) {
255 blk_m = (blk_1 + blk_2) / 2;
256
257 error = find_good_lh(jd, &blk_1, &lh_1);
258 if (error)
259 return error;
260
261 error = find_good_lh(jd, &blk_m, &lh_m);
262 if (error)
263 return error;
264
265 if (blk_1 == blk_m || blk_m == blk_2)
266 break;
267
268 if (lh_1.lh_sequence <= lh_m.lh_sequence)
269 blk_1 = blk_m;
270 else
271 blk_2 = blk_m;
272 }
273
274 error = jhead_scan(jd, &lh_1);
275 if (error)
276 return error;
277
278 *head = lh_1;
279
280 return error;
281}
282
283/**
284 * foreach_descriptor - go through the active part of the log
285 * @jd: the journal
286 * @start: the first log header in the active region
287 * @end: the last log header (don't process the contents of this entry))
288 *
289 * Call a given function once for every log descriptor in the active
290 * portion of the log.
291 *
292 * Returns: errno
293 */
294
295static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
296 unsigned int end, int pass)
297{
298 struct gfs2_sbd *sdp = get_v2ip(jd->jd_inode)->i_sbd;
299 struct buffer_head *bh;
300 struct gfs2_log_descriptor *ld;
301 int error = 0;
302 u32 length;
303 __be64 *ptr;
304 unsigned int offset = sizeof(struct gfs2_log_descriptor);
305 offset += (sizeof(__be64)-1);
306 offset &= ~(sizeof(__be64)-1);
307
308 while (start != end) {
309 error = gfs2_replay_read_block(jd, start, &bh);
310 if (error)
311 return error;
312 if (gfs2_meta_check(sdp, bh)) {
313 brelse(bh);
314 return -EIO;
315 }
316 ld = (struct gfs2_log_descriptor *)bh->b_data;
317 length = be32_to_cpu(ld->ld_length);
318
319 if (be16_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
320 struct gfs2_log_header lh;
321 error = get_log_header(jd, start, &lh);
322 if (!error) {
323 gfs2_replay_incr_blk(sdp, &start);
324 continue;
325 }
326 if (error == 1) {
327 gfs2_consist_inode(get_v2ip(jd->jd_inode));
328 error = -EIO;
329 }
330 brelse(bh);
331 return error;
332 } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
333 brelse(bh);
334 return -EIO;
335 }
336 ptr = (__be64 *)(bh->b_data + offset);
337 error = lops_scan_elements(jd, start, ld, ptr, pass);
338 if (error) {
339 brelse(bh);
340 return error;
341 }
342
343 while (length--)
344 gfs2_replay_incr_blk(sdp, &start);
345
346 brelse(bh);
347 }
348
349 return 0;
350}
351
352/**
353 * clean_journal - mark a dirty journal as being clean
354 * @sdp: the filesystem
355 * @jd: the journal
356 * @gl: the journal's glock
357 * @head: the head journal to start from
358 *
359 * Returns: errno
360 */
361
362static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
363{
364 struct gfs2_inode *ip = get_v2ip(jd->jd_inode);
365 struct gfs2_sbd *sdp = ip->i_sbd;
366 unsigned int lblock;
367 int new = 0;
368 uint64_t dblock;
369 struct gfs2_log_header *lh;
370 uint32_t hash;
371 struct buffer_head *bh;
372 int error;
373
374 lblock = head->lh_blkno;
375 gfs2_replay_incr_blk(sdp, &lblock);
376 error = gfs2_block_map(ip, lblock, &new, &dblock, NULL);
377 if (error)
378 return error;
379 if (!dblock) {
380 gfs2_consist_inode(ip);
381 return -EIO;
382 }
383
384 bh = sb_getblk(sdp->sd_vfs, dblock);
385 lock_buffer(bh);
386 memset(bh->b_data, 0, bh->b_size);
387 set_buffer_uptodate(bh);
388 clear_buffer_dirty(bh);
389 unlock_buffer(bh);
390
391 lh = (struct gfs2_log_header *)bh->b_data;
392 memset(lh, 0, sizeof(struct gfs2_log_header));
393 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
394 lh->lh_header.mh_type = cpu_to_be16(GFS2_METATYPE_LH);
395 lh->lh_header.mh_format = cpu_to_be16(GFS2_FORMAT_LH);
396 lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
397 lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
398 lh->lh_blkno = cpu_to_be32(lblock);
399 hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
400 lh->lh_hash = cpu_to_be32(hash);
401
402 set_buffer_dirty(bh);
403 if (sync_dirty_buffer(bh))
404 gfs2_io_error_bh(sdp, bh);
405 brelse(bh);
406
407 return error;
408}
409
410/**
411 * gfs2_recover_journal - recovery a given journal
412 * @jd: the struct gfs2_jdesc describing the journal
413 * @wait: Don't return until the journal is clean (or an error is encountered)
414 *
415 * Acquire the journal's lock, check to see if the journal is clean, and
416 * do recovery if necessary.
417 *
418 * Returns: errno
419 */
420
421int gfs2_recover_journal(struct gfs2_jdesc *jd, int wait)
422{
423 struct gfs2_sbd *sdp = get_v2ip(jd->jd_inode)->i_sbd;
424 struct gfs2_log_header head;
425 struct gfs2_holder j_gh, ji_gh, t_gh;
426 unsigned long t;
427 int ro = 0;
428 unsigned int pass;
429 int error;
430
431 fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", jd->jd_jid);
432
433 /* Aquire the journal lock so we can do recovery */
434
435 error = gfs2_glock_nq_num(sdp,
436 jd->jd_jid, &gfs2_journal_glops,
437 LM_ST_EXCLUSIVE,
438 LM_FLAG_NOEXP |
439 ((wait) ? 0 : LM_FLAG_TRY) |
440 GL_NOCACHE, &j_gh);
441 switch (error) {
442 case 0:
443 break;
444
445 case GLR_TRYFAILED:
446 fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
447 error = 0;
448
449 default:
450 goto fail;
451 };
452
453 error = gfs2_glock_nq_init(get_v2ip(jd->jd_inode)->i_gl, LM_ST_SHARED,
454 LM_FLAG_NOEXP, &ji_gh);
455 if (error)
456 goto fail_gunlock_j;
457
458 fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
459
460 error = gfs2_jdesc_check(jd);
461 if (error)
462 goto fail_gunlock_ji;
463
464 error = gfs2_find_jhead(jd, &head);
465 if (error)
466 goto fail_gunlock_ji;
467
468 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
469 fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
470 jd->jd_jid);
471
472 t = jiffies;
473
474 /* Acquire a shared hold on the transaction lock */
475
476 error = gfs2_glock_nq_init(sdp->sd_trans_gl,
477 LM_ST_SHARED,
478 LM_FLAG_NOEXP |
479 LM_FLAG_PRIORITY |
480 GL_NEVER_RECURSE |
481 GL_NOCANCEL |
482 GL_NOCACHE,
483 &t_gh);
484 if (error)
485 goto fail_gunlock_ji;
486
487 if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
488 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
489 ro = 1;
490 } else {
491 if (sdp->sd_vfs->s_flags & MS_RDONLY)
492 ro = 1;
493 }
494
495 if (ro) {
496 fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n",
497 jd->jd_jid);
498 error = -EROFS;
499 goto fail_gunlock_tr;
500 }
501
502 fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
503
504 for (pass = 0; pass < 2; pass++) {
505 lops_before_scan(jd, &head, pass);
506 error = foreach_descriptor(jd, head.lh_tail,
507 head.lh_blkno, pass);
508 lops_after_scan(jd, error, pass);
509 if (error)
510 goto fail_gunlock_tr;
511 }
512
513 error = clean_journal(jd, &head);
514 if (error)
515 goto fail_gunlock_tr;
516
517 gfs2_glock_dq_uninit(&t_gh);
518
519 t = DIV_RU(jiffies - t, HZ);
520
521 fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
522 jd->jd_jid, t);
523 }
524
525 gfs2_glock_dq_uninit(&ji_gh);
526
527 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
528
529 gfs2_glock_dq_uninit(&j_gh);
530
531 fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
532
533 return 0;
534
535 fail_gunlock_tr:
536 gfs2_glock_dq_uninit(&t_gh);
537
538 fail_gunlock_ji:
539 gfs2_glock_dq_uninit(&ji_gh);
540
541 fail_gunlock_j:
542 gfs2_glock_dq_uninit(&j_gh);
543
544 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
545
546 fail:
547 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
548
549 return error;
550}
551
552/**
553 * gfs2_check_journals - Recover any dirty journals
554 * @sdp: the filesystem
555 *
556 */
557
558void gfs2_check_journals(struct gfs2_sbd *sdp)
559{
560 struct gfs2_jdesc *jd;
561
562 for (;;) {
563 jd = gfs2_jdesc_find_dirty(sdp);
564 if (!jd)
565 break;
566
567 if (jd != sdp->sd_jdesc)
568 gfs2_recover_journal(jd, NO_WAIT);
569 }
570}
571
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
new file mode 100644
index 000000000000..50d7eb57881c
--- /dev/null
+++ b/fs/gfs2/recovery.h
@@ -0,0 +1,32 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __RECOVERY_DOT_H__
11#define __RECOVERY_DOT_H__
12
13static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
14{
15 if (++*blk == sdp->sd_jdesc->jd_blocks)
16 *blk = 0;
17}
18
19int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
20 struct buffer_head **bh);
21
22int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where);
23int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where);
24void gfs2_revoke_clean(struct gfs2_sbd *sdp);
25
26int gfs2_find_jhead(struct gfs2_jdesc *jd,
27 struct gfs2_log_header *head);
28int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, int wait);
29void gfs2_check_journals(struct gfs2_sbd *sdp);
30
31#endif /* __RECOVERY_DOT_H__ */
32
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
new file mode 100644
index 000000000000..9525b176f502
--- /dev/null
+++ b/fs/gfs2/rgrp.c
@@ -0,0 +1,1365 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/fs.h>
16#include <asm/semaphore.h>
17
18#include "gfs2.h"
19#include "bits.h"
20#include "glock.h"
21#include "glops.h"
22#include "lops.h"
23#include "meta_io.h"
24#include "quota.h"
25#include "rgrp.h"
26#include "super.h"
27#include "trans.h"
28#include "ops_file.h"
29
30/**
31 * gfs2_rgrp_verify - Verify that a resource group is consistent
32 * @sdp: the filesystem
33 * @rgd: the rgrp
34 *
35 */
36
37void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
38{
39 struct gfs2_sbd *sdp = rgd->rd_sbd;
40 struct gfs2_bitmap *bi = NULL;
41 uint32_t length = rgd->rd_ri.ri_length;
42 uint32_t count[4], tmp;
43 int buf, x;
44
45 memset(count, 0, 4 * sizeof(uint32_t));
46
47 /* Count # blocks in each of 4 possible allocation states */
48 for (buf = 0; buf < length; buf++) {
49 bi = rgd->rd_bits + buf;
50 for (x = 0; x < 4; x++)
51 count[x] += gfs2_bitcount(rgd,
52 bi->bi_bh->b_data +
53 bi->bi_offset,
54 bi->bi_len, x);
55 }
56
57 if (count[0] != rgd->rd_rg.rg_free) {
58 if (gfs2_consist_rgrpd(rgd))
59 fs_err(sdp, "free data mismatch: %u != %u\n",
60 count[0], rgd->rd_rg.rg_free);
61 return;
62 }
63
64 tmp = rgd->rd_ri.ri_data -
65 rgd->rd_rg.rg_free -
66 rgd->rd_rg.rg_dinodes;
67 if (count[1] != tmp) {
68 if (gfs2_consist_rgrpd(rgd))
69 fs_err(sdp, "used data mismatch: %u != %u\n",
70 count[1], tmp);
71 return;
72 }
73
74 if (count[2]) {
75 if (gfs2_consist_rgrpd(rgd))
76 fs_err(sdp, "free metadata mismatch: %u != 0\n",
77 count[2]);
78 return;
79 }
80
81 if (count[3] != rgd->rd_rg.rg_dinodes) {
82 if (gfs2_consist_rgrpd(rgd))
83 fs_err(sdp, "used metadata mismatch: %u != %u\n",
84 count[3], rgd->rd_rg.rg_dinodes);
85 return;
86 }
87}
88
89static inline int rgrp_contains_block(struct gfs2_rindex *ri, uint64_t block)
90{
91 uint64_t first = ri->ri_data0;
92 uint64_t last = first + ri->ri_data;
93 return !!(first <= block && block < last);
94}
95
96/**
97 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
98 * @sdp: The GFS2 superblock
99 * @n: The data block number
100 *
101 * Returns: The resource group, or NULL if not found
102 */
103
104struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk)
105{
106 struct gfs2_rgrpd *rgd;
107
108 spin_lock(&sdp->sd_rindex_spin);
109
110 list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
111 if (rgrp_contains_block(&rgd->rd_ri, blk)) {
112 list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
113 spin_unlock(&sdp->sd_rindex_spin);
114 return rgd;
115 }
116 }
117
118 spin_unlock(&sdp->sd_rindex_spin);
119
120 return NULL;
121}
122
123/**
124 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
125 * @sdp: The GFS2 superblock
126 *
127 * Returns: The first rgrp in the filesystem
128 */
129
130struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
131{
132 gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list));
133 return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list);
134}
135
136/**
137 * gfs2_rgrpd_get_next - get the next RG
138 * @rgd: A RG
139 *
140 * Returns: The next rgrp
141 */
142
143struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
144{
145 if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list)
146 return NULL;
147 return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list);
148}
149
150static void clear_rgrpdi(struct gfs2_sbd *sdp)
151{
152 struct list_head *head;
153 struct gfs2_rgrpd *rgd;
154 struct gfs2_glock *gl;
155
156 spin_lock(&sdp->sd_rindex_spin);
157 sdp->sd_rindex_forward = NULL;
158 head = &sdp->sd_rindex_recent_list;
159 while (!list_empty(head)) {
160 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
161 list_del(&rgd->rd_recent);
162 }
163 spin_unlock(&sdp->sd_rindex_spin);
164
165 head = &sdp->sd_rindex_list;
166 while (!list_empty(head)) {
167 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list);
168 gl = rgd->rd_gl;
169
170 list_del(&rgd->rd_list);
171 list_del(&rgd->rd_list_mru);
172
173 if (gl) {
174 set_gl2rgd(gl, NULL);
175 gfs2_glock_put(gl);
176 }
177
178 kfree(rgd->rd_bits);
179 kfree(rgd);
180 }
181}
182
183void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
184{
185 mutex_lock(&sdp->sd_rindex_mutex);
186 clear_rgrpdi(sdp);
187 mutex_unlock(&sdp->sd_rindex_mutex);
188}
189
190/**
191 * gfs2_compute_bitstructs - Compute the bitmap sizes
192 * @rgd: The resource group descriptor
193 *
194 * Calculates bitmap descriptors, one for each block that contains bitmap data
195 *
196 * Returns: errno
197 */
198
199static int compute_bitstructs(struct gfs2_rgrpd *rgd)
200{
201 struct gfs2_sbd *sdp = rgd->rd_sbd;
202 struct gfs2_bitmap *bi;
203 uint32_t length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
204 uint32_t bytes_left, bytes;
205 int x;
206
207 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_KERNEL);
208 if (!rgd->rd_bits)
209 return -ENOMEM;
210
211 bytes_left = rgd->rd_ri.ri_bitbytes;
212
213 for (x = 0; x < length; x++) {
214 bi = rgd->rd_bits + x;
215
216 /* small rgrp; bitmap stored completely in header block */
217 if (length == 1) {
218 bytes = bytes_left;
219 bi->bi_offset = sizeof(struct gfs2_rgrp);
220 bi->bi_start = 0;
221 bi->bi_len = bytes;
222 /* header block */
223 } else if (x == 0) {
224 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
225 bi->bi_offset = sizeof(struct gfs2_rgrp);
226 bi->bi_start = 0;
227 bi->bi_len = bytes;
228 /* last block */
229 } else if (x + 1 == length) {
230 bytes = bytes_left;
231 bi->bi_offset = sizeof(struct gfs2_meta_header);
232 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
233 bi->bi_len = bytes;
234 /* other blocks */
235 } else {
236 bytes = sdp->sd_sb.sb_bsize -
237 sizeof(struct gfs2_meta_header);
238 bi->bi_offset = sizeof(struct gfs2_meta_header);
239 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
240 bi->bi_len = bytes;
241 }
242
243 bytes_left -= bytes;
244 }
245
246 if (bytes_left) {
247 gfs2_consist_rgrpd(rgd);
248 return -EIO;
249 }
250 bi = rgd->rd_bits + (length - 1);
251 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) {
252 if (gfs2_consist_rgrpd(rgd)) {
253 gfs2_rindex_print(&rgd->rd_ri);
254 fs_err(sdp, "start=%u len=%u offset=%u\n",
255 bi->bi_start, bi->bi_len, bi->bi_offset);
256 }
257 return -EIO;
258 }
259
260 return 0;
261}
262
263/**
264 * gfs2_ri_update - Pull in a new resource index from the disk
265 * @gl: The glock covering the rindex inode
266 *
267 * Returns: 0 on successful update, error code otherwise
268 */
269
270static int gfs2_ri_update(struct gfs2_inode *ip)
271{
272 struct gfs2_sbd *sdp = ip->i_sbd;
273 struct inode *inode = ip->i_vnode;
274 struct gfs2_rgrpd *rgd;
275 char buf[sizeof(struct gfs2_rindex)];
276 struct file_ra_state ra_state;
277 uint64_t junk = ip->i_di.di_size;
278 int error;
279
280 if (do_div(junk, sizeof(struct gfs2_rindex))) {
281 gfs2_consist_inode(ip);
282 return -EIO;
283 }
284
285 clear_rgrpdi(sdp);
286
287 file_ra_state_init(&ra_state, inode->i_mapping);
288 for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
289 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
290 error = gfs2_internal_read(ip, &ra_state, buf, &pos,
291 sizeof(struct gfs2_rindex));
292 if (!error)
293 break;
294 if (error != sizeof(struct gfs2_rindex)) {
295 if (error > 0)
296 error = -EIO;
297 goto fail;
298 }
299
300 rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_KERNEL);
301 error = -ENOMEM;
302 if (!rgd)
303 goto fail;
304
305 mutex_init(&rgd->rd_mutex);
306 lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
307 rgd->rd_sbd = sdp;
308
309 list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
310 list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
311
312 gfs2_rindex_in(&rgd->rd_ri, buf);
313
314 error = compute_bitstructs(rgd);
315 if (error)
316 goto fail;
317
318 error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr,
319 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
320 if (error)
321 goto fail;
322
323 set_gl2rgd(rgd->rd_gl, rgd);
324 rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
325 }
326
327 sdp->sd_rindex_vn = ip->i_gl->gl_vn;
328
329 return 0;
330
331 fail:
332 clear_rgrpdi(sdp);
333
334 return error;
335}
336
337/**
338 * gfs2_rindex_hold - Grab a lock on the rindex
339 * @sdp: The GFS2 superblock
340 * @ri_gh: the glock holder
341 *
342 * We grab a lock on the rindex inode to make sure that it doesn't
343 * change whilst we are performing an operation. We keep this lock
344 * for quite long periods of time compared to other locks. This
345 * doesn't matter, since it is shared and it is very, very rarely
346 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
347 *
348 * This makes sure that we're using the latest copy of the resource index
349 * special file, which might have been updated if someone expanded the
350 * filesystem (via gfs2_grow utility), which adds new resource groups.
351 *
352 * Returns: 0 on success, error code otherwise
353 */
354
355int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
356{
357 struct gfs2_inode *ip = get_v2ip(sdp->sd_rindex);
358 struct gfs2_glock *gl = ip->i_gl;
359 int error;
360
361 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh);
362 if (error)
363 return error;
364
365 /* Read new copy from disk if we don't have the latest */
366 if (sdp->sd_rindex_vn != gl->gl_vn) {
367 mutex_lock(&sdp->sd_rindex_mutex);
368 if (sdp->sd_rindex_vn != gl->gl_vn) {
369 error = gfs2_ri_update(ip);
370 if (error)
371 gfs2_glock_dq_uninit(ri_gh);
372 }
373 mutex_unlock(&sdp->sd_rindex_mutex);
374 }
375
376 return error;
377}
378
379/**
380 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
381 * @rgd: the struct gfs2_rgrpd describing the RG to read in
382 *
383 * Read in all of a Resource Group's header and bitmap blocks.
384 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
385 *
386 * Returns: errno
387 */
388
389int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
390{
391 struct gfs2_sbd *sdp = rgd->rd_sbd;
392 struct gfs2_glock *gl = rgd->rd_gl;
393 unsigned int length = rgd->rd_ri.ri_length;
394 struct gfs2_bitmap *bi;
395 unsigned int x, y;
396 int error;
397
398 mutex_lock(&rgd->rd_mutex);
399
400 spin_lock(&sdp->sd_rindex_spin);
401 if (rgd->rd_bh_count) {
402 rgd->rd_bh_count++;
403 spin_unlock(&sdp->sd_rindex_spin);
404 mutex_unlock(&rgd->rd_mutex);
405 return 0;
406 }
407 spin_unlock(&sdp->sd_rindex_spin);
408
409 for (x = 0; x < length; x++) {
410 bi = rgd->rd_bits + x;
411 error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, DIO_START,
412 &bi->bi_bh);
413 if (error)
414 goto fail;
415 }
416
417 for (y = length; y--;) {
418 bi = rgd->rd_bits + y;
419 error = gfs2_meta_reread(sdp, bi->bi_bh, DIO_WAIT);
420 if (error)
421 goto fail;
422 if (gfs2_metatype_check(sdp, bi->bi_bh,
423 (y) ? GFS2_METATYPE_RB :
424 GFS2_METATYPE_RG)) {
425 error = -EIO;
426 goto fail;
427 }
428 }
429
430 if (rgd->rd_rg_vn != gl->gl_vn) {
431 gfs2_rgrp_in(&rgd->rd_rg, (rgd->rd_bits[0].bi_bh)->b_data);
432 rgd->rd_rg_vn = gl->gl_vn;
433 }
434
435 spin_lock(&sdp->sd_rindex_spin);
436 rgd->rd_free_clone = rgd->rd_rg.rg_free;
437 rgd->rd_bh_count++;
438 spin_unlock(&sdp->sd_rindex_spin);
439
440 mutex_unlock(&rgd->rd_mutex);
441
442 return 0;
443
444 fail:
445 while (x--) {
446 bi = rgd->rd_bits + x;
447 brelse(bi->bi_bh);
448 bi->bi_bh = NULL;
449 gfs2_assert_warn(sdp, !bi->bi_clone);
450 }
451 mutex_unlock(&rgd->rd_mutex);
452
453 return error;
454}
455
456void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
457{
458 struct gfs2_sbd *sdp = rgd->rd_sbd;
459
460 spin_lock(&sdp->sd_rindex_spin);
461 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
462 rgd->rd_bh_count++;
463 spin_unlock(&sdp->sd_rindex_spin);
464}
465
466/**
467 * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get()
468 * @rgd: the struct gfs2_rgrpd describing the RG to read in
469 *
470 */
471
472void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
473{
474 struct gfs2_sbd *sdp = rgd->rd_sbd;
475 int x, length = rgd->rd_ri.ri_length;
476
477 spin_lock(&sdp->sd_rindex_spin);
478 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
479 if (--rgd->rd_bh_count) {
480 spin_unlock(&sdp->sd_rindex_spin);
481 return;
482 }
483
484 for (x = 0; x < length; x++) {
485 struct gfs2_bitmap *bi = rgd->rd_bits + x;
486 kfree(bi->bi_clone);
487 bi->bi_clone = NULL;
488 brelse(bi->bi_bh);
489 bi->bi_bh = NULL;
490 }
491
492 spin_unlock(&sdp->sd_rindex_spin);
493}
494
495void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
496{
497 struct gfs2_sbd *sdp = rgd->rd_sbd;
498 unsigned int length = rgd->rd_ri.ri_length;
499 unsigned int x;
500
501 for (x = 0; x < length; x++) {
502 struct gfs2_bitmap *bi = rgd->rd_bits + x;
503 if (!bi->bi_clone)
504 continue;
505 memcpy(bi->bi_clone + bi->bi_offset,
506 bi->bi_bh->b_data + bi->bi_offset,
507 bi->bi_len);
508 }
509
510 spin_lock(&sdp->sd_rindex_spin);
511 rgd->rd_free_clone = rgd->rd_rg.rg_free;
512 spin_unlock(&sdp->sd_rindex_spin);
513}
514
515/**
516 * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode
517 * @ip: the incore GFS2 inode structure
518 *
519 * Returns: the struct gfs2_alloc
520 */
521
522struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
523{
524 struct gfs2_alloc *al = &ip->i_alloc;
525
526 /* FIXME: Should assert that the correct locks are held here... */
527 memset(al, 0, sizeof(*al));
528 return al;
529}
530
531/**
532 * gfs2_alloc_put - throw away the struct gfs2_alloc for an inode
533 * @ip: the inode
534 *
535 */
536
537void gfs2_alloc_put(struct gfs2_inode *ip)
538{
539 return;
540}
541
542/**
543 * try_rgrp_fit - See if a given reservation will fit in a given RG
544 * @rgd: the RG data
545 * @al: the struct gfs2_alloc structure describing the reservation
546 *
547 * If there's room for the requested blocks to be allocated from the RG:
548 * Sets the $al_reserved_data field in @al.
549 * Sets the $al_reserved_meta field in @al.
550 * Sets the $al_rgd field in @al.
551 *
552 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
553 */
554
555static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
556{
557 struct gfs2_sbd *sdp = rgd->rd_sbd;
558 int ret = 0;
559
560 spin_lock(&sdp->sd_rindex_spin);
561 if (rgd->rd_free_clone >= al->al_requested) {
562 al->al_rgd = rgd;
563 ret = 1;
564 }
565 spin_unlock(&sdp->sd_rindex_spin);
566
567 return ret;
568}
569
570/**
571 * recent_rgrp_first - get first RG from "recent" list
572 * @sdp: The GFS2 superblock
573 * @rglast: address of the rgrp used last
574 *
575 * Returns: The first rgrp in the recent list
576 */
577
578static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
579 uint64_t rglast)
580{
581 struct gfs2_rgrpd *rgd = NULL;
582
583 spin_lock(&sdp->sd_rindex_spin);
584
585 if (list_empty(&sdp->sd_rindex_recent_list))
586 goto out;
587
588 if (!rglast)
589 goto first;
590
591 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
592 if (rgd->rd_ri.ri_addr == rglast)
593 goto out;
594 }
595
596 first:
597 rgd = list_entry(sdp->sd_rindex_recent_list.next, struct gfs2_rgrpd,
598 rd_recent);
599
600 out:
601 spin_unlock(&sdp->sd_rindex_spin);
602
603 return rgd;
604}
605
606/**
607 * recent_rgrp_next - get next RG from "recent" list
608 * @cur_rgd: current rgrp
609 * @remove:
610 *
611 * Returns: The next rgrp in the recent list
612 */
613
614static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd,
615 int remove)
616{
617 struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
618 struct list_head *head;
619 struct gfs2_rgrpd *rgd;
620
621 spin_lock(&sdp->sd_rindex_spin);
622
623 head = &sdp->sd_rindex_recent_list;
624
625 list_for_each_entry(rgd, head, rd_recent) {
626 if (rgd == cur_rgd) {
627 if (cur_rgd->rd_recent.next != head)
628 rgd = list_entry(cur_rgd->rd_recent.next,
629 struct gfs2_rgrpd, rd_recent);
630 else
631 rgd = NULL;
632
633 if (remove)
634 list_del(&cur_rgd->rd_recent);
635
636 goto out;
637 }
638 }
639
640 rgd = NULL;
641 if (!list_empty(head))
642 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
643
644 out:
645 spin_unlock(&sdp->sd_rindex_spin);
646
647 return rgd;
648}
649
650/**
651 * recent_rgrp_add - add an RG to tail of "recent" list
652 * @new_rgd: The rgrp to add
653 *
654 */
655
656static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd)
657{
658 struct gfs2_sbd *sdp = new_rgd->rd_sbd;
659 struct gfs2_rgrpd *rgd;
660 unsigned int count = 0;
661 unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp);
662
663 spin_lock(&sdp->sd_rindex_spin);
664
665 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
666 if (rgd == new_rgd)
667 goto out;
668
669 if (++count >= max)
670 goto out;
671 }
672 list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list);
673
674 out:
675 spin_unlock(&sdp->sd_rindex_spin);
676}
677
678/**
679 * forward_rgrp_get - get an rgrp to try next from full list
680 * @sdp: The GFS2 superblock
681 *
682 * Returns: The rgrp to try next
683 */
684
685static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp)
686{
687 struct gfs2_rgrpd *rgd;
688 unsigned int journals = gfs2_jindex_size(sdp);
689 unsigned int rg = 0, x;
690
691 spin_lock(&sdp->sd_rindex_spin);
692
693 rgd = sdp->sd_rindex_forward;
694 if (!rgd) {
695 if (sdp->sd_rgrps >= journals)
696 rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals;
697
698 for (x = 0, rgd = gfs2_rgrpd_get_first(sdp);
699 x < rg;
700 x++, rgd = gfs2_rgrpd_get_next(rgd))
701 /* Do Nothing */;
702
703 sdp->sd_rindex_forward = rgd;
704 }
705
706 spin_unlock(&sdp->sd_rindex_spin);
707
708 return rgd;
709}
710
711/**
712 * forward_rgrp_set - set the forward rgrp pointer
713 * @sdp: the filesystem
714 * @rgd: The new forward rgrp
715 *
716 */
717
718static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
719{
720 spin_lock(&sdp->sd_rindex_spin);
721 sdp->sd_rindex_forward = rgd;
722 spin_unlock(&sdp->sd_rindex_spin);
723}
724
725/**
726 * get_local_rgrp - Choose and lock a rgrp for allocation
727 * @ip: the inode to reserve space for
728 * @rgp: the chosen and locked rgrp
729 *
730 * Try to acquire rgrp in way which avoids contending with others.
731 *
732 * Returns: errno
733 */
734
735static int get_local_rgrp(struct gfs2_inode *ip)
736{
737 struct gfs2_sbd *sdp = ip->i_sbd;
738 struct gfs2_rgrpd *rgd, *begin = NULL;
739 struct gfs2_alloc *al = &ip->i_alloc;
740 int flags = LM_FLAG_TRY;
741 int skipped = 0;
742 int loops = 0;
743 int error;
744
745 /* Try recently successful rgrps */
746
747 rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc);
748
749 while (rgd) {
750 error = gfs2_glock_nq_init(rgd->rd_gl,
751 LM_ST_EXCLUSIVE, LM_FLAG_TRY,
752 &al->al_rgd_gh);
753 switch (error) {
754 case 0:
755 if (try_rgrp_fit(rgd, al))
756 goto out;
757 gfs2_glock_dq_uninit(&al->al_rgd_gh);
758 rgd = recent_rgrp_next(rgd, 1);
759 break;
760
761 case GLR_TRYFAILED:
762 rgd = recent_rgrp_next(rgd, 0);
763 break;
764
765 default:
766 return error;
767 }
768 }
769
770 /* Go through full list of rgrps */
771
772 begin = rgd = forward_rgrp_get(sdp);
773
774 for (;;) {
775 error = gfs2_glock_nq_init(rgd->rd_gl,
776 LM_ST_EXCLUSIVE, flags,
777 &al->al_rgd_gh);
778 switch (error) {
779 case 0:
780 if (try_rgrp_fit(rgd, al))
781 goto out;
782 gfs2_glock_dq_uninit(&al->al_rgd_gh);
783 break;
784
785 case GLR_TRYFAILED:
786 skipped++;
787 break;
788
789 default:
790 return error;
791 }
792
793 rgd = gfs2_rgrpd_get_next(rgd);
794 if (!rgd)
795 rgd = gfs2_rgrpd_get_first(sdp);
796
797 if (rgd == begin) {
798 if (++loops >= 2 || !skipped)
799 return -ENOSPC;
800 flags = 0;
801 }
802 }
803
804 out:
805 ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
806
807 if (begin) {
808 recent_rgrp_add(rgd);
809 rgd = gfs2_rgrpd_get_next(rgd);
810 if (!rgd)
811 rgd = gfs2_rgrpd_get_first(sdp);
812 forward_rgrp_set(sdp, rgd);
813 }
814
815 return 0;
816}
817
818/**
819 * gfs2_inplace_reserve_i - Reserve space in the filesystem
820 * @ip: the inode to reserve space for
821 *
822 * Returns: errno
823 */
824
825int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
826{
827 struct gfs2_sbd *sdp = ip->i_sbd;
828 struct gfs2_alloc *al = &ip->i_alloc;
829 int error;
830
831 if (gfs2_assert_warn(sdp, al->al_requested))
832 return -EINVAL;
833
834 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
835 if (error)
836 return error;
837
838 error = get_local_rgrp(ip);
839 if (error) {
840 gfs2_glock_dq_uninit(&al->al_ri_gh);
841 return error;
842 }
843
844 al->al_file = file;
845 al->al_line = line;
846
847 return 0;
848}
849
850/**
851 * gfs2_inplace_release - release an inplace reservation
852 * @ip: the inode the reservation was taken out on
853 *
854 * Release a reservation made by gfs2_inplace_reserve().
855 */
856
857void gfs2_inplace_release(struct gfs2_inode *ip)
858{
859 struct gfs2_sbd *sdp = ip->i_sbd;
860 struct gfs2_alloc *al = &ip->i_alloc;
861
862 if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
863 fs_warn(sdp, "al_alloced = %u, al_requested = %u "
864 "al_file = %s, al_line = %u\n",
865 al->al_alloced, al->al_requested, al->al_file,
866 al->al_line);
867
868 al->al_rgd = NULL;
869 gfs2_glock_dq_uninit(&al->al_rgd_gh);
870 gfs2_glock_dq_uninit(&al->al_ri_gh);
871}
872
873/**
874 * gfs2_get_block_type - Check a block in a RG is of given type
875 * @rgd: the resource group holding the block
876 * @block: the block number
877 *
878 * Returns: The block type (GFS2_BLKST_*)
879 */
880
881unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, uint64_t block)
882{
883 struct gfs2_bitmap *bi = NULL;
884 uint32_t length, rgrp_block, buf_block;
885 unsigned int buf;
886 unsigned char type;
887
888 length = rgd->rd_ri.ri_length;
889 rgrp_block = block - rgd->rd_ri.ri_data0;
890
891 for (buf = 0; buf < length; buf++) {
892 bi = rgd->rd_bits + buf;
893 if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
894 break;
895 }
896
897 gfs2_assert(rgd->rd_sbd, buf < length);
898 buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
899
900 type = gfs2_testbit(rgd,
901 bi->bi_bh->b_data + bi->bi_offset,
902 bi->bi_len, buf_block);
903
904 return type;
905}
906
907/**
908 * rgblk_search - find a block in @old_state, change allocation
909 * state to @new_state
910 * @rgd: the resource group descriptor
911 * @goal: the goal block within the RG (start here to search for avail block)
912 * @old_state: GFS2_BLKST_XXX the before-allocation state to find
913 * @new_state: GFS2_BLKST_XXX the after-allocation block state
914 *
915 * Walk rgrp's bitmap to find bits that represent a block in @old_state.
916 * Add the found bitmap buffer to the transaction.
917 * Set the found bits to @new_state to change block's allocation state.
918 *
919 * This function never fails, because we wouldn't call it unless we
920 * know (from reservation results, etc.) that a block is available.
921 *
922 * Scope of @goal and returned block is just within rgrp, not the whole
923 * filesystem.
924 *
925 * Returns: the block number allocated
926 */
927
928static uint32_t rgblk_search(struct gfs2_rgrpd *rgd, uint32_t goal,
929 unsigned char old_state, unsigned char new_state)
930{
931 struct gfs2_bitmap *bi = NULL;
932 uint32_t length = rgd->rd_ri.ri_length;
933 uint32_t blk = 0;
934 unsigned int buf, x;
935
936 /* Find bitmap block that contains bits for goal block */
937 for (buf = 0; buf < length; buf++) {
938 bi = rgd->rd_bits + buf;
939 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
940 break;
941 }
942
943 gfs2_assert(rgd->rd_sbd, buf < length);
944
945 /* Convert scope of "goal" from rgrp-wide to within found bit block */
946 goal -= bi->bi_start * GFS2_NBBY;
947
948 /* Search (up to entire) bitmap in this rgrp for allocatable block.
949 "x <= length", instead of "x < length", because we typically start
950 the search in the middle of a bit block, but if we can't find an
951 allocatable block anywhere else, we want to be able wrap around and
952 search in the first part of our first-searched bit block. */
953 for (x = 0; x <= length; x++) {
954 if (bi->bi_clone)
955 blk = gfs2_bitfit(rgd,
956 bi->bi_clone + bi->bi_offset,
957 bi->bi_len, goal, old_state);
958 else
959 blk = gfs2_bitfit(rgd,
960 bi->bi_bh->b_data + bi->bi_offset,
961 bi->bi_len, goal, old_state);
962 if (blk != BFITNOENT)
963 break;
964
965 /* Try next bitmap block (wrap back to rgrp header if at end) */
966 buf = (buf + 1) % length;
967 bi = rgd->rd_bits + buf;
968 goal = 0;
969 }
970
971 if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
972 blk = 0;
973
974 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
975 gfs2_setbit(rgd,
976 bi->bi_bh->b_data + bi->bi_offset,
977 bi->bi_len, blk, new_state);
978 if (bi->bi_clone)
979 gfs2_setbit(rgd,
980 bi->bi_clone + bi->bi_offset,
981 bi->bi_len, blk, new_state);
982
983 return bi->bi_start * GFS2_NBBY + blk;
984}
985
986/**
987 * rgblk_free - Change alloc state of given block(s)
988 * @sdp: the filesystem
989 * @bstart: the start of a run of blocks to free
990 * @blen: the length of the block run (all must lie within ONE RG!)
991 * @new_state: GFS2_BLKST_XXX the after-allocation block state
992 *
993 * Returns: Resource group containing the block(s)
994 */
995
996static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, uint64_t bstart,
997 uint32_t blen, unsigned char new_state)
998{
999 struct gfs2_rgrpd *rgd;
1000 struct gfs2_bitmap *bi = NULL;
1001 uint32_t length, rgrp_blk, buf_blk;
1002 unsigned int buf;
1003
1004 rgd = gfs2_blk2rgrpd(sdp, bstart);
1005 if (!rgd) {
1006 if (gfs2_consist(sdp))
1007 fs_err(sdp, "block = %llu\n", bstart);
1008 return NULL;
1009 }
1010
1011 length = rgd->rd_ri.ri_length;
1012
1013 rgrp_blk = bstart - rgd->rd_ri.ri_data0;
1014
1015 while (blen--) {
1016 for (buf = 0; buf < length; buf++) {
1017 bi = rgd->rd_bits + buf;
1018 if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1019 break;
1020 }
1021
1022 gfs2_assert(rgd->rd_sbd, buf < length);
1023
1024 buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
1025 rgrp_blk++;
1026
1027 if (!bi->bi_clone) {
1028 bi->bi_clone = kmalloc(bi->bi_bh->b_size,
1029 GFP_KERNEL | __GFP_NOFAIL);
1030 memcpy(bi->bi_clone + bi->bi_offset,
1031 bi->bi_bh->b_data + bi->bi_offset,
1032 bi->bi_len);
1033 }
1034 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1035 gfs2_setbit(rgd,
1036 bi->bi_bh->b_data + bi->bi_offset,
1037 bi->bi_len, buf_blk, new_state);
1038 }
1039
1040 return rgd;
1041}
1042
1043/**
1044 * gfs2_alloc_data - Allocate a data block
1045 * @ip: the inode to allocate the data block for
1046 *
1047 * Returns: the allocated block
1048 */
1049
1050uint64_t gfs2_alloc_data(struct gfs2_inode *ip)
1051{
1052 struct gfs2_sbd *sdp = ip->i_sbd;
1053 struct gfs2_alloc *al = &ip->i_alloc;
1054 struct gfs2_rgrpd *rgd = al->al_rgd;
1055 uint32_t goal, blk;
1056 uint64_t block;
1057
1058 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data))
1059 goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0;
1060 else
1061 goal = rgd->rd_last_alloc_data;
1062
1063 blk = rgblk_search(rgd, goal,
1064 GFS2_BLKST_FREE, GFS2_BLKST_USED);
1065 rgd->rd_last_alloc_data = blk;
1066
1067 block = rgd->rd_ri.ri_data0 + blk;
1068 ip->i_di.di_goal_data = block;
1069
1070 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1071 rgd->rd_rg.rg_free--;
1072
1073 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1074 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1075
1076 al->al_alloced++;
1077
1078 gfs2_statfs_change(sdp, 0, -1, 0);
1079 gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
1080
1081 spin_lock(&sdp->sd_rindex_spin);
1082 rgd->rd_free_clone--;
1083 spin_unlock(&sdp->sd_rindex_spin);
1084
1085 return block;
1086}
1087
1088/**
1089 * gfs2_alloc_meta - Allocate a metadata block
1090 * @ip: the inode to allocate the metadata block for
1091 *
1092 * Returns: the allocated block
1093 */
1094
1095uint64_t gfs2_alloc_meta(struct gfs2_inode *ip)
1096{
1097 struct gfs2_sbd *sdp = ip->i_sbd;
1098 struct gfs2_alloc *al = &ip->i_alloc;
1099 struct gfs2_rgrpd *rgd = al->al_rgd;
1100 uint32_t goal, blk;
1101 uint64_t block;
1102
1103 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta))
1104 goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0;
1105 else
1106 goal = rgd->rd_last_alloc_meta;
1107
1108 blk = rgblk_search(rgd, goal,
1109 GFS2_BLKST_FREE, GFS2_BLKST_USED);
1110 rgd->rd_last_alloc_meta = blk;
1111
1112 block = rgd->rd_ri.ri_data0 + blk;
1113 ip->i_di.di_goal_meta = block;
1114
1115 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1116 rgd->rd_rg.rg_free--;
1117
1118 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1119 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1120
1121 al->al_alloced++;
1122
1123 gfs2_statfs_change(sdp, 0, -1, 0);
1124 gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
1125 gfs2_trans_add_unrevoke(sdp, block);
1126
1127 spin_lock(&sdp->sd_rindex_spin);
1128 rgd->rd_free_clone--;
1129 spin_unlock(&sdp->sd_rindex_spin);
1130
1131 return block;
1132}
1133
1134/**
1135 * gfs2_alloc_di - Allocate a dinode
1136 * @dip: the directory that the inode is going in
1137 *
1138 * Returns: the block allocated
1139 */
1140
1141uint64_t gfs2_alloc_di(struct gfs2_inode *dip)
1142{
1143 struct gfs2_sbd *sdp = dip->i_sbd;
1144 struct gfs2_alloc *al = &dip->i_alloc;
1145 struct gfs2_rgrpd *rgd = al->al_rgd;
1146 uint32_t blk;
1147 uint64_t block;
1148
1149 blk = rgblk_search(rgd, rgd->rd_last_alloc_meta,
1150 GFS2_BLKST_FREE, GFS2_BLKST_DINODE);
1151
1152 rgd->rd_last_alloc_meta = blk;
1153
1154 block = rgd->rd_ri.ri_data0 + blk;
1155
1156 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1157 rgd->rd_rg.rg_free--;
1158 rgd->rd_rg.rg_dinodes++;
1159
1160 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1161 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1162
1163 al->al_alloced++;
1164
1165 gfs2_statfs_change(sdp, 0, -1, +1);
1166 gfs2_trans_add_unrevoke(sdp, block);
1167
1168 spin_lock(&sdp->sd_rindex_spin);
1169 rgd->rd_free_clone--;
1170 spin_unlock(&sdp->sd_rindex_spin);
1171
1172 return block;
1173}
1174
1175/**
1176 * gfs2_free_data - free a contiguous run of data block(s)
1177 * @ip: the inode these blocks are being freed from
1178 * @bstart: first block of a run of contiguous blocks
1179 * @blen: the length of the block run
1180 *
1181 */
1182
1183void gfs2_free_data(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
1184{
1185 struct gfs2_sbd *sdp = ip->i_sbd;
1186 struct gfs2_rgrpd *rgd;
1187
1188 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1189 if (!rgd)
1190 return;
1191
1192 rgd->rd_rg.rg_free += blen;
1193
1194 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1195 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1196
1197 gfs2_trans_add_rg(rgd);
1198
1199 gfs2_statfs_change(sdp, 0, +blen, 0);
1200 gfs2_quota_change(ip, -(int64_t)blen,
1201 ip->i_di.di_uid, ip->i_di.di_gid);
1202}
1203
1204/**
1205 * gfs2_free_meta - free a contiguous run of data block(s)
1206 * @ip: the inode these blocks are being freed from
1207 * @bstart: first block of a run of contiguous blocks
1208 * @blen: the length of the block run
1209 *
1210 */
1211
1212void gfs2_free_meta(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
1213{
1214 struct gfs2_sbd *sdp = ip->i_sbd;
1215 struct gfs2_rgrpd *rgd;
1216
1217 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1218 if (!rgd)
1219 return;
1220
1221 rgd->rd_rg.rg_free += blen;
1222
1223 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1224 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1225
1226 gfs2_trans_add_rg(rgd);
1227
1228 gfs2_statfs_change(sdp, 0, +blen, 0);
1229 gfs2_quota_change(ip, -(int64_t)blen,
1230 ip->i_di.di_uid, ip->i_di.di_gid);
1231 gfs2_meta_wipe(ip, bstart, blen);
1232}
1233
1234void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, uint64_t blkno)
1235{
1236 struct gfs2_sbd *sdp = rgd->rd_sbd;
1237 struct gfs2_rgrpd *tmp_rgd;
1238
1239 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
1240 if (!tmp_rgd)
1241 return;
1242 gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
1243
1244 if (!rgd->rd_rg.rg_dinodes)
1245 gfs2_consist_rgrpd(rgd);
1246 rgd->rd_rg.rg_dinodes--;
1247 rgd->rd_rg.rg_free++;
1248
1249 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1250 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1251
1252 gfs2_statfs_change(sdp, 0, +1, -1);
1253 gfs2_trans_add_rg(rgd);
1254}
1255
1256/**
1257 * gfs2_free_uninit_di - free a dinode block
1258 * @rgd: the resource group that contains the dinode
1259 * @ip: the inode
1260 *
1261 */
1262
1263void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1264{
1265 gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
1266 gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid);
1267 gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
1268}
1269
1270/**
1271 * gfs2_rlist_add - add a RG to a list of RGs
1272 * @sdp: the filesystem
1273 * @rlist: the list of resource groups
1274 * @block: the block
1275 *
1276 * Figure out what RG a block belongs to and add that RG to the list
1277 *
1278 * FIXME: Don't use NOFAIL
1279 *
1280 */
1281
1282void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
1283 uint64_t block)
1284{
1285 struct gfs2_rgrpd *rgd;
1286 struct gfs2_rgrpd **tmp;
1287 unsigned int new_space;
1288 unsigned int x;
1289
1290 if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
1291 return;
1292
1293 rgd = gfs2_blk2rgrpd(sdp, block);
1294 if (!rgd) {
1295 if (gfs2_consist(sdp))
1296 fs_err(sdp, "block = %llu\n", block);
1297 return;
1298 }
1299
1300 for (x = 0; x < rlist->rl_rgrps; x++)
1301 if (rlist->rl_rgd[x] == rgd)
1302 return;
1303
1304 if (rlist->rl_rgrps == rlist->rl_space) {
1305 new_space = rlist->rl_space + 10;
1306
1307 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
1308 GFP_KERNEL | __GFP_NOFAIL);
1309
1310 if (rlist->rl_rgd) {
1311 memcpy(tmp, rlist->rl_rgd,
1312 rlist->rl_space * sizeof(struct gfs2_rgrpd *));
1313 kfree(rlist->rl_rgd);
1314 }
1315
1316 rlist->rl_space = new_space;
1317 rlist->rl_rgd = tmp;
1318 }
1319
1320 rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
1321}
1322
1323/**
1324 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
1325 * and initialize an array of glock holders for them
1326 * @rlist: the list of resource groups
1327 * @state: the lock state to acquire the RG lock in
1328 * @flags: the modifier flags for the holder structures
1329 *
1330 * FIXME: Don't use NOFAIL
1331 *
1332 */
1333
1334void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
1335 int flags)
1336{
1337 unsigned int x;
1338
1339 rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
1340 GFP_KERNEL | __GFP_NOFAIL);
1341 for (x = 0; x < rlist->rl_rgrps; x++)
1342 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
1343 state, flags,
1344 &rlist->rl_ghs[x]);
1345}
1346
1347/**
1348 * gfs2_rlist_free - free a resource group list
1349 * @list: the list of resource groups
1350 *
1351 */
1352
1353void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
1354{
1355 unsigned int x;
1356
1357 kfree(rlist->rl_rgd);
1358
1359 if (rlist->rl_ghs) {
1360 for (x = 0; x < rlist->rl_rgrps; x++)
1361 gfs2_holder_uninit(&rlist->rl_ghs[x]);
1362 kfree(rlist->rl_ghs);
1363 }
1364}
1365
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
new file mode 100644
index 000000000000..4c44a191b1c1
--- /dev/null
+++ b/fs/gfs2/rgrp.h
@@ -0,0 +1,62 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __RGRP_DOT_H__
11#define __RGRP_DOT_H__
12
13void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
14
15struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk);
16struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
17struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
18
19void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
20int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
21
22int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
23void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
24void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
25
26void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
27
28struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
29void gfs2_alloc_put(struct gfs2_inode *ip);
30
31int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
32 char *file, unsigned int line);
33#define gfs2_inplace_reserve(ip) \
34gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
35
36void gfs2_inplace_release(struct gfs2_inode *ip);
37
38unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, uint64_t block);
39
40uint64_t gfs2_alloc_data(struct gfs2_inode *ip);
41uint64_t gfs2_alloc_meta(struct gfs2_inode *ip);
42uint64_t gfs2_alloc_di(struct gfs2_inode *ip);
43
44void gfs2_free_data(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
45void gfs2_free_meta(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
46void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, uint64_t blkno);
47void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
48
49struct gfs2_rgrp_list {
50 unsigned int rl_rgrps;
51 unsigned int rl_space;
52 struct gfs2_rgrpd **rl_rgd;
53 struct gfs2_holder *rl_ghs;
54};
55
56void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
57 uint64_t block);
58void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
59 int flags);
60void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
61
62#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
new file mode 100644
index 000000000000..2c1c6aa1c077
--- /dev/null
+++ b/fs/gfs2/super.c
@@ -0,0 +1,955 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "bmap.h"
19#include "dir.h"
20#include "format.h"
21#include "glock.h"
22#include "glops.h"
23#include "inode.h"
24#include "log.h"
25#include "meta_io.h"
26#include "quota.h"
27#include "recovery.h"
28#include "rgrp.h"
29#include "super.h"
30#include "trans.h"
31#include "unlinked.h"
32
33/**
34 * gfs2_tune_init - Fill a gfs2_tune structure with default values
35 * @gt: tune
36 *
37 */
38
39void gfs2_tune_init(struct gfs2_tune *gt)
40{
41 spin_lock_init(&gt->gt_spin);
42
43 gt->gt_ilimit = 100;
44 gt->gt_ilimit_tries = 3;
45 gt->gt_ilimit_min = 1;
46 gt->gt_demote_secs = 300;
47 gt->gt_incore_log_blocks = 1024;
48 gt->gt_log_flush_secs = 60;
49 gt->gt_jindex_refresh_secs = 60;
50 gt->gt_scand_secs = 15;
51 gt->gt_recoverd_secs = 60;
52 gt->gt_logd_secs = 1;
53 gt->gt_quotad_secs = 5;
54 gt->gt_inoded_secs = 15;
55 gt->gt_quota_simul_sync = 64;
56 gt->gt_quota_warn_period = 10;
57 gt->gt_quota_scale_num = 1;
58 gt->gt_quota_scale_den = 1;
59 gt->gt_quota_cache_secs = 300;
60 gt->gt_quota_quantum = 60;
61 gt->gt_atime_quantum = 3600;
62 gt->gt_new_files_jdata = 0;
63 gt->gt_new_files_directio = 0;
64 gt->gt_max_atomic_write = 4 << 20;
65 gt->gt_max_readahead = 1 << 18;
66 gt->gt_lockdump_size = 131072;
67 gt->gt_stall_secs = 600;
68 gt->gt_complain_secs = 10;
69 gt->gt_reclaim_limit = 5000;
70 gt->gt_entries_per_readdir = 32;
71 gt->gt_prefetch_secs = 10;
72 gt->gt_greedy_default = HZ / 10;
73 gt->gt_greedy_quantum = HZ / 40;
74 gt->gt_greedy_max = HZ / 4;
75 gt->gt_statfs_quantum = 30;
76 gt->gt_statfs_slow = 0;
77}
78
79/**
80 * gfs2_check_sb - Check superblock
81 * @sdp: the filesystem
82 * @sb: The superblock
83 * @silent: Don't print a message if the check fails
84 *
85 * Checks the version code of the FS is one that we understand how to
86 * read and that the sizes of the various on-disk structures have not
87 * changed.
88 */
89
90int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent)
91{
92 unsigned int x;
93
94 if (sb->sb_header.mh_magic != GFS2_MAGIC ||
95 sb->sb_header.mh_type != GFS2_METATYPE_SB) {
96 if (!silent)
97 printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
98 return -EINVAL;
99 }
100
101 /* If format numbers match exactly, we're done. */
102
103 if (sb->sb_fs_format == GFS2_FORMAT_FS &&
104 sb->sb_multihost_format == GFS2_FORMAT_MULTI)
105 return 0;
106
107 if (sb->sb_fs_format != GFS2_FORMAT_FS) {
108 for (x = 0; gfs2_old_fs_formats[x]; x++)
109 if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
110 break;
111
112 if (!gfs2_old_fs_formats[x]) {
113 printk(KERN_WARNING
114 "GFS2: code version (%u, %u) is incompatible "
115 "with ondisk format (%u, %u)\n",
116 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
117 sb->sb_fs_format, sb->sb_multihost_format);
118 printk(KERN_WARNING
119 "GFS2: I don't know how to upgrade this FS\n");
120 return -EINVAL;
121 }
122 }
123
124 if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
125 for (x = 0; gfs2_old_multihost_formats[x]; x++)
126 if (gfs2_old_multihost_formats[x] ==
127 sb->sb_multihost_format)
128 break;
129
130 if (!gfs2_old_multihost_formats[x]) {
131 printk(KERN_WARNING
132 "GFS2: code version (%u, %u) is incompatible "
133 "with ondisk format (%u, %u)\n",
134 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
135 sb->sb_fs_format, sb->sb_multihost_format);
136 printk(KERN_WARNING
137 "GFS2: I don't know how to upgrade this FS\n");
138 return -EINVAL;
139 }
140 }
141
142 if (!sdp->sd_args.ar_upgrade) {
143 printk(KERN_WARNING
144 "GFS2: code version (%u, %u) is incompatible "
145 "with ondisk format (%u, %u)\n",
146 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
147 sb->sb_fs_format, sb->sb_multihost_format);
148 printk(KERN_INFO
149 "GFS2: Use the \"upgrade\" mount option to upgrade "
150 "the FS\n");
151 printk(KERN_INFO "GFS2: See the manual for more details\n");
152 return -EINVAL;
153 }
154
155 return 0;
156}
157
158/**
159 * gfs2_read_sb - Read super block
160 * @sdp: The GFS2 superblock
161 * @gl: the glock for the superblock (assumed to be held)
162 * @silent: Don't print message if mount fails
163 *
164 */
165
166int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
167{
168 struct buffer_head *bh;
169 uint32_t hash_blocks, ind_blocks, leaf_blocks;
170 uint32_t tmp_blocks;
171 unsigned int x;
172 int error;
173
174 error = gfs2_meta_read(gl, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift,
175 DIO_FORCE | DIO_START | DIO_WAIT, &bh);
176 if (error) {
177 if (!silent)
178 fs_err(sdp, "can't read superblock\n");
179 return error;
180 }
181
182 gfs2_assert(sdp, sizeof(struct gfs2_sb) <= bh->b_size);
183 gfs2_sb_in(&sdp->sd_sb, bh->b_data);
184 brelse(bh);
185
186 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
187 if (error)
188 return error;
189
190 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
191 GFS2_BASIC_BLOCK_SHIFT;
192 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
193 sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
194 sizeof(struct gfs2_dinode)) / sizeof(uint64_t);
195 sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
196 sizeof(struct gfs2_meta_header)) / sizeof(uint64_t);
197 sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
198 sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
199 sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
200 sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(uint64_t);
201 sdp->sd_ut_per_block = (sdp->sd_sb.sb_bsize -
202 sizeof(struct gfs2_meta_header)) /
203 sizeof(struct gfs2_unlinked_tag);
204 sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
205 sizeof(struct gfs2_meta_header)) /
206 sizeof(struct gfs2_quota_change);
207
208 /* Compute maximum reservation required to add a entry to a directory */
209
210 hash_blocks = DIV_RU(sizeof(uint64_t) * (1 << GFS2_DIR_MAX_DEPTH),
211 sdp->sd_jbsize);
212
213 ind_blocks = 0;
214 for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
215 tmp_blocks = DIV_RU(tmp_blocks, sdp->sd_inptrs);
216 ind_blocks += tmp_blocks;
217 }
218
219 leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
220
221 sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
222
223 sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
224 sizeof(struct gfs2_dinode);
225 sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
226 for (x = 2;; x++) {
227 uint64_t space, d;
228 uint32_t m;
229
230 space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
231 d = space;
232 m = do_div(d, sdp->sd_inptrs);
233
234 if (d != sdp->sd_heightsize[x - 1] || m)
235 break;
236 sdp->sd_heightsize[x] = space;
237 }
238 sdp->sd_max_height = x;
239 gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
240
241 sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
242 sizeof(struct gfs2_dinode);
243 sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
244 for (x = 2;; x++) {
245 uint64_t space, d;
246 uint32_t m;
247
248 space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
249 d = space;
250 m = do_div(d, sdp->sd_inptrs);
251
252 if (d != sdp->sd_jheightsize[x - 1] || m)
253 break;
254 sdp->sd_jheightsize[x] = space;
255 }
256 sdp->sd_max_jheight = x;
257 gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
258
259 return 0;
260}
261
262int gfs2_do_upgrade(struct gfs2_sbd *sdp, struct gfs2_glock *sb_gl)
263{
264 return 0;
265}
266
267/**
268 * gfs2_jindex_hold - Grab a lock on the jindex
269 * @sdp: The GFS2 superblock
270 * @ji_gh: the holder for the jindex glock
271 *
272 * This is very similar to the gfs2_rindex_hold() function, except that
273 * in general we hold the jindex lock for longer periods of time and
274 * we grab it far less frequently (in general) then the rgrp lock.
275 *
276 * Returns: errno
277 */
278
279int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
280{
281 struct gfs2_inode *dip = get_v2ip(sdp->sd_jindex);
282 struct qstr name;
283 char buf[20];
284 struct gfs2_jdesc *jd;
285 int error;
286
287 name.name = buf;
288
289 mutex_lock(&sdp->sd_jindex_mutex);
290
291 for (;;) {
292 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED,
293 GL_LOCAL_EXCL, ji_gh);
294 if (error)
295 break;
296
297 name.len = sprintf(buf, "journal%u", sdp->sd_journals);
298
299 error = gfs2_dir_search(get_v2ip(sdp->sd_jindex),
300 &name, NULL, NULL);
301 if (error == -ENOENT) {
302 error = 0;
303 break;
304 }
305
306 gfs2_glock_dq_uninit(ji_gh);
307
308 if (error)
309 break;
310
311 error = -ENOMEM;
312 jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
313 if (!jd)
314 break;
315
316 error = gfs2_lookupi(sdp->sd_jindex, &name, 1, &jd->jd_inode);
317 if (error) {
318 kfree(jd);
319 break;
320 }
321
322 spin_lock(&sdp->sd_jindex_spin);
323 jd->jd_jid = sdp->sd_journals++;
324 list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
325 spin_unlock(&sdp->sd_jindex_spin);
326 }
327
328 mutex_unlock(&sdp->sd_jindex_mutex);
329
330 return error;
331}
332
333/**
334 * gfs2_jindex_free - Clear all the journal index information
335 * @sdp: The GFS2 superblock
336 *
337 */
338
339void gfs2_jindex_free(struct gfs2_sbd *sdp)
340{
341 struct list_head list;
342 struct gfs2_jdesc *jd;
343
344 spin_lock(&sdp->sd_jindex_spin);
345 list_add(&list, &sdp->sd_jindex_list);
346 list_del_init(&sdp->sd_jindex_list);
347 sdp->sd_journals = 0;
348 spin_unlock(&sdp->sd_jindex_spin);
349
350 while (!list_empty(&list)) {
351 jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
352 list_del(&jd->jd_list);
353 iput(jd->jd_inode);
354 kfree(jd);
355 }
356}
357
358static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
359{
360 struct gfs2_jdesc *jd;
361 int found = 0;
362
363 list_for_each_entry(jd, head, jd_list) {
364 if (jd->jd_jid == jid) {
365 found = 1;
366 break;
367 }
368 }
369
370 if (!found)
371 jd = NULL;
372
373 return jd;
374}
375
376struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
377{
378 struct gfs2_jdesc *jd;
379
380 spin_lock(&sdp->sd_jindex_spin);
381 jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
382 spin_unlock(&sdp->sd_jindex_spin);
383
384 return jd;
385}
386
387void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
388{
389 struct gfs2_jdesc *jd;
390
391 spin_lock(&sdp->sd_jindex_spin);
392 jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
393 if (jd)
394 jd->jd_dirty = 1;
395 spin_unlock(&sdp->sd_jindex_spin);
396}
397
398struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
399{
400 struct gfs2_jdesc *jd;
401 int found = 0;
402
403 spin_lock(&sdp->sd_jindex_spin);
404
405 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
406 if (jd->jd_dirty) {
407 jd->jd_dirty = 0;
408 found = 1;
409 break;
410 }
411 }
412 spin_unlock(&sdp->sd_jindex_spin);
413
414 if (!found)
415 jd = NULL;
416
417 return jd;
418}
419
420int gfs2_jdesc_check(struct gfs2_jdesc *jd)
421{
422 struct gfs2_inode *ip = get_v2ip(jd->jd_inode);
423 struct gfs2_sbd *sdp = ip->i_sbd;
424 int ar;
425 int error;
426
427 if (ip->i_di.di_size < (8 << 20) ||
428 ip->i_di.di_size > (1 << 30) ||
429 (ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) {
430 gfs2_consist_inode(ip);
431 return -EIO;
432 }
433 jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
434
435 error = gfs2_write_alloc_required(ip,
436 0, ip->i_di.di_size,
437 &ar);
438 if (!error && ar) {
439 gfs2_consist_inode(ip);
440 error = -EIO;
441 }
442
443 return error;
444}
445
446int gfs2_lookup_master_dir(struct gfs2_sbd *sdp)
447{
448 struct inode *inode = NULL;
449 struct gfs2_glock *gl;
450 int error;
451
452 error = gfs2_glock_get(sdp,
453 sdp->sd_sb.sb_master_dir.no_addr,
454 &gfs2_inode_glops, CREATE, &gl);
455 if (!error) {
456 error = gfs2_lookup_simple(sdp->sd_root_dir, ".gfs2_admin",
457 &inode);
458 sdp->sd_master_dir = inode;
459 gfs2_glock_put(gl);
460 }
461
462 return error;
463}
464
465/**
466 * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
467 * @sdp: the filesystem
468 *
469 * Returns: errno
470 */
471
472int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
473{
474 struct gfs2_glock *j_gl = get_v2ip(sdp->sd_jdesc->jd_inode)->i_gl;
475 struct gfs2_holder t_gh;
476 struct gfs2_log_header head;
477 int error;
478
479 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
480 GL_LOCAL_EXCL | GL_NEVER_RECURSE, &t_gh);
481 if (error)
482 return error;
483
484 gfs2_meta_cache_flush(get_v2ip(sdp->sd_jdesc->jd_inode));
485 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
486
487 error = gfs2_find_jhead(sdp->sd_jdesc, &head);
488 if (error)
489 goto fail;
490
491 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
492 gfs2_consist(sdp);
493 error = -EIO;
494 goto fail;
495 }
496
497 /* Initialize some head of the log stuff */
498 sdp->sd_log_sequence = head.lh_sequence + 1;
499 gfs2_log_pointers_init(sdp, head.lh_blkno);
500
501 error = gfs2_unlinked_init(sdp);
502 if (error)
503 goto fail;
504 error = gfs2_quota_init(sdp);
505 if (error)
506 goto fail_unlinked;
507
508 set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
509
510 gfs2_glock_dq_uninit(&t_gh);
511
512 return 0;
513
514 fail_unlinked:
515 gfs2_unlinked_cleanup(sdp);
516
517 fail:
518 t_gh.gh_flags |= GL_NOCACHE;
519 gfs2_glock_dq_uninit(&t_gh);
520
521 return error;
522}
523
524/**
525 * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
526 * @sdp: the filesystem
527 *
528 * Returns: errno
529 */
530
531int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
532{
533 struct gfs2_holder t_gh;
534 int error;
535
536 gfs2_unlinked_dealloc(sdp);
537 gfs2_quota_sync(sdp);
538 gfs2_statfs_sync(sdp);
539
540 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
541 GL_LOCAL_EXCL | GL_NEVER_RECURSE | GL_NOCACHE,
542 &t_gh);
543 if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
544 return error;
545
546 gfs2_meta_syncfs(sdp);
547 gfs2_log_shutdown(sdp);
548
549 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
550
551 if (t_gh.gh_gl)
552 gfs2_glock_dq_uninit(&t_gh);
553
554 gfs2_unlinked_cleanup(sdp);
555 gfs2_quota_cleanup(sdp);
556
557 return error;
558}
559
560int gfs2_statfs_init(struct gfs2_sbd *sdp)
561{
562 struct gfs2_inode *m_ip = get_v2ip(sdp->sd_statfs_inode);
563 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
564 struct gfs2_inode *l_ip = get_v2ip(sdp->sd_sc_inode);
565 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
566 struct buffer_head *m_bh, *l_bh;
567 struct gfs2_holder gh;
568 int error;
569
570 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
571 &gh);
572 if (error)
573 return error;
574
575 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
576 if (error)
577 goto out;
578
579 if (sdp->sd_args.ar_spectator) {
580 spin_lock(&sdp->sd_statfs_spin);
581 gfs2_statfs_change_in(m_sc, m_bh->b_data +
582 sizeof(struct gfs2_dinode));
583 spin_unlock(&sdp->sd_statfs_spin);
584 } else {
585 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
586 if (error)
587 goto out_m_bh;
588
589 spin_lock(&sdp->sd_statfs_spin);
590 gfs2_statfs_change_in(m_sc, m_bh->b_data +
591 sizeof(struct gfs2_dinode));
592 gfs2_statfs_change_in(l_sc, l_bh->b_data +
593 sizeof(struct gfs2_dinode));
594 spin_unlock(&sdp->sd_statfs_spin);
595
596 brelse(l_bh);
597 }
598
599 out_m_bh:
600 brelse(m_bh);
601
602 out:
603 gfs2_glock_dq_uninit(&gh);
604
605 return 0;
606}
607
608void gfs2_statfs_change(struct gfs2_sbd *sdp, int64_t total, int64_t free,
609 int64_t dinodes)
610{
611 struct gfs2_inode *l_ip = get_v2ip(sdp->sd_sc_inode);
612 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
613 struct buffer_head *l_bh;
614 int error;
615
616 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
617 if (error)
618 return;
619
620 mutex_lock(&sdp->sd_statfs_mutex);
621 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
622 mutex_unlock(&sdp->sd_statfs_mutex);
623
624 spin_lock(&sdp->sd_statfs_spin);
625 l_sc->sc_total += total;
626 l_sc->sc_free += free;
627 l_sc->sc_dinodes += dinodes;
628 gfs2_statfs_change_out(l_sc, l_bh->b_data +
629 sizeof(struct gfs2_dinode));
630 spin_unlock(&sdp->sd_statfs_spin);
631
632 brelse(l_bh);
633}
634
635int gfs2_statfs_sync(struct gfs2_sbd *sdp)
636{
637 struct gfs2_inode *m_ip = get_v2ip(sdp->sd_statfs_inode);
638 struct gfs2_inode *l_ip = get_v2ip(sdp->sd_sc_inode);
639 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
640 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
641 struct gfs2_holder gh;
642 struct buffer_head *m_bh, *l_bh;
643 int error;
644
645 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
646 &gh);
647 if (error)
648 return error;
649
650 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
651 if (error)
652 goto out;
653
654 spin_lock(&sdp->sd_statfs_spin);
655 gfs2_statfs_change_in(m_sc, m_bh->b_data +
656 sizeof(struct gfs2_dinode));
657 if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
658 spin_unlock(&sdp->sd_statfs_spin);
659 goto out_bh;
660 }
661 spin_unlock(&sdp->sd_statfs_spin);
662
663 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
664 if (error)
665 goto out_bh;
666
667 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
668 if (error)
669 goto out_bh2;
670
671 mutex_lock(&sdp->sd_statfs_mutex);
672 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
673 mutex_unlock(&sdp->sd_statfs_mutex);
674
675 spin_lock(&sdp->sd_statfs_spin);
676 m_sc->sc_total += l_sc->sc_total;
677 m_sc->sc_free += l_sc->sc_free;
678 m_sc->sc_dinodes += l_sc->sc_dinodes;
679 memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
680 memset(l_bh->b_data + sizeof(struct gfs2_dinode),
681 0, sizeof(struct gfs2_statfs_change));
682 spin_unlock(&sdp->sd_statfs_spin);
683
684 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
685 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
686
687 gfs2_trans_end(sdp);
688
689 out_bh2:
690 brelse(l_bh);
691
692 out_bh:
693 brelse(m_bh);
694
695 out:
696 gfs2_glock_dq_uninit(&gh);
697
698 return error;
699}
700
701/**
702 * gfs2_statfs_i - Do a statfs
703 * @sdp: the filesystem
704 * @sg: the sg structure
705 *
706 * Returns: errno
707 */
708
709int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
710{
711 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
712 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
713
714 spin_lock(&sdp->sd_statfs_spin);
715
716 *sc = *m_sc;
717 sc->sc_total += l_sc->sc_total;
718 sc->sc_free += l_sc->sc_free;
719 sc->sc_dinodes += l_sc->sc_dinodes;
720
721 spin_unlock(&sdp->sd_statfs_spin);
722
723 if (sc->sc_free < 0)
724 sc->sc_free = 0;
725 if (sc->sc_free > sc->sc_total)
726 sc->sc_free = sc->sc_total;
727 if (sc->sc_dinodes < 0)
728 sc->sc_dinodes = 0;
729
730 return 0;
731}
732
733/**
734 * statfs_fill - fill in the sg for a given RG
735 * @rgd: the RG
736 * @sc: the sc structure
737 *
738 * Returns: 0 on success, -ESTALE if the LVB is invalid
739 */
740
741static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
742 struct gfs2_statfs_change *sc)
743{
744 gfs2_rgrp_verify(rgd);
745 sc->sc_total += rgd->rd_ri.ri_data;
746 sc->sc_free += rgd->rd_rg.rg_free;
747 sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
748 return 0;
749}
750
751/**
752 * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
753 * @sdp: the filesystem
754 * @sc: the sc info that will be returned
755 *
756 * Any error (other than a signal) will cause this routine to fall back
757 * to the synchronous version.
758 *
759 * FIXME: This really shouldn't busy wait like this.
760 *
761 * Returns: errno
762 */
763
764int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
765{
766 struct gfs2_holder ri_gh;
767 struct gfs2_rgrpd *rgd_next;
768 struct gfs2_holder *gha, *gh;
769 unsigned int slots = 64;
770 unsigned int x;
771 int done;
772 int error = 0, err;
773
774 memset(sc, 0, sizeof(struct gfs2_statfs_change));
775 gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
776 if (!gha)
777 return -ENOMEM;
778
779 error = gfs2_rindex_hold(sdp, &ri_gh);
780 if (error)
781 goto out;
782
783 rgd_next = gfs2_rgrpd_get_first(sdp);
784
785 for (;;) {
786 done = 1;
787
788 for (x = 0; x < slots; x++) {
789 gh = gha + x;
790
791 if (gh->gh_gl && gfs2_glock_poll(gh)) {
792 err = gfs2_glock_wait(gh);
793 if (err) {
794 gfs2_holder_uninit(gh);
795 error = err;
796 } else {
797 if (!error)
798 error = statfs_slow_fill(get_gl2rgd(gh->gh_gl), sc);
799 gfs2_glock_dq_uninit(gh);
800 }
801 }
802
803 if (gh->gh_gl)
804 done = 0;
805 else if (rgd_next && !error) {
806 error = gfs2_glock_nq_init(rgd_next->rd_gl,
807 LM_ST_SHARED,
808 GL_ASYNC,
809 gh);
810 rgd_next = gfs2_rgrpd_get_next(rgd_next);
811 done = 0;
812 }
813
814 if (signal_pending(current))
815 error = -ERESTARTSYS;
816 }
817
818 if (done)
819 break;
820
821 yield();
822 }
823
824 gfs2_glock_dq_uninit(&ri_gh);
825
826 out:
827 kfree(gha);
828
829 return error;
830}
831
832struct lfcc {
833 struct list_head list;
834 struct gfs2_holder gh;
835};
836
837/**
838 * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
839 * journals are clean
840 * @sdp: the file system
841 * @state: the state to put the transaction lock into
842 * @t_gh: the hold on the transaction lock
843 *
844 * Returns: errno
845 */
846
847int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_holder *t_gh)
848{
849 struct gfs2_holder ji_gh;
850 struct gfs2_jdesc *jd;
851 struct lfcc *lfcc;
852 LIST_HEAD(list);
853 struct gfs2_log_header lh;
854 int error;
855
856 error = gfs2_jindex_hold(sdp, &ji_gh);
857 if (error)
858 return error;
859
860 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
861 lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
862 if (!lfcc) {
863 error = -ENOMEM;
864 goto out;
865 }
866 error = gfs2_glock_nq_init(get_v2ip(jd->jd_inode)->i_gl,
867 LM_ST_SHARED, 0,
868 &lfcc->gh);
869 if (error) {
870 kfree(lfcc);
871 goto out;
872 }
873 list_add(&lfcc->list, &list);
874 }
875
876 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED,
877 LM_FLAG_PRIORITY | GL_NEVER_RECURSE | GL_NOCACHE,
878 t_gh);
879
880 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
881 error = gfs2_jdesc_check(jd);
882 if (error)
883 break;
884 error = gfs2_find_jhead(jd, &lh);
885 if (error)
886 break;
887 if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
888 error = -EBUSY;
889 break;
890 }
891 }
892
893 if (error)
894 gfs2_glock_dq_uninit(t_gh);
895
896 out:
897 while (!list_empty(&list)) {
898 lfcc = list_entry(list.next, struct lfcc, list);
899 list_del(&lfcc->list);
900 gfs2_glock_dq_uninit(&lfcc->gh);
901 kfree(lfcc);
902 }
903 gfs2_glock_dq_uninit(&ji_gh);
904
905 return error;
906}
907
908/**
909 * gfs2_freeze_fs - freezes the file system
910 * @sdp: the file system
911 *
912 * This function flushes data and meta data for all machines by
913 * aquiring the transaction log exclusively. All journals are
914 * ensured to be in a clean state as well.
915 *
916 * Returns: errno
917 */
918
919int gfs2_freeze_fs(struct gfs2_sbd *sdp)
920{
921 int error = 0;
922
923 mutex_lock(&sdp->sd_freeze_lock);
924
925 if (!sdp->sd_freeze_count++) {
926 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
927 if (error)
928 sdp->sd_freeze_count--;
929 }
930
931 mutex_unlock(&sdp->sd_freeze_lock);
932
933 return error;
934}
935
936/**
937 * gfs2_unfreeze_fs - unfreezes the file system
938 * @sdp: the file system
939 *
940 * This function allows the file system to proceed by unlocking
941 * the exclusively held transaction lock. Other GFS2 nodes are
942 * now free to acquire the lock shared and go on with their lives.
943 *
944 */
945
946void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
947{
948 mutex_lock(&sdp->sd_freeze_lock);
949
950 if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
951 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
952
953 mutex_unlock(&sdp->sd_freeze_lock);
954}
955
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
new file mode 100644
index 000000000000..cc1a3df1949a
--- /dev/null
+++ b/fs/gfs2/super.h
@@ -0,0 +1,55 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __SUPER_DOT_H__
11#define __SUPER_DOT_H__
12
13void gfs2_tune_init(struct gfs2_tune *gt);
14
15int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent);
16int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
17int gfs2_do_upgrade(struct gfs2_sbd *sdp, struct gfs2_glock *gl_sb);
18
19static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
20{
21 unsigned int x;
22 spin_lock(&sdp->sd_jindex_spin);
23 x = sdp->sd_journals;
24 spin_unlock(&sdp->sd_jindex_spin);
25 return x;
26}
27
28int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh);
29void gfs2_jindex_free(struct gfs2_sbd *sdp);
30
31struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
32void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid);
33struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp);
34int gfs2_jdesc_check(struct gfs2_jdesc *jd);
35
36int gfs2_lookup_master_dir(struct gfs2_sbd *sdp);
37int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
38 struct gfs2_inode **ipp);
39
40int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
41int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
42
43int gfs2_statfs_init(struct gfs2_sbd *sdp);
44void gfs2_statfs_change(struct gfs2_sbd *sdp,
45 int64_t total, int64_t free, int64_t dinodes);
46int gfs2_statfs_sync(struct gfs2_sbd *sdp);
47int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
48int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
49
50int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_holder *t_gh);
51int gfs2_freeze_fs(struct gfs2_sbd *sdp);
52void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
53
54#endif /* __SUPER_DOT_H__ */
55
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
new file mode 100644
index 000000000000..f87df8ec041e
--- /dev/null
+++ b/fs/gfs2/sys.c
@@ -0,0 +1,578 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/kobject.h>
17#include <asm/semaphore.h>
18#include <asm/uaccess.h>
19
20#include "gfs2.h"
21#include "lm.h"
22#include "sys.h"
23#include "super.h"
24#include "glock.h"
25#include "quota.h"
26
27char *gfs2_sys_margs;
28spinlock_t gfs2_sys_margs_lock;
29
30static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
31{
32 return sprintf(buf, "%s\n", sdp->sd_vfs->s_id);
33}
34
35static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
36{
37 return sprintf(buf, "%s\n", sdp->sd_fsname);
38}
39
40static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
41{
42 unsigned int count;
43
44 mutex_lock(&sdp->sd_freeze_lock);
45 count = sdp->sd_freeze_count;
46 mutex_unlock(&sdp->sd_freeze_lock);
47
48 return sprintf(buf, "%u\n", count);
49}
50
51static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
52{
53 ssize_t ret = len;
54 int error = 0;
55 int n = simple_strtol(buf, NULL, 0);
56
57 if (!capable(CAP_SYS_ADMIN))
58 return -EACCES;
59
60 switch (n) {
61 case 0:
62 gfs2_unfreeze_fs(sdp);
63 break;
64 case 1:
65 error = gfs2_freeze_fs(sdp);
66 break;
67 default:
68 ret = -EINVAL;
69 }
70
71 if (error)
72 fs_warn(sdp, "freeze %d error %d", n, error);
73
74 return ret;
75}
76
77static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
78{
79 unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags);
80 return sprintf(buf, "%u\n", b);
81}
82
83static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
84{
85 if (!capable(CAP_SYS_ADMIN))
86 return -EACCES;
87
88 if (simple_strtol(buf, NULL, 0) != 1)
89 return -EINVAL;
90
91 gfs2_lm_withdraw(sdp,
92 "GFS2: fsid=%s: withdrawing from cluster at user's request\n",
93 sdp->sd_fsname);
94 return len;
95}
96
97static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
98 size_t len)
99{
100 if (!capable(CAP_SYS_ADMIN))
101 return -EACCES;
102
103 if (simple_strtol(buf, NULL, 0) != 1)
104 return -EINVAL;
105
106 gfs2_statfs_sync(sdp);
107 return len;
108}
109
110static ssize_t shrink_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
111{
112 if (!capable(CAP_SYS_ADMIN))
113 return -EACCES;
114
115 if (simple_strtol(buf, NULL, 0) != 1)
116 return -EINVAL;
117
118 gfs2_gl_hash_clear(sdp, NO_WAIT);
119 return len;
120}
121
122static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
123 size_t len)
124{
125 if (!capable(CAP_SYS_ADMIN))
126 return -EACCES;
127
128 if (simple_strtol(buf, NULL, 0) != 1)
129 return -EINVAL;
130
131 gfs2_quota_sync(sdp);
132 return len;
133}
134
135static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
136 size_t len)
137{
138 uint32_t id;
139
140 if (!capable(CAP_SYS_ADMIN))
141 return -EACCES;
142
143 id = simple_strtoul(buf, NULL, 0);
144
145 gfs2_quota_refresh(sdp, 1, id);
146 return len;
147}
148
149static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
150 size_t len)
151{
152 uint32_t id;
153
154 if (!capable(CAP_SYS_ADMIN))
155 return -EACCES;
156
157 id = simple_strtoul(buf, NULL, 0);
158
159 gfs2_quota_refresh(sdp, 0, id);
160 return len;
161}
162
163struct gfs2_attr {
164 struct attribute attr;
165 ssize_t (*show)(struct gfs2_sbd *, char *);
166 ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
167};
168
169#define GFS2_ATTR(name, mode, show, store) \
170static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
171
172GFS2_ATTR(id, 0444, id_show, NULL);
173GFS2_ATTR(fsname, 0444, fsname_show, NULL);
174GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
175GFS2_ATTR(shrink, 0200, NULL, shrink_store);
176GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
177GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
178GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
179GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store);
180GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store);
181
182static struct attribute *gfs2_attrs[] = {
183 &gfs2_attr_id.attr,
184 &gfs2_attr_fsname.attr,
185 &gfs2_attr_freeze.attr,
186 &gfs2_attr_shrink.attr,
187 &gfs2_attr_withdraw.attr,
188 &gfs2_attr_statfs_sync.attr,
189 &gfs2_attr_quota_sync.attr,
190 &gfs2_attr_quota_refresh_user.attr,
191 &gfs2_attr_quota_refresh_group.attr,
192 NULL,
193};
194
195static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
196 char *buf)
197{
198 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
199 struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
200 return a->show ? a->show(sdp, buf) : 0;
201}
202
203static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
204 const char *buf, size_t len)
205{
206 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
207 struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
208 return a->store ? a->store(sdp, buf, len) : len;
209}
210
211static struct sysfs_ops gfs2_attr_ops = {
212 .show = gfs2_attr_show,
213 .store = gfs2_attr_store,
214};
215
216static struct kobj_type gfs2_ktype = {
217 .default_attrs = gfs2_attrs,
218 .sysfs_ops = &gfs2_attr_ops,
219};
220
221static struct kset gfs2_kset = {
222 .subsys = &fs_subsys,
223 .kobj = {.name = "gfs2",},
224 .ktype = &gfs2_ktype,
225};
226
227/*
228 * display struct lm_lockstruct fields
229 */
230
231struct lockstruct_attr {
232 struct attribute attr;
233 ssize_t (*show)(struct gfs2_sbd *, char *);
234};
235
236#define LOCKSTRUCT_ATTR(name, fmt) \
237static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
238{ \
239 return sprintf(buf, fmt, sdp->sd_lockstruct.ls_##name); \
240} \
241static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name)
242
243LOCKSTRUCT_ATTR(jid, "%u\n");
244LOCKSTRUCT_ATTR(first, "%u\n");
245LOCKSTRUCT_ATTR(lvb_size, "%u\n");
246LOCKSTRUCT_ATTR(flags, "%d\n");
247
248static struct attribute *lockstruct_attrs[] = {
249 &lockstruct_attr_jid.attr,
250 &lockstruct_attr_first.attr,
251 &lockstruct_attr_lvb_size.attr,
252 &lockstruct_attr_flags.attr,
253 NULL
254};
255
256/*
257 * display struct gfs2_args fields
258 */
259
260struct args_attr {
261 struct attribute attr;
262 ssize_t (*show)(struct gfs2_sbd *, char *);
263};
264
265#define ARGS_ATTR(name, fmt) \
266static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
267{ \
268 return sprintf(buf, fmt, sdp->sd_args.ar_##name); \
269} \
270static struct args_attr args_attr_##name = __ATTR_RO(name)
271
272ARGS_ATTR(lockproto, "%s\n");
273ARGS_ATTR(locktable, "%s\n");
274ARGS_ATTR(hostdata, "%s\n");
275ARGS_ATTR(spectator, "%d\n");
276ARGS_ATTR(ignore_local_fs, "%d\n");
277ARGS_ATTR(localcaching, "%d\n");
278ARGS_ATTR(localflocks, "%d\n");
279ARGS_ATTR(debug, "%d\n");
280ARGS_ATTR(upgrade, "%d\n");
281ARGS_ATTR(num_glockd, "%u\n");
282ARGS_ATTR(posix_acl, "%d\n");
283ARGS_ATTR(quota, "%u\n");
284ARGS_ATTR(suiddir, "%d\n");
285ARGS_ATTR(data, "%d\n");
286
287/* one oddball doesn't fit the macro mold */
288static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf)
289{
290 return sprintf(buf, "%d\n", !!test_bit(SDF_NOATIME, &sdp->sd_flags));
291}
292static struct args_attr args_attr_noatime = __ATTR_RO(noatime);
293
294static struct attribute *args_attrs[] = {
295 &args_attr_lockproto.attr,
296 &args_attr_locktable.attr,
297 &args_attr_hostdata.attr,
298 &args_attr_spectator.attr,
299 &args_attr_ignore_local_fs.attr,
300 &args_attr_localcaching.attr,
301 &args_attr_localflocks.attr,
302 &args_attr_debug.attr,
303 &args_attr_upgrade.attr,
304 &args_attr_num_glockd.attr,
305 &args_attr_posix_acl.attr,
306 &args_attr_quota.attr,
307 &args_attr_suiddir.attr,
308 &args_attr_data.attr,
309 &args_attr_noatime.attr,
310 NULL
311};
312
313/*
314 * display counters from superblock
315 */
316
317struct counters_attr {
318 struct attribute attr;
319 ssize_t (*show)(struct gfs2_sbd *, char *);
320};
321
322#define COUNTERS_ATTR(name, fmt) \
323static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
324{ \
325 return sprintf(buf, fmt, (unsigned int)atomic_read(&sdp->sd_##name)); \
326} \
327static struct counters_attr counters_attr_##name = __ATTR_RO(name)
328
329COUNTERS_ATTR(glock_count, "%u\n");
330COUNTERS_ATTR(glock_held_count, "%u\n");
331COUNTERS_ATTR(inode_count, "%u\n");
332COUNTERS_ATTR(reclaimed, "%u\n");
333
334static struct attribute *counters_attrs[] = {
335 &counters_attr_glock_count.attr,
336 &counters_attr_glock_held_count.attr,
337 &counters_attr_inode_count.attr,
338 &counters_attr_reclaimed.attr,
339 NULL
340};
341
342/*
343 * get and set struct gfs2_tune fields
344 */
345
346static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf)
347{
348 return sprintf(buf, "%u %u\n", sdp->sd_tune.gt_quota_scale_num,
349 sdp->sd_tune.gt_quota_scale_den);
350}
351
352static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
353 size_t len)
354{
355 struct gfs2_tune *gt = &sdp->sd_tune;
356 unsigned int x, y;
357
358 if (!capable(CAP_SYS_ADMIN))
359 return -EACCES;
360
361 if (sscanf(buf, "%u %u", &x, &y) != 2 || !y)
362 return -EINVAL;
363
364 spin_lock(&gt->gt_spin);
365 gt->gt_quota_scale_num = x;
366 gt->gt_quota_scale_den = y;
367 spin_unlock(&gt->gt_spin);
368 return len;
369}
370
371static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
372 int check_zero, const char *buf, size_t len)
373{
374 struct gfs2_tune *gt = &sdp->sd_tune;
375 unsigned int x;
376
377 if (!capable(CAP_SYS_ADMIN))
378 return -EACCES;
379
380 x = simple_strtoul(buf, NULL, 0);
381
382 if (check_zero && !x)
383 return -EINVAL;
384
385 spin_lock(&gt->gt_spin);
386 *field = x;
387 spin_unlock(&gt->gt_spin);
388 return len;
389}
390
391struct tune_attr {
392 struct attribute attr;
393 ssize_t (*show)(struct gfs2_sbd *, char *);
394 ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
395};
396
397#define TUNE_ATTR_3(name, show, store) \
398static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store)
399
400#define TUNE_ATTR_2(name, store) \
401static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
402{ \
403 return sprintf(buf, "%u\n", sdp->sd_tune.gt_##name); \
404} \
405TUNE_ATTR_3(name, name##_show, store)
406
407#define TUNE_ATTR(name, check_zero) \
408static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
409{ \
410 return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len); \
411} \
412TUNE_ATTR_2(name, name##_store)
413
414#define TUNE_ATTR_DAEMON(name, process) \
415static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
416{ \
417 ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len); \
418 wake_up_process(sdp->sd_##process); \
419 return r; \
420} \
421TUNE_ATTR_2(name, name##_store)
422
423TUNE_ATTR(ilimit, 0);
424TUNE_ATTR(ilimit_tries, 0);
425TUNE_ATTR(ilimit_min, 0);
426TUNE_ATTR(demote_secs, 0);
427TUNE_ATTR(incore_log_blocks, 0);
428TUNE_ATTR(log_flush_secs, 0);
429TUNE_ATTR(jindex_refresh_secs, 0);
430TUNE_ATTR(quota_warn_period, 0);
431TUNE_ATTR(quota_quantum, 0);
432TUNE_ATTR(atime_quantum, 0);
433TUNE_ATTR(max_readahead, 0);
434TUNE_ATTR(complain_secs, 0);
435TUNE_ATTR(reclaim_limit, 0);
436TUNE_ATTR(prefetch_secs, 0);
437TUNE_ATTR(statfs_slow, 0);
438TUNE_ATTR(new_files_jdata, 0);
439TUNE_ATTR(new_files_directio, 0);
440TUNE_ATTR(quota_simul_sync, 1);
441TUNE_ATTR(quota_cache_secs, 1);
442TUNE_ATTR(max_atomic_write, 1);
443TUNE_ATTR(stall_secs, 1);
444TUNE_ATTR(entries_per_readdir, 1);
445TUNE_ATTR(greedy_default, 1);
446TUNE_ATTR(greedy_quantum, 1);
447TUNE_ATTR(greedy_max, 1);
448TUNE_ATTR(statfs_quantum, 1);
449TUNE_ATTR_DAEMON(scand_secs, scand_process);
450TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
451TUNE_ATTR_DAEMON(logd_secs, logd_process);
452TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
453TUNE_ATTR_DAEMON(inoded_secs, inoded_process);
454TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
455
456static struct attribute *tune_attrs[] = {
457 &tune_attr_ilimit.attr,
458 &tune_attr_ilimit_tries.attr,
459 &tune_attr_ilimit_min.attr,
460 &tune_attr_demote_secs.attr,
461 &tune_attr_incore_log_blocks.attr,
462 &tune_attr_log_flush_secs.attr,
463 &tune_attr_jindex_refresh_secs.attr,
464 &tune_attr_quota_warn_period.attr,
465 &tune_attr_quota_quantum.attr,
466 &tune_attr_atime_quantum.attr,
467 &tune_attr_max_readahead.attr,
468 &tune_attr_complain_secs.attr,
469 &tune_attr_reclaim_limit.attr,
470 &tune_attr_prefetch_secs.attr,
471 &tune_attr_statfs_slow.attr,
472 &tune_attr_quota_simul_sync.attr,
473 &tune_attr_quota_cache_secs.attr,
474 &tune_attr_max_atomic_write.attr,
475 &tune_attr_stall_secs.attr,
476 &tune_attr_entries_per_readdir.attr,
477 &tune_attr_greedy_default.attr,
478 &tune_attr_greedy_quantum.attr,
479 &tune_attr_greedy_max.attr,
480 &tune_attr_statfs_quantum.attr,
481 &tune_attr_scand_secs.attr,
482 &tune_attr_recoverd_secs.attr,
483 &tune_attr_logd_secs.attr,
484 &tune_attr_quotad_secs.attr,
485 &tune_attr_inoded_secs.attr,
486 &tune_attr_quota_scale.attr,
487 &tune_attr_new_files_jdata.attr,
488 &tune_attr_new_files_directio.attr,
489 NULL
490};
491
492static struct attribute_group lockstruct_group = {
493 .name = "lockstruct",
494 .attrs = lockstruct_attrs
495};
496
497static struct attribute_group counters_group = {
498 .name = "counters",
499 .attrs = counters_attrs
500};
501
502static struct attribute_group args_group = {
503 .name = "args",
504 .attrs = args_attrs
505};
506
507static struct attribute_group tune_group = {
508 .name = "tune",
509 .attrs = tune_attrs
510};
511
512int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
513{
514 int error;
515
516 sdp->sd_kobj.kset = &gfs2_kset;
517 sdp->sd_kobj.ktype = &gfs2_ktype;
518
519 error = kobject_set_name(&sdp->sd_kobj, "%s", sdp->sd_table_name);
520 if (error)
521 goto fail;
522
523 error = kobject_register(&sdp->sd_kobj);
524 if (error)
525 goto fail;
526
527 error = sysfs_create_group(&sdp->sd_kobj, &lockstruct_group);
528 if (error)
529 goto fail_reg;
530
531 error = sysfs_create_group(&sdp->sd_kobj, &counters_group);
532 if (error)
533 goto fail_lockstruct;
534
535 error = sysfs_create_group(&sdp->sd_kobj, &args_group);
536 if (error)
537 goto fail_counters;
538
539 error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
540 if (error)
541 goto fail_args;
542
543 return 0;
544
545 fail_args:
546 sysfs_remove_group(&sdp->sd_kobj, &args_group);
547 fail_counters:
548 sysfs_remove_group(&sdp->sd_kobj, &counters_group);
549 fail_lockstruct:
550 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
551 fail_reg:
552 kobject_unregister(&sdp->sd_kobj);
553 fail:
554 return error;
555}
556
557void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
558{
559 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
560 sysfs_remove_group(&sdp->sd_kobj, &args_group);
561 sysfs_remove_group(&sdp->sd_kobj, &counters_group);
562 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
563 kobject_unregister(&sdp->sd_kobj);
564}
565
566int gfs2_sys_init(void)
567{
568 gfs2_sys_margs = NULL;
569 spin_lock_init(&gfs2_sys_margs_lock);
570 return kset_register(&gfs2_kset);
571}
572
573void gfs2_sys_uninit(void)
574{
575 kfree(gfs2_sys_margs);
576 kset_unregister(&gfs2_kset);
577}
578
diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h
new file mode 100644
index 000000000000..62c8ed89ab9c
--- /dev/null
+++ b/fs/gfs2/sys.h
@@ -0,0 +1,24 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __SYS_DOT_H__
11#define __SYS_DOT_H__
12
13/* Allow args to be passed to GFS2 when using an initial ram disk */
14extern char *gfs2_sys_margs;
15extern spinlock_t gfs2_sys_margs_lock;
16
17int gfs2_sys_fs_add(struct gfs2_sbd *sdp);
18void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
19
20int gfs2_sys_init(void);
21void gfs2_sys_uninit(void);
22
23#endif /* __SYS_DOT_H__ */
24
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
new file mode 100644
index 000000000000..0a0ea70eac4c
--- /dev/null
+++ b/fs/gfs2/trans.c
@@ -0,0 +1,198 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <asm/semaphore.h>
16
17#include "gfs2.h"
18#include "glock.h"
19#include "log.h"
20#include "lops.h"
21#include "meta_io.h"
22#include "trans.h"
23
24int gfs2_trans_begin_i(struct gfs2_sbd *sdp, unsigned int blocks,
25 unsigned int revokes, char *file, unsigned int line)
26{
27 struct gfs2_trans *tr;
28 int error;
29
30 if (gfs2_assert_warn(sdp, !get_transaction) ||
31 gfs2_assert_warn(sdp, blocks || revokes)) {
32 fs_warn(sdp, "(%s, %u)\n", file, line);
33 return -EINVAL;
34 }
35
36 tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS);
37 if (!tr)
38 return -ENOMEM;
39
40 tr->tr_file = file;
41 tr->tr_line = line;
42 tr->tr_blocks = blocks;
43 tr->tr_revokes = revokes;
44 tr->tr_reserved = 1;
45 if (blocks)
46 tr->tr_reserved += 1 + blocks;
47 if (revokes)
48 tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
49 sizeof(uint64_t));
50 INIT_LIST_HEAD(&tr->tr_list_buf);
51
52 error = -ENOMEM;
53 tr->tr_t_gh = gfs2_holder_get(sdp->sd_trans_gl, LM_ST_SHARED,
54 GL_NEVER_RECURSE, GFP_NOFS);
55 if (!tr->tr_t_gh)
56 goto fail;
57
58 error = gfs2_glock_nq(tr->tr_t_gh);
59 if (error)
60 goto fail_holder_put;
61
62 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
63 tr->tr_t_gh->gh_flags |= GL_NOCACHE;
64 error = -EROFS;
65 goto fail_gunlock;
66 }
67
68 error = gfs2_log_reserve(sdp, tr->tr_reserved);
69 if (error)
70 goto fail_gunlock;
71
72 set_transaction(tr);
73
74 return 0;
75
76 fail_gunlock:
77 gfs2_glock_dq(tr->tr_t_gh);
78
79 fail_holder_put:
80 gfs2_holder_put(tr->tr_t_gh);
81
82 fail:
83 kfree(tr);
84
85 return error;
86}
87
88void gfs2_trans_end(struct gfs2_sbd *sdp)
89{
90 struct gfs2_trans *tr;
91 struct gfs2_holder *t_gh;
92
93 tr = get_transaction;
94 set_transaction(NULL);
95
96 if (gfs2_assert_warn(sdp, tr))
97 return;
98
99 t_gh = tr->tr_t_gh;
100 tr->tr_t_gh = NULL;
101
102 if (!tr->tr_touched) {
103 gfs2_log_release(sdp, tr->tr_reserved);
104 kfree(tr);
105
106 gfs2_glock_dq(t_gh);
107 gfs2_holder_put(t_gh);
108
109 return;
110 }
111
112 if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks))
113 fs_err(sdp, "tr_num_buf = %u, tr_blocks = %u "
114 "tr_file = %s, tr_line = %u\n",
115 tr->tr_num_buf, tr->tr_blocks,
116 tr->tr_file, tr->tr_line);
117 if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes))
118 fs_err(sdp, "tr_num_revoke = %u, tr_revokes = %u "
119 "tr_file = %s, tr_line = %u\n",
120 tr->tr_num_revoke, tr->tr_revokes,
121 tr->tr_file, tr->tr_line);
122
123 gfs2_log_commit(sdp, tr);
124
125 gfs2_glock_dq(t_gh);
126 gfs2_holder_put(t_gh);
127
128 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
129 gfs2_log_flush(sdp);
130}
131
132void gfs2_trans_add_gl(struct gfs2_glock *gl)
133{
134 lops_add(gl->gl_sbd, &gl->gl_le);
135}
136
137/**
138 * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction
139 * @gl: the glock the buffer belongs to
140 * @bh: The buffer to add
141 * @meta: True in the case of adding metadata
142 *
143 */
144
145void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
146{
147 struct gfs2_sbd *sdp = gl->gl_sbd;
148 struct gfs2_bufdata *bd;
149
150 bd = get_v2bd(bh);
151 if (bd)
152 gfs2_assert(sdp, bd->bd_gl == gl);
153 else {
154 gfs2_attach_bufdata(gl, bh, meta);
155 bd = get_v2bd(bh);
156 }
157 lops_add(sdp, &bd->bd_le);
158}
159
160void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno)
161{
162 struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
163 GFP_NOFS | __GFP_NOFAIL);
164 lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
165 rv->rv_blkno = blkno;
166 lops_add(sdp, &rv->rv_le);
167}
168
169void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno)
170{
171 struct gfs2_revoke *rv;
172 int found = 0;
173
174 gfs2_log_lock(sdp);
175
176 list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) {
177 if (rv->rv_blkno == blkno) {
178 list_del(&rv->rv_le.le_list);
179 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
180 sdp->sd_log_num_revoke--;
181 found = 1;
182 break;
183 }
184 }
185
186 gfs2_log_unlock(sdp);
187
188 if (found) {
189 kfree(rv);
190 get_transaction->tr_num_revoke_rm++;
191 }
192}
193
194void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd)
195{
196 lops_add(rgd->rd_sbd, &rgd->rd_le);
197}
198
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
new file mode 100644
index 000000000000..f7f3e2a3d590
--- /dev/null
+++ b/fs/gfs2/trans.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __TRANS_DOT_H__
11#define __TRANS_DOT_H__
12
13#define RES_DINODE 1
14#define RES_INDIRECT 1
15#define RES_JDATA 1
16#define RES_DATA 1
17#define RES_LEAF 1
18#define RES_RG_BIT 2
19#define RES_EATTR 1
20#define RES_UNLINKED 1
21#define RES_STATFS 1
22#define RES_QUOTA 2
23
24#define gfs2_trans_begin(sdp, blocks, revokes) \
25gfs2_trans_begin_i((sdp), (blocks), (revokes), __FILE__, __LINE__)
26
27int gfs2_trans_begin_i(struct gfs2_sbd *sdp,
28 unsigned int blocks, unsigned int revokes,
29 char *file, unsigned int line);
30
31void gfs2_trans_end(struct gfs2_sbd *sdp);
32
33void gfs2_trans_add_gl(struct gfs2_glock *gl);
34void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
35void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno);
36void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno);
37void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
38
39#endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/unlinked.c b/fs/gfs2/unlinked.c
new file mode 100644
index 000000000000..e92a3a11815b
--- /dev/null
+++ b/fs/gfs2/unlinked.c
@@ -0,0 +1,453 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/kthread.h>
16#include <asm/semaphore.h>
17
18#include "gfs2.h"
19#include "bmap.h"
20#include "inode.h"
21#include "meta_io.h"
22#include "trans.h"
23#include "unlinked.h"
24
25static int munge_ondisk(struct gfs2_sbd *sdp, unsigned int slot,
26 struct gfs2_unlinked_tag *ut)
27{
28 struct gfs2_inode *ip = get_v2ip(sdp->sd_ut_inode);
29 unsigned int block, offset;
30 uint64_t dblock;
31 int new = 0;
32 struct buffer_head *bh;
33 int error;
34
35 block = slot / sdp->sd_ut_per_block;
36 offset = slot % sdp->sd_ut_per_block;
37
38 error = gfs2_block_map(ip, block, &new, &dblock, NULL);
39 if (error)
40 return error;
41 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, &bh);
42 if (error)
43 return error;
44 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) {
45 error = -EIO;
46 goto out;
47 }
48
49 mutex_lock(&sdp->sd_unlinked_mutex);
50 gfs2_trans_add_bh(ip->i_gl, bh, 1);
51 gfs2_unlinked_tag_out(ut, bh->b_data +
52 sizeof(struct gfs2_meta_header) +
53 offset * sizeof(struct gfs2_unlinked_tag));
54 mutex_unlock(&sdp->sd_unlinked_mutex);
55
56 out:
57 brelse(bh);
58
59 return error;
60}
61
62static void ul_hash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
63{
64 spin_lock(&sdp->sd_unlinked_spin);
65 list_add(&ul->ul_list, &sdp->sd_unlinked_list);
66 gfs2_assert(sdp, ul->ul_count);
67 ul->ul_count++;
68 atomic_inc(&sdp->sd_unlinked_count);
69 spin_unlock(&sdp->sd_unlinked_spin);
70}
71
72static void ul_unhash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
73{
74 spin_lock(&sdp->sd_unlinked_spin);
75 list_del_init(&ul->ul_list);
76 gfs2_assert(sdp, ul->ul_count > 1);
77 ul->ul_count--;
78 gfs2_assert_warn(sdp, atomic_read(&sdp->sd_unlinked_count) > 0);
79 atomic_dec(&sdp->sd_unlinked_count);
80 spin_unlock(&sdp->sd_unlinked_spin);
81}
82
83static struct gfs2_unlinked *ul_fish(struct gfs2_sbd *sdp)
84{
85 struct list_head *head;
86 struct gfs2_unlinked *ul;
87 int found = 0;
88
89 if (sdp->sd_vfs->s_flags & MS_RDONLY)
90 return NULL;
91
92 spin_lock(&sdp->sd_unlinked_spin);
93
94 head = &sdp->sd_unlinked_list;
95
96 list_for_each_entry(ul, head, ul_list) {
97 if (test_bit(ULF_LOCKED, &ul->ul_flags))
98 continue;
99
100 list_move_tail(&ul->ul_list, head);
101 ul->ul_count++;
102 set_bit(ULF_LOCKED, &ul->ul_flags);
103 found = 1;
104
105 break;
106 }
107
108 if (!found)
109 ul = NULL;
110
111 spin_unlock(&sdp->sd_unlinked_spin);
112
113 return ul;
114}
115
116/**
117 * enforce_limit - limit the number of inodes waiting to be deallocated
118 * @sdp: the filesystem
119 *
120 * Returns: errno
121 */
122
123static void enforce_limit(struct gfs2_sbd *sdp)
124{
125 unsigned int tries = 0, min = 0;
126 int error;
127
128 if (atomic_read(&sdp->sd_unlinked_count) >=
129 gfs2_tune_get(sdp, gt_ilimit)) {
130 tries = gfs2_tune_get(sdp, gt_ilimit_tries);
131 min = gfs2_tune_get(sdp, gt_ilimit_min);
132 }
133
134 while (tries--) {
135 struct gfs2_unlinked *ul = ul_fish(sdp);
136 if (!ul)
137 break;
138 error = gfs2_inode_dealloc(sdp, ul);
139 gfs2_unlinked_put(sdp, ul);
140
141 if (!error) {
142 if (!--min)
143 break;
144 } else if (error != 1)
145 break;
146 }
147}
148
149static struct gfs2_unlinked *ul_alloc(struct gfs2_sbd *sdp)
150{
151 struct gfs2_unlinked *ul;
152
153 ul = kzalloc(sizeof(struct gfs2_unlinked), GFP_KERNEL);
154 if (ul) {
155 INIT_LIST_HEAD(&ul->ul_list);
156 ul->ul_count = 1;
157 set_bit(ULF_LOCKED, &ul->ul_flags);
158 }
159
160 return ul;
161}
162
163int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul)
164{
165 unsigned int c, o = 0, b;
166 unsigned char byte = 0;
167
168 enforce_limit(sdp);
169
170 *ul = ul_alloc(sdp);
171 if (!*ul)
172 return -ENOMEM;
173
174 spin_lock(&sdp->sd_unlinked_spin);
175
176 for (c = 0; c < sdp->sd_unlinked_chunks; c++)
177 for (o = 0; o < PAGE_SIZE; o++) {
178 byte = sdp->sd_unlinked_bitmap[c][o];
179 if (byte != 0xFF)
180 goto found;
181 }
182
183 goto fail;
184
185 found:
186 for (b = 0; b < 8; b++)
187 if (!(byte & (1 << b)))
188 break;
189 (*ul)->ul_slot = c * (8 * PAGE_SIZE) + o * 8 + b;
190
191 if ((*ul)->ul_slot >= sdp->sd_unlinked_slots)
192 goto fail;
193
194 sdp->sd_unlinked_bitmap[c][o] |= 1 << b;
195
196 spin_unlock(&sdp->sd_unlinked_spin);
197
198 return 0;
199
200 fail:
201 spin_unlock(&sdp->sd_unlinked_spin);
202 kfree(*ul);
203 return -ENOSPC;
204}
205
206void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
207{
208 gfs2_assert_warn(sdp, test_and_clear_bit(ULF_LOCKED, &ul->ul_flags));
209
210 spin_lock(&sdp->sd_unlinked_spin);
211 gfs2_assert(sdp, ul->ul_count);
212 ul->ul_count--;
213 if (!ul->ul_count) {
214 gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, ul->ul_slot, 0);
215 spin_unlock(&sdp->sd_unlinked_spin);
216 kfree(ul);
217 } else
218 spin_unlock(&sdp->sd_unlinked_spin);
219}
220
221int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
222{
223 int error;
224
225 gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
226 gfs2_assert_warn(sdp, list_empty(&ul->ul_list));
227
228 error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut);
229 if (!error)
230 ul_hash(sdp, ul);
231
232 return error;
233}
234
235int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
236{
237 int error;
238
239 gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
240 gfs2_assert_warn(sdp, !list_empty(&ul->ul_list));
241
242 error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut);
243
244 return error;
245}
246
247int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
248{
249 struct gfs2_unlinked_tag ut;
250 int error;
251
252 gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
253 gfs2_assert_warn(sdp, !list_empty(&ul->ul_list));
254
255 memset(&ut, 0, sizeof(struct gfs2_unlinked_tag));
256
257 error = munge_ondisk(sdp, ul->ul_slot, &ut);
258 if (error)
259 return error;
260
261 ul_unhash(sdp, ul);
262
263 return 0;
264}
265
266/**
267 * gfs2_unlinked_dealloc - Go through the list of inodes to be deallocated
268 * @sdp: the filesystem
269 *
270 * Returns: errno
271 */
272
273int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp)
274{
275 unsigned int hits, strikes;
276 int error;
277
278 for (;;) {
279 hits = 0;
280 strikes = 0;
281
282 for (;;) {
283 struct gfs2_unlinked *ul = ul_fish(sdp);
284 if (!ul)
285 return 0;
286 error = gfs2_inode_dealloc(sdp, ul);
287 gfs2_unlinked_put(sdp, ul);
288
289 if (!error) {
290 hits++;
291 if (strikes)
292 strikes--;
293 } else if (error == 1) {
294 strikes++;
295 if (strikes >=
296 atomic_read(&sdp->sd_unlinked_count)) {
297 error = 0;
298 break;
299 }
300 } else
301 return error;
302 }
303
304 if (!hits || kthread_should_stop())
305 break;
306
307 cond_resched();
308 }
309
310 return 0;
311}
312
313int gfs2_unlinked_init(struct gfs2_sbd *sdp)
314{
315 struct gfs2_inode *ip = get_v2ip(sdp->sd_ut_inode);
316 unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
317 unsigned int x, slot = 0;
318 unsigned int found = 0;
319 uint64_t dblock;
320 uint32_t extlen = 0;
321 int error;
322
323 if (!ip->i_di.di_size ||
324 ip->i_di.di_size > (64 << 20) ||
325 ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
326 gfs2_consist_inode(ip);
327 return -EIO;
328 }
329 sdp->sd_unlinked_slots = blocks * sdp->sd_ut_per_block;
330 sdp->sd_unlinked_chunks = DIV_RU(sdp->sd_unlinked_slots, 8 * PAGE_SIZE);
331
332 error = -ENOMEM;
333
334 sdp->sd_unlinked_bitmap = kcalloc(sdp->sd_unlinked_chunks,
335 sizeof(unsigned char *),
336 GFP_KERNEL);
337 if (!sdp->sd_unlinked_bitmap)
338 return error;
339
340 for (x = 0; x < sdp->sd_unlinked_chunks; x++) {
341 sdp->sd_unlinked_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL);
342 if (!sdp->sd_unlinked_bitmap[x])
343 goto fail;
344 }
345
346 for (x = 0; x < blocks; x++) {
347 struct buffer_head *bh;
348 unsigned int y;
349
350 if (!extlen) {
351 int new = 0;
352 error = gfs2_block_map(ip, x, &new, &dblock, &extlen);
353 if (error)
354 goto fail;
355 }
356 gfs2_meta_ra(ip->i_gl, dblock, extlen);
357 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT,
358 &bh);
359 if (error)
360 goto fail;
361 error = -EIO;
362 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) {
363 brelse(bh);
364 goto fail;
365 }
366
367 for (y = 0;
368 y < sdp->sd_ut_per_block && slot < sdp->sd_unlinked_slots;
369 y++, slot++) {
370 struct gfs2_unlinked_tag ut;
371 struct gfs2_unlinked *ul;
372
373 gfs2_unlinked_tag_in(&ut, bh->b_data +
374 sizeof(struct gfs2_meta_header) +
375 y * sizeof(struct gfs2_unlinked_tag));
376 if (!ut.ut_inum.no_addr)
377 continue;
378
379 error = -ENOMEM;
380 ul = ul_alloc(sdp);
381 if (!ul) {
382 brelse(bh);
383 goto fail;
384 }
385 ul->ul_ut = ut;
386 ul->ul_slot = slot;
387
388 spin_lock(&sdp->sd_unlinked_spin);
389 gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, slot, 1);
390 spin_unlock(&sdp->sd_unlinked_spin);
391 ul_hash(sdp, ul);
392
393 gfs2_unlinked_put(sdp, ul);
394 found++;
395 }
396
397 brelse(bh);
398 dblock++;
399 extlen--;
400 }
401
402 if (found)
403 fs_info(sdp, "found %u unlinked inodes\n", found);
404
405 return 0;
406
407 fail:
408 gfs2_unlinked_cleanup(sdp);
409 return error;
410}
411
412/**
413 * gfs2_unlinked_cleanup - get rid of any extra struct gfs2_unlinked structures
414 * @sdp: the filesystem
415 *
416 */
417
418void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp)
419{
420 struct list_head *head = &sdp->sd_unlinked_list;
421 struct gfs2_unlinked *ul;
422 unsigned int x;
423
424 spin_lock(&sdp->sd_unlinked_spin);
425 while (!list_empty(head)) {
426 ul = list_entry(head->next, struct gfs2_unlinked, ul_list);
427
428 if (ul->ul_count > 1) {
429 list_move_tail(&ul->ul_list, head);
430 spin_unlock(&sdp->sd_unlinked_spin);
431 schedule();
432 spin_lock(&sdp->sd_unlinked_spin);
433 continue;
434 }
435
436 list_del_init(&ul->ul_list);
437 atomic_dec(&sdp->sd_unlinked_count);
438
439 gfs2_assert_warn(sdp, ul->ul_count == 1);
440 gfs2_assert_warn(sdp, !test_bit(ULF_LOCKED, &ul->ul_flags));
441 kfree(ul);
442 }
443 spin_unlock(&sdp->sd_unlinked_spin);
444
445 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_unlinked_count));
446
447 if (sdp->sd_unlinked_bitmap) {
448 for (x = 0; x < sdp->sd_unlinked_chunks; x++)
449 kfree(sdp->sd_unlinked_bitmap[x]);
450 kfree(sdp->sd_unlinked_bitmap);
451 }
452}
453
diff --git a/fs/gfs2/unlinked.h b/fs/gfs2/unlinked.h
new file mode 100644
index 000000000000..51e77f88d74f
--- /dev/null
+++ b/fs/gfs2/unlinked.h
@@ -0,0 +1,25 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __UNLINKED_DOT_H__
11#define __UNLINKED_DOT_H__
12
13int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul);
14void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
15
16int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
17int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
18int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
19
20int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp);
21
22int gfs2_unlinked_init(struct gfs2_sbd *sdp);
23void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp);
24
25#endif /* __UNLINKED_DOT_H__ */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
new file mode 100644
index 000000000000..4fb1704aac10
--- /dev/null
+++ b/fs/gfs2/util.c
@@ -0,0 +1,246 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/crc32.h>
16#include <asm/semaphore.h>
17#include <asm/uaccess.h>
18
19#include "gfs2.h"
20#include "glock.h"
21#include "lm.h"
22
23kmem_cache_t *gfs2_glock_cachep __read_mostly;
24kmem_cache_t *gfs2_inode_cachep __read_mostly;
25kmem_cache_t *gfs2_bufdata_cachep __read_mostly;
26
27uint32_t gfs2_disk_hash(const char *data, int len)
28{
29 return crc32_le(0xFFFFFFFF, data, len) ^ 0xFFFFFFFF;
30}
31
32void gfs2_assert_i(struct gfs2_sbd *sdp)
33{
34 printk(KERN_EMERG "GFS2: fsid=%s: fatal assertion failed\n",
35 sdp->sd_fsname);
36}
37
38/**
39 * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
40 * Returns: -1 if this call withdrew the machine,
41 * -2 if it was already withdrawn
42 */
43
44int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
45 const char *function, char *file, unsigned int line)
46{
47 int me;
48 me = gfs2_lm_withdraw(sdp,
49 "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n"
50 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
51 sdp->sd_fsname, assertion,
52 sdp->sd_fsname, function, file, line);
53 dump_stack();
54 return (me) ? -1 : -2;
55}
56
57/**
58 * gfs2_assert_warn_i - Print a message to the console if @assertion is false
59 * Returns: -1 if we printed something
60 * -2 if we didn't
61 */
62
63int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
64 const char *function, char *file, unsigned int line)
65{
66 if (time_before(jiffies,
67 sdp->sd_last_warning +
68 gfs2_tune_get(sdp, gt_complain_secs) * HZ))
69 return -2;
70
71 printk(KERN_WARNING
72 "GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
73 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
74 sdp->sd_fsname, assertion,
75 sdp->sd_fsname, function, file, line);
76
77 if (sdp->sd_args.ar_debug)
78 BUG();
79 else
80 dump_stack();
81
82 sdp->sd_last_warning = jiffies;
83
84 return -1;
85}
86
87/**
88 * gfs2_consist_i - Flag a filesystem consistency error and withdraw
89 * Returns: -1 if this call withdrew the machine,
90 * 0 if it was already withdrawn
91 */
92
93int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function,
94 char *file, unsigned int line)
95{
96 int rv;
97 rv = gfs2_lm_withdraw(sdp,
98 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
99 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
100 sdp->sd_fsname,
101 sdp->sd_fsname, function, file, line);
102 return rv;
103}
104
105/**
106 * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
107 * Returns: -1 if this call withdrew the machine,
108 * 0 if it was already withdrawn
109 */
110
111int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
112 const char *function, char *file, unsigned int line)
113{
114 struct gfs2_sbd *sdp = ip->i_sbd;
115 int rv;
116 rv = gfs2_lm_withdraw(sdp,
117 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
118 "GFS2: fsid=%s: inode = %llu %llu\n"
119 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
120 sdp->sd_fsname,
121 sdp->sd_fsname, ip->i_num.no_formal_ino, ip->i_num.no_addr,
122 sdp->sd_fsname, function, file, line);
123 return rv;
124}
125
126/**
127 * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
128 * Returns: -1 if this call withdrew the machine,
129 * 0 if it was already withdrawn
130 */
131
132int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
133 const char *function, char *file, unsigned int line)
134{
135 struct gfs2_sbd *sdp = rgd->rd_sbd;
136 int rv;
137 rv = gfs2_lm_withdraw(sdp,
138 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
139 "GFS2: fsid=%s: RG = %llu\n"
140 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
141 sdp->sd_fsname,
142 sdp->sd_fsname, rgd->rd_ri.ri_addr,
143 sdp->sd_fsname, function, file, line);
144 return rv;
145}
146
147/**
148 * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
149 * Returns: -1 if this call withdrew the machine,
150 * -2 if it was already withdrawn
151 */
152
153int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
154 const char *type, const char *function, char *file,
155 unsigned int line)
156{
157 int me;
158 me = gfs2_lm_withdraw(sdp,
159 "GFS2: fsid=%s: fatal: invalid metadata block\n"
160 "GFS2: fsid=%s: bh = %llu (%s)\n"
161 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
162 sdp->sd_fsname,
163 sdp->sd_fsname, (uint64_t)bh->b_blocknr, type,
164 sdp->sd_fsname, function, file, line);
165 return (me) ? -1 : -2;
166}
167
168/**
169 * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
170 * Returns: -1 if this call withdrew the machine,
171 * -2 if it was already withdrawn
172 */
173
174int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
175 uint16_t type, uint16_t t, const char *function,
176 char *file, unsigned int line)
177{
178 int me;
179 me = gfs2_lm_withdraw(sdp,
180 "GFS2: fsid=%s: fatal: invalid metadata block\n"
181 "GFS2: fsid=%s: bh = %llu (type: exp=%u, found=%u)\n"
182 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
183 sdp->sd_fsname,
184 sdp->sd_fsname, (uint64_t)bh->b_blocknr, type, t,
185 sdp->sd_fsname, function, file, line);
186 return (me) ? -1 : -2;
187}
188
189/**
190 * gfs2_io_error_i - Flag an I/O error and withdraw
191 * Returns: -1 if this call withdrew the machine,
192 * 0 if it was already withdrawn
193 */
194
195int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
196 unsigned int line)
197{
198 int rv;
199 rv = gfs2_lm_withdraw(sdp,
200 "GFS2: fsid=%s: fatal: I/O error\n"
201 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
202 sdp->sd_fsname,
203 sdp->sd_fsname, function, file, line);
204 return rv;
205}
206
207/**
208 * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw
209 * Returns: -1 if this call withdrew the machine,
210 * 0 if it was already withdrawn
211 */
212
213int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
214 const char *function, char *file, unsigned int line)
215{
216 int rv;
217 rv = gfs2_lm_withdraw(sdp,
218 "GFS2: fsid=%s: fatal: I/O error\n"
219 "GFS2: fsid=%s: block = %llu\n"
220 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
221 sdp->sd_fsname,
222 sdp->sd_fsname, (uint64_t)bh->b_blocknr,
223 sdp->sd_fsname, function, file, line);
224 return rv;
225}
226
227void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
228 unsigned int bit, int new_value)
229{
230 unsigned int c, o, b = bit;
231 int old_value;
232
233 c = b / (8 * PAGE_SIZE);
234 b %= 8 * PAGE_SIZE;
235 o = b / 8;
236 b %= 8;
237
238 old_value = (bitmap[c][o] & (1 << b));
239 gfs2_assert_withdraw(sdp, !old_value != !new_value);
240
241 if (new_value)
242 bitmap[c][o] |= 1 << b;
243 else
244 bitmap[c][o] &= ~(1 << b);
245}
246
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
new file mode 100644
index 000000000000..8d6eba3bdf0a
--- /dev/null
+++ b/fs/gfs2/util.h
@@ -0,0 +1,172 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __UTIL_DOT_H__
11#define __UTIL_DOT_H__
12
13uint32_t gfs2_disk_hash(const char *data, int len);
14
15
16#define fs_printk(level, fs, fmt, arg...) \
17 printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg)
18
19#define fs_info(fs, fmt, arg...) \
20 fs_printk(KERN_INFO , fs , fmt , ## arg)
21
22#define fs_warn(fs, fmt, arg...) \
23 fs_printk(KERN_WARNING , fs , fmt , ## arg)
24
25#define fs_err(fs, fmt, arg...) \
26 fs_printk(KERN_ERR, fs , fmt , ## arg)
27
28
29void gfs2_assert_i(struct gfs2_sbd *sdp);
30
31#define gfs2_assert(sdp, assertion) \
32do { \
33 if (unlikely(!(assertion))) { \
34 gfs2_assert_i(sdp); \
35 BUG(); \
36 } \
37} while (0)
38
39
40int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
41 const char *function, char *file, unsigned int line);
42
43#define gfs2_assert_withdraw(sdp, assertion) \
44((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
45 __FUNCTION__, __FILE__, __LINE__))
46
47
48int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
49 const char *function, char *file, unsigned int line);
50
51#define gfs2_assert_warn(sdp, assertion) \
52((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
53 __FUNCTION__, __FILE__, __LINE__))
54
55
56int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
57 const char *function, char *file, unsigned int line);
58
59#define gfs2_consist(sdp) \
60gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__)
61
62
63int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
64 const char *function, char *file, unsigned int line);
65
66#define gfs2_consist_inode(ip) \
67gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__)
68
69
70int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
71 const char *function, char *file, unsigned int line);
72
73#define gfs2_consist_rgrpd(rgd) \
74gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__)
75
76
77int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
78 const char *type, const char *function,
79 char *file, unsigned int line);
80
81static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
82 struct buffer_head *bh,
83 const char *function,
84 char *file, unsigned int line)
85{
86 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
87 uint32_t magic = mh->mh_magic;
88 magic = be32_to_cpu(magic);
89 if (unlikely(magic != GFS2_MAGIC))
90 return gfs2_meta_check_ii(sdp, bh, "magic number", function,
91 file, line);
92 return 0;
93}
94
95#define gfs2_meta_check(sdp, bh) \
96gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__)
97
98
99int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
100 uint16_t type, uint16_t t,
101 const char *function,
102 char *file, unsigned int line);
103
104static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
105 struct buffer_head *bh,
106 uint16_t type,
107 const char *function,
108 char *file, unsigned int line)
109{
110 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
111 uint32_t magic = mh->mh_magic;
112 uint16_t t = mh->mh_type;
113 magic = be32_to_cpu(magic);
114 if (unlikely(magic != GFS2_MAGIC))
115 return gfs2_meta_check_ii(sdp, bh, "magic number", function,
116 file, line);
117 t = be16_to_cpu(t);
118 if (unlikely(t != type))
119 return gfs2_metatype_check_ii(sdp, bh, type, t, function,
120 file, line);
121 return 0;
122}
123
124#define gfs2_metatype_check(sdp, bh, type) \
125gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__)
126
127static inline void gfs2_metatype_set(struct buffer_head *bh, uint16_t type,
128 uint16_t format)
129{
130 struct gfs2_meta_header *mh;
131 mh = (struct gfs2_meta_header *)bh->b_data;
132 mh->mh_type = cpu_to_be16(type);
133 mh->mh_format = cpu_to_be16(format);
134}
135
136
137int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
138 char *file, unsigned int line);
139
140#define gfs2_io_error(sdp) \
141gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__);
142
143
144int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
145 const char *function, char *file, unsigned int line);
146
147#define gfs2_io_error_bh(sdp, bh) \
148gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
149
150
151extern kmem_cache_t *gfs2_glock_cachep;
152extern kmem_cache_t *gfs2_inode_cachep;
153extern kmem_cache_t *gfs2_bufdata_cachep;
154
155static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
156 unsigned int *p)
157{
158 unsigned int x;
159 spin_lock(&gt->gt_spin);
160 x = *p;
161 spin_unlock(&gt->gt_spin);
162 return x;
163}
164
165#define gfs2_tune_get(sdp, field) \
166gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
167
168void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
169 unsigned int bit, int new_value);
170
171#endif /* __UTIL_DOT_H__ */
172