aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/gfs2')
-rw-r--r--fs/gfs2/Kconfig44
-rw-r--r--fs/gfs2/Makefile10
-rw-r--r--fs/gfs2/acl.c315
-rw-r--r--fs/gfs2/acl.h37
-rw-r--r--fs/gfs2/bmap.c1103
-rw-r--r--fs/gfs2/bmap.h32
-rw-r--r--fs/gfs2/daemon.c223
-rw-r--r--fs/gfs2/daemon.h20
-rw-r--r--fs/gfs2/dir.c1974
-rw-r--r--fs/gfs2/dir.h73
-rw-r--r--fs/gfs2/eaops.c230
-rw-r--r--fs/gfs2/eaops.h31
-rw-r--r--fs/gfs2/eattr.c1549
-rw-r--r--fs/gfs2/eattr.h97
-rw-r--r--fs/gfs2/format.h21
-rw-r--r--fs/gfs2/gfs2.h31
-rw-r--r--fs/gfs2/glock.c2340
-rw-r--r--fs/gfs2/glock.h155
-rw-r--r--fs/gfs2/glops.c491
-rw-r--r--fs/gfs2/glops.h23
-rw-r--r--fs/gfs2/incore.h687
-rw-r--r--fs/gfs2/inode.c1820
-rw-r--r--fs/gfs2/inode.h72
-rw-r--r--fs/gfs2/lm.c244
-rw-r--r--fs/gfs2/lm.h41
-rw-r--r--fs/gfs2/lm_interface.h295
-rw-r--r--fs/gfs2/locking.c191
-rw-r--r--fs/gfs2/locking/dlm/Makefile3
-rw-r--r--fs/gfs2/locking/dlm/lock.c541
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h188
-rw-r--r--fs/gfs2/locking/dlm/main.c64
-rw-r--r--fs/gfs2/locking/dlm/mount.c256
-rw-r--r--fs/gfs2/locking/dlm/plock.c299
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c225
-rw-r--r--fs/gfs2/locking/dlm/thread.c352
-rw-r--r--fs/gfs2/locking/nolock/Makefile3
-rw-r--r--fs/gfs2/locking/nolock/main.c259
-rw-r--r--fs/gfs2/log.c598
-rw-r--r--fs/gfs2/log.h61
-rw-r--r--fs/gfs2/lops.c804
-rw-r--r--fs/gfs2/lops.h96
-rw-r--r--fs/gfs2/lvb.c45
-rw-r--r--fs/gfs2/lvb.h19
-rw-r--r--fs/gfs2/main.c129
-rw-r--r--fs/gfs2/meta_io.c892
-rw-r--r--fs/gfs2/meta_io.h89
-rw-r--r--fs/gfs2/mount.c214
-rw-r--r--fs/gfs2/mount.h15
-rw-r--r--fs/gfs2/ondisk.c321
-rw-r--r--fs/gfs2/ops_address.c670
-rw-r--r--fs/gfs2/ops_address.h17
-rw-r--r--fs/gfs2/ops_dentry.c123
-rw-r--r--fs/gfs2/ops_dentry.h15
-rw-r--r--fs/gfs2/ops_export.c297
-rw-r--r--fs/gfs2/ops_export.h15
-rw-r--r--fs/gfs2/ops_file.c1000
-rw-r--r--fs/gfs2/ops_file.h20
-rw-r--r--fs/gfs2/ops_fstype.c901
-rw-r--r--fs/gfs2/ops_fstype.h16
-rw-r--r--fs/gfs2/ops_inode.c1194
-rw-r--r--fs/gfs2/ops_inode.h18
-rw-r--r--fs/gfs2/ops_super.c399
-rw-r--r--fs/gfs2/ops_super.h15
-rw-r--r--fs/gfs2/ops_vm.c195
-rw-r--r--fs/gfs2/ops_vm.h16
-rw-r--r--fs/gfs2/page.c280
-rw-r--r--fs/gfs2/page.h23
-rw-r--r--fs/gfs2/quota.c1305
-rw-r--r--fs/gfs2/quota.h32
-rw-r--r--fs/gfs2/recovery.c576
-rw-r--r--fs/gfs2/recovery.h32
-rw-r--r--fs/gfs2/rgrp.c1524
-rw-r--r--fs/gfs2/rgrp.h62
-rw-r--r--fs/gfs2/super.c945
-rw-r--r--fs/gfs2/super.h52
-rw-r--r--fs/gfs2/sys.c581
-rw-r--r--fs/gfs2/sys.h24
-rw-r--r--fs/gfs2/trans.c184
-rw-r--r--fs/gfs2/trans.h35
-rw-r--r--fs/gfs2/unlinked.c459
-rw-r--r--fs/gfs2/unlinked.h25
-rw-r--r--fs/gfs2/util.c245
-rw-r--r--fs/gfs2/util.h169
83 files changed, 29086 insertions, 0 deletions
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
new file mode 100644
index 000000000000..115f30d8c22e
--- /dev/null
+++ b/fs/gfs2/Kconfig
@@ -0,0 +1,44 @@
1config GFS2_FS
2 tristate "GFS2 file system support"
3 depends on EXPERIMENTAL
4 select FS_POSIX_ACL
5 help
6 A cluster filesystem.
7
8 Allows a cluster of computers to simultaneously use a block device
9 that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
10 and writes to the block device like a local filesystem, but also uses
11 a lock module to allow the computers coordinate their I/O so
12 filesystem consistency is maintained. One of the nifty features of
13 GFS is perfect consistency -- changes made to the filesystem on one
14 machine show up immediately on all other machines in the cluster.
15
16 To use the GFS2 filesystem, you will need to enable one or more of
17 the below locking modules. Documentation and utilities for GFS2 can
18 be found here: http://sources.redhat.com/cluster/gfs/
19
20config GFS2_FS_LOCKING_NOLOCK
21 tristate "GFS2 \"nolock\" locking module"
22 depends on GFS2_FS
23 help
24 Single node locking module for GFS2.
25
26 Use this module if you want to use GFS2 on a single node without
27 its clustering features. You can still take advantage of the
28 large file support, and upgrade to running a full cluster later on
29 if required.
30
31 If you will only be using GFS2 in cluster mode, you do not need this
32 module.
33
34config GFS2_FS_LOCKING_DLM
35 tristate "GFS2 DLM locking module"
36 depends on GFS2_FS
37 select DLM
38 help
39 Multiple node locking module for GFS2
40
41 Most users of GFS2 will require this module. It provides the locking
42 interface between GFS2 and the DLM, which is required to use GFS2
43 in a cluster environment.
44
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
new file mode 100644
index 000000000000..9974201aa16c
--- /dev/null
+++ b/fs/gfs2/Makefile
@@ -0,0 +1,10 @@
1obj-$(CONFIG_GFS2_FS) += gfs2.o
2gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
3 glops.o inode.o lm.o log.o lops.o locking.o lvb.o main.o meta_io.o \
4 mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
5 ops_fstype.o ops_inode.o ops_super.o ops_vm.o page.o quota.o \
6 recovery.o rgrp.o super.o sys.o trans.o unlinked.o util.o
7
8obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/
9obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/
10
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
new file mode 100644
index 000000000000..343dbe3e87bb
--- /dev/null
+++ b/fs/gfs2/acl.c
@@ -0,0 +1,315 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/posix_acl.h>
16#include <linux/posix_acl_xattr.h>
17#include <linux/gfs2_ondisk.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "acl.h"
23#include "eaops.h"
24#include "eattr.h"
25#include "glock.h"
26#include "inode.h"
27#include "meta_io.h"
28#include "trans.h"
29#include "util.h"
30
31#define ACL_ACCESS 1
32#define ACL_DEFAULT 0
33
34int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
35 struct gfs2_ea_request *er,
36 int *remove, mode_t *mode)
37{
38 struct posix_acl *acl;
39 int error;
40
41 error = gfs2_acl_validate_remove(ip, access);
42 if (error)
43 return error;
44
45 if (!er->er_data)
46 return -EINVAL;
47
48 acl = posix_acl_from_xattr(er->er_data, er->er_data_len);
49 if (IS_ERR(acl))
50 return PTR_ERR(acl);
51 if (!acl) {
52 *remove = 1;
53 return 0;
54 }
55
56 error = posix_acl_valid(acl);
57 if (error)
58 goto out;
59
60 if (access) {
61 error = posix_acl_equiv_mode(acl, mode);
62 if (!error)
63 *remove = 1;
64 else if (error > 0)
65 error = 0;
66 }
67
68 out:
69 posix_acl_release(acl);
70
71 return error;
72}
73
74int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
75{
76 if (!ip->i_sbd->sd_args.ar_posix_acl)
77 return -EOPNOTSUPP;
78 if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER))
79 return -EPERM;
80 if (S_ISLNK(ip->i_di.di_mode))
81 return -EOPNOTSUPP;
82 if (!access && !S_ISDIR(ip->i_di.di_mode))
83 return -EACCES;
84
85 return 0;
86}
87
88static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
89 struct gfs2_ea_location *el, char **data, unsigned int *len)
90{
91 struct gfs2_ea_request er;
92 struct gfs2_ea_location el_this;
93 int error;
94
95 if (!ip->i_di.di_eattr)
96 return 0;
97
98 memset(&er, 0, sizeof(struct gfs2_ea_request));
99 if (access) {
100 er.er_name = GFS2_POSIX_ACL_ACCESS;
101 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
102 } else {
103 er.er_name = GFS2_POSIX_ACL_DEFAULT;
104 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
105 }
106 er.er_type = GFS2_EATYPE_SYS;
107
108 if (!el)
109 el = &el_this;
110
111 error = gfs2_ea_find(ip, &er, el);
112 if (error)
113 return error;
114 if (!el->el_ea)
115 return 0;
116 if (!GFS2_EA_DATA_LEN(el->el_ea))
117 goto out;
118
119 er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea);
120 er.er_data = kmalloc(er.er_data_len, GFP_KERNEL);
121 error = -ENOMEM;
122 if (!er.er_data)
123 goto out;
124
125 error = gfs2_ea_get_copy(ip, el, er.er_data);
126 if (error)
127 goto out_kfree;
128
129 if (acl) {
130 *acl = posix_acl_from_xattr(er.er_data, er.er_data_len);
131 if (IS_ERR(*acl))
132 error = PTR_ERR(*acl);
133 }
134
135 out_kfree:
136 if (error || !data)
137 kfree(er.er_data);
138 else {
139 *data = er.er_data;
140 *len = er.er_data_len;
141 }
142
143 out:
144 if (error || el == &el_this)
145 brelse(el->el_bh);
146
147 return error;
148}
149
150/**
151 * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something
152 * @inode: the file we want to do something to
153 * @mask: what we want to do
154 *
155 * Returns: errno
156 */
157
158int gfs2_check_acl_locked(struct inode *inode, int mask)
159{
160 struct posix_acl *acl = NULL;
161 int error;
162
163 error = acl_get(inode->u.generic_ip, ACL_ACCESS, &acl, NULL, NULL, NULL);
164 if (error)
165 return error;
166
167 if (acl) {
168 error = posix_acl_permission(inode, acl, mask);
169 posix_acl_release(acl);
170 return error;
171 }
172
173 return -EAGAIN;
174}
175
176int gfs2_check_acl(struct inode *inode, int mask)
177{
178 struct gfs2_inode *ip = inode->u.generic_ip;
179 struct gfs2_holder i_gh;
180 int error;
181
182 error = gfs2_glock_nq_init(ip->i_gl,
183 LM_ST_SHARED, LM_FLAG_ANY,
184 &i_gh);
185 if (!error) {
186 error = gfs2_check_acl_locked(inode, mask);
187 gfs2_glock_dq_uninit(&i_gh);
188 }
189
190 return error;
191}
192
193static int munge_mode(struct gfs2_inode *ip, mode_t mode)
194{
195 struct gfs2_sbd *sdp = ip->i_sbd;
196 struct buffer_head *dibh;
197 int error;
198
199 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
200 if (error)
201 return error;
202
203 error = gfs2_meta_inode_buffer(ip, &dibh);
204 if (!error) {
205 gfs2_assert_withdraw(sdp,
206 (ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT));
207 ip->i_di.di_mode = mode;
208 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
209 gfs2_dinode_out(&ip->i_di, dibh->b_data);
210 brelse(dibh);
211 }
212
213 gfs2_trans_end(sdp);
214
215 return 0;
216}
217
218int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
219{
220 struct gfs2_sbd *sdp = dip->i_sbd;
221 struct posix_acl *acl = NULL, *clone;
222 struct gfs2_ea_request er;
223 mode_t mode = ip->i_di.di_mode;
224 int error;
225
226 if (!sdp->sd_args.ar_posix_acl)
227 return 0;
228 if (S_ISLNK(ip->i_di.di_mode))
229 return 0;
230
231 memset(&er, 0, sizeof(struct gfs2_ea_request));
232 er.er_type = GFS2_EATYPE_SYS;
233
234 error = acl_get(dip, ACL_DEFAULT, &acl, NULL,
235 &er.er_data, &er.er_data_len);
236 if (error)
237 return error;
238 if (!acl) {
239 mode &= ~current->fs->umask;
240 if (mode != ip->i_di.di_mode)
241 error = munge_mode(ip, mode);
242 return error;
243 }
244
245 clone = posix_acl_clone(acl, GFP_KERNEL);
246 error = -ENOMEM;
247 if (!clone)
248 goto out;
249 posix_acl_release(acl);
250 acl = clone;
251
252 if (S_ISDIR(ip->i_di.di_mode)) {
253 er.er_name = GFS2_POSIX_ACL_DEFAULT;
254 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
255 error = gfs2_system_eaops.eo_set(ip, &er);
256 if (error)
257 goto out;
258 }
259
260 error = posix_acl_create_masq(acl, &mode);
261 if (error < 0)
262 goto out;
263 if (error > 0) {
264 er.er_name = GFS2_POSIX_ACL_ACCESS;
265 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
266 posix_acl_to_xattr(acl, er.er_data, er.er_data_len);
267 er.er_mode = mode;
268 er.er_flags = GFS2_ERF_MODE;
269 error = gfs2_system_eaops.eo_set(ip, &er);
270 if (error)
271 goto out;
272 } else
273 munge_mode(ip, mode);
274
275 out:
276 posix_acl_release(acl);
277 kfree(er.er_data);
278 return error;
279}
280
281int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
282{
283 struct posix_acl *acl = NULL, *clone;
284 struct gfs2_ea_location el;
285 char *data;
286 unsigned int len;
287 int error;
288
289 error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len);
290 if (error)
291 return error;
292 if (!acl)
293 return gfs2_setattr_simple(ip, attr);
294
295 clone = posix_acl_clone(acl, GFP_KERNEL);
296 error = -ENOMEM;
297 if (!clone)
298 goto out;
299 posix_acl_release(acl);
300 acl = clone;
301
302 error = posix_acl_chmod_masq(acl, attr->ia_mode);
303 if (!error) {
304 posix_acl_to_xattr(acl, data, len);
305 error = gfs2_ea_acl_chmod(ip, &el, attr, data);
306 }
307
308 out:
309 posix_acl_release(acl);
310 brelse(el.el_bh);
311 kfree(data);
312
313 return error;
314}
315
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
new file mode 100644
index 000000000000..067105786eaa
--- /dev/null
+++ b/fs/gfs2/acl.h
@@ -0,0 +1,37 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __ACL_DOT_H__
11#define __ACL_DOT_H__
12
13#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
14#define GFS2_POSIX_ACL_ACCESS_LEN 16
15#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
16#define GFS2_POSIX_ACL_DEFAULT_LEN 17
17
18#define GFS2_ACL_IS_ACCESS(name, len) \
19 ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \
20 !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len)))
21
22#define GFS2_ACL_IS_DEFAULT(name, len) \
23 ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \
24 !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len)))
25
26struct gfs2_ea_request;
27
28int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
29 struct gfs2_ea_request *er,
30 int *remove, mode_t *mode);
31int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access);
32int gfs2_check_acl_locked(struct inode *inode, int mask);
33int gfs2_check_acl(struct inode *inode, int mask);
34int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip);
35int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
36
37#endif /* __ACL_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
new file mode 100644
index 000000000000..41abd3f4fc73
--- /dev/null
+++ b/fs/gfs2/bmap.c
@@ -0,0 +1,1103 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17
18#include "gfs2.h"
19#include "lm_interface.h"
20#include "incore.h"
21#include "bmap.h"
22#include "glock.h"
23#include "inode.h"
24#include "meta_io.h"
25#include "page.h"
26#include "quota.h"
27#include "rgrp.h"
28#include "trans.h"
29#include "dir.h"
30#include "util.h"
31
32/* This doesn't need to be that large as max 64 bit pointers in a 4k
33 * block is 512, so __u16 is fine for that. It saves stack space to
34 * keep it small.
35 */
36struct metapath {
37 __u16 mp_list[GFS2_MAX_META_HEIGHT];
38};
39
40typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
41 struct buffer_head *bh, uint64_t *top,
42 uint64_t *bottom, unsigned int height,
43 void *data);
44
45struct strip_mine {
46 int sm_first;
47 unsigned int sm_height;
48};
49
50/**
51 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
52 * @ip: The GFS2 inode to unstuff
53 * @unstuffer: the routine that handles unstuffing a non-zero length file
54 * @private: private data for the unstuffer
55 *
56 * This routine unstuffs a dinode and returns it to a "normal" state such
57 * that the height can be grown in the traditional way.
58 *
59 * Returns: errno
60 */
61
62int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer,
63 void *private)
64{
65 struct buffer_head *bh, *dibh;
66 uint64_t block = 0;
67 int isdir = gfs2_is_dir(ip);
68 int error;
69
70 down_write(&ip->i_rw_mutex);
71
72 error = gfs2_meta_inode_buffer(ip, &dibh);
73 if (error)
74 goto out;
75
76 if (ip->i_di.di_size) {
77 /* Get a free block, fill it with the stuffed data,
78 and write it out to disk */
79
80 if (isdir) {
81 block = gfs2_alloc_meta(ip);
82
83 error = gfs2_dir_get_new_buffer(ip, block, &bh);
84 if (error)
85 goto out_brelse;
86 gfs2_buffer_copy_tail(bh,
87 sizeof(struct gfs2_meta_header),
88 dibh, sizeof(struct gfs2_dinode));
89 brelse(bh);
90 } else {
91 block = gfs2_alloc_data(ip);
92
93 error = unstuffer(ip, dibh, block, private);
94 if (error)
95 goto out_brelse;
96 }
97 }
98
99 /* Set up the pointer to the new block */
100
101 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
102
103 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
104
105 if (ip->i_di.di_size) {
106 *(uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)) =
107 cpu_to_be64(block);
108 ip->i_di.di_blocks++;
109 }
110
111 ip->i_di.di_height = 1;
112
113 gfs2_dinode_out(&ip->i_di, dibh->b_data);
114
115 out_brelse:
116 brelse(dibh);
117
118 out:
119 up_write(&ip->i_rw_mutex);
120
121 return error;
122}
123
124/**
125 * calc_tree_height - Calculate the height of a metadata tree
126 * @ip: The GFS2 inode
127 * @size: The proposed size of the file
128 *
129 * Work out how tall a metadata tree needs to be in order to accommodate a
130 * file of a particular size. If size is less than the current size of
131 * the inode, then the current size of the inode is used instead of the
132 * supplied one.
133 *
134 * Returns: the height the tree should be
135 */
136
137static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size)
138{
139 struct gfs2_sbd *sdp = ip->i_sbd;
140 uint64_t *arr;
141 unsigned int max, height;
142
143 if (ip->i_di.di_size > size)
144 size = ip->i_di.di_size;
145
146 if (gfs2_is_dir(ip)) {
147 arr = sdp->sd_jheightsize;
148 max = sdp->sd_max_jheight;
149 } else {
150 arr = sdp->sd_heightsize;
151 max = sdp->sd_max_height;
152 }
153
154 for (height = 0; height < max; height++)
155 if (arr[height] >= size)
156 break;
157
158 return height;
159}
160
161/**
162 * build_height - Build a metadata tree of the requested height
163 * @ip: The GFS2 inode
164 * @height: The height to build to
165 *
166 *
167 * Returns: errno
168 */
169
170static int build_height(struct inode *inode, unsigned height)
171{
172 struct gfs2_inode *ip = inode->u.generic_ip;
173 unsigned new_height = height - ip->i_di.di_height;
174 struct buffer_head *dibh;
175 struct buffer_head *blocks[GFS2_MAX_META_HEIGHT];
176 int error;
177 u64 *bp;
178 u64 bn;
179 unsigned n;
180
181 if (height <= ip->i_di.di_height)
182 return 0;
183
184 error = gfs2_meta_inode_buffer(ip, &dibh);
185 if (error)
186 return error;
187
188 for(n = 0; n < new_height; n++) {
189 bn = gfs2_alloc_meta(ip);
190 blocks[n] = gfs2_meta_new(ip->i_gl, bn);
191 gfs2_trans_add_bh(ip->i_gl, blocks[n], 1);
192 }
193
194 n = 0;
195 bn = blocks[0]->b_blocknr;
196 if (new_height > 1) {
197 for(; n < new_height-1; n++) {
198 gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN,
199 GFS2_FORMAT_IN);
200 gfs2_buffer_clear_tail(blocks[n],
201 sizeof(struct gfs2_meta_header));
202 bp = (u64 *)(blocks[n]->b_data +
203 sizeof(struct gfs2_meta_header));
204 *bp = cpu_to_be64(blocks[n+1]->b_blocknr);
205 brelse(blocks[n]);
206 blocks[n] = NULL;
207 }
208 }
209 gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
210 gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header),
211 dibh, sizeof(struct gfs2_dinode));
212 brelse(blocks[n]);
213 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
214 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
215 bp = (u64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
216 *bp = cpu_to_be64(bn);
217 ip->i_di.di_height += new_height;
218 ip->i_di.di_blocks += new_height;
219 gfs2_dinode_out(&ip->i_di, dibh->b_data);
220 brelse(dibh);
221 return error;
222}
223
224/**
225 * find_metapath - Find path through the metadata tree
226 * @ip: The inode pointer
227 * @mp: The metapath to return the result in
228 * @block: The disk block to look up
229 *
230 * This routine returns a struct metapath structure that defines a path
231 * through the metadata of inode "ip" to get to block "block".
232 *
233 * Example:
234 * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
235 * filesystem with a blocksize of 4096.
236 *
237 * find_metapath() would return a struct metapath structure set to:
238 * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
239 * and mp_list[2] = 165.
240 *
241 * That means that in order to get to the block containing the byte at
242 * offset 101342453, we would load the indirect block pointed to by pointer
243 * 0 in the dinode. We would then load the indirect block pointed to by
244 * pointer 48 in that indirect block. We would then load the data block
245 * pointed to by pointer 165 in that indirect block.
246 *
247 * ----------------------------------------
248 * | Dinode | |
249 * | | 4|
250 * | |0 1 2 3 4 5 9|
251 * | | 6|
252 * ----------------------------------------
253 * |
254 * |
255 * V
256 * ----------------------------------------
257 * | Indirect Block |
258 * | 5|
259 * | 4 4 4 4 4 5 5 1|
260 * |0 5 6 7 8 9 0 1 2|
261 * ----------------------------------------
262 * |
263 * |
264 * V
265 * ----------------------------------------
266 * | Indirect Block |
267 * | 1 1 1 1 1 5|
268 * | 6 6 6 6 6 1|
269 * |0 3 4 5 6 7 2|
270 * ----------------------------------------
271 * |
272 * |
273 * V
274 * ----------------------------------------
275 * | Data block containing offset |
276 * | 101342453 |
277 * | |
278 * | |
279 * ----------------------------------------
280 *
281 */
282
283static void find_metapath(struct gfs2_inode *ip, uint64_t block,
284 struct metapath *mp)
285{
286 struct gfs2_sbd *sdp = ip->i_sbd;
287 uint64_t b = block;
288 unsigned int i;
289
290 for (i = ip->i_di.di_height; i--;)
291 mp->mp_list[i] = (__u16)do_div(b, sdp->sd_inptrs);
292
293}
294
295/**
296 * metapointer - Return pointer to start of metadata in a buffer
297 * @bh: The buffer
298 * @height: The metadata height (0 = dinode)
299 * @mp: The metapath
300 *
301 * Return a pointer to the block number of the next height of the metadata
302 * tree given a buffer containing the pointer to the current height of the
303 * metadata tree.
304 */
305
306static inline u64 *metapointer(struct buffer_head *bh, int *boundary,
307 unsigned int height, const struct metapath *mp)
308{
309 unsigned int head_size = (height > 0) ?
310 sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
311 u64 *ptr;
312 *boundary = 0;
313 ptr = ((u64 *)(bh->b_data + head_size)) + mp->mp_list[height];
314 if (ptr + 1 == (u64*)(bh->b_data + bh->b_size))
315 *boundary = 1;
316 return ptr;
317}
318
319/**
320 * lookup_block - Get the next metadata block in metadata tree
321 * @ip: The GFS2 inode
322 * @bh: Buffer containing the pointers to metadata blocks
323 * @height: The height of the tree (0 = dinode)
324 * @mp: The metapath
325 * @create: Non-zero if we may create a new meatdata block
326 * @new: Used to indicate if we did create a new metadata block
327 * @block: the returned disk block number
328 *
329 * Given a metatree, complete to a particular height, checks to see if the next
330 * height of the tree exists. If not the next height of the tree is created.
331 * The block number of the next height of the metadata tree is returned.
332 *
333 */
334
335static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
336 unsigned int height, struct metapath *mp, int create,
337 int *new, uint64_t *block)
338{
339 int boundary;
340 uint64_t *ptr = metapointer(bh, &boundary, height, mp);
341
342 if (*ptr) {
343 *block = be64_to_cpu(*ptr);
344 return boundary;
345 }
346
347 *block = 0;
348
349 if (!create)
350 return 0;
351
352 if (height == ip->i_di.di_height - 1 && !gfs2_is_dir(ip))
353 *block = gfs2_alloc_data(ip);
354 else
355 *block = gfs2_alloc_meta(ip);
356
357 gfs2_trans_add_bh(ip->i_gl, bh, 1);
358
359 *ptr = cpu_to_be64(*block);
360 ip->i_di.di_blocks++;
361
362 *new = 1;
363 return 0;
364}
365
366/**
367 * gfs2_block_pointers - Map a block from an inode to a disk block
368 * @inode: The inode
369 * @lblock: The logical block number
370 * @new: Value/Result argument (1 = may create/did create new blocks)
371 * @boundary: gets set if we've hit a block boundary
372 * @mp: metapath to use
373 *
374 * Find the block number on the current device which corresponds to an
375 * inode's block. If the block had to be created, "new" will be set.
376 *
377 * Returns: errno
378 */
379
380static struct buffer_head *gfs2_block_pointers(struct inode *inode, u64 lblock,
381 int *new, u64 *dblock,
382 int *boundary,
383 struct metapath *mp)
384{
385 struct gfs2_inode *ip = inode->u.generic_ip;
386 struct gfs2_sbd *sdp = ip->i_sbd;
387 struct buffer_head *bh;
388 int create = *new;
389 unsigned int bsize;
390 unsigned int height;
391 unsigned int end_of_metadata;
392 unsigned int x;
393 int error = 0;
394
395 *new = 0;
396 *dblock = 0;
397
398 if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
399 goto out;
400
401 bsize = (gfs2_is_dir(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
402
403 height = calc_tree_height(ip, (lblock + 1) * bsize);
404 if (ip->i_di.di_height < height) {
405 if (!create)
406 goto out;
407
408 error = build_height(inode, height);
409 if (error)
410 goto out;
411 }
412
413 find_metapath(ip, lblock, mp);
414 end_of_metadata = ip->i_di.di_height - 1;
415
416 error = gfs2_meta_inode_buffer(ip, &bh);
417 if (error)
418 goto out;
419
420 for (x = 0; x < end_of_metadata; x++) {
421 lookup_block(ip, bh, x, mp, create, new, dblock);
422 brelse(bh);
423 if (!*dblock)
424 goto out;
425
426 error = gfs2_meta_indirect_buffer(ip, x+1, *dblock, *new, &bh);
427 if (error)
428 goto out;
429 }
430
431 *boundary = lookup_block(ip, bh, end_of_metadata, mp, create, new, dblock);
432 if (*new) {
433 struct buffer_head *dibh;
434 error = gfs2_meta_inode_buffer(ip, &dibh);
435 if (!error) {
436 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
437 gfs2_dinode_out(&ip->i_di, dibh->b_data);
438 brelse(dibh);
439 }
440 }
441 return bh;
442out:
443 return ERR_PTR(error);
444}
445
446
447static inline void bmap_lock(struct inode *inode, int create)
448{
449 struct gfs2_inode *ip = inode->u.generic_ip;
450 if (create)
451 down_write(&ip->i_rw_mutex);
452 else
453 down_read(&ip->i_rw_mutex);
454}
455
456static inline void bmap_unlock(struct inode *inode, int create)
457{
458 struct gfs2_inode *ip = inode->u.generic_ip;
459 if (create)
460 up_write(&ip->i_rw_mutex);
461 else
462 up_read(&ip->i_rw_mutex);
463}
464
465int gfs2_block_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, int *boundary)
466{
467 struct metapath mp;
468 struct buffer_head *bh;
469 int create = *new;
470
471 bmap_lock(inode, create);
472 bh = gfs2_block_pointers(inode, lblock, new, dblock, boundary, &mp);
473 bmap_unlock(inode, create);
474 if (!bh)
475 return 0;
476 if (IS_ERR(bh))
477 return PTR_ERR(bh);
478 brelse(bh);
479 return 0;
480}
481
482int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
483{
484 struct gfs2_inode *ip = inode->u.generic_ip;
485 struct gfs2_sbd *sdp = ip->i_sbd;
486 struct metapath mp;
487 struct buffer_head *bh;
488 int boundary;
489 int create = *new;
490
491 BUG_ON(!extlen);
492 BUG_ON(!dblock);
493 BUG_ON(!new);
494
495 bmap_lock(inode, create);
496 bh = gfs2_block_pointers(inode, lblock, new, dblock, &boundary, &mp);
497 *extlen = 1;
498
499 if (bh && !IS_ERR(bh) && *dblock && !*new) {
500 u64 tmp_dblock;
501 int tmp_new;
502 unsigned int nptrs;
503 unsigned end_of_metadata = ip->i_di.di_height - 1;
504
505 nptrs = (end_of_metadata) ? sdp->sd_inptrs : sdp->sd_diptrs;
506 while (++mp.mp_list[end_of_metadata] < nptrs) {
507 lookup_block(ip, bh, end_of_metadata, &mp, 0, &tmp_new, &tmp_dblock);
508 if (*dblock + *extlen != tmp_dblock)
509 break;
510 (*extlen)++;
511 }
512 }
513 bmap_unlock(inode, create);
514 if (!bh)
515 return 0;
516 if (IS_ERR(bh))
517 return PTR_ERR(bh);
518 brelse(bh);
519 return 0;
520}
521
522/**
523 * recursive_scan - recursively scan through the end of a file
524 * @ip: the inode
525 * @dibh: the dinode buffer
526 * @mp: the path through the metadata to the point to start
527 * @height: the height the recursion is at
528 * @block: the indirect block to look at
529 * @first: 1 if this is the first block
530 * @bc: the call to make for each piece of metadata
531 * @data: data opaque to this function to pass to @bc
532 *
533 * When this is first called @height and @block should be zero and
534 * @first should be 1.
535 *
536 * Returns: errno
537 */
538
539static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
540 struct metapath *mp, unsigned int height,
541 uint64_t block, int first, block_call_t bc,
542 void *data)
543{
544 struct gfs2_sbd *sdp = ip->i_sbd;
545 struct buffer_head *bh = NULL;
546 uint64_t *top, *bottom;
547 uint64_t bn;
548 int error;
549 int mh_size = sizeof(struct gfs2_meta_header);
550
551 if (!height) {
552 error = gfs2_meta_inode_buffer(ip, &bh);
553 if (error)
554 return error;
555 dibh = bh;
556
557 top = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) +
558 mp->mp_list[0];
559 bottom = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) +
560 sdp->sd_diptrs;
561 } else {
562 error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
563 if (error)
564 return error;
565
566 top = (uint64_t *)(bh->b_data + mh_size) +
567 ((first) ? mp->mp_list[height] : 0);
568
569 bottom = (uint64_t *)(bh->b_data + mh_size) + sdp->sd_inptrs;
570 }
571
572 error = bc(ip, dibh, bh, top, bottom, height, data);
573 if (error)
574 goto out;
575
576 if (height < ip->i_di.di_height - 1)
577 for (; top < bottom; top++, first = 0) {
578 if (!*top)
579 continue;
580
581 bn = be64_to_cpu(*top);
582
583 error = recursive_scan(ip, dibh, mp, height + 1, bn,
584 first, bc, data);
585 if (error)
586 break;
587 }
588
589 out:
590 brelse(bh);
591
592 return error;
593}
594
595/**
596 * do_strip - Look for a layer a particular layer of the file and strip it off
597 * @ip: the inode
598 * @dibh: the dinode buffer
599 * @bh: A buffer of pointers
600 * @top: The first pointer in the buffer
601 * @bottom: One more than the last pointer
602 * @height: the height this buffer is at
603 * @data: a pointer to a struct strip_mine
604 *
605 * Returns: errno
606 */
607
608static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
609 struct buffer_head *bh, uint64_t *top, uint64_t *bottom,
610 unsigned int height, void *data)
611{
612 struct strip_mine *sm = (struct strip_mine *)data;
613 struct gfs2_sbd *sdp = ip->i_sbd;
614 struct gfs2_rgrp_list rlist;
615 uint64_t bn, bstart;
616 uint32_t blen;
617 uint64_t *p;
618 unsigned int rg_blocks = 0;
619 int metadata;
620 unsigned int revokes = 0;
621 int x;
622 int error;
623
624 if (!*top)
625 sm->sm_first = 0;
626
627 if (height != sm->sm_height)
628 return 0;
629
630 if (sm->sm_first) {
631 top++;
632 sm->sm_first = 0;
633 }
634
635 metadata = (height != ip->i_di.di_height - 1);
636 if (metadata)
637 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
638
639 error = gfs2_rindex_hold(sdp, &ip->i_alloc.al_ri_gh);
640 if (error)
641 return error;
642
643 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
644 bstart = 0;
645 blen = 0;
646
647 for (p = top; p < bottom; p++) {
648 if (!*p)
649 continue;
650
651 bn = be64_to_cpu(*p);
652
653 if (bstart + blen == bn)
654 blen++;
655 else {
656 if (bstart)
657 gfs2_rlist_add(sdp, &rlist, bstart);
658
659 bstart = bn;
660 blen = 1;
661 }
662 }
663
664 if (bstart)
665 gfs2_rlist_add(sdp, &rlist, bstart);
666 else
667 goto out; /* Nothing to do */
668
669 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
670
671 for (x = 0; x < rlist.rl_rgrps; x++) {
672 struct gfs2_rgrpd *rgd;
673 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
674 rg_blocks += rgd->rd_ri.ri_length;
675 }
676
677 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
678 if (error)
679 goto out_rlist;
680
681 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
682 RES_INDIRECT + RES_STATFS + RES_QUOTA,
683 revokes);
684 if (error)
685 goto out_rg_gunlock;
686
687 down_write(&ip->i_rw_mutex);
688
689 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
690 gfs2_trans_add_bh(ip->i_gl, bh, 1);
691
692 bstart = 0;
693 blen = 0;
694
695 for (p = top; p < bottom; p++) {
696 if (!*p)
697 continue;
698
699 bn = be64_to_cpu(*p);
700
701 if (bstart + blen == bn)
702 blen++;
703 else {
704 if (bstart) {
705 if (metadata)
706 gfs2_free_meta(ip, bstart, blen);
707 else
708 gfs2_free_data(ip, bstart, blen);
709 }
710
711 bstart = bn;
712 blen = 1;
713 }
714
715 *p = 0;
716 if (!ip->i_di.di_blocks)
717 gfs2_consist_inode(ip);
718 ip->i_di.di_blocks--;
719 }
720 if (bstart) {
721 if (metadata)
722 gfs2_free_meta(ip, bstart, blen);
723 else
724 gfs2_free_data(ip, bstart, blen);
725 }
726
727 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
728
729 gfs2_dinode_out(&ip->i_di, dibh->b_data);
730
731 up_write(&ip->i_rw_mutex);
732
733 gfs2_trans_end(sdp);
734
735 out_rg_gunlock:
736 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
737
738 out_rlist:
739 gfs2_rlist_free(&rlist);
740
741 out:
742 gfs2_glock_dq_uninit(&ip->i_alloc.al_ri_gh);
743
744 return error;
745}
746
747/**
748 * do_grow - Make a file look bigger than it is
749 * @ip: the inode
750 * @size: the size to set the file to
751 *
752 * Called with an exclusive lock on @ip.
753 *
754 * Returns: errno
755 */
756
757static int do_grow(struct gfs2_inode *ip, uint64_t size)
758{
759 struct gfs2_sbd *sdp = ip->i_sbd;
760 struct gfs2_alloc *al;
761 struct buffer_head *dibh;
762 unsigned int h;
763 int error;
764
765 al = gfs2_alloc_get(ip);
766
767 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
768 if (error)
769 goto out;
770
771 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
772 if (error)
773 goto out_gunlock_q;
774
775 al->al_requested = sdp->sd_max_height + RES_DATA;
776
777 error = gfs2_inplace_reserve(ip);
778 if (error)
779 goto out_gunlock_q;
780
781 error = gfs2_trans_begin(sdp,
782 sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
783 RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
784 if (error)
785 goto out_ipres;
786
787 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
788 if (gfs2_is_stuffed(ip)) {
789 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
790 NULL);
791 if (error)
792 goto out_end_trans;
793 }
794
795 h = calc_tree_height(ip, size);
796 if (ip->i_di.di_height < h) {
797 down_write(&ip->i_rw_mutex);
798 error = build_height(ip->i_vnode, h);
799 up_write(&ip->i_rw_mutex);
800 if (error)
801 goto out_end_trans;
802 }
803 }
804
805 ip->i_di.di_size = size;
806 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
807
808 error = gfs2_meta_inode_buffer(ip, &dibh);
809 if (error)
810 goto out_end_trans;
811
812 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
813 gfs2_dinode_out(&ip->i_di, dibh->b_data);
814 brelse(dibh);
815
816 out_end_trans:
817 gfs2_trans_end(sdp);
818
819 out_ipres:
820 gfs2_inplace_release(ip);
821
822 out_gunlock_q:
823 gfs2_quota_unlock(ip);
824
825 out:
826 gfs2_alloc_put(ip);
827
828 return error;
829}
830
831static int trunc_start(struct gfs2_inode *ip, uint64_t size)
832{
833 struct gfs2_sbd *sdp = ip->i_sbd;
834 struct buffer_head *dibh;
835 int journaled = gfs2_is_jdata(ip);
836 int error;
837
838 error = gfs2_trans_begin(sdp,
839 RES_DINODE + ((journaled) ? RES_JDATA : 0), 0);
840 if (error)
841 return error;
842
843 error = gfs2_meta_inode_buffer(ip, &dibh);
844 if (error)
845 goto out;
846
847 if (gfs2_is_stuffed(ip)) {
848 ip->i_di.di_size = size;
849 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
850 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
851 gfs2_dinode_out(&ip->i_di, dibh->b_data);
852 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
853 error = 1;
854
855 } else {
856 if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1))
857 error = gfs2_block_truncate_page(ip->i_vnode->i_mapping);
858
859 if (!error) {
860 ip->i_di.di_size = size;
861 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
862 ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
863 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
864 gfs2_dinode_out(&ip->i_di, dibh->b_data);
865 }
866 }
867
868 brelse(dibh);
869
870 out:
871 gfs2_trans_end(sdp);
872
873 return error;
874}
875
876static int trunc_dealloc(struct gfs2_inode *ip, uint64_t size)
877{
878 unsigned int height = ip->i_di.di_height;
879 uint64_t lblock;
880 struct metapath mp;
881 int error;
882
883 if (!size)
884 lblock = 0;
885 else
886 lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift;
887
888 find_metapath(ip, lblock, &mp);
889 gfs2_alloc_get(ip);
890
891 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
892 if (error)
893 goto out;
894
895 while (height--) {
896 struct strip_mine sm;
897 sm.sm_first = !!size;
898 sm.sm_height = height;
899
900 error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm);
901 if (error)
902 break;
903 }
904
905 gfs2_quota_unhold(ip);
906
907 out:
908 gfs2_alloc_put(ip);
909 return error;
910}
911
912static int trunc_end(struct gfs2_inode *ip)
913{
914 struct gfs2_sbd *sdp = ip->i_sbd;
915 struct buffer_head *dibh;
916 int error;
917
918 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
919 if (error)
920 return error;
921
922 down_write(&ip->i_rw_mutex);
923
924 error = gfs2_meta_inode_buffer(ip, &dibh);
925 if (error)
926 goto out;
927
928 if (!ip->i_di.di_size) {
929 ip->i_di.di_height = 0;
930 ip->i_di.di_goal_meta =
931 ip->i_di.di_goal_data =
932 ip->i_num.no_addr;
933 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
934 }
935 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
936 ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
937
938 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
939 gfs2_dinode_out(&ip->i_di, dibh->b_data);
940 brelse(dibh);
941
942 out:
943 up_write(&ip->i_rw_mutex);
944
945 gfs2_trans_end(sdp);
946
947 return error;
948}
949
950/**
951 * do_shrink - make a file smaller
952 * @ip: the inode
953 * @size: the size to make the file
954 * @truncator: function to truncate the last partial block
955 *
956 * Called with an exclusive lock on @ip.
957 *
958 * Returns: errno
959 */
960
961static int do_shrink(struct gfs2_inode *ip, uint64_t size)
962{
963 int error;
964
965 error = trunc_start(ip, size);
966 if (error < 0)
967 return error;
968 if (error > 0)
969 return 0;
970
971 error = trunc_dealloc(ip, size);
972 if (!error)
973 error = trunc_end(ip);
974
975 return error;
976}
977
978/**
979 * gfs2_truncatei - make a file a given size
980 * @ip: the inode
981 * @size: the size to make the file
982 * @truncator: function to truncate the last partial block
983 *
984 * The file size can grow, shrink, or stay the same size.
985 *
986 * Returns: errno
987 */
988
989int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size)
990{
991 int error;
992
993 if (gfs2_assert_warn(ip->i_sbd, S_ISREG(ip->i_di.di_mode)))
994 return -EINVAL;
995
996 if (size > ip->i_di.di_size)
997 error = do_grow(ip, size);
998 else
999 error = do_shrink(ip, size);
1000
1001 return error;
1002}
1003
1004int gfs2_truncatei_resume(struct gfs2_inode *ip)
1005{
1006 int error;
1007 error = trunc_dealloc(ip, ip->i_di.di_size);
1008 if (!error)
1009 error = trunc_end(ip);
1010 return error;
1011}
1012
1013int gfs2_file_dealloc(struct gfs2_inode *ip)
1014{
1015 return trunc_dealloc(ip, 0);
1016}
1017
1018/**
1019 * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
1020 * @ip: the file
1021 * @len: the number of bytes to be written to the file
1022 * @data_blocks: returns the number of data blocks required
1023 * @ind_blocks: returns the number of indirect blocks required
1024 *
1025 */
1026
1027void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
1028 unsigned int *data_blocks, unsigned int *ind_blocks)
1029{
1030 struct gfs2_sbd *sdp = ip->i_sbd;
1031 unsigned int tmp;
1032
1033 if (gfs2_is_dir(ip)) {
1034 *data_blocks = DIV_ROUND_UP(len, sdp->sd_jbsize) + 2;
1035 *ind_blocks = 3 * (sdp->sd_max_jheight - 1);
1036 } else {
1037 *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
1038 *ind_blocks = 3 * (sdp->sd_max_height - 1);
1039 }
1040
1041 for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
1042 tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
1043 *ind_blocks += tmp;
1044 }
1045}
1046
1047/**
1048 * gfs2_write_alloc_required - figure out if a write will require an allocation
1049 * @ip: the file being written to
1050 * @offset: the offset to write to
1051 * @len: the number of bytes being written
1052 * @alloc_required: set to 1 if an alloc is required, 0 otherwise
1053 *
1054 * Returns: errno
1055 */
1056
1057int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset,
1058 unsigned int len, int *alloc_required)
1059{
1060 struct gfs2_sbd *sdp = ip->i_sbd;
1061 uint64_t lblock, lblock_stop, dblock;
1062 uint32_t extlen;
1063 int new = 0;
1064 int error = 0;
1065
1066 *alloc_required = 0;
1067
1068 if (!len)
1069 return 0;
1070
1071 if (gfs2_is_stuffed(ip)) {
1072 if (offset + len >
1073 sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
1074 *alloc_required = 1;
1075 return 0;
1076 }
1077
1078 if (gfs2_is_dir(ip)) {
1079 unsigned int bsize = sdp->sd_jbsize;
1080 lblock = offset;
1081 do_div(lblock, bsize);
1082 lblock_stop = offset + len + bsize - 1;
1083 do_div(lblock_stop, bsize);
1084 } else {
1085 unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1086 lblock = offset >> shift;
1087 lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1088 }
1089
1090 for (; lblock < lblock_stop; lblock += extlen) {
1091 error = gfs2_extent_map(ip->i_vnode, lblock, &new, &dblock, &extlen);
1092 if (error)
1093 return error;
1094
1095 if (!dblock) {
1096 *alloc_required = 1;
1097 return 0;
1098 }
1099 }
1100
1101 return 0;
1102}
1103
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
new file mode 100644
index 000000000000..06ccb2d808ad
--- /dev/null
+++ b/fs/gfs2/bmap.h
@@ -0,0 +1,32 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __BMAP_DOT_H__
11#define __BMAP_DOT_H__
12
13typedef int (*gfs2_unstuffer_t) (struct gfs2_inode * ip,
14 struct buffer_head * dibh, uint64_t block,
15 void *private);
16int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer,
17 void *private);
18
19int gfs2_block_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, int *boundary);
20int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen);
21
22int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size);
23int gfs2_truncatei_resume(struct gfs2_inode *ip);
24int gfs2_file_dealloc(struct gfs2_inode *ip);
25
26void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
27 unsigned int *data_blocks,
28 unsigned int *ind_blocks);
29int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset,
30 unsigned int len, int *alloc_required);
31
32#endif /* __BMAP_DOT_H__ */
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
new file mode 100644
index 000000000000..9e7b9f296786
--- /dev/null
+++ b/fs/gfs2/daemon.c
@@ -0,0 +1,223 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/kthread.h>
16#include <linux/delay.h>
17#include <linux/gfs2_ondisk.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "daemon.h"
23#include "glock.h"
24#include "log.h"
25#include "quota.h"
26#include "recovery.h"
27#include "super.h"
28#include "unlinked.h"
29#include "util.h"
30
31/* This uses schedule_timeout() instead of msleep() because it's good for
32 the daemons to wake up more often than the timeout when unmounting so
33 the user's unmount doesn't sit there forever.
34
35 The kthread functions used to start these daemons block and flush signals. */
36
37/**
38 * gfs2_scand - Look for cached glocks and inodes to toss from memory
39 * @sdp: Pointer to GFS2 superblock
40 *
41 * One of these daemons runs, finding candidates to add to sd_reclaim_list.
42 * See gfs2_glockd()
43 */
44
45int gfs2_scand(void *data)
46{
47 struct gfs2_sbd *sdp = data;
48 unsigned long t;
49
50 while (!kthread_should_stop()) {
51 gfs2_scand_internal(sdp);
52 t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
53 schedule_timeout_interruptible(t);
54 }
55
56 return 0;
57}
58
59/**
60 * gfs2_glockd - Reclaim unused glock structures
61 * @sdp: Pointer to GFS2 superblock
62 *
63 * One or more of these daemons run, reclaiming glocks on sd_reclaim_list.
64 * Number of daemons can be set by user, with num_glockd mount option.
65 */
66
67int gfs2_glockd(void *data)
68{
69 struct gfs2_sbd *sdp = data;
70
71 while (!kthread_should_stop()) {
72 while (atomic_read(&sdp->sd_reclaim_count))
73 gfs2_reclaim_glock(sdp);
74
75 wait_event_interruptible(sdp->sd_reclaim_wq,
76 (atomic_read(&sdp->sd_reclaim_count) ||
77 kthread_should_stop()));
78 }
79
80 return 0;
81}
82
83/**
84 * gfs2_recoverd - Recover dead machine's journals
85 * @sdp: Pointer to GFS2 superblock
86 *
87 */
88
89int gfs2_recoverd(void *data)
90{
91 struct gfs2_sbd *sdp = data;
92 unsigned long t;
93
94 while (!kthread_should_stop()) {
95 gfs2_check_journals(sdp);
96 t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ;
97 schedule_timeout_interruptible(t);
98 }
99
100 return 0;
101}
102
103/**
104 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
105 * @sdp: Pointer to GFS2 superblock
106 *
107 * Also, periodically check to make sure that we're using the most recent
108 * journal index.
109 */
110
111int gfs2_logd(void *data)
112{
113 struct gfs2_sbd *sdp = data;
114 struct gfs2_holder ji_gh;
115 unsigned long t;
116
117 while (!kthread_should_stop()) {
118 /* Advance the log tail */
119
120 t = sdp->sd_log_flush_time +
121 gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
122
123 gfs2_ail1_empty(sdp, DIO_ALL);
124
125 if (time_after_eq(jiffies, t)) {
126 gfs2_log_flush(sdp, NULL);
127 sdp->sd_log_flush_time = jiffies;
128 }
129
130 /* Check for latest journal index */
131
132 t = sdp->sd_jindex_refresh_time +
133 gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ;
134
135 if (time_after_eq(jiffies, t)) {
136 if (!gfs2_jindex_hold(sdp, &ji_gh))
137 gfs2_glock_dq_uninit(&ji_gh);
138 sdp->sd_jindex_refresh_time = jiffies;
139 }
140
141 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
142 schedule_timeout_interruptible(t);
143 }
144
145 return 0;
146}
147
148/**
149 * gfs2_quotad - Write cached quota changes into the quota file
150 * @sdp: Pointer to GFS2 superblock
151 *
152 */
153
154int gfs2_quotad(void *data)
155{
156 struct gfs2_sbd *sdp = data;
157 unsigned long t;
158 int error;
159
160 while (!kthread_should_stop()) {
161 /* Update the master statfs file */
162
163 t = sdp->sd_statfs_sync_time +
164 gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
165
166 if (time_after_eq(jiffies, t)) {
167 error = gfs2_statfs_sync(sdp);
168 if (error &&
169 error != -EROFS &&
170 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
171 fs_err(sdp, "quotad: (1) error=%d\n", error);
172 sdp->sd_statfs_sync_time = jiffies;
173 }
174
175 /* Update quota file */
176
177 t = sdp->sd_quota_sync_time +
178 gfs2_tune_get(sdp, gt_quota_quantum) * HZ;
179
180 if (time_after_eq(jiffies, t)) {
181 error = gfs2_quota_sync(sdp);
182 if (error &&
183 error != -EROFS &&
184 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
185 fs_err(sdp, "quotad: (2) error=%d\n", error);
186 sdp->sd_quota_sync_time = jiffies;
187 }
188
189 gfs2_quota_scan(sdp);
190
191 t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
192 schedule_timeout_interruptible(t);
193 }
194
195 return 0;
196}
197
198/**
199 * gfs2_inoded - Deallocate unlinked inodes
200 * @sdp: Pointer to GFS2 superblock
201 *
202 */
203
204int gfs2_inoded(void *data)
205{
206 struct gfs2_sbd *sdp = data;
207 unsigned long t;
208 int error;
209
210 while (!kthread_should_stop()) {
211 error = gfs2_unlinked_dealloc(sdp);
212 if (error &&
213 error != -EROFS &&
214 !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
215 fs_err(sdp, "inoded: error = %d\n", error);
216
217 t = gfs2_tune_get(sdp, gt_inoded_secs) * HZ;
218 schedule_timeout_interruptible(t);
219 }
220
221 return 0;
222}
223
diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h
new file mode 100644
index 000000000000..aa68e7a1b0b7
--- /dev/null
+++ b/fs/gfs2/daemon.h
@@ -0,0 +1,20 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __DAEMON_DOT_H__
11#define __DAEMON_DOT_H__
12
13int gfs2_scand(void *data);
14int gfs2_glockd(void *data);
15int gfs2_recoverd(void *data);
16int gfs2_logd(void *data);
17int gfs2_quotad(void *data);
18int gfs2_inoded(void *data);
19
20#endif /* __DAEMON_DOT_H__ */
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
new file mode 100644
index 000000000000..6918a58261e2
--- /dev/null
+++ b/fs/gfs2/dir.c
@@ -0,0 +1,1974 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10/*
11 * Implements Extendible Hashing as described in:
12 * "Extendible Hashing" by Fagin, et al in
13 * __ACM Trans. on Database Systems__, Sept 1979.
14 *
15 *
16 * Here's the layout of dirents which is essentially the same as that of ext2
17 * within a single block. The field de_name_len is the number of bytes
18 * actually required for the name (no null terminator). The field de_rec_len
19 * is the number of bytes allocated to the dirent. The offset of the next
20 * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
21 * deleted, the preceding dirent inherits its allocated space, ie
22 * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
23 * by adding de_rec_len to the current dirent, this essentially causes the
24 * deleted dirent to get jumped over when iterating through all the dirents.
25 *
26 * When deleting the first dirent in a block, there is no previous dirent so
27 * the field de_ino is set to zero to designate it as deleted. When allocating
28 * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
29 * first dirent has (de_ino == 0) and de_rec_len is large enough, this first
30 * dirent is allocated. Otherwise it must go through all the 'used' dirents
31 * searching for one in which the amount of total space minus the amount of
32 * used space will provide enough space for the new dirent.
33 *
34 * There are two types of blocks in which dirents reside. In a stuffed dinode,
35 * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
36 * the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
37 * beginning of the leaf block. The dirents reside in leaves when
38 *
39 * dip->i_di.di_flags & GFS2_DIF_EXHASH is true
40 *
41 * Otherwise, the dirents are "linear", within a single stuffed dinode block.
42 *
43 * When the dirents are in leaves, the actual contents of the directory file are
44 * used as an array of 64-bit block pointers pointing to the leaf blocks. The
45 * dirents are NOT in the directory file itself. There can be more than one
46 * block pointer in the array that points to the same leaf. In fact, when a
47 * directory is first converted from linear to exhash, all of the pointers
48 * point to the same leaf.
49 *
50 * When a leaf is completely full, the size of the hash table can be
51 * doubled unless it is already at the maximum size which is hard coded into
52 * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
53 * but never before the maximum hash table size has been reached.
54 */
55
56#include <linux/sched.h>
57#include <linux/slab.h>
58#include <linux/spinlock.h>
59#include <linux/buffer_head.h>
60#include <linux/sort.h>
61#include <linux/gfs2_ondisk.h>
62#include <linux/crc32.h>
63#include <linux/vmalloc.h>
64
65#include "gfs2.h"
66#include "lm_interface.h"
67#include "incore.h"
68#include "dir.h"
69#include "glock.h"
70#include "inode.h"
71#include "meta_io.h"
72#include "quota.h"
73#include "rgrp.h"
74#include "trans.h"
75#include "bmap.h"
76#include "util.h"
77
78#define IS_LEAF 1 /* Hashed (leaf) directory */
79#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */
80
81#if 1
82#define gfs2_disk_hash2offset(h) (((uint64_t)(h)) >> 1)
83#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)) << 1))
84#else
85#define gfs2_disk_hash2offset(h) (((uint64_t)(h)))
86#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p))))
87#endif
88
89typedef int (*leaf_call_t) (struct gfs2_inode *dip,
90 uint32_t index, uint32_t len, uint64_t leaf_no,
91 void *data);
92
93
94int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, uint64_t block,
95 struct buffer_head **bhp)
96{
97 struct buffer_head *bh;
98
99 bh = gfs2_meta_new(ip->i_gl, block);
100 gfs2_trans_add_bh(ip->i_gl, bh, 1);
101 gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
102 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
103 *bhp = bh;
104 return 0;
105}
106
107static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, uint64_t block,
108 struct buffer_head **bhp)
109{
110 struct buffer_head *bh;
111 int error;
112
113 error = gfs2_meta_read(ip->i_gl, block, DIO_START | DIO_WAIT, &bh);
114 if (error)
115 return error;
116 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) {
117 brelse(bh);
118 return -EIO;
119 }
120 *bhp = bh;
121 return 0;
122}
123
124static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
125 unsigned int offset, unsigned int size)
126
127{
128 struct buffer_head *dibh;
129 int error;
130
131 error = gfs2_meta_inode_buffer(ip, &dibh);
132 if (error)
133 return error;
134
135 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
136 memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
137 if (ip->i_di.di_size < offset + size)
138 ip->i_di.di_size = offset + size;
139 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
140 gfs2_dinode_out(&ip->i_di, dibh->b_data);
141
142 brelse(dibh);
143
144 return size;
145}
146
147
148
149/**
150 * gfs2_dir_write_data - Write directory information to the inode
151 * @ip: The GFS2 inode
152 * @buf: The buffer containing information to be written
153 * @offset: The file offset to start writing at
154 * @size: The amount of data to write
155 *
156 * Returns: The number of bytes correctly written or error code
157 */
158static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
159 uint64_t offset, unsigned int size)
160{
161 struct gfs2_sbd *sdp = ip->i_sbd;
162 struct buffer_head *dibh;
163 uint64_t lblock, dblock;
164 uint32_t extlen = 0;
165 unsigned int o;
166 int copied = 0;
167 int error = 0;
168
169 if (!size)
170 return 0;
171
172 if (gfs2_is_stuffed(ip) &&
173 offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
174 return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset,
175 size);
176
177 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
178 return -EINVAL;
179
180 if (gfs2_is_stuffed(ip)) {
181 error = gfs2_unstuff_dinode(ip, NULL, NULL);
182 if (error)
183 return error;
184 }
185
186 lblock = offset;
187 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
188
189 while (copied < size) {
190 unsigned int amount;
191 struct buffer_head *bh;
192 int new;
193
194 amount = size - copied;
195 if (amount > sdp->sd_sb.sb_bsize - o)
196 amount = sdp->sd_sb.sb_bsize - o;
197
198 if (!extlen) {
199 new = 1;
200 error = gfs2_extent_map(ip->i_vnode, lblock, &new,
201 &dblock, &extlen);
202 if (error)
203 goto fail;
204 error = -EIO;
205 if (gfs2_assert_withdraw(sdp, dblock))
206 goto fail;
207 }
208
209 if (amount == sdp->sd_jbsize || new)
210 error = gfs2_dir_get_new_buffer(ip, dblock, &bh);
211 else
212 error = gfs2_dir_get_existing_buffer(ip, dblock, &bh);
213
214 if (error)
215 goto fail;
216
217 gfs2_trans_add_bh(ip->i_gl, bh, 1);
218 memcpy(bh->b_data + o, buf, amount);
219 brelse(bh);
220 if (error)
221 goto fail;
222
223 copied += amount;
224 lblock++;
225 dblock++;
226 extlen--;
227
228 o = sizeof(struct gfs2_meta_header);
229 }
230
231out:
232 error = gfs2_meta_inode_buffer(ip, &dibh);
233 if (error)
234 return error;
235
236 if (ip->i_di.di_size < offset + copied)
237 ip->i_di.di_size = offset + copied;
238 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
239
240 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
241 gfs2_dinode_out(&ip->i_di, dibh->b_data);
242 brelse(dibh);
243
244 return copied;
245fail:
246 if (copied)
247 goto out;
248 return error;
249}
250
251static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
252 unsigned int offset, unsigned int size)
253{
254 struct buffer_head *dibh;
255 int error;
256
257 error = gfs2_meta_inode_buffer(ip, &dibh);
258 if (!error) {
259 offset += sizeof(struct gfs2_dinode);
260 memcpy(buf, dibh->b_data + offset, size);
261 brelse(dibh);
262 }
263
264 return (error) ? error : size;
265}
266
267
268/**
269 * gfs2_dir_read_data - Read a data from a directory inode
270 * @ip: The GFS2 Inode
271 * @buf: The buffer to place result into
272 * @offset: File offset to begin jdata_readng from
273 * @size: Amount of data to transfer
274 *
275 * Returns: The amount of data actually copied or the error
276 */
277static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf,
278 uint64_t offset, unsigned int size)
279{
280 struct gfs2_sbd *sdp = ip->i_sbd;
281 uint64_t lblock, dblock;
282 uint32_t extlen = 0;
283 unsigned int o;
284 int copied = 0;
285 int error = 0;
286
287 if (offset >= ip->i_di.di_size)
288 return 0;
289
290 if ((offset + size) > ip->i_di.di_size)
291 size = ip->i_di.di_size - offset;
292
293 if (!size)
294 return 0;
295
296 if (gfs2_is_stuffed(ip))
297 return gfs2_dir_read_stuffed(ip, buf, (unsigned int)offset,
298 size);
299
300 if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
301 return -EINVAL;
302
303 lblock = offset;
304 o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
305
306 while (copied < size) {
307 unsigned int amount;
308 struct buffer_head *bh;
309 int new;
310
311 amount = size - copied;
312 if (amount > sdp->sd_sb.sb_bsize - o)
313 amount = sdp->sd_sb.sb_bsize - o;
314
315 if (!extlen) {
316 new = 0;
317 error = gfs2_extent_map(ip->i_vnode, lblock, &new,
318 &dblock, &extlen);
319 if (error)
320 goto fail;
321 }
322
323 if (extlen > 1)
324 gfs2_meta_ra(ip->i_gl, dblock, extlen);
325
326 if (dblock) {
327 if (new)
328 error = gfs2_dir_get_new_buffer(ip, dblock, &bh);
329 else
330 error = gfs2_dir_get_existing_buffer(ip, dblock, &bh);
331 if (error)
332 goto fail;
333 dblock++;
334 extlen--;
335 } else
336 bh = NULL;
337
338 memcpy(buf, bh->b_data + o, amount);
339 brelse(bh);
340 if (error)
341 goto fail;
342
343 copied += amount;
344 lblock++;
345
346 o = sizeof(struct gfs2_meta_header);
347 }
348
349 return copied;
350fail:
351 return (copied) ? copied : error;
352}
353
354typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
355 const struct qstr *name,
356 void *opaque);
357
358static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent,
359 const struct qstr *name, int ret)
360{
361 if (dent->de_inum.no_addr != 0 &&
362 be32_to_cpu(dent->de_hash) == name->hash &&
363 be16_to_cpu(dent->de_name_len) == name->len &&
364 memcmp((char *)(dent+1), name->name, name->len) == 0)
365 return ret;
366 return 0;
367}
368
369static int gfs2_dirent_find(const struct gfs2_dirent *dent,
370 const struct qstr *name,
371 void *opaque)
372{
373 return __gfs2_dirent_find(dent, name, 1);
374}
375
376static int gfs2_dirent_prev(const struct gfs2_dirent *dent,
377 const struct qstr *name,
378 void *opaque)
379{
380 return __gfs2_dirent_find(dent, name, 2);
381}
382
383/*
384 * name->name holds ptr to start of block.
385 * name->len holds size of block.
386 */
387static int gfs2_dirent_last(const struct gfs2_dirent *dent,
388 const struct qstr *name,
389 void *opaque)
390{
391 const char *start = name->name;
392 const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len);
393 if (name->len == (end - start))
394 return 1;
395 return 0;
396}
397
398static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
399 const struct qstr *name,
400 void *opaque)
401{
402 unsigned required = GFS2_DIRENT_SIZE(name->len);
403 unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
404 unsigned totlen = be16_to_cpu(dent->de_rec_len);
405
406 if (!dent->de_inum.no_addr)
407 actual = GFS2_DIRENT_SIZE(0);
408 if ((totlen - actual) >= required)
409 return 1;
410 return 0;
411}
412
413struct dirent_gather {
414 const struct gfs2_dirent **pdent;
415 unsigned offset;
416};
417
418static int gfs2_dirent_gather(const struct gfs2_dirent *dent,
419 const struct qstr *name,
420 void *opaque)
421{
422 struct dirent_gather *g = opaque;
423 if (dent->de_inum.no_addr) {
424 g->pdent[g->offset++] = dent;
425 }
426 return 0;
427}
428
429/*
430 * Other possible things to check:
431 * - Inode located within filesystem size (and on valid block)
432 * - Valid directory entry type
433 * Not sure how heavy-weight we want to make this... could also check
434 * hash is correct for example, but that would take a lot of extra time.
435 * For now the most important thing is to check that the various sizes
436 * are correct.
437 */
438static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset,
439 unsigned int size, unsigned int len, int first)
440{
441 const char *msg = "gfs2_dirent too small";
442 if (unlikely(size < sizeof(struct gfs2_dirent)))
443 goto error;
444 msg = "gfs2_dirent misaligned";
445 if (unlikely(offset & 0x7))
446 goto error;
447 msg = "gfs2_dirent points beyond end of block";
448 if (unlikely(offset + size > len))
449 goto error;
450 msg = "zero inode number";
451 if (unlikely(!first && !dent->de_inum.no_addr))
452 goto error;
453 msg = "name length is greater than space in dirent";
454 if (dent->de_inum.no_addr &&
455 unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) >
456 size))
457 goto error;
458 return 0;
459error:
460 printk(KERN_WARNING "gfs2_check_dirent: %s (%s)\n", msg,
461 first ? "first in block" : "not first in block");
462 return -EIO;
463}
464
465static int gfs2_dirent_offset(const void *buf)
466{
467 const struct gfs2_meta_header *h = buf;
468 int offset;
469
470 BUG_ON(buf == NULL);
471
472 switch(be32_to_cpu(h->mh_type)) {
473 case GFS2_METATYPE_LF:
474 offset = sizeof(struct gfs2_leaf);
475 break;
476 case GFS2_METATYPE_DI:
477 offset = sizeof(struct gfs2_dinode);
478 break;
479 default:
480 goto wrong_type;
481 }
482 return offset;
483wrong_type:
484 printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n",
485 be32_to_cpu(h->mh_type));
486 return -1;
487}
488
489static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode,
490 void *buf,
491 unsigned int len, gfs2_dscan_t scan,
492 const struct qstr *name,
493 void *opaque)
494{
495 struct gfs2_dirent *dent, *prev;
496 unsigned offset;
497 unsigned size;
498 int ret = 0;
499
500 ret = gfs2_dirent_offset(buf);
501 if (ret < 0)
502 goto consist_inode;
503
504 offset = ret;
505 prev = NULL;
506 dent = (struct gfs2_dirent *)(buf + offset);
507 size = be16_to_cpu(dent->de_rec_len);
508 if (gfs2_check_dirent(dent, offset, size, len, 1))
509 goto consist_inode;
510 do {
511 ret = scan(dent, name, opaque);
512 if (ret)
513 break;
514 offset += size;
515 if (offset == len)
516 break;
517 prev = dent;
518 dent = (struct gfs2_dirent *)(buf + offset);
519 size = be16_to_cpu(dent->de_rec_len);
520 if (gfs2_check_dirent(dent, offset, size, len, 0))
521 goto consist_inode;
522 } while(1);
523
524 switch(ret) {
525 case 0:
526 return NULL;
527 case 1:
528 return dent;
529 case 2:
530 return prev ? prev : dent;
531 default:
532 BUG_ON(ret > 0);
533 return ERR_PTR(ret);
534 }
535
536consist_inode:
537 gfs2_consist_inode(inode->u.generic_ip);
538 return ERR_PTR(-EIO);
539}
540
541
542/**
543 * dirent_first - Return the first dirent
544 * @dip: the directory
545 * @bh: The buffer
546 * @dent: Pointer to list of dirents
547 *
548 * return first dirent whether bh points to leaf or stuffed dinode
549 *
550 * Returns: IS_LEAF, IS_DINODE, or -errno
551 */
552
553static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh,
554 struct gfs2_dirent **dent)
555{
556 struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data;
557
558 if (be32_to_cpu(h->mh_type) == GFS2_METATYPE_LF) {
559 if (gfs2_meta_check(dip->i_sbd, bh))
560 return -EIO;
561 *dent = (struct gfs2_dirent *)(bh->b_data +
562 sizeof(struct gfs2_leaf));
563 return IS_LEAF;
564 } else {
565 if (gfs2_metatype_check(dip->i_sbd, bh, GFS2_METATYPE_DI))
566 return -EIO;
567 *dent = (struct gfs2_dirent *)(bh->b_data +
568 sizeof(struct gfs2_dinode));
569 return IS_DINODE;
570 }
571}
572
573/**
574 * dirent_next - Next dirent
575 * @dip: the directory
576 * @bh: The buffer
577 * @dent: Pointer to list of dirents
578 *
579 * Returns: 0 on success, error code otherwise
580 */
581
582static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
583 struct gfs2_dirent **dent)
584{
585 struct gfs2_dirent *tmp, *cur;
586 char *bh_end;
587 uint16_t cur_rec_len;
588
589 cur = *dent;
590 bh_end = bh->b_data + bh->b_size;
591 cur_rec_len = be16_to_cpu(cur->de_rec_len);
592
593 if ((char *)cur + cur_rec_len >= bh_end) {
594 if ((char *)cur + cur_rec_len > bh_end) {
595 gfs2_consist_inode(dip);
596 return -EIO;
597 }
598 return -ENOENT;
599 }
600
601 tmp = (struct gfs2_dirent *)((char *)cur + cur_rec_len);
602
603 if ((char *)tmp + be16_to_cpu(tmp->de_rec_len) > bh_end) {
604 gfs2_consist_inode(dip);
605 return -EIO;
606 }
607
608 if (cur_rec_len == 0) {
609 gfs2_consist_inode(dip);
610 return -EIO;
611 }
612
613 /* Only the first dent could ever have de_inum.no_addr == 0 */
614 if (!tmp->de_inum.no_addr) {
615 gfs2_consist_inode(dip);
616 return -EIO;
617 }
618
619 *dent = tmp;
620
621 return 0;
622}
623
624/**
625 * dirent_del - Delete a dirent
626 * @dip: The GFS2 inode
627 * @bh: The buffer
628 * @prev: The previous dirent
629 * @cur: The current dirent
630 *
631 */
632
633static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
634 struct gfs2_dirent *prev, struct gfs2_dirent *cur)
635{
636 uint16_t cur_rec_len, prev_rec_len;
637
638 if (!cur->de_inum.no_addr) {
639 gfs2_consist_inode(dip);
640 return;
641 }
642
643 gfs2_trans_add_bh(dip->i_gl, bh, 1);
644
645 /* If there is no prev entry, this is the first entry in the block.
646 The de_rec_len is already as big as it needs to be. Just zero
647 out the inode number and return. */
648
649 if (!prev) {
650 cur->de_inum.no_addr = 0; /* No endianess worries */
651 return;
652 }
653
654 /* Combine this dentry with the previous one. */
655
656 prev_rec_len = be16_to_cpu(prev->de_rec_len);
657 cur_rec_len = be16_to_cpu(cur->de_rec_len);
658
659 if ((char *)prev + prev_rec_len != (char *)cur)
660 gfs2_consist_inode(dip);
661 if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size)
662 gfs2_consist_inode(dip);
663
664 prev_rec_len += cur_rec_len;
665 prev->de_rec_len = cpu_to_be16(prev_rec_len);
666}
667
668/*
669 * Takes a dent from which to grab space as an argument. Returns the
670 * newly created dent.
671 */
672static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
673 struct gfs2_dirent *dent,
674 const struct qstr *name,
675 struct buffer_head *bh)
676{
677 struct gfs2_inode *ip = inode->u.generic_ip;
678 struct gfs2_dirent *ndent;
679 unsigned offset = 0, totlen;
680
681 if (dent->de_inum.no_addr)
682 offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
683 totlen = be16_to_cpu(dent->de_rec_len);
684 BUG_ON(offset + name->len > totlen);
685 gfs2_trans_add_bh(ip->i_gl, bh, 1);
686 ndent = (struct gfs2_dirent *)((char *)dent + offset);
687 dent->de_rec_len = cpu_to_be16(offset);
688 gfs2_qstr2dirent(name, totlen - offset, ndent);
689 return ndent;
690}
691
692static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode,
693 struct buffer_head *bh,
694 const struct qstr *name)
695{
696 struct gfs2_dirent *dent;
697 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
698 gfs2_dirent_find_space, name, NULL);
699 if (!dent || IS_ERR(dent))
700 return dent;
701 return gfs2_init_dirent(inode, dent, name, bh);
702}
703
704static int get_leaf(struct gfs2_inode *dip, uint64_t leaf_no,
705 struct buffer_head **bhp)
706{
707 int error;
708
709 error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_START | DIO_WAIT, bhp);
710 if (!error && gfs2_metatype_check(dip->i_sbd, *bhp, GFS2_METATYPE_LF))
711 error = -EIO;
712
713 return error;
714}
715
716/**
717 * get_leaf_nr - Get a leaf number associated with the index
718 * @dip: The GFS2 inode
719 * @index:
720 * @leaf_out:
721 *
722 * Returns: 0 on success, error code otherwise
723 */
724
725static int get_leaf_nr(struct gfs2_inode *dip, uint32_t index,
726 uint64_t *leaf_out)
727{
728 uint64_t leaf_no;
729 int error;
730
731 error = gfs2_dir_read_data(dip, (char *)&leaf_no,
732 index * sizeof(uint64_t),
733 sizeof(uint64_t));
734 if (error != sizeof(uint64_t))
735 return (error < 0) ? error : -EIO;
736
737 *leaf_out = be64_to_cpu(leaf_no);
738
739 return 0;
740}
741
742static int get_first_leaf(struct gfs2_inode *dip, uint32_t index,
743 struct buffer_head **bh_out)
744{
745 uint64_t leaf_no;
746 int error;
747
748 error = get_leaf_nr(dip, index, &leaf_no);
749 if (!error)
750 error = get_leaf(dip, leaf_no, bh_out);
751
752 return error;
753}
754
755static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
756 const struct qstr *name,
757 gfs2_dscan_t scan,
758 struct buffer_head **pbh)
759{
760 struct buffer_head *bh;
761 struct gfs2_dirent *dent;
762 struct gfs2_inode *ip = inode->u.generic_ip;
763 int error;
764
765 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
766 struct gfs2_leaf *leaf;
767 unsigned hsize = 1 << ip->i_di.di_depth;
768 unsigned index;
769 u64 ln;
770 if (hsize * sizeof(u64) != ip->i_di.di_size) {
771 gfs2_consist_inode(ip);
772 return ERR_PTR(-EIO);
773 }
774
775 index = name->hash >> (32 - ip->i_di.di_depth);
776 error = get_first_leaf(ip, index, &bh);
777 if (error)
778 return ERR_PTR(error);
779 do {
780 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
781 scan, name, NULL);
782 if (dent)
783 goto got_dent;
784 leaf = (struct gfs2_leaf *)bh->b_data;
785 ln = be64_to_cpu(leaf->lf_next);
786 brelse(bh);
787 if (!ln)
788 break;
789 error = get_leaf(ip, ln, &bh);
790 } while(!error);
791
792 return error ? ERR_PTR(error) : NULL;
793 }
794
795 error = gfs2_meta_inode_buffer(ip, &bh);
796 if (error)
797 return ERR_PTR(error);
798 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL);
799got_dent:
800 if (unlikely(dent == NULL || IS_ERR(dent))) {
801 brelse(bh);
802 bh = NULL;
803 }
804 *pbh = bh;
805 return dent;
806}
807
808static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth)
809{
810 struct gfs2_inode *ip = inode->u.generic_ip;
811 u64 bn = gfs2_alloc_meta(ip);
812 struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn);
813 struct gfs2_leaf *leaf;
814 struct gfs2_dirent *dent;
815 struct qstr name = { .name = "", .len = 0, .hash = 0 };
816 if (!bh)
817 return NULL;
818 gfs2_trans_add_bh(ip->i_gl, bh, 1);
819 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
820 leaf = (struct gfs2_leaf *)bh->b_data;
821 leaf->lf_depth = cpu_to_be16(depth);
822 leaf->lf_entries = cpu_to_be16(0);
823 leaf->lf_dirent_format = cpu_to_be16(GFS2_FORMAT_DE);
824 leaf->lf_next = cpu_to_be64(0);
825 memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved));
826 dent = (struct gfs2_dirent *)(leaf+1);
827 gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent);
828 *pbh = bh;
829 return leaf;
830}
831
832/**
833 * dir_make_exhash - Convert a stuffed directory into an ExHash directory
834 * @dip: The GFS2 inode
835 *
836 * Returns: 0 on success, error code otherwise
837 */
838
839static int dir_make_exhash(struct inode *inode)
840{
841 struct gfs2_inode *dip = inode->u.generic_ip;
842 struct gfs2_sbd *sdp = dip->i_sbd;
843 struct gfs2_dirent *dent;
844 struct qstr args;
845 struct buffer_head *bh, *dibh;
846 struct gfs2_leaf *leaf;
847 int y;
848 uint32_t x;
849 uint64_t *lp, bn;
850 int error;
851
852 error = gfs2_meta_inode_buffer(dip, &dibh);
853 if (error)
854 return error;
855
856 /* Turn over a new leaf */
857
858 leaf = new_leaf(inode, &bh, 0);
859 if (!leaf)
860 return -ENOSPC;
861 bn = bh->b_blocknr;
862
863 gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16));
864 leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries);
865
866 /* Copy dirents */
867
868 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh,
869 sizeof(struct gfs2_dinode));
870
871 /* Find last entry */
872
873 x = 0;
874 args.len = bh->b_size - sizeof(struct gfs2_dinode) +
875 sizeof(struct gfs2_leaf);
876 args.name = bh->b_data;
877 dent = gfs2_dirent_scan(dip->i_vnode, bh->b_data, bh->b_size,
878 gfs2_dirent_last, &args, NULL);
879 if (!dent) {
880 brelse(bh);
881 brelse(dibh);
882 return -EIO;
883 }
884 if (IS_ERR(dent)) {
885 brelse(bh);
886 brelse(dibh);
887 return PTR_ERR(dent);
888 }
889
890 /* Adjust the last dirent's record length
891 (Remember that dent still points to the last entry.) */
892
893 dent->de_rec_len = cpu_to_be16(be16_to_cpu(dent->de_rec_len) +
894 sizeof(struct gfs2_dinode) -
895 sizeof(struct gfs2_leaf));
896
897 brelse(bh);
898
899 /* We're done with the new leaf block, now setup the new
900 hash table. */
901
902 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
903 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
904
905 lp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode));
906
907 for (x = sdp->sd_hash_ptrs; x--; lp++)
908 *lp = cpu_to_be64(bn);
909
910 dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
911 dip->i_di.di_blocks++;
912 dip->i_di.di_flags |= GFS2_DIF_EXHASH;
913 dip->i_di.di_payload_format = 0;
914
915 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
916 dip->i_di.di_depth = y;
917
918 gfs2_dinode_out(&dip->i_di, dibh->b_data);
919
920 brelse(dibh);
921
922 return 0;
923}
924
925/**
926 * dir_split_leaf - Split a leaf block into two
927 * @dip: The GFS2 inode
928 * @index:
929 * @leaf_no:
930 *
931 * Returns: 0 on success, error code on failure
932 */
933
934static int dir_split_leaf(struct inode *inode, const struct qstr *name)
935{
936 struct gfs2_inode *dip = inode->u.generic_ip;
937 struct buffer_head *nbh, *obh, *dibh;
938 struct gfs2_leaf *nleaf, *oleaf;
939 struct gfs2_dirent *dent, *prev = NULL, *next = NULL, *new;
940 uint32_t start, len, half_len, divider;
941 uint64_t bn, *lp, leaf_no;
942 uint32_t index;
943 int x, moved = 0;
944 int error;
945
946 index = name->hash >> (32 - dip->i_di.di_depth);
947 error = get_leaf_nr(dip, index, &leaf_no);
948 if (error)
949 return error;
950
951 /* Get the old leaf block */
952 error = get_leaf(dip, leaf_no, &obh);
953 if (error)
954 return error;
955
956 oleaf = (struct gfs2_leaf *)obh->b_data;
957 if (dip->i_di.di_depth == be16_to_cpu(oleaf->lf_depth)) {
958 brelse(obh);
959 return 1; /* can't split */
960 }
961
962 gfs2_trans_add_bh(dip->i_gl, obh, 1);
963
964 nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1);
965 if (!nleaf) {
966 brelse(obh);
967 return -ENOSPC;
968 }
969 bn = nbh->b_blocknr;
970
971 /* Compute the start and len of leaf pointers in the hash table. */
972 len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth));
973 half_len = len >> 1;
974 if (!half_len) {
975 printk(KERN_WARNING "di_depth %u lf_depth %u index %u\n", dip->i_di.di_depth, be16_to_cpu(oleaf->lf_depth), index);
976 gfs2_consist_inode(dip);
977 error = -EIO;
978 goto fail_brelse;
979 }
980
981 start = (index & ~(len - 1));
982
983 /* Change the pointers.
984 Don't bother distinguishing stuffed from non-stuffed.
985 This code is complicated enough already. */
986 lp = kmalloc(half_len * sizeof(uint64_t), GFP_NOFS | __GFP_NOFAIL);
987 /* Change the pointers */
988 for (x = 0; x < half_len; x++)
989 lp[x] = cpu_to_be64(bn);
990
991 error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(uint64_t),
992 half_len * sizeof(uint64_t));
993 if (error != half_len * sizeof(uint64_t)) {
994 if (error >= 0)
995 error = -EIO;
996 goto fail_lpfree;
997 }
998
999 kfree(lp);
1000
1001 /* Compute the divider */
1002 divider = (start + half_len) << (32 - dip->i_di.di_depth);
1003
1004 /* Copy the entries */
1005 dirent_first(dip, obh, &dent);
1006
1007 do {
1008 next = dent;
1009 if (dirent_next(dip, obh, &next))
1010 next = NULL;
1011
1012 if (dent->de_inum.no_addr &&
1013 be32_to_cpu(dent->de_hash) < divider) {
1014 struct qstr str;
1015 str.name = (char*)(dent+1);
1016 str.len = be16_to_cpu(dent->de_name_len);
1017 str.hash = be32_to_cpu(dent->de_hash);
1018 new = gfs2_dirent_alloc(inode, nbh, &str);
1019 if (IS_ERR(new)) {
1020 error = PTR_ERR(new);
1021 break;
1022 }
1023
1024 new->de_inum = dent->de_inum; /* No endian worries */
1025 new->de_type = dent->de_type; /* No endian worries */
1026 nleaf->lf_entries = cpu_to_be16(be16_to_cpu(nleaf->lf_entries)+1);
1027
1028 dirent_del(dip, obh, prev, dent);
1029
1030 if (!oleaf->lf_entries)
1031 gfs2_consist_inode(dip);
1032 oleaf->lf_entries = cpu_to_be16(be16_to_cpu(oleaf->lf_entries)-1);
1033
1034 if (!prev)
1035 prev = dent;
1036
1037 moved = 1;
1038 } else {
1039 prev = dent;
1040 }
1041 dent = next;
1042 } while (dent);
1043
1044 oleaf->lf_depth = nleaf->lf_depth;
1045
1046 error = gfs2_meta_inode_buffer(dip, &dibh);
1047 if (!gfs2_assert_withdraw(dip->i_sbd, !error)) {
1048 dip->i_di.di_blocks++;
1049 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1050 brelse(dibh);
1051 }
1052
1053 brelse(obh);
1054 brelse(nbh);
1055
1056 return error;
1057
1058fail_lpfree:
1059 kfree(lp);
1060
1061fail_brelse:
1062 brelse(obh);
1063 brelse(nbh);
1064 return error;
1065}
1066
1067/**
1068 * dir_double_exhash - Double size of ExHash table
1069 * @dip: The GFS2 dinode
1070 *
1071 * Returns: 0 on success, error code on failure
1072 */
1073
1074static int dir_double_exhash(struct gfs2_inode *dip)
1075{
1076 struct gfs2_sbd *sdp = dip->i_sbd;
1077 struct buffer_head *dibh;
1078 uint32_t hsize;
1079 uint64_t *buf;
1080 uint64_t *from, *to;
1081 uint64_t block;
1082 int x;
1083 int error = 0;
1084
1085 hsize = 1 << dip->i_di.di_depth;
1086 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1087 gfs2_consist_inode(dip);
1088 return -EIO;
1089 }
1090
1091 /* Allocate both the "from" and "to" buffers in one big chunk */
1092
1093 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL);
1094
1095 for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
1096 error = gfs2_dir_read_data(dip, (char *)buf,
1097 block * sdp->sd_hash_bsize,
1098 sdp->sd_hash_bsize);
1099 if (error != sdp->sd_hash_bsize) {
1100 if (error >= 0)
1101 error = -EIO;
1102 goto fail;
1103 }
1104
1105 from = buf;
1106 to = (uint64_t *)((char *)buf + sdp->sd_hash_bsize);
1107
1108 for (x = sdp->sd_hash_ptrs; x--; from++) {
1109 *to++ = *from; /* No endianess worries */
1110 *to++ = *from;
1111 }
1112
1113 error = gfs2_dir_write_data(dip,
1114 (char *)buf + sdp->sd_hash_bsize,
1115 block * sdp->sd_sb.sb_bsize,
1116 sdp->sd_sb.sb_bsize);
1117 if (error != sdp->sd_sb.sb_bsize) {
1118 if (error >= 0)
1119 error = -EIO;
1120 goto fail;
1121 }
1122 }
1123
1124 kfree(buf);
1125
1126 error = gfs2_meta_inode_buffer(dip, &dibh);
1127 if (!gfs2_assert_withdraw(sdp, !error)) {
1128 dip->i_di.di_depth++;
1129 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1130 brelse(dibh);
1131 }
1132
1133 return error;
1134
1135 fail:
1136 kfree(buf);
1137
1138 return error;
1139}
1140
1141/**
1142 * compare_dents - compare directory entries by hash value
1143 * @a: first dent
1144 * @b: second dent
1145 *
1146 * When comparing the hash entries of @a to @b:
1147 * gt: returns 1
1148 * lt: returns -1
1149 * eq: returns 0
1150 */
1151
1152static int compare_dents(const void *a, const void *b)
1153{
1154 struct gfs2_dirent *dent_a, *dent_b;
1155 uint32_t hash_a, hash_b;
1156 int ret = 0;
1157
1158 dent_a = *(struct gfs2_dirent **)a;
1159 hash_a = be32_to_cpu(dent_a->de_hash);
1160
1161 dent_b = *(struct gfs2_dirent **)b;
1162 hash_b = be32_to_cpu(dent_b->de_hash);
1163
1164 if (hash_a > hash_b)
1165 ret = 1;
1166 else if (hash_a < hash_b)
1167 ret = -1;
1168 else {
1169 unsigned int len_a = be16_to_cpu(dent_a->de_name_len);
1170 unsigned int len_b = be16_to_cpu(dent_b->de_name_len);
1171
1172 if (len_a > len_b)
1173 ret = 1;
1174 else if (len_a < len_b)
1175 ret = -1;
1176 else
1177 ret = memcmp((char *)(dent_a + 1),
1178 (char *)(dent_b + 1),
1179 len_a);
1180 }
1181
1182 return ret;
1183}
1184
1185/**
1186 * do_filldir_main - read out directory entries
1187 * @dip: The GFS2 inode
1188 * @offset: The offset in the file to read from
1189 * @opaque: opaque data to pass to filldir
1190 * @filldir: The function to pass entries to
1191 * @darr: an array of struct gfs2_dirent pointers to read
1192 * @entries: the number of entries in darr
1193 * @copied: pointer to int that's non-zero if a entry has been copied out
1194 *
1195 * Jump through some hoops to make sure that if there are hash collsions,
1196 * they are read out at the beginning of a buffer. We want to minimize
1197 * the possibility that they will fall into different readdir buffers or
1198 * that someone will want to seek to that location.
1199 *
1200 * Returns: errno, >0 on exception from filldir
1201 */
1202
1203static int do_filldir_main(struct gfs2_inode *dip, uint64_t *offset,
1204 void *opaque, gfs2_filldir_t filldir,
1205 const struct gfs2_dirent **darr, uint32_t entries,
1206 int *copied)
1207{
1208 const struct gfs2_dirent *dent, *dent_next;
1209 struct gfs2_inum inum;
1210 uint64_t off, off_next;
1211 unsigned int x, y;
1212 int run = 0;
1213 int error = 0;
1214
1215 sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
1216
1217 dent_next = darr[0];
1218 off_next = be32_to_cpu(dent_next->de_hash);
1219 off_next = gfs2_disk_hash2offset(off_next);
1220
1221 for (x = 0, y = 1; x < entries; x++, y++) {
1222 dent = dent_next;
1223 off = off_next;
1224
1225 if (y < entries) {
1226 dent_next = darr[y];
1227 off_next = be32_to_cpu(dent_next->de_hash);
1228 off_next = gfs2_disk_hash2offset(off_next);
1229
1230 if (off < *offset)
1231 continue;
1232 *offset = off;
1233
1234 if (off_next == off) {
1235 if (*copied && !run)
1236 return 1;
1237 run = 1;
1238 } else
1239 run = 0;
1240 } else {
1241 if (off < *offset)
1242 continue;
1243 *offset = off;
1244 }
1245
1246 gfs2_inum_in(&inum, (char *)&dent->de_inum);
1247
1248 error = filldir(opaque, (char *)(dent + 1),
1249 be16_to_cpu(dent->de_name_len),
1250 off, &inum,
1251 be16_to_cpu(dent->de_type));
1252 if (error)
1253 return 1;
1254
1255 *copied = 1;
1256 }
1257
1258 /* Increment the *offset by one, so the next time we come into the
1259 do_filldir fxn, we get the next entry instead of the last one in the
1260 current leaf */
1261
1262 (*offset)++;
1263
1264 return 0;
1265}
1266
1267static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1268 gfs2_filldir_t filldir, int *copied,
1269 unsigned *depth, u64 leaf_no)
1270{
1271 struct gfs2_inode *ip = inode->u.generic_ip;
1272 struct buffer_head *bh;
1273 struct gfs2_leaf *lf;
1274 unsigned entries = 0;
1275 unsigned leaves = 0;
1276 const struct gfs2_dirent **darr, *dent;
1277 struct dirent_gather g;
1278 struct buffer_head **larr;
1279 int leaf = 0;
1280 int error, i;
1281 u64 lfn = leaf_no;
1282
1283 do {
1284 error = get_leaf(ip, lfn, &bh);
1285 if (error)
1286 goto out;
1287 lf = (struct gfs2_leaf *)bh->b_data;
1288 if (leaves == 0)
1289 *depth = be16_to_cpu(lf->lf_depth);
1290 entries += be16_to_cpu(lf->lf_entries);
1291 leaves++;
1292 lfn = be64_to_cpu(lf->lf_next);
1293 brelse(bh);
1294 } while(lfn);
1295
1296 if (!entries)
1297 return 0;
1298
1299 error = -ENOMEM;
1300 larr = vmalloc((leaves + entries) * sizeof(void*));
1301 if (!larr)
1302 goto out;
1303 darr = (const struct gfs2_dirent **)(larr + leaves);
1304 g.pdent = darr;
1305 g.offset = 0;
1306 lfn = leaf_no;
1307
1308 do {
1309 error = get_leaf(ip, lfn, &bh);
1310 if (error)
1311 goto out_kfree;
1312 lf = (struct gfs2_leaf *)bh->b_data;
1313 lfn = be64_to_cpu(lf->lf_next);
1314 if (lf->lf_entries) {
1315 dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
1316 gfs2_dirent_gather, NULL, &g);
1317 error = PTR_ERR(dent);
1318 if (IS_ERR(dent)) {
1319 goto out_kfree;
1320 }
1321 error = 0;
1322 larr[leaf++] = bh;
1323 } else {
1324 brelse(bh);
1325 }
1326 } while(lfn);
1327
1328 error = do_filldir_main(ip, offset, opaque, filldir, darr,
1329 entries, copied);
1330out_kfree:
1331 for(i = 0; i < leaf; i++)
1332 brelse(larr[i]);
1333 vfree(larr);
1334out:
1335 return error;
1336}
1337
1338/**
1339 * dir_e_read - Reads the entries from a directory into a filldir buffer
1340 * @dip: dinode pointer
1341 * @offset: the hash of the last entry read shifted to the right once
1342 * @opaque: buffer for the filldir function to fill
1343 * @filldir: points to the filldir function to use
1344 *
1345 * Returns: errno
1346 */
1347
1348static int dir_e_read(struct inode *inode, uint64_t *offset, void *opaque,
1349 gfs2_filldir_t filldir)
1350{
1351 struct gfs2_inode *dip = inode->u.generic_ip;
1352 struct gfs2_sbd *sdp = dip->i_sbd;
1353 uint32_t hsize, len = 0;
1354 uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
1355 uint32_t hash, index;
1356 uint64_t *lp;
1357 int copied = 0;
1358 int error = 0;
1359 unsigned depth;
1360
1361 hsize = 1 << dip->i_di.di_depth;
1362 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1363 gfs2_consist_inode(dip);
1364 return -EIO;
1365 }
1366
1367 hash = gfs2_dir_offset2hash(*offset);
1368 index = hash >> (32 - dip->i_di.di_depth);
1369
1370 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1371 if (!lp)
1372 return -ENOMEM;
1373
1374 while (index < hsize) {
1375 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1376 ht_offset = index - lp_offset;
1377
1378 if (ht_offset_cur != ht_offset) {
1379 error = gfs2_dir_read_data(dip, (char *)lp,
1380 ht_offset * sizeof(uint64_t),
1381 sdp->sd_hash_bsize);
1382 if (error != sdp->sd_hash_bsize) {
1383 if (error >= 0)
1384 error = -EIO;
1385 goto out;
1386 }
1387 ht_offset_cur = ht_offset;
1388 }
1389
1390 error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
1391 &copied, &depth,
1392 be64_to_cpu(lp[lp_offset]));
1393 if (error)
1394 break;
1395
1396 len = 1 << (dip->i_di.di_depth - depth);
1397 index = (index & ~(len - 1)) + len;
1398 }
1399
1400out:
1401 kfree(lp);
1402 if (error > 0)
1403 error = 0;
1404 return error;
1405}
1406
1407int gfs2_dir_read(struct inode *inode, uint64_t *offset, void *opaque,
1408 gfs2_filldir_t filldir)
1409{
1410 struct gfs2_inode *dip = inode->u.generic_ip;
1411 struct dirent_gather g;
1412 const struct gfs2_dirent **darr, *dent;
1413 struct buffer_head *dibh;
1414 int copied = 0;
1415 int error;
1416
1417 if (!dip->i_di.di_entries)
1418 return 0;
1419
1420 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1421 return dir_e_read(inode, offset, opaque, filldir);
1422
1423 if (!gfs2_is_stuffed(dip)) {
1424 gfs2_consist_inode(dip);
1425 return -EIO;
1426 }
1427
1428 error = gfs2_meta_inode_buffer(dip, &dibh);
1429 if (error)
1430 return error;
1431
1432 error = -ENOMEM;
1433 darr = kmalloc(dip->i_di.di_entries * sizeof(struct gfs2_dirent *),
1434 GFP_KERNEL);
1435 if (darr) {
1436 g.pdent = darr;
1437 g.offset = 0;
1438 dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size,
1439 gfs2_dirent_gather, NULL, &g);
1440 if (IS_ERR(dent)) {
1441 error = PTR_ERR(dent);
1442 goto out;
1443 }
1444 error = do_filldir_main(dip, offset, opaque, filldir, darr,
1445 dip->i_di.di_entries, &copied);
1446out:
1447 kfree(darr);
1448 }
1449
1450 if (error > 0)
1451 error = 0;
1452
1453 brelse(dibh);
1454
1455 return error;
1456}
1457
1458/**
1459 * gfs2_dir_search - Search a directory
1460 * @dip: The GFS2 inode
1461 * @filename:
1462 * @inode:
1463 *
1464 * This routine searches a directory for a file or another directory.
1465 * Assumes a glock is held on dip.
1466 *
1467 * Returns: errno
1468 */
1469
1470int gfs2_dir_search(struct inode *dir, const struct qstr *name,
1471 struct gfs2_inum *inum, unsigned int *type)
1472{
1473 struct buffer_head *bh;
1474 struct gfs2_dirent *dent;
1475
1476 dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1477 if (dent) {
1478 if (IS_ERR(dent))
1479 return PTR_ERR(dent);
1480 if (inum)
1481 gfs2_inum_in(inum, (char *)&dent->de_inum);
1482 if (type)
1483 *type = be16_to_cpu(dent->de_type);
1484 brelse(bh);
1485 return 0;
1486 }
1487 return -ENOENT;
1488}
1489
1490static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1491{
1492 struct buffer_head *bh, *obh;
1493 struct gfs2_inode *ip = inode->u.generic_ip;
1494 struct gfs2_leaf *leaf, *oleaf;
1495 int error;
1496 u32 index;
1497 u64 bn;
1498
1499 index = name->hash >> (32 - ip->i_di.di_depth);
1500 error = get_first_leaf(ip, index, &obh);
1501 if (error)
1502 return error;
1503 do {
1504 oleaf = (struct gfs2_leaf *)obh->b_data;
1505 bn = be64_to_cpu(oleaf->lf_next);
1506 if (!bn)
1507 break;
1508 brelse(obh);
1509 error = get_leaf(ip, bn, &obh);
1510 if (error)
1511 return error;
1512 } while(1);
1513
1514 gfs2_trans_add_bh(ip->i_gl, obh, 1);
1515
1516 leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth));
1517 if (!leaf) {
1518 brelse(obh);
1519 return -ENOSPC;
1520 }
1521 oleaf->lf_next = cpu_to_be64(bh->b_blocknr);
1522 brelse(bh);
1523 brelse(obh);
1524
1525 error = gfs2_meta_inode_buffer(ip, &bh);
1526 if (error)
1527 return error;
1528 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1529 ip->i_di.di_blocks++;
1530 gfs2_dinode_out(&ip->i_di, bh->b_data);
1531 brelse(bh);
1532 return 0;
1533}
1534
1535/**
1536 * gfs2_dir_add - Add new filename into directory
1537 * @dip: The GFS2 inode
1538 * @filename: The new name
1539 * @inode: The inode number of the entry
1540 * @type: The type of the entry
1541 *
1542 * Returns: 0 on success, error code on failure
1543 */
1544
1545int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1546 const struct gfs2_inum *inum, unsigned type)
1547{
1548 struct gfs2_inode *ip = inode->u.generic_ip;
1549 struct buffer_head *bh;
1550 struct gfs2_dirent *dent;
1551 struct gfs2_leaf *leaf;
1552 int error;
1553
1554 while(1) {
1555 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space,
1556 &bh);
1557 if (dent) {
1558 if (IS_ERR(dent))
1559 return PTR_ERR(dent);
1560 dent = gfs2_init_dirent(inode, dent, name, bh);
1561 gfs2_inum_out(inum, (char *)&dent->de_inum);
1562 dent->de_type = cpu_to_be16(type);
1563 if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
1564 leaf = (struct gfs2_leaf *)bh->b_data;
1565 leaf->lf_entries = cpu_to_be16(be16_to_cpu(leaf->lf_entries) + 1);
1566 }
1567 brelse(bh);
1568 error = gfs2_meta_inode_buffer(ip, &bh);
1569 if (error)
1570 break;
1571 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1572 ip->i_di.di_entries++;
1573 ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
1574 gfs2_dinode_out(&ip->i_di, bh->b_data);
1575 brelse(bh);
1576 error = 0;
1577 break;
1578 }
1579 if (!(ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
1580 error = dir_make_exhash(inode);
1581 if (error)
1582 break;
1583 continue;
1584 }
1585 error = dir_split_leaf(inode, name);
1586 if (error == 0)
1587 continue;
1588 if (error < 0)
1589 break;
1590 if (ip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) {
1591 error = dir_double_exhash(ip);
1592 if (error)
1593 break;
1594 error = dir_split_leaf(inode, name);
1595 if (error < 0)
1596 break;
1597 if (error == 0)
1598 continue;
1599 }
1600 error = dir_new_leaf(inode, name);
1601 if (!error)
1602 continue;
1603 error = -ENOSPC;
1604 break;
1605 }
1606 return error;
1607}
1608
1609
1610/**
1611 * gfs2_dir_del - Delete a directory entry
1612 * @dip: The GFS2 inode
1613 * @filename: The filename
1614 *
1615 * Returns: 0 on success, error code on failure
1616 */
1617
1618int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1619{
1620 struct gfs2_dirent *dent, *prev = NULL;
1621 struct buffer_head *bh;
1622 int error;
1623
1624 /* Returns _either_ the entry (if its first in block) or the
1625 previous entry otherwise */
1626 dent = gfs2_dirent_search(dip->i_vnode, name, gfs2_dirent_prev, &bh);
1627 if (!dent) {
1628 gfs2_consist_inode(dip);
1629 return -EIO;
1630 }
1631 if (IS_ERR(dent)) {
1632 gfs2_consist_inode(dip);
1633 return PTR_ERR(dent);
1634 }
1635 /* If not first in block, adjust pointers accordingly */
1636 if (gfs2_dirent_find(dent, name, NULL) == 0) {
1637 prev = dent;
1638 dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len));
1639 }
1640
1641 dirent_del(dip, bh, prev, dent);
1642 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
1643 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
1644 u16 entries = be16_to_cpu(leaf->lf_entries);
1645 if (!entries)
1646 gfs2_consist_inode(dip);
1647 leaf->lf_entries = cpu_to_be16(--entries);
1648 }
1649 brelse(bh);
1650
1651 error = gfs2_meta_inode_buffer(dip, &bh);
1652 if (error)
1653 return error;
1654
1655 if (!dip->i_di.di_entries)
1656 gfs2_consist_inode(dip);
1657 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1658 dip->i_di.di_entries--;
1659 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1660 gfs2_dinode_out(&dip->i_di, bh->b_data);
1661 brelse(bh);
1662
1663 return error;
1664}
1665
1666/**
1667 * gfs2_dir_mvino - Change inode number of directory entry
1668 * @dip: The GFS2 inode
1669 * @filename:
1670 * @new_inode:
1671 *
1672 * This routine changes the inode number of a directory entry. It's used
1673 * by rename to change ".." when a directory is moved.
1674 * Assumes a glock is held on dvp.
1675 *
1676 * Returns: errno
1677 */
1678
1679int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1680 struct gfs2_inum *inum, unsigned int new_type)
1681{
1682 struct buffer_head *bh;
1683 struct gfs2_dirent *dent;
1684 int error;
1685
1686 dent = gfs2_dirent_search(dip->i_vnode, filename, gfs2_dirent_find, &bh);
1687 if (!dent) {
1688 gfs2_consist_inode(dip);
1689 return -EIO;
1690 }
1691 if (IS_ERR(dent))
1692 return PTR_ERR(dent);
1693
1694 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1695 gfs2_inum_out(inum, (char *)&dent->de_inum);
1696 dent->de_type = cpu_to_be16(new_type);
1697
1698 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
1699 brelse(bh);
1700 error = gfs2_meta_inode_buffer(dip, &bh);
1701 if (error)
1702 return error;
1703 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1704 }
1705
1706 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1707 gfs2_dinode_out(&dip->i_di, bh->b_data);
1708 brelse(bh);
1709 return 0;
1710}
1711
1712/**
1713 * foreach_leaf - call a function for each leaf in a directory
1714 * @dip: the directory
1715 * @lc: the function to call for each each
1716 * @data: private data to pass to it
1717 *
1718 * Returns: errno
1719 */
1720
1721static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
1722{
1723 struct gfs2_sbd *sdp = dip->i_sbd;
1724 struct buffer_head *bh;
1725 struct gfs2_leaf *leaf;
1726 uint32_t hsize, len;
1727 uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
1728 uint32_t index = 0;
1729 uint64_t *lp;
1730 uint64_t leaf_no;
1731 int error = 0;
1732
1733 hsize = 1 << dip->i_di.di_depth;
1734 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1735 gfs2_consist_inode(dip);
1736 return -EIO;
1737 }
1738
1739 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1740 if (!lp)
1741 return -ENOMEM;
1742
1743 while (index < hsize) {
1744 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1745 ht_offset = index - lp_offset;
1746
1747 if (ht_offset_cur != ht_offset) {
1748 error = gfs2_dir_read_data(dip, (char *)lp,
1749 ht_offset * sizeof(uint64_t),
1750 sdp->sd_hash_bsize);
1751 if (error != sdp->sd_hash_bsize) {
1752 if (error >= 0)
1753 error = -EIO;
1754 goto out;
1755 }
1756 ht_offset_cur = ht_offset;
1757 }
1758
1759 leaf_no = be64_to_cpu(lp[lp_offset]);
1760 if (leaf_no) {
1761 error = get_leaf(dip, leaf_no, &bh);
1762 if (error)
1763 goto out;
1764 leaf = (struct gfs2_leaf *)bh->b_data;
1765 brelse(bh);
1766
1767 len = 1 << (dip->i_di.di_depth - be16_to_cpu(leaf->lf_depth));
1768
1769 error = lc(dip, index, len, leaf_no, data);
1770 if (error)
1771 goto out;
1772
1773 index = (index & ~(len - 1)) + len;
1774 } else
1775 index++;
1776 }
1777
1778 if (index != hsize) {
1779 gfs2_consist_inode(dip);
1780 error = -EIO;
1781 }
1782
1783 out:
1784 kfree(lp);
1785
1786 return error;
1787}
1788
1789/**
1790 * leaf_dealloc - Deallocate a directory leaf
1791 * @dip: the directory
1792 * @index: the hash table offset in the directory
1793 * @len: the number of pointers to this leaf
1794 * @leaf_no: the leaf number
1795 * @data: not used
1796 *
1797 * Returns: errno
1798 */
1799
1800static int leaf_dealloc(struct gfs2_inode *dip, uint32_t index, uint32_t len,
1801 uint64_t leaf_no, void *data)
1802{
1803 struct gfs2_sbd *sdp = dip->i_sbd;
1804 struct gfs2_leaf *tmp_leaf;
1805 struct gfs2_rgrp_list rlist;
1806 struct buffer_head *bh, *dibh;
1807 uint64_t blk, nblk;
1808 unsigned int rg_blocks = 0, l_blocks = 0;
1809 char *ht;
1810 unsigned int x, size = len * sizeof(uint64_t);
1811 int error;
1812
1813 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1814
1815 ht = kzalloc(size, GFP_KERNEL);
1816 if (!ht)
1817 return -ENOMEM;
1818
1819 gfs2_alloc_get(dip);
1820
1821 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1822 if (error)
1823 goto out;
1824
1825 error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh);
1826 if (error)
1827 goto out_qs;
1828
1829 /* Count the number of leaves */
1830
1831 for (blk = leaf_no; blk; blk = nblk) {
1832 error = get_leaf(dip, blk, &bh);
1833 if (error)
1834 goto out_rlist;
1835 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1836 nblk = be64_to_cpu(tmp_leaf->lf_next);
1837 brelse(bh);
1838
1839 gfs2_rlist_add(sdp, &rlist, blk);
1840 l_blocks++;
1841 }
1842
1843 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1844
1845 for (x = 0; x < rlist.rl_rgrps; x++) {
1846 struct gfs2_rgrpd *rgd;
1847 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1848 rg_blocks += rgd->rd_ri.ri_length;
1849 }
1850
1851 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1852 if (error)
1853 goto out_rlist;
1854
1855 error = gfs2_trans_begin(sdp,
1856 rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) +
1857 RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks);
1858 if (error)
1859 goto out_rg_gunlock;
1860
1861 for (blk = leaf_no; blk; blk = nblk) {
1862 error = get_leaf(dip, blk, &bh);
1863 if (error)
1864 goto out_end_trans;
1865 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1866 nblk = be64_to_cpu(tmp_leaf->lf_next);
1867 brelse(bh);
1868
1869 gfs2_free_meta(dip, blk, 1);
1870
1871 if (!dip->i_di.di_blocks)
1872 gfs2_consist_inode(dip);
1873 dip->i_di.di_blocks--;
1874 }
1875
1876 error = gfs2_dir_write_data(dip, ht, index * sizeof(uint64_t), size);
1877 if (error != size) {
1878 if (error >= 0)
1879 error = -EIO;
1880 goto out_end_trans;
1881 }
1882
1883 error = gfs2_meta_inode_buffer(dip, &dibh);
1884 if (error)
1885 goto out_end_trans;
1886
1887 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1888 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1889 brelse(dibh);
1890
1891 out_end_trans:
1892 gfs2_trans_end(sdp);
1893
1894 out_rg_gunlock:
1895 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1896
1897 out_rlist:
1898 gfs2_rlist_free(&rlist);
1899 gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh);
1900
1901 out_qs:
1902 gfs2_quota_unhold(dip);
1903
1904 out:
1905 gfs2_alloc_put(dip);
1906 kfree(ht);
1907
1908 return error;
1909}
1910
1911/**
1912 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory
1913 * @dip: the directory
1914 *
1915 * Dealloc all on-disk directory leaves to FREEMETA state
1916 * Change on-disk inode type to "regular file"
1917 *
1918 * Returns: errno
1919 */
1920
1921int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
1922{
1923 struct gfs2_sbd *sdp = dip->i_sbd;
1924 struct buffer_head *bh;
1925 int error;
1926
1927 /* Dealloc on-disk leaves to FREEMETA state */
1928 error = foreach_leaf(dip, leaf_dealloc, NULL);
1929 if (error)
1930 return error;
1931
1932 /* Make this a regular file in case we crash.
1933 (We don't want to free these blocks a second time.) */
1934
1935 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1936 if (error)
1937 return error;
1938
1939 error = gfs2_meta_inode_buffer(dip, &bh);
1940 if (!error) {
1941 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1942 ((struct gfs2_dinode *)bh->b_data)->di_mode =
1943 cpu_to_be32(S_IFREG);
1944 brelse(bh);
1945 }
1946
1947 gfs2_trans_end(sdp);
1948
1949 return error;
1950}
1951
1952/**
1953 * gfs2_diradd_alloc_required - find if adding entry will require an allocation
1954 * @ip: the file being written to
1955 * @filname: the filename that's going to be added
1956 *
1957 * Returns: 1 if alloc required, 0 if not, -ve on error
1958 */
1959
1960int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name)
1961{
1962 struct gfs2_dirent *dent;
1963 struct buffer_head *bh;
1964
1965 dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh);
1966 if (!dent) {
1967 return 1;
1968 }
1969 if (IS_ERR(dent))
1970 return PTR_ERR(dent);
1971 brelse(bh);
1972 return 0;
1973}
1974
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
new file mode 100644
index 000000000000..173403095eb2
--- /dev/null
+++ b/fs/gfs2/dir.h
@@ -0,0 +1,73 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __DIR_DOT_H__
11#define __DIR_DOT_H__
12
13/**
14 * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read()
15 * @opaque: opaque data used by the function
16 * @name: the name of the directory entry
17 * @length: the length of the name
18 * @offset: the entry's offset in the directory
19 * @inum: the inode number the entry points to
20 * @type: the type of inode the entry points to
21 *
22 * Returns: 0 on success, 1 if buffer full
23 */
24
25typedef int (*gfs2_filldir_t) (void *opaque,
26 const char *name, unsigned int length,
27 uint64_t offset,
28 struct gfs2_inum *inum, unsigned int type);
29
30int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
31 struct gfs2_inum *inum, unsigned int *type);
32int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
33 const struct gfs2_inum *inum, unsigned int type);
34int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
35int gfs2_dir_read(struct inode *inode, uint64_t * offset, void *opaque,
36 gfs2_filldir_t filldir);
37int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
38 struct gfs2_inum *new_inum, unsigned int new_type);
39
40int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
41
42int gfs2_diradd_alloc_required(struct inode *dir,
43 const struct qstr *filename);
44int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, uint64_t block,
45 struct buffer_head **bhp);
46
47static inline uint32_t gfs2_disk_hash(const char *data, int len)
48{
49 return crc32_le(0xFFFFFFFF, data, len) ^ 0xFFFFFFFF;
50}
51
52
53static inline void gfs2_str2qstr(struct qstr *name, const char *fname)
54{
55 name->name = fname;
56 name->len = strlen(fname);
57 name->hash = gfs2_disk_hash(name->name, name->len);
58}
59
60/* N.B. This probably ought to take inum & type as args as well */
61static inline void gfs2_qstr2dirent(const struct qstr *name, u16 reclen, struct gfs2_dirent *dent)
62{
63 dent->de_inum.no_addr = cpu_to_be64(0);
64 dent->de_inum.no_formal_ino = cpu_to_be64(0);
65 dent->de_hash = cpu_to_be32(name->hash);
66 dent->de_rec_len = cpu_to_be16(reclen);
67 dent->de_name_len = cpu_to_be16(name->len);
68 dent->de_type = cpu_to_be16(0);
69 memset(dent->__pad, 0, sizeof(dent->__pad));
70 memcpy((char*)(dent+1), name->name, name->len);
71}
72
73#endif /* __DIR_DOT_H__ */
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
new file mode 100644
index 000000000000..1c5ac3160b3b
--- /dev/null
+++ b/fs/gfs2/eaops.c
@@ -0,0 +1,230 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
16#include <linux/gfs2_ondisk.h>
17#include <asm/uaccess.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "acl.h"
23#include "eaops.h"
24#include "eattr.h"
25#include "util.h"
26
27/**
28 * gfs2_ea_name2type - get the type of the ea, and truncate type from the name
29 * @namep: ea name, possibly with type appended
30 *
31 * Returns: GFS2_EATYPE_XXX
32 */
33
34unsigned int gfs2_ea_name2type(const char *name, char **truncated_name)
35{
36 unsigned int type;
37
38 if (strncmp(name, "system.", 7) == 0) {
39 type = GFS2_EATYPE_SYS;
40 if (truncated_name)
41 *truncated_name = strchr(name, '.') + 1;
42 } else if (strncmp(name, "user.", 5) == 0) {
43 type = GFS2_EATYPE_USR;
44 if (truncated_name)
45 *truncated_name = strchr(name, '.') + 1;
46 } else if (strncmp(name, "security.", 9) == 0) {
47 type = GFS2_EATYPE_SECURITY;
48 if (truncated_name)
49 *truncated_name = strchr(name, '.') + 1;
50 } else {
51 type = GFS2_EATYPE_UNUSED;
52 if (truncated_name)
53 *truncated_name = NULL;
54 }
55
56 return type;
57}
58
59static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
60{
61 struct inode *inode = ip->i_vnode;
62 int error = permission(inode, MAY_READ, NULL);
63 if (error)
64 return error;
65
66 return gfs2_ea_get_i(ip, er);
67}
68
69static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
70{
71 struct inode *inode = ip->i_vnode;
72
73 if (S_ISREG(inode->i_mode) ||
74 (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
75 int error = permission(inode, MAY_WRITE, NULL);
76 if (error)
77 return error;
78 } else
79 return -EPERM;
80
81 return gfs2_ea_set_i(ip, er);
82}
83
84static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
85{
86 struct inode *inode = ip->i_vnode;
87
88 if (S_ISREG(inode->i_mode) ||
89 (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
90 int error = permission(inode, MAY_WRITE, NULL);
91 if (error)
92 return error;
93 } else
94 return -EPERM;
95
96 return gfs2_ea_remove_i(ip, er);
97}
98
99static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
100{
101 if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
102 !GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len) &&
103 !capable(CAP_SYS_ADMIN))
104 return -EPERM;
105
106 if (ip->i_sbd->sd_args.ar_posix_acl == 0 &&
107 (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) ||
108 GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)))
109 return -EOPNOTSUPP;
110
111
112
113 return gfs2_ea_get_i(ip, er);
114}
115
116static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
117{
118 int remove = 0;
119 int error;
120
121 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
122 if (!(er->er_flags & GFS2_ERF_MODE)) {
123 er->er_mode = ip->i_di.di_mode;
124 er->er_flags |= GFS2_ERF_MODE;
125 }
126 error = gfs2_acl_validate_set(ip, 1, er,
127 &remove, &er->er_mode);
128 if (error)
129 return error;
130 error = gfs2_ea_set_i(ip, er);
131 if (error)
132 return error;
133 if (remove)
134 gfs2_ea_remove_i(ip, er);
135 return 0;
136
137 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
138 error = gfs2_acl_validate_set(ip, 0, er,
139 &remove, NULL);
140 if (error)
141 return error;
142 if (!remove)
143 error = gfs2_ea_set_i(ip, er);
144 else {
145 error = gfs2_ea_remove_i(ip, er);
146 if (error == -ENODATA)
147 error = 0;
148 }
149 return error;
150 }
151
152 return -EPERM;
153}
154
155static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
156{
157 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
158 int error = gfs2_acl_validate_remove(ip, 1);
159 if (error)
160 return error;
161
162 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
163 int error = gfs2_acl_validate_remove(ip, 0);
164 if (error)
165 return error;
166
167 } else
168 return -EPERM;
169
170 return gfs2_ea_remove_i(ip, er);
171}
172
173static int security_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
174{
175 struct inode *inode = ip->i_vnode;
176 int error = permission(inode, MAY_READ, NULL);
177 if (error)
178 return error;
179
180 return gfs2_ea_get_i(ip, er);
181}
182
183static int security_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
184{
185 struct inode *inode = ip->i_vnode;
186 int error = permission(inode, MAY_WRITE, NULL);
187 if (error)
188 return error;
189
190 return gfs2_ea_set_i(ip, er);
191}
192
193static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
194{
195 struct inode *inode = ip->i_vnode;
196 int error = permission(inode, MAY_WRITE, NULL);
197 if (error)
198 return error;
199
200 return gfs2_ea_remove_i(ip, er);
201}
202
203static struct gfs2_eattr_operations gfs2_user_eaops = {
204 .eo_get = user_eo_get,
205 .eo_set = user_eo_set,
206 .eo_remove = user_eo_remove,
207 .eo_name = "user",
208};
209
210struct gfs2_eattr_operations gfs2_system_eaops = {
211 .eo_get = system_eo_get,
212 .eo_set = system_eo_set,
213 .eo_remove = system_eo_remove,
214 .eo_name = "system",
215};
216
217struct gfs2_eattr_operations gfs2_security_eaops = {
218 .eo_get = security_eo_get,
219 .eo_set = security_eo_set,
220 .eo_remove = security_eo_remove,
221 .eo_name = "security",
222};
223
224struct gfs2_eattr_operations *gfs2_ea_ops[] = {
225 NULL,
226 &gfs2_user_eaops,
227 &gfs2_system_eaops,
228 &gfs2_security_eaops,
229};
230
diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h
new file mode 100644
index 000000000000..965a235c96e8
--- /dev/null
+++ b/fs/gfs2/eaops.h
@@ -0,0 +1,31 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __EAOPS_DOT_H__
11#define __EAOPS_DOT_H__
12
13struct gfs2_ea_request;
14
15struct gfs2_eattr_operations {
16 int (*eo_get) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
17 int (*eo_set) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
18 int (*eo_remove) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
19 char *eo_name;
20};
21
22unsigned int gfs2_ea_name2type(const char *name, char **truncated_name);
23
24extern struct gfs2_eattr_operations gfs2_system_eaops;
25
26extern struct gfs2_eattr_operations gfs2_security_eaops;
27
28extern struct gfs2_eattr_operations *gfs2_ea_ops[];
29
30#endif /* __EAOPS_DOT_H__ */
31
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
new file mode 100644
index 000000000000..346601538ac7
--- /dev/null
+++ b/fs/gfs2/eattr.c
@@ -0,0 +1,1549 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
16#include <linux/gfs2_ondisk.h>
17#include <asm/uaccess.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "acl.h"
23#include "eaops.h"
24#include "eattr.h"
25#include "glock.h"
26#include "inode.h"
27#include "meta_io.h"
28#include "quota.h"
29#include "rgrp.h"
30#include "trans.h"
31#include "util.h"
32
33/**
34 * ea_calc_size - returns the acutal number of bytes the request will take up
35 * (not counting any unstuffed data blocks)
36 * @sdp:
37 * @er:
38 * @size:
39 *
40 * Returns: 1 if the EA should be stuffed
41 */
42
43static int ea_calc_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er,
44 unsigned int *size)
45{
46 *size = GFS2_EAREQ_SIZE_STUFFED(er);
47 if (*size <= sdp->sd_jbsize)
48 return 1;
49
50 *size = GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er);
51
52 return 0;
53}
54
55static int ea_check_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er)
56{
57 unsigned int size;
58
59 if (er->er_data_len > GFS2_EA_MAX_DATA_LEN)
60 return -ERANGE;
61
62 ea_calc_size(sdp, er, &size);
63
64 /* This can only happen with 512 byte blocks */
65 if (size > sdp->sd_jbsize)
66 return -ERANGE;
67
68 return 0;
69}
70
71typedef int (*ea_call_t) (struct gfs2_inode *ip,
72 struct buffer_head *bh,
73 struct gfs2_ea_header *ea,
74 struct gfs2_ea_header *prev,
75 void *private);
76
77static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
78 ea_call_t ea_call, void *data)
79{
80 struct gfs2_ea_header *ea, *prev = NULL;
81 int error = 0;
82
83 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_EA))
84 return -EIO;
85
86 for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) {
87 if (!GFS2_EA_REC_LEN(ea))
88 goto fail;
89 if (!(bh->b_data <= (char *)ea &&
90 (char *)GFS2_EA2NEXT(ea) <=
91 bh->b_data + bh->b_size))
92 goto fail;
93 if (!GFS2_EATYPE_VALID(ea->ea_type))
94 goto fail;
95
96 error = ea_call(ip, bh, ea, prev, data);
97 if (error)
98 return error;
99
100 if (GFS2_EA_IS_LAST(ea)) {
101 if ((char *)GFS2_EA2NEXT(ea) !=
102 bh->b_data + bh->b_size)
103 goto fail;
104 break;
105 }
106 }
107
108 return error;
109
110 fail:
111 gfs2_consist_inode(ip);
112 return -EIO;
113}
114
115static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data)
116{
117 struct buffer_head *bh, *eabh;
118 uint64_t *eablk, *end;
119 int error;
120
121 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
122 DIO_START | DIO_WAIT, &bh);
123 if (error)
124 return error;
125
126 if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) {
127 error = ea_foreach_i(ip, bh, ea_call, data);
128 goto out;
129 }
130
131 if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_IN)) {
132 error = -EIO;
133 goto out;
134 }
135
136 eablk = (uint64_t *)(bh->b_data + sizeof(struct gfs2_meta_header));
137 end = eablk + ip->i_sbd->sd_inptrs;
138
139 for (; eablk < end; eablk++) {
140 uint64_t bn;
141
142 if (!*eablk)
143 break;
144 bn = be64_to_cpu(*eablk);
145
146 error = gfs2_meta_read(ip->i_gl, bn, DIO_START | DIO_WAIT,
147 &eabh);
148 if (error)
149 break;
150 error = ea_foreach_i(ip, eabh, ea_call, data);
151 brelse(eabh);
152 if (error)
153 break;
154 }
155 out:
156 brelse(bh);
157
158 return error;
159}
160
161struct ea_find {
162 struct gfs2_ea_request *ef_er;
163 struct gfs2_ea_location *ef_el;
164};
165
166static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
167 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
168 void *private)
169{
170 struct ea_find *ef = private;
171 struct gfs2_ea_request *er = ef->ef_er;
172
173 if (ea->ea_type == GFS2_EATYPE_UNUSED)
174 return 0;
175
176 if (ea->ea_type == er->er_type) {
177 if (ea->ea_name_len == er->er_name_len &&
178 !memcmp(GFS2_EA2NAME(ea), er->er_name, ea->ea_name_len)) {
179 struct gfs2_ea_location *el = ef->ef_el;
180 get_bh(bh);
181 el->el_bh = bh;
182 el->el_ea = ea;
183 el->el_prev = prev;
184 return 1;
185 }
186 }
187
188#if 0
189 else if ((ip->i_di.di_flags & GFS2_DIF_EA_PACKED) &&
190 er->er_type == GFS2_EATYPE_SYS)
191 return 1;
192#endif
193
194 return 0;
195}
196
197int gfs2_ea_find(struct gfs2_inode *ip, struct gfs2_ea_request *er,
198 struct gfs2_ea_location *el)
199{
200 struct ea_find ef;
201 int error;
202
203 ef.ef_er = er;
204 ef.ef_el = el;
205
206 memset(el, 0, sizeof(struct gfs2_ea_location));
207
208 error = ea_foreach(ip, ea_find_i, &ef);
209 if (error > 0)
210 return 0;
211
212 return error;
213}
214
215/**
216 * ea_dealloc_unstuffed -
217 * @ip:
218 * @bh:
219 * @ea:
220 * @prev:
221 * @private:
222 *
223 * Take advantage of the fact that all unstuffed blocks are
224 * allocated from the same RG. But watch, this may not always
225 * be true.
226 *
227 * Returns: errno
228 */
229
230static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
231 struct gfs2_ea_header *ea,
232 struct gfs2_ea_header *prev, void *private)
233{
234 int *leave = private;
235 struct gfs2_sbd *sdp = ip->i_sbd;
236 struct gfs2_rgrpd *rgd;
237 struct gfs2_holder rg_gh;
238 struct buffer_head *dibh;
239 uint64_t *dataptrs, bn = 0;
240 uint64_t bstart = 0;
241 unsigned int blen = 0;
242 unsigned int blks = 0;
243 unsigned int x;
244 int error;
245
246 if (GFS2_EA_IS_STUFFED(ea))
247 return 0;
248
249 dataptrs = GFS2_EA2DATAPTRS(ea);
250 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++)
251 if (*dataptrs) {
252 blks++;
253 bn = be64_to_cpu(*dataptrs);
254 }
255 if (!blks)
256 return 0;
257
258 rgd = gfs2_blk2rgrpd(sdp, bn);
259 if (!rgd) {
260 gfs2_consist_inode(ip);
261 return -EIO;
262 }
263
264 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
265 if (error)
266 return error;
267
268 error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length +
269 RES_DINODE + RES_EATTR + RES_STATFS +
270 RES_QUOTA, blks);
271 if (error)
272 goto out_gunlock;
273
274 gfs2_trans_add_bh(ip->i_gl, bh, 1);
275
276 dataptrs = GFS2_EA2DATAPTRS(ea);
277 for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
278 if (!*dataptrs)
279 break;
280 bn = be64_to_cpu(*dataptrs);
281
282 if (bstart + blen == bn)
283 blen++;
284 else {
285 if (bstart)
286 gfs2_free_meta(ip, bstart, blen);
287 bstart = bn;
288 blen = 1;
289 }
290
291 *dataptrs = 0;
292 if (!ip->i_di.di_blocks)
293 gfs2_consist_inode(ip);
294 ip->i_di.di_blocks--;
295 }
296 if (bstart)
297 gfs2_free_meta(ip, bstart, blen);
298
299 if (prev && !leave) {
300 uint32_t len;
301
302 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
303 prev->ea_rec_len = cpu_to_be32(len);
304
305 if (GFS2_EA_IS_LAST(ea))
306 prev->ea_flags |= GFS2_EAFLAG_LAST;
307 } else {
308 ea->ea_type = GFS2_EATYPE_UNUSED;
309 ea->ea_num_ptrs = 0;
310 }
311
312 error = gfs2_meta_inode_buffer(ip, &dibh);
313 if (!error) {
314 ip->i_di.di_ctime = get_seconds();
315 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
316 gfs2_dinode_out(&ip->i_di, dibh->b_data);
317 brelse(dibh);
318 }
319
320 gfs2_trans_end(sdp);
321
322 out_gunlock:
323 gfs2_glock_dq_uninit(&rg_gh);
324
325 return error;
326}
327
328static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
329 struct gfs2_ea_header *ea,
330 struct gfs2_ea_header *prev, int leave)
331{
332 struct gfs2_alloc *al;
333 int error;
334
335 al = gfs2_alloc_get(ip);
336
337 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
338 if (error)
339 goto out_alloc;
340
341 error = gfs2_rindex_hold(ip->i_sbd, &al->al_ri_gh);
342 if (error)
343 goto out_quota;
344
345 error = ea_dealloc_unstuffed(ip,
346 bh, ea, prev,
347 (leave) ? &error : NULL);
348
349 gfs2_glock_dq_uninit(&al->al_ri_gh);
350
351 out_quota:
352 gfs2_quota_unhold(ip);
353
354 out_alloc:
355 gfs2_alloc_put(ip);
356
357 return error;
358}
359
360struct ea_list {
361 struct gfs2_ea_request *ei_er;
362 unsigned int ei_size;
363};
364
365static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
366 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
367 void *private)
368{
369 struct ea_list *ei = private;
370 struct gfs2_ea_request *er = ei->ei_er;
371 unsigned int ea_size = gfs2_ea_strlen(ea);
372
373 if (ea->ea_type == GFS2_EATYPE_UNUSED)
374 return 0;
375
376 if (er->er_data_len) {
377 char *prefix;
378 unsigned int l;
379 char c = 0;
380
381 if (ei->ei_size + ea_size > er->er_data_len)
382 return -ERANGE;
383
384 switch (ea->ea_type) {
385 case GFS2_EATYPE_USR:
386 prefix = "user.";
387 l = 5;
388 break;
389 case GFS2_EATYPE_SYS:
390 prefix = "system.";
391 l = 7;
392 break;
393 case GFS2_EATYPE_SECURITY:
394 prefix = "security.";
395 l = 9;
396 break;
397 default:
398 /* FIXME: Needs looking at again */
399 break;
400 }
401
402 memcpy(er->er_data + ei->ei_size, prefix, l);
403 memcpy(er->er_data + ei->ei_size + l, GFS2_EA2NAME(ea),
404 ea->ea_name_len);
405 memcpy(er->er_data + ei->ei_size + ea_size - 1, &c, 1);
406 }
407
408 ei->ei_size += ea_size;
409
410 return 0;
411}
412
413/**
414 * gfs2_ea_list -
415 * @ip:
416 * @er:
417 *
418 * Returns: actual size of data on success, -errno on error
419 */
420
421int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er)
422{
423 struct gfs2_holder i_gh;
424 int error;
425
426 if (!er->er_data || !er->er_data_len) {
427 er->er_data = NULL;
428 er->er_data_len = 0;
429 }
430
431 error = gfs2_glock_nq_init(ip->i_gl,
432 LM_ST_SHARED, LM_FLAG_ANY,
433 &i_gh);
434 if (error)
435 return error;
436
437 if (ip->i_di.di_eattr) {
438 struct ea_list ei = { .ei_er = er, .ei_size = 0 };
439
440 error = ea_foreach(ip, ea_list_i, &ei);
441 if (!error)
442 error = ei.ei_size;
443 }
444
445 gfs2_glock_dq_uninit(&i_gh);
446
447 return error;
448}
449
450/**
451 * ea_get_unstuffed - actually copies the unstuffed data into the
452 * request buffer
453 * @ip:
454 * @ea:
455 * @data:
456 *
457 * Returns: errno
458 */
459
460static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
461 char *data)
462{
463 struct gfs2_sbd *sdp = ip->i_sbd;
464 struct buffer_head **bh;
465 unsigned int amount = GFS2_EA_DATA_LEN(ea);
466 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
467 uint64_t *dataptrs = GFS2_EA2DATAPTRS(ea);
468 unsigned int x;
469 int error = 0;
470
471 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
472 if (!bh)
473 return -ENOMEM;
474
475 for (x = 0; x < nptrs; x++) {
476 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs),
477 DIO_START, bh + x);
478 if (error) {
479 while (x--)
480 brelse(bh[x]);
481 goto out;
482 }
483 dataptrs++;
484 }
485
486 for (x = 0; x < nptrs; x++) {
487 error = gfs2_meta_reread(sdp, bh[x], DIO_WAIT);
488 if (error) {
489 for (; x < nptrs; x++)
490 brelse(bh[x]);
491 goto out;
492 }
493 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
494 for (; x < nptrs; x++)
495 brelse(bh[x]);
496 error = -EIO;
497 goto out;
498 }
499
500 memcpy(data,
501 bh[x]->b_data + sizeof(struct gfs2_meta_header),
502 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
503
504 amount -= sdp->sd_jbsize;
505 data += sdp->sd_jbsize;
506
507 brelse(bh[x]);
508 }
509
510 out:
511 kfree(bh);
512
513 return error;
514}
515
516int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
517 char *data)
518{
519 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
520 memcpy(data,
521 GFS2_EA2DATA(el->el_ea),
522 GFS2_EA_DATA_LEN(el->el_ea));
523 return 0;
524 } else
525 return ea_get_unstuffed(ip, el->el_ea, data);
526}
527
528/**
529 * gfs2_ea_get_i -
530 * @ip:
531 * @er:
532 *
533 * Returns: actual size of data on success, -errno on error
534 */
535
536int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
537{
538 struct gfs2_ea_location el;
539 int error;
540
541 if (!ip->i_di.di_eattr)
542 return -ENODATA;
543
544 error = gfs2_ea_find(ip, er, &el);
545 if (error)
546 return error;
547 if (!el.el_ea)
548 return -ENODATA;
549
550 if (er->er_data_len) {
551 if (GFS2_EA_DATA_LEN(el.el_ea) > er->er_data_len)
552 error = -ERANGE;
553 else
554 error = gfs2_ea_get_copy(ip, &el, er->er_data);
555 }
556 if (!error)
557 error = GFS2_EA_DATA_LEN(el.el_ea);
558
559 brelse(el.el_bh);
560
561 return error;
562}
563
564/**
565 * gfs2_ea_get -
566 * @ip:
567 * @er:
568 *
569 * Returns: actual size of data on success, -errno on error
570 */
571
572int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
573{
574 struct gfs2_holder i_gh;
575 int error;
576
577 if (!er->er_name_len ||
578 er->er_name_len > GFS2_EA_MAX_NAME_LEN)
579 return -EINVAL;
580 if (!er->er_data || !er->er_data_len) {
581 er->er_data = NULL;
582 er->er_data_len = 0;
583 }
584
585 error = gfs2_glock_nq_init(ip->i_gl,
586 LM_ST_SHARED, LM_FLAG_ANY,
587 &i_gh);
588 if (error)
589 return error;
590
591 error = gfs2_ea_ops[er->er_type]->eo_get(ip, er);
592
593 gfs2_glock_dq_uninit(&i_gh);
594
595 return error;
596}
597
598/**
599 * ea_alloc_blk - allocates a new block for extended attributes.
600 * @ip: A pointer to the inode that's getting extended attributes
601 * @bhp:
602 *
603 * Returns: errno
604 */
605
606static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
607{
608 struct gfs2_sbd *sdp = ip->i_sbd;
609 struct gfs2_ea_header *ea;
610 uint64_t block;
611
612 block = gfs2_alloc_meta(ip);
613
614 *bhp = gfs2_meta_new(ip->i_gl, block);
615 gfs2_trans_add_bh(ip->i_gl, *bhp, 1);
616 gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA);
617 gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header));
618
619 ea = GFS2_EA_BH2FIRST(*bhp);
620 ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize);
621 ea->ea_type = GFS2_EATYPE_UNUSED;
622 ea->ea_flags = GFS2_EAFLAG_LAST;
623 ea->ea_num_ptrs = 0;
624
625 ip->i_di.di_blocks++;
626
627 return 0;
628}
629
630/**
631 * ea_write - writes the request info to an ea, creating new blocks if
632 * necessary
633 * @ip: inode that is being modified
634 * @ea: the location of the new ea in a block
635 * @er: the write request
636 *
637 * Note: does not update ea_rec_len or the GFS2_EAFLAG_LAST bin of ea_flags
638 *
639 * returns : errno
640 */
641
642static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
643 struct gfs2_ea_request *er)
644{
645 struct gfs2_sbd *sdp = ip->i_sbd;
646
647 ea->ea_data_len = cpu_to_be32(er->er_data_len);
648 ea->ea_name_len = er->er_name_len;
649 ea->ea_type = er->er_type;
650 ea->__pad = 0;
651
652 memcpy(GFS2_EA2NAME(ea), er->er_name, er->er_name_len);
653
654 if (GFS2_EAREQ_SIZE_STUFFED(er) <= sdp->sd_jbsize) {
655 ea->ea_num_ptrs = 0;
656 memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len);
657 } else {
658 uint64_t *dataptr = GFS2_EA2DATAPTRS(ea);
659 const char *data = er->er_data;
660 unsigned int data_len = er->er_data_len;
661 unsigned int copy;
662 unsigned int x;
663
664 ea->ea_num_ptrs = DIV_ROUND_UP(er->er_data_len, sdp->sd_jbsize);
665 for (x = 0; x < ea->ea_num_ptrs; x++) {
666 struct buffer_head *bh;
667 uint64_t block;
668 int mh_size = sizeof(struct gfs2_meta_header);
669
670 block = gfs2_alloc_meta(ip);
671
672 bh = gfs2_meta_new(ip->i_gl, block);
673 gfs2_trans_add_bh(ip->i_gl, bh, 1);
674 gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED);
675
676 ip->i_di.di_blocks++;
677
678 copy = (data_len > sdp->sd_jbsize) ? sdp->sd_jbsize :
679 data_len;
680 memcpy(bh->b_data + mh_size, data, copy);
681 if (copy < sdp->sd_jbsize)
682 memset(bh->b_data + mh_size + copy, 0,
683 sdp->sd_jbsize - copy);
684
685 *dataptr++ = cpu_to_be64((uint64_t)bh->b_blocknr);
686 data += copy;
687 data_len -= copy;
688
689 brelse(bh);
690 }
691
692 gfs2_assert_withdraw(sdp, !data_len);
693 }
694
695 return 0;
696}
697
698typedef int (*ea_skeleton_call_t) (struct gfs2_inode *ip,
699 struct gfs2_ea_request *er,
700 void *private);
701
702static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
703 unsigned int blks,
704 ea_skeleton_call_t skeleton_call,
705 void *private)
706{
707 struct gfs2_alloc *al;
708 struct buffer_head *dibh;
709 int error;
710
711 al = gfs2_alloc_get(ip);
712
713 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
714 if (error)
715 goto out;
716
717 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
718 if (error)
719 goto out_gunlock_q;
720
721 al->al_requested = blks;
722
723 error = gfs2_inplace_reserve(ip);
724 if (error)
725 goto out_gunlock_q;
726
727 error = gfs2_trans_begin(ip->i_sbd,
728 blks + al->al_rgd->rd_ri.ri_length +
729 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
730 if (error)
731 goto out_ipres;
732
733 error = skeleton_call(ip, er, private);
734 if (error)
735 goto out_end_trans;
736
737 error = gfs2_meta_inode_buffer(ip, &dibh);
738 if (!error) {
739 if (er->er_flags & GFS2_ERF_MODE) {
740 gfs2_assert_withdraw(ip->i_sbd,
741 (ip->i_di.di_mode & S_IFMT) ==
742 (er->er_mode & S_IFMT));
743 ip->i_di.di_mode = er->er_mode;
744 }
745 ip->i_di.di_ctime = get_seconds();
746 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
747 gfs2_dinode_out(&ip->i_di, dibh->b_data);
748 brelse(dibh);
749 }
750
751 out_end_trans:
752 gfs2_trans_end(ip->i_sbd);
753
754 out_ipres:
755 gfs2_inplace_release(ip);
756
757 out_gunlock_q:
758 gfs2_quota_unlock(ip);
759
760 out:
761 gfs2_alloc_put(ip);
762
763 return error;
764}
765
766static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
767 void *private)
768{
769 struct buffer_head *bh;
770 int error;
771
772 error = ea_alloc_blk(ip, &bh);
773 if (error)
774 return error;
775
776 ip->i_di.di_eattr = bh->b_blocknr;
777 error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er);
778
779 brelse(bh);
780
781 return error;
782}
783
784/**
785 * ea_init - initializes a new eattr block
786 * @ip:
787 * @er:
788 *
789 * Returns: errno
790 */
791
792static int ea_init(struct gfs2_inode *ip, struct gfs2_ea_request *er)
793{
794 unsigned int jbsize = ip->i_sbd->sd_jbsize;
795 unsigned int blks = 1;
796
797 if (GFS2_EAREQ_SIZE_STUFFED(er) > jbsize)
798 blks += DIV_ROUND_UP(er->er_data_len, jbsize);
799
800 return ea_alloc_skeleton(ip, er, blks, ea_init_i, NULL);
801}
802
803static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea)
804{
805 uint32_t ea_size = GFS2_EA_SIZE(ea);
806 struct gfs2_ea_header *new = (struct gfs2_ea_header *)((char *)ea +
807 ea_size);
808 uint32_t new_size = GFS2_EA_REC_LEN(ea) - ea_size;
809 int last = ea->ea_flags & GFS2_EAFLAG_LAST;
810
811 ea->ea_rec_len = cpu_to_be32(ea_size);
812 ea->ea_flags ^= last;
813
814 new->ea_rec_len = cpu_to_be32(new_size);
815 new->ea_flags = last;
816
817 return new;
818}
819
820static void ea_set_remove_stuffed(struct gfs2_inode *ip,
821 struct gfs2_ea_location *el)
822{
823 struct gfs2_ea_header *ea = el->el_ea;
824 struct gfs2_ea_header *prev = el->el_prev;
825 uint32_t len;
826
827 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
828
829 if (!prev || !GFS2_EA_IS_STUFFED(ea)) {
830 ea->ea_type = GFS2_EATYPE_UNUSED;
831 return;
832 } else if (GFS2_EA2NEXT(prev) != ea) {
833 prev = GFS2_EA2NEXT(prev);
834 gfs2_assert_withdraw(ip->i_sbd, GFS2_EA2NEXT(prev) == ea);
835 }
836
837 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
838 prev->ea_rec_len = cpu_to_be32(len);
839
840 if (GFS2_EA_IS_LAST(ea))
841 prev->ea_flags |= GFS2_EAFLAG_LAST;
842}
843
844struct ea_set {
845 int ea_split;
846
847 struct gfs2_ea_request *es_er;
848 struct gfs2_ea_location *es_el;
849
850 struct buffer_head *es_bh;
851 struct gfs2_ea_header *es_ea;
852};
853
854static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
855 struct gfs2_ea_header *ea, struct ea_set *es)
856{
857 struct gfs2_ea_request *er = es->es_er;
858 struct buffer_head *dibh;
859 int error;
860
861 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + 2 * RES_EATTR, 0);
862 if (error)
863 return error;
864
865 gfs2_trans_add_bh(ip->i_gl, bh, 1);
866
867 if (es->ea_split)
868 ea = ea_split_ea(ea);
869
870 ea_write(ip, ea, er);
871
872 if (es->es_el)
873 ea_set_remove_stuffed(ip, es->es_el);
874
875 error = gfs2_meta_inode_buffer(ip, &dibh);
876 if (error)
877 goto out;
878
879 if (er->er_flags & GFS2_ERF_MODE) {
880 gfs2_assert_withdraw(ip->i_sbd,
881 (ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT));
882 ip->i_di.di_mode = er->er_mode;
883 }
884 ip->i_di.di_ctime = get_seconds();
885 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
886 gfs2_dinode_out(&ip->i_di, dibh->b_data);
887 brelse(dibh);
888 out:
889 gfs2_trans_end(ip->i_sbd);
890
891 return error;
892}
893
894static int ea_set_simple_alloc(struct gfs2_inode *ip,
895 struct gfs2_ea_request *er, void *private)
896{
897 struct ea_set *es = private;
898 struct gfs2_ea_header *ea = es->es_ea;
899 int error;
900
901 gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1);
902
903 if (es->ea_split)
904 ea = ea_split_ea(ea);
905
906 error = ea_write(ip, ea, er);
907 if (error)
908 return error;
909
910 if (es->es_el)
911 ea_set_remove_stuffed(ip, es->es_el);
912
913 return 0;
914}
915
916static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh,
917 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
918 void *private)
919{
920 struct ea_set *es = private;
921 unsigned int size;
922 int stuffed;
923 int error;
924
925 stuffed = ea_calc_size(ip->i_sbd, es->es_er, &size);
926
927 if (ea->ea_type == GFS2_EATYPE_UNUSED) {
928 if (GFS2_EA_REC_LEN(ea) < size)
929 return 0;
930 if (!GFS2_EA_IS_STUFFED(ea)) {
931 error = ea_remove_unstuffed(ip, bh, ea, prev, 1);
932 if (error)
933 return error;
934 }
935 es->ea_split = 0;
936 } else if (GFS2_EA_REC_LEN(ea) - GFS2_EA_SIZE(ea) >= size)
937 es->ea_split = 1;
938 else
939 return 0;
940
941 if (stuffed) {
942 error = ea_set_simple_noalloc(ip, bh, ea, es);
943 if (error)
944 return error;
945 } else {
946 unsigned int blks;
947
948 es->es_bh = bh;
949 es->es_ea = ea;
950 blks = 2 + DIV_ROUND_UP(es->es_er->er_data_len,
951 ip->i_sbd->sd_jbsize);
952
953 error = ea_alloc_skeleton(ip, es->es_er, blks,
954 ea_set_simple_alloc, es);
955 if (error)
956 return error;
957 }
958
959 return 1;
960}
961
962static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
963 void *private)
964{
965 struct gfs2_sbd *sdp = ip->i_sbd;
966 struct buffer_head *indbh, *newbh;
967 uint64_t *eablk;
968 int error;
969 int mh_size = sizeof(struct gfs2_meta_header);
970
971 if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
972 uint64_t *end;
973
974 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
975 DIO_START | DIO_WAIT, &indbh);
976 if (error)
977 return error;
978
979 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
980 error = -EIO;
981 goto out;
982 }
983
984 eablk = (uint64_t *)(indbh->b_data + mh_size);
985 end = eablk + sdp->sd_inptrs;
986
987 for (; eablk < end; eablk++)
988 if (!*eablk)
989 break;
990
991 if (eablk == end) {
992 error = -ENOSPC;
993 goto out;
994 }
995
996 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
997 } else {
998 uint64_t blk;
999
1000 blk = gfs2_alloc_meta(ip);
1001
1002 indbh = gfs2_meta_new(ip->i_gl, blk);
1003 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
1004 gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
1005 gfs2_buffer_clear_tail(indbh, mh_size);
1006
1007 eablk = (uint64_t *)(indbh->b_data + mh_size);
1008 *eablk = cpu_to_be64(ip->i_di.di_eattr);
1009 ip->i_di.di_eattr = blk;
1010 ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT;
1011 ip->i_di.di_blocks++;
1012
1013 eablk++;
1014 }
1015
1016 error = ea_alloc_blk(ip, &newbh);
1017 if (error)
1018 goto out;
1019
1020 *eablk = cpu_to_be64((uint64_t)newbh->b_blocknr);
1021 error = ea_write(ip, GFS2_EA_BH2FIRST(newbh), er);
1022 brelse(newbh);
1023 if (error)
1024 goto out;
1025
1026 if (private)
1027 ea_set_remove_stuffed(ip, (struct gfs2_ea_location *)private);
1028
1029 out:
1030 brelse(indbh);
1031
1032 return error;
1033}
1034
1035static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
1036 struct gfs2_ea_location *el)
1037{
1038 struct ea_set es;
1039 unsigned int blks = 2;
1040 int error;
1041
1042 memset(&es, 0, sizeof(struct ea_set));
1043 es.es_er = er;
1044 es.es_el = el;
1045
1046 error = ea_foreach(ip, ea_set_simple, &es);
1047 if (error > 0)
1048 return 0;
1049 if (error)
1050 return error;
1051
1052 if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT))
1053 blks++;
1054 if (GFS2_EAREQ_SIZE_STUFFED(er) > ip->i_sbd->sd_jbsize)
1055 blks += DIV_ROUND_UP(er->er_data_len, ip->i_sbd->sd_jbsize);
1056
1057 return ea_alloc_skeleton(ip, er, blks, ea_set_block, el);
1058}
1059
1060static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
1061 struct gfs2_ea_location *el)
1062{
1063 if (el->el_prev && GFS2_EA2NEXT(el->el_prev) != el->el_ea) {
1064 el->el_prev = GFS2_EA2NEXT(el->el_prev);
1065 gfs2_assert_withdraw(ip->i_sbd,
1066 GFS2_EA2NEXT(el->el_prev) == el->el_ea);
1067 }
1068
1069 return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev,0);
1070}
1071
1072int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1073{
1074 struct gfs2_ea_location el;
1075 int error;
1076
1077 if (!ip->i_di.di_eattr) {
1078 if (er->er_flags & XATTR_REPLACE)
1079 return -ENODATA;
1080 return ea_init(ip, er);
1081 }
1082
1083 error = gfs2_ea_find(ip, er, &el);
1084 if (error)
1085 return error;
1086
1087 if (el.el_ea) {
1088 if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY) {
1089 brelse(el.el_bh);
1090 return -EPERM;
1091 }
1092
1093 error = -EEXIST;
1094 if (!(er->er_flags & XATTR_CREATE)) {
1095 int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
1096 error = ea_set_i(ip, er, &el);
1097 if (!error && unstuffed)
1098 ea_set_remove_unstuffed(ip, &el);
1099 }
1100
1101 brelse(el.el_bh);
1102 } else {
1103 error = -ENODATA;
1104 if (!(er->er_flags & XATTR_REPLACE))
1105 error = ea_set_i(ip, er, NULL);
1106 }
1107
1108 return error;
1109}
1110
1111int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1112{
1113 struct gfs2_holder i_gh;
1114 int error;
1115
1116 if (!er->er_name_len ||
1117 er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1118 return -EINVAL;
1119 if (!er->er_data || !er->er_data_len) {
1120 er->er_data = NULL;
1121 er->er_data_len = 0;
1122 }
1123 error = ea_check_size(ip->i_sbd, er);
1124 if (error)
1125 return error;
1126
1127 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1128 if (error)
1129 return error;
1130
1131 if (IS_IMMUTABLE(ip->i_vnode))
1132 error = -EPERM;
1133 else
1134 error = gfs2_ea_ops[er->er_type]->eo_set(ip, er);
1135
1136 gfs2_glock_dq_uninit(&i_gh);
1137
1138 return error;
1139}
1140
1141static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1142{
1143 struct gfs2_ea_header *ea = el->el_ea;
1144 struct gfs2_ea_header *prev = el->el_prev;
1145 struct buffer_head *dibh;
1146 int error;
1147
1148 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + RES_EATTR, 0);
1149 if (error)
1150 return error;
1151
1152 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
1153
1154 if (prev) {
1155 uint32_t len;
1156
1157 len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea);
1158 prev->ea_rec_len = cpu_to_be32(len);
1159
1160 if (GFS2_EA_IS_LAST(ea))
1161 prev->ea_flags |= GFS2_EAFLAG_LAST;
1162 } else
1163 ea->ea_type = GFS2_EATYPE_UNUSED;
1164
1165 error = gfs2_meta_inode_buffer(ip, &dibh);
1166 if (!error) {
1167 ip->i_di.di_ctime = get_seconds();
1168 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1169 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1170 brelse(dibh);
1171 }
1172
1173 gfs2_trans_end(ip->i_sbd);
1174
1175 return error;
1176}
1177
1178int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1179{
1180 struct gfs2_ea_location el;
1181 int error;
1182
1183 if (!ip->i_di.di_eattr)
1184 return -ENODATA;
1185
1186 error = gfs2_ea_find(ip, er, &el);
1187 if (error)
1188 return error;
1189 if (!el.el_ea)
1190 return -ENODATA;
1191
1192 if (GFS2_EA_IS_STUFFED(el.el_ea))
1193 error = ea_remove_stuffed(ip, &el);
1194 else
1195 error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev,
1196 0);
1197
1198 brelse(el.el_bh);
1199
1200 return error;
1201}
1202
1203/**
1204 * gfs2_ea_remove - sets (or creates or replaces) an extended attribute
1205 * @ip: pointer to the inode of the target file
1206 * @er: request information
1207 *
1208 * Returns: errno
1209 */
1210
1211int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1212{
1213 struct gfs2_holder i_gh;
1214 int error;
1215
1216 if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1217 return -EINVAL;
1218
1219 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1220 if (error)
1221 return error;
1222
1223 if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode))
1224 error = -EPERM;
1225 else
1226 error = gfs2_ea_ops[er->er_type]->eo_remove(ip, er);
1227
1228 gfs2_glock_dq_uninit(&i_gh);
1229
1230 return error;
1231}
1232
1233static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
1234 struct gfs2_ea_header *ea, char *data)
1235{
1236 struct gfs2_sbd *sdp = ip->i_sbd;
1237 struct buffer_head **bh;
1238 unsigned int amount = GFS2_EA_DATA_LEN(ea);
1239 unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
1240 uint64_t *dataptrs = GFS2_EA2DATAPTRS(ea);
1241 unsigned int x;
1242 int error;
1243
1244 bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL);
1245 if (!bh)
1246 return -ENOMEM;
1247
1248 error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
1249 if (error)
1250 goto out;
1251
1252 for (x = 0; x < nptrs; x++) {
1253 error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs),
1254 DIO_START, bh + x);
1255 if (error) {
1256 while (x--)
1257 brelse(bh[x]);
1258 goto fail;
1259 }
1260 dataptrs++;
1261 }
1262
1263 for (x = 0; x < nptrs; x++) {
1264 error = gfs2_meta_reread(sdp, bh[x], DIO_WAIT);
1265 if (error) {
1266 for (; x < nptrs; x++)
1267 brelse(bh[x]);
1268 goto fail;
1269 }
1270 if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
1271 for (; x < nptrs; x++)
1272 brelse(bh[x]);
1273 error = -EIO;
1274 goto fail;
1275 }
1276
1277 gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
1278
1279 memcpy(bh[x]->b_data + sizeof(struct gfs2_meta_header),
1280 data,
1281 (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
1282
1283 amount -= sdp->sd_jbsize;
1284 data += sdp->sd_jbsize;
1285
1286 brelse(bh[x]);
1287 }
1288
1289 out:
1290 kfree(bh);
1291
1292 return error;
1293
1294 fail:
1295 gfs2_trans_end(sdp);
1296 kfree(bh);
1297
1298 return error;
1299}
1300
1301int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
1302 struct iattr *attr, char *data)
1303{
1304 struct buffer_head *dibh;
1305 int error;
1306
1307 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
1308 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + RES_EATTR, 0);
1309 if (error)
1310 return error;
1311
1312 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1);
1313 memcpy(GFS2_EA2DATA(el->el_ea),
1314 data,
1315 GFS2_EA_DATA_LEN(el->el_ea));
1316 } else
1317 error = ea_acl_chmod_unstuffed(ip, el->el_ea, data);
1318
1319 if (error)
1320 return error;
1321
1322 error = gfs2_meta_inode_buffer(ip, &dibh);
1323 if (!error) {
1324 error = inode_setattr(ip->i_vnode, attr);
1325 gfs2_assert_warn(ip->i_sbd, !error);
1326 gfs2_inode_attr_out(ip);
1327 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1328 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1329 brelse(dibh);
1330 }
1331
1332 gfs2_trans_end(ip->i_sbd);
1333
1334 return error;
1335}
1336
1337static int ea_dealloc_indirect(struct gfs2_inode *ip)
1338{
1339 struct gfs2_sbd *sdp = ip->i_sbd;
1340 struct gfs2_rgrp_list rlist;
1341 struct buffer_head *indbh, *dibh;
1342 uint64_t *eablk, *end;
1343 unsigned int rg_blocks = 0;
1344 uint64_t bstart = 0;
1345 unsigned int blen = 0;
1346 unsigned int blks = 0;
1347 unsigned int x;
1348 int error;
1349
1350 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1351
1352 error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr,
1353 DIO_START | DIO_WAIT, &indbh);
1354 if (error)
1355 return error;
1356
1357 if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) {
1358 error = -EIO;
1359 goto out;
1360 }
1361
1362 eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1363 end = eablk + sdp->sd_inptrs;
1364
1365 for (; eablk < end; eablk++) {
1366 uint64_t bn;
1367
1368 if (!*eablk)
1369 break;
1370 bn = be64_to_cpu(*eablk);
1371
1372 if (bstart + blen == bn)
1373 blen++;
1374 else {
1375 if (bstart)
1376 gfs2_rlist_add(sdp, &rlist, bstart);
1377 bstart = bn;
1378 blen = 1;
1379 }
1380 blks++;
1381 }
1382 if (bstart)
1383 gfs2_rlist_add(sdp, &rlist, bstart);
1384 else
1385 goto out;
1386
1387 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1388
1389 for (x = 0; x < rlist.rl_rgrps; x++) {
1390 struct gfs2_rgrpd *rgd;
1391 rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1392 rg_blocks += rgd->rd_ri.ri_length;
1393 }
1394
1395 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1396 if (error)
1397 goto out_rlist_free;
1398
1399 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
1400 RES_INDIRECT + RES_STATFS +
1401 RES_QUOTA, blks);
1402 if (error)
1403 goto out_gunlock;
1404
1405 gfs2_trans_add_bh(ip->i_gl, indbh, 1);
1406
1407 eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs2_meta_header));
1408 bstart = 0;
1409 blen = 0;
1410
1411 for (; eablk < end; eablk++) {
1412 uint64_t bn;
1413
1414 if (!*eablk)
1415 break;
1416 bn = be64_to_cpu(*eablk);
1417
1418 if (bstart + blen == bn)
1419 blen++;
1420 else {
1421 if (bstart)
1422 gfs2_free_meta(ip, bstart, blen);
1423 bstart = bn;
1424 blen = 1;
1425 }
1426
1427 *eablk = 0;
1428 if (!ip->i_di.di_blocks)
1429 gfs2_consist_inode(ip);
1430 ip->i_di.di_blocks--;
1431 }
1432 if (bstart)
1433 gfs2_free_meta(ip, bstart, blen);
1434
1435 ip->i_di.di_flags &= ~GFS2_DIF_EA_INDIRECT;
1436
1437 error = gfs2_meta_inode_buffer(ip, &dibh);
1438 if (!error) {
1439 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1440 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1441 brelse(dibh);
1442 }
1443
1444 gfs2_trans_end(sdp);
1445
1446 out_gunlock:
1447 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1448
1449 out_rlist_free:
1450 gfs2_rlist_free(&rlist);
1451
1452 out:
1453 brelse(indbh);
1454
1455 return error;
1456}
1457
1458static int ea_dealloc_block(struct gfs2_inode *ip)
1459{
1460 struct gfs2_sbd *sdp = ip->i_sbd;
1461 struct gfs2_alloc *al = &ip->i_alloc;
1462 struct gfs2_rgrpd *rgd;
1463 struct buffer_head *dibh;
1464 int error;
1465
1466 rgd = gfs2_blk2rgrpd(sdp, ip->i_di.di_eattr);
1467 if (!rgd) {
1468 gfs2_consist_inode(ip);
1469 return -EIO;
1470 }
1471
1472 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
1473 &al->al_rgd_gh);
1474 if (error)
1475 return error;
1476
1477 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_DINODE +
1478 RES_STATFS + RES_QUOTA, 1);
1479 if (error)
1480 goto out_gunlock;
1481
1482 gfs2_free_meta(ip, ip->i_di.di_eattr, 1);
1483
1484 ip->i_di.di_eattr = 0;
1485 if (!ip->i_di.di_blocks)
1486 gfs2_consist_inode(ip);
1487 ip->i_di.di_blocks--;
1488
1489 error = gfs2_meta_inode_buffer(ip, &dibh);
1490 if (!error) {
1491 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1492 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1493 brelse(dibh);
1494 }
1495
1496 gfs2_trans_end(sdp);
1497
1498 out_gunlock:
1499 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1500
1501 return error;
1502}
1503
1504/**
1505 * gfs2_ea_dealloc - deallocate the extended attribute fork
1506 * @ip: the inode
1507 *
1508 * Returns: errno
1509 */
1510
1511int gfs2_ea_dealloc(struct gfs2_inode *ip)
1512{
1513 struct gfs2_alloc *al;
1514 int error;
1515
1516 al = gfs2_alloc_get(ip);
1517
1518 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1519 if (error)
1520 goto out_alloc;
1521
1522 error = gfs2_rindex_hold(ip->i_sbd, &al->al_ri_gh);
1523 if (error)
1524 goto out_quota;
1525
1526 error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
1527 if (error)
1528 goto out_rindex;
1529
1530 if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) {
1531 error = ea_dealloc_indirect(ip);
1532 if (error)
1533 goto out_rindex;
1534 }
1535
1536 error = ea_dealloc_block(ip);
1537
1538 out_rindex:
1539 gfs2_glock_dq_uninit(&al->al_ri_gh);
1540
1541 out_quota:
1542 gfs2_quota_unhold(ip);
1543
1544 out_alloc:
1545 gfs2_alloc_put(ip);
1546
1547 return error;
1548}
1549
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/eattr.h
new file mode 100644
index 000000000000..ae199692e51d
--- /dev/null
+++ b/fs/gfs2/eattr.h
@@ -0,0 +1,97 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __EATTR_DOT_H__
11#define __EATTR_DOT_H__
12
13#define GFS2_EA_REC_LEN(ea) be32_to_cpu((ea)->ea_rec_len)
14#define GFS2_EA_DATA_LEN(ea) be32_to_cpu((ea)->ea_data_len)
15
16#define GFS2_EA_SIZE(ea) \
17ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
18 ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
19 (sizeof(uint64_t) * (ea)->ea_num_ptrs)), 8)
20
21#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
22#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
23
24#define GFS2_EAREQ_SIZE_STUFFED(er) \
25ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
26
27#define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
28ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
29 sizeof(uint64_t) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
30
31#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
32#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
33
34#define GFS2_EA2DATAPTRS(ea) \
35((uint64_t *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
36
37#define GFS2_EA2NEXT(ea) \
38((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea)))
39
40#define GFS2_EA_BH2FIRST(bh) \
41((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header)))
42
43#define GFS2_ERF_MODE 0x80000000
44
45struct gfs2_ea_request {
46 char *er_name;
47 char *er_data;
48 unsigned int er_name_len;
49 unsigned int er_data_len;
50 unsigned int er_type; /* GFS2_EATYPE_... */
51 int er_flags;
52 mode_t er_mode;
53};
54
55struct gfs2_ea_location {
56 struct buffer_head *el_bh;
57 struct gfs2_ea_header *el_ea;
58 struct gfs2_ea_header *el_prev;
59};
60
61int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
62int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
63int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
64
65int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er);
66int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er);
67int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er);
68int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er);
69
70int gfs2_ea_dealloc(struct gfs2_inode *ip);
71
72/* Exported to acl.c */
73
74int gfs2_ea_find(struct gfs2_inode *ip,
75 struct gfs2_ea_request *er,
76 struct gfs2_ea_location *el);
77int gfs2_ea_get_copy(struct gfs2_inode *ip,
78 struct gfs2_ea_location *el,
79 char *data);
80int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
81 struct iattr *attr, char *data);
82
83static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea)
84{
85 switch (ea->ea_type) {
86 case GFS2_EATYPE_USR:
87 return (5 + (ea->ea_name_len + 1));
88 case GFS2_EATYPE_SYS:
89 return (7 + (ea->ea_name_len + 1));
90 case GFS2_EATYPE_SECURITY:
91 return (9 + (ea->ea_name_len + 1));
92 default:
93 return (0);
94 }
95}
96
97#endif /* __EATTR_DOT_H__ */
diff --git a/fs/gfs2/format.h b/fs/gfs2/format.h
new file mode 100644
index 000000000000..239f0c3553fc
--- /dev/null
+++ b/fs/gfs2/format.h
@@ -0,0 +1,21 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __FORMAT_DOT_H__
11#define __FORMAT_DOT_H__
12
13static const uint32_t gfs2_old_fs_formats[] = {
14 0
15};
16
17static const uint32_t gfs2_old_multihost_formats[] = {
18 0
19};
20
21#endif /* __FORMAT_DOT_H__ */
diff --git a/fs/gfs2/gfs2.h b/fs/gfs2/gfs2.h
new file mode 100644
index 000000000000..6edbd551a4c0
--- /dev/null
+++ b/fs/gfs2/gfs2.h
@@ -0,0 +1,31 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GFS2_DOT_H__
11#define __GFS2_DOT_H__
12
13enum {
14 NO_CREATE = 0,
15 CREATE = 1,
16};
17
18enum {
19 NO_WAIT = 0,
20 WAIT = 1,
21};
22
23enum {
24 NO_FORCE = 0,
25 FORCE = 1,
26};
27
28#define GFS2_FAST_NAME_SIZE 8
29
30#endif /* __GFS2_DOT_H__ */
31
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
new file mode 100644
index 000000000000..0603a6de52c9
--- /dev/null
+++ b/fs/gfs2/glock.c
@@ -0,0 +1,2340 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/delay.h>
16#include <linux/sort.h>
17#include <linux/jhash.h>
18#include <linux/kref.h>
19#include <linux/kallsyms.h>
20#include <linux/gfs2_ondisk.h>
21#include <asm/uaccess.h>
22
23#include "gfs2.h"
24#include "lm_interface.h"
25#include "incore.h"
26#include "glock.h"
27#include "glops.h"
28#include "inode.h"
29#include "lm.h"
30#include "lops.h"
31#include "meta_io.h"
32#include "quota.h"
33#include "super.h"
34#include "util.h"
35
36/* Must be kept in sync with the beginning of struct gfs2_glock */
37struct glock_plug {
38 struct list_head gl_list;
39 unsigned long gl_flags;
40};
41
42struct greedy {
43 struct gfs2_holder gr_gh;
44 struct work_struct gr_work;
45};
46
47typedef void (*glock_examiner) (struct gfs2_glock * gl);
48
49static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
50static int dump_glock(struct gfs2_glock *gl);
51
52/**
53 * relaxed_state_ok - is a requested lock compatible with the current lock mode?
54 * @actual: the current state of the lock
55 * @requested: the lock state that was requested by the caller
56 * @flags: the modifier flags passed in by the caller
57 *
58 * Returns: 1 if the locks are compatible, 0 otherwise
59 */
60
61static inline int relaxed_state_ok(unsigned int actual, unsigned requested,
62 int flags)
63{
64 if (actual == requested)
65 return 1;
66
67 if (flags & GL_EXACT)
68 return 0;
69
70 if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED)
71 return 1;
72
73 if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY))
74 return 1;
75
76 return 0;
77}
78
79/**
80 * gl_hash() - Turn glock number into hash bucket number
81 * @lock: The glock number
82 *
83 * Returns: The number of the corresponding hash bucket
84 */
85
86static unsigned int gl_hash(struct lm_lockname *name)
87{
88 unsigned int h;
89
90 h = jhash(&name->ln_number, sizeof(uint64_t), 0);
91 h = jhash(&name->ln_type, sizeof(unsigned int), h);
92 h &= GFS2_GL_HASH_MASK;
93
94 return h;
95}
96
97/**
98 * glock_free() - Perform a few checks and then release struct gfs2_glock
99 * @gl: The glock to release
100 *
101 * Also calls lock module to release its internal structure for this glock.
102 *
103 */
104
105static void glock_free(struct gfs2_glock *gl)
106{
107 struct gfs2_sbd *sdp = gl->gl_sbd;
108 struct inode *aspace = gl->gl_aspace;
109
110 gfs2_lm_put_lock(sdp, gl->gl_lock);
111
112 if (aspace)
113 gfs2_aspace_put(aspace);
114
115 kmem_cache_free(gfs2_glock_cachep, gl);
116}
117
118/**
119 * gfs2_glock_hold() - increment reference count on glock
120 * @gl: The glock to hold
121 *
122 */
123
124void gfs2_glock_hold(struct gfs2_glock *gl)
125{
126 kref_get(&gl->gl_ref);
127}
128
129/* All work is done after the return from kref_put() so we
130 can release the write_lock before the free. */
131
132static void kill_glock(struct kref *kref)
133{
134 struct gfs2_glock *gl = container_of(kref, struct gfs2_glock, gl_ref);
135 struct gfs2_sbd *sdp = gl->gl_sbd;
136
137 gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED);
138 gfs2_assert(sdp, list_empty(&gl->gl_reclaim));
139 gfs2_assert(sdp, list_empty(&gl->gl_holders));
140 gfs2_assert(sdp, list_empty(&gl->gl_waiters1));
141 gfs2_assert(sdp, list_empty(&gl->gl_waiters2));
142 gfs2_assert(sdp, list_empty(&gl->gl_waiters3));
143}
144
145/**
146 * gfs2_glock_put() - Decrement reference count on glock
147 * @gl: The glock to put
148 *
149 */
150
151int gfs2_glock_put(struct gfs2_glock *gl)
152{
153 struct gfs2_sbd *sdp = gl->gl_sbd;
154 struct gfs2_gl_hash_bucket *bucket = gl->gl_bucket;
155 int rv = 0;
156
157 mutex_lock(&sdp->sd_invalidate_inodes_mutex);
158
159 write_lock(&bucket->hb_lock);
160 if (kref_put(&gl->gl_ref, kill_glock)) {
161 list_del_init(&gl->gl_list);
162 write_unlock(&bucket->hb_lock);
163 BUG_ON(spin_is_locked(&gl->gl_spin));
164 glock_free(gl);
165 rv = 1;
166 goto out;
167 }
168 write_unlock(&bucket->hb_lock);
169 out:
170 mutex_unlock(&sdp->sd_invalidate_inodes_mutex);
171 return rv;
172}
173
174/**
175 * queue_empty - check to see if a glock's queue is empty
176 * @gl: the glock
177 * @head: the head of the queue to check
178 *
179 * This function protects the list in the event that a process already
180 * has a holder on the list and is adding a second holder for itself.
181 * The glmutex lock is what generally prevents processes from working
182 * on the same glock at once, but the special case of adding a second
183 * holder for yourself ("recursive" locking) doesn't involve locking
184 * glmutex, making the spin lock necessary.
185 *
186 * Returns: 1 if the queue is empty
187 */
188
189static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head)
190{
191 int empty;
192 spin_lock(&gl->gl_spin);
193 empty = list_empty(head);
194 spin_unlock(&gl->gl_spin);
195 return empty;
196}
197
198/**
199 * search_bucket() - Find struct gfs2_glock by lock number
200 * @bucket: the bucket to search
201 * @name: The lock name
202 *
203 * Returns: NULL, or the struct gfs2_glock with the requested number
204 */
205
206static struct gfs2_glock *search_bucket(struct gfs2_gl_hash_bucket *bucket,
207 struct lm_lockname *name)
208{
209 struct gfs2_glock *gl;
210
211 list_for_each_entry(gl, &bucket->hb_list, gl_list) {
212 if (test_bit(GLF_PLUG, &gl->gl_flags))
213 continue;
214 if (!lm_name_equal(&gl->gl_name, name))
215 continue;
216
217 kref_get(&gl->gl_ref);
218
219 return gl;
220 }
221
222 return NULL;
223}
224
225/**
226 * gfs2_glock_find() - Find glock by lock number
227 * @sdp: The GFS2 superblock
228 * @name: The lock name
229 *
230 * Returns: NULL, or the struct gfs2_glock with the requested number
231 */
232
233static struct gfs2_glock *gfs2_glock_find(struct gfs2_sbd *sdp,
234 struct lm_lockname *name)
235{
236 struct gfs2_gl_hash_bucket *bucket = &sdp->sd_gl_hash[gl_hash(name)];
237 struct gfs2_glock *gl;
238
239 read_lock(&bucket->hb_lock);
240 gl = search_bucket(bucket, name);
241 read_unlock(&bucket->hb_lock);
242
243 return gl;
244}
245
246/**
247 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
248 * @sdp: The GFS2 superblock
249 * @number: the lock number
250 * @glops: The glock_operations to use
251 * @create: If 0, don't create the glock if it doesn't exist
252 * @glp: the glock is returned here
253 *
254 * This does not lock a glock, just finds/creates structures for one.
255 *
256 * Returns: errno
257 */
258
259int gfs2_glock_get(struct gfs2_sbd *sdp, uint64_t number,
260 struct gfs2_glock_operations *glops, int create,
261 struct gfs2_glock **glp)
262{
263 struct lm_lockname name;
264 struct gfs2_glock *gl, *tmp;
265 struct gfs2_gl_hash_bucket *bucket;
266 int error;
267
268 name.ln_number = number;
269 name.ln_type = glops->go_type;
270 bucket = &sdp->sd_gl_hash[gl_hash(&name)];
271
272 read_lock(&bucket->hb_lock);
273 gl = search_bucket(bucket, &name);
274 read_unlock(&bucket->hb_lock);
275
276 if (gl || !create) {
277 *glp = gl;
278 return 0;
279 }
280
281 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
282 if (!gl)
283 return -ENOMEM;
284
285 memset(gl, 0, sizeof(struct gfs2_glock));
286
287 INIT_LIST_HEAD(&gl->gl_list);
288 gl->gl_name = name;
289 kref_init(&gl->gl_ref);
290
291 spin_lock_init(&gl->gl_spin);
292
293 gl->gl_state = LM_ST_UNLOCKED;
294 gl->gl_owner = NULL;
295 gl->gl_ip = 0;
296 INIT_LIST_HEAD(&gl->gl_holders);
297 INIT_LIST_HEAD(&gl->gl_waiters1);
298 INIT_LIST_HEAD(&gl->gl_waiters2);
299 INIT_LIST_HEAD(&gl->gl_waiters3);
300
301 gl->gl_ops = glops;
302
303 gl->gl_bucket = bucket;
304 INIT_LIST_HEAD(&gl->gl_reclaim);
305
306 gl->gl_sbd = sdp;
307
308 lops_init_le(&gl->gl_le, &gfs2_glock_lops);
309 INIT_LIST_HEAD(&gl->gl_ail_list);
310
311 /* If this glock protects actual on-disk data or metadata blocks,
312 create a VFS inode to manage the pages/buffers holding them. */
313 if (glops == &gfs2_inode_glops ||
314 glops == &gfs2_rgrp_glops ||
315 glops == &gfs2_meta_glops) {
316 gl->gl_aspace = gfs2_aspace_get(sdp);
317 if (!gl->gl_aspace) {
318 error = -ENOMEM;
319 goto fail;
320 }
321 }
322
323 error = gfs2_lm_get_lock(sdp, &name, &gl->gl_lock);
324 if (error)
325 goto fail_aspace;
326
327 write_lock(&bucket->hb_lock);
328 tmp = search_bucket(bucket, &name);
329 if (tmp) {
330 write_unlock(&bucket->hb_lock);
331 glock_free(gl);
332 gl = tmp;
333 } else {
334 list_add_tail(&gl->gl_list, &bucket->hb_list);
335 write_unlock(&bucket->hb_lock);
336 }
337
338 *glp = gl;
339
340 return 0;
341
342 fail_aspace:
343 if (gl->gl_aspace)
344 gfs2_aspace_put(gl->gl_aspace);
345
346 fail:
347 kmem_cache_free(gfs2_glock_cachep, gl);
348
349 return error;
350}
351
352/**
353 * gfs2_holder_init - initialize a struct gfs2_holder in the default way
354 * @gl: the glock
355 * @state: the state we're requesting
356 * @flags: the modifier flags
357 * @gh: the holder structure
358 *
359 */
360
361void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
362 struct gfs2_holder *gh)
363{
364 INIT_LIST_HEAD(&gh->gh_list);
365 gh->gh_gl = gl;
366 gh->gh_ip = (unsigned long)__builtin_return_address(0);
367 gh->gh_owner = current;
368 gh->gh_state = state;
369 gh->gh_flags = flags;
370 gh->gh_error = 0;
371 gh->gh_iflags = 0;
372 init_completion(&gh->gh_wait);
373
374 if (gh->gh_state == LM_ST_EXCLUSIVE)
375 gh->gh_flags |= GL_LOCAL_EXCL;
376
377 gfs2_glock_hold(gl);
378}
379
380/**
381 * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
382 * @state: the state we're requesting
383 * @flags: the modifier flags
384 * @gh: the holder structure
385 *
386 * Don't mess with the glock.
387 *
388 */
389
390void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *gh)
391{
392 gh->gh_state = state;
393 gh->gh_flags = flags;
394 if (gh->gh_state == LM_ST_EXCLUSIVE)
395 gh->gh_flags |= GL_LOCAL_EXCL;
396
397 gh->gh_iflags &= 1 << HIF_ALLOCED;
398 gh->gh_ip = (unsigned long)__builtin_return_address(0);
399}
400
401/**
402 * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
403 * @gh: the holder structure
404 *
405 */
406
407void gfs2_holder_uninit(struct gfs2_holder *gh)
408{
409 gfs2_glock_put(gh->gh_gl);
410 gh->gh_gl = NULL;
411 gh->gh_ip = 0;
412}
413
414/**
415 * gfs2_holder_get - get a struct gfs2_holder structure
416 * @gl: the glock
417 * @state: the state we're requesting
418 * @flags: the modifier flags
419 * @gfp_flags: __GFP_NOFAIL
420 *
421 * Figure out how big an impact this function has. Either:
422 * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd
423 * 2) Leave it like it is
424 *
425 * Returns: the holder structure, NULL on ENOMEM
426 */
427
428static struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl,
429 unsigned int state,
430 int flags, gfp_t gfp_flags)
431{
432 struct gfs2_holder *gh;
433
434 gh = kmalloc(sizeof(struct gfs2_holder), gfp_flags);
435 if (!gh)
436 return NULL;
437
438 gfs2_holder_init(gl, state, flags, gh);
439 set_bit(HIF_ALLOCED, &gh->gh_iflags);
440 gh->gh_ip = (unsigned long)__builtin_return_address(0);
441 return gh;
442}
443
444/**
445 * gfs2_holder_put - get rid of a struct gfs2_holder structure
446 * @gh: the holder structure
447 *
448 */
449
450static void gfs2_holder_put(struct gfs2_holder *gh)
451{
452 gfs2_holder_uninit(gh);
453 kfree(gh);
454}
455
456/**
457 * rq_mutex - process a mutex request in the queue
458 * @gh: the glock holder
459 *
460 * Returns: 1 if the queue is blocked
461 */
462
463static int rq_mutex(struct gfs2_holder *gh)
464{
465 struct gfs2_glock *gl = gh->gh_gl;
466
467 list_del_init(&gh->gh_list);
468 /* gh->gh_error never examined. */
469 set_bit(GLF_LOCK, &gl->gl_flags);
470 complete(&gh->gh_wait);
471
472 return 1;
473}
474
475/**
476 * rq_promote - process a promote request in the queue
477 * @gh: the glock holder
478 *
479 * Acquire a new inter-node lock, or change a lock state to more restrictive.
480 *
481 * Returns: 1 if the queue is blocked
482 */
483
484static int rq_promote(struct gfs2_holder *gh)
485{
486 struct gfs2_glock *gl = gh->gh_gl;
487 struct gfs2_sbd *sdp = gl->gl_sbd;
488 struct gfs2_glock_operations *glops = gl->gl_ops;
489
490 if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
491 if (list_empty(&gl->gl_holders)) {
492 gl->gl_req_gh = gh;
493 set_bit(GLF_LOCK, &gl->gl_flags);
494 spin_unlock(&gl->gl_spin);
495
496 if (atomic_read(&sdp->sd_reclaim_count) >
497 gfs2_tune_get(sdp, gt_reclaim_limit) &&
498 !(gh->gh_flags & LM_FLAG_PRIORITY)) {
499 gfs2_reclaim_glock(sdp);
500 gfs2_reclaim_glock(sdp);
501 }
502
503 glops->go_xmote_th(gl, gh->gh_state,
504 gh->gh_flags);
505
506 spin_lock(&gl->gl_spin);
507 }
508 return 1;
509 }
510
511 if (list_empty(&gl->gl_holders)) {
512 set_bit(HIF_FIRST, &gh->gh_iflags);
513 set_bit(GLF_LOCK, &gl->gl_flags);
514 } else {
515 struct gfs2_holder *next_gh;
516 if (gh->gh_flags & GL_LOCAL_EXCL)
517 return 1;
518 next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder,
519 gh_list);
520 if (next_gh->gh_flags & GL_LOCAL_EXCL)
521 return 1;
522 }
523
524 list_move_tail(&gh->gh_list, &gl->gl_holders);
525 gh->gh_error = 0;
526 set_bit(HIF_HOLDER, &gh->gh_iflags);
527
528 complete(&gh->gh_wait);
529
530 return 0;
531}
532
533/**
534 * rq_demote - process a demote request in the queue
535 * @gh: the glock holder
536 *
537 * Returns: 1 if the queue is blocked
538 */
539
540static int rq_demote(struct gfs2_holder *gh)
541{
542 struct gfs2_glock *gl = gh->gh_gl;
543 struct gfs2_glock_operations *glops = gl->gl_ops;
544
545 if (!list_empty(&gl->gl_holders))
546 return 1;
547
548 if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) {
549 list_del_init(&gh->gh_list);
550 gh->gh_error = 0;
551 spin_unlock(&gl->gl_spin);
552 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
553 gfs2_holder_put(gh);
554 else
555 complete(&gh->gh_wait);
556 spin_lock(&gl->gl_spin);
557 } else {
558 gl->gl_req_gh = gh;
559 set_bit(GLF_LOCK, &gl->gl_flags);
560 spin_unlock(&gl->gl_spin);
561
562 if (gh->gh_state == LM_ST_UNLOCKED ||
563 gl->gl_state != LM_ST_EXCLUSIVE)
564 glops->go_drop_th(gl);
565 else
566 glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
567
568 spin_lock(&gl->gl_spin);
569 }
570
571 return 0;
572}
573
574/**
575 * rq_greedy - process a queued request to drop greedy status
576 * @gh: the glock holder
577 *
578 * Returns: 1 if the queue is blocked
579 */
580
581static int rq_greedy(struct gfs2_holder *gh)
582{
583 struct gfs2_glock *gl = gh->gh_gl;
584
585 list_del_init(&gh->gh_list);
586 /* gh->gh_error never examined. */
587 clear_bit(GLF_GREEDY, &gl->gl_flags);
588 spin_unlock(&gl->gl_spin);
589
590 gfs2_holder_uninit(gh);
591 kfree(container_of(gh, struct greedy, gr_gh));
592
593 spin_lock(&gl->gl_spin);
594
595 return 0;
596}
597
598/**
599 * run_queue - process holder structures on a glock
600 * @gl: the glock
601 *
602 */
603static void run_queue(struct gfs2_glock *gl)
604{
605 struct gfs2_holder *gh;
606 int blocked = 1;
607
608 for (;;) {
609 if (test_bit(GLF_LOCK, &gl->gl_flags))
610 break;
611
612 if (!list_empty(&gl->gl_waiters1)) {
613 gh = list_entry(gl->gl_waiters1.next,
614 struct gfs2_holder, gh_list);
615
616 if (test_bit(HIF_MUTEX, &gh->gh_iflags))
617 blocked = rq_mutex(gh);
618 else
619 gfs2_assert_warn(gl->gl_sbd, 0);
620
621 } else if (!list_empty(&gl->gl_waiters2) &&
622 !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) {
623 gh = list_entry(gl->gl_waiters2.next,
624 struct gfs2_holder, gh_list);
625
626 if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
627 blocked = rq_demote(gh);
628 else if (test_bit(HIF_GREEDY, &gh->gh_iflags))
629 blocked = rq_greedy(gh);
630 else
631 gfs2_assert_warn(gl->gl_sbd, 0);
632
633 } else if (!list_empty(&gl->gl_waiters3)) {
634 gh = list_entry(gl->gl_waiters3.next,
635 struct gfs2_holder, gh_list);
636
637 if (test_bit(HIF_PROMOTE, &gh->gh_iflags))
638 blocked = rq_promote(gh);
639 else
640 gfs2_assert_warn(gl->gl_sbd, 0);
641
642 } else
643 break;
644
645 if (blocked)
646 break;
647 }
648}
649
650/**
651 * gfs2_glmutex_lock - acquire a local lock on a glock
652 * @gl: the glock
653 *
654 * Gives caller exclusive access to manipulate a glock structure.
655 */
656
657void gfs2_glmutex_lock(struct gfs2_glock *gl)
658{
659 struct gfs2_holder gh;
660
661 gfs2_holder_init(gl, 0, 0, &gh);
662 set_bit(HIF_MUTEX, &gh.gh_iflags);
663
664 spin_lock(&gl->gl_spin);
665 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
666 list_add_tail(&gh.gh_list, &gl->gl_waiters1);
667 else {
668 gl->gl_owner = current;
669 gl->gl_ip = (unsigned long)__builtin_return_address(0);
670 complete(&gh.gh_wait);
671 }
672 spin_unlock(&gl->gl_spin);
673
674 wait_for_completion(&gh.gh_wait);
675 gfs2_holder_uninit(&gh);
676}
677
678/**
679 * gfs2_glmutex_trylock - try to acquire a local lock on a glock
680 * @gl: the glock
681 *
682 * Returns: 1 if the glock is acquired
683 */
684
685static int gfs2_glmutex_trylock(struct gfs2_glock *gl)
686{
687 int acquired = 1;
688
689 spin_lock(&gl->gl_spin);
690 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
691 acquired = 0;
692 else {
693 gl->gl_owner = current;
694 gl->gl_ip = (unsigned long)__builtin_return_address(0);
695 }
696 spin_unlock(&gl->gl_spin);
697
698 return acquired;
699}
700
701/**
702 * gfs2_glmutex_unlock - release a local lock on a glock
703 * @gl: the glock
704 *
705 */
706
707void gfs2_glmutex_unlock(struct gfs2_glock *gl)
708{
709 spin_lock(&gl->gl_spin);
710 clear_bit(GLF_LOCK, &gl->gl_flags);
711 gl->gl_owner = NULL;
712 gl->gl_ip = 0;
713 run_queue(gl);
714 BUG_ON(!spin_is_locked(&gl->gl_spin));
715 spin_unlock(&gl->gl_spin);
716}
717
718/**
719 * handle_callback - add a demote request to a lock's queue
720 * @gl: the glock
721 * @state: the state the caller wants us to change to
722 *
723 */
724
725static void handle_callback(struct gfs2_glock *gl, unsigned int state)
726{
727 struct gfs2_holder *gh, *new_gh = NULL;
728
729 restart:
730 spin_lock(&gl->gl_spin);
731
732 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
733 if (test_bit(HIF_DEMOTE, &gh->gh_iflags) &&
734 gl->gl_req_gh != gh) {
735 if (gh->gh_state != state)
736 gh->gh_state = LM_ST_UNLOCKED;
737 goto out;
738 }
739 }
740
741 if (new_gh) {
742 list_add_tail(&new_gh->gh_list, &gl->gl_waiters2);
743 new_gh = NULL;
744 } else {
745 spin_unlock(&gl->gl_spin);
746
747 new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY,
748 GFP_KERNEL | __GFP_NOFAIL),
749 set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
750 set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
751
752 goto restart;
753 }
754
755 out:
756 spin_unlock(&gl->gl_spin);
757
758 if (new_gh)
759 gfs2_holder_put(new_gh);
760}
761
762/**
763 * state_change - record that the glock is now in a different state
764 * @gl: the glock
765 * @new_state the new state
766 *
767 */
768
769static void state_change(struct gfs2_glock *gl, unsigned int new_state)
770{
771 int held1, held2;
772
773 held1 = (gl->gl_state != LM_ST_UNLOCKED);
774 held2 = (new_state != LM_ST_UNLOCKED);
775
776 if (held1 != held2) {
777 if (held2)
778 gfs2_glock_hold(gl);
779 else
780 gfs2_glock_put(gl);
781 }
782
783 gl->gl_state = new_state;
784}
785
786/**
787 * xmote_bh - Called after the lock module is done acquiring a lock
788 * @gl: The glock in question
789 * @ret: the int returned from the lock module
790 *
791 */
792
793static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
794{
795 struct gfs2_sbd *sdp = gl->gl_sbd;
796 struct gfs2_glock_operations *glops = gl->gl_ops;
797 struct gfs2_holder *gh = gl->gl_req_gh;
798 int prev_state = gl->gl_state;
799 int op_done = 1;
800
801 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
802 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
803 gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
804
805 state_change(gl, ret & LM_OUT_ST_MASK);
806
807 if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
808 if (glops->go_inval)
809 glops->go_inval(gl, DIO_METADATA | DIO_DATA);
810 } else if (gl->gl_state == LM_ST_DEFERRED) {
811 /* We might not want to do this here.
812 Look at moving to the inode glops. */
813 if (glops->go_inval)
814 glops->go_inval(gl, DIO_DATA);
815 }
816
817 /* Deal with each possible exit condition */
818
819 if (!gh)
820 gl->gl_stamp = jiffies;
821
822 else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
823 spin_lock(&gl->gl_spin);
824 list_del_init(&gh->gh_list);
825 gh->gh_error = -EIO;
826 spin_unlock(&gl->gl_spin);
827
828 } else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) {
829 spin_lock(&gl->gl_spin);
830 list_del_init(&gh->gh_list);
831 if (gl->gl_state == gh->gh_state ||
832 gl->gl_state == LM_ST_UNLOCKED)
833 gh->gh_error = 0;
834 else {
835 if (gfs2_assert_warn(sdp, gh->gh_flags &
836 (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1)
837 fs_warn(sdp, "ret = 0x%.8X\n", ret);
838 gh->gh_error = GLR_TRYFAILED;
839 }
840 spin_unlock(&gl->gl_spin);
841
842 if (ret & LM_OUT_CANCELED)
843 handle_callback(gl, LM_ST_UNLOCKED); /* Lame */
844
845 } else if (ret & LM_OUT_CANCELED) {
846 spin_lock(&gl->gl_spin);
847 list_del_init(&gh->gh_list);
848 gh->gh_error = GLR_CANCELED;
849 spin_unlock(&gl->gl_spin);
850
851 } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
852 spin_lock(&gl->gl_spin);
853 list_move_tail(&gh->gh_list, &gl->gl_holders);
854 gh->gh_error = 0;
855 set_bit(HIF_HOLDER, &gh->gh_iflags);
856 spin_unlock(&gl->gl_spin);
857
858 set_bit(HIF_FIRST, &gh->gh_iflags);
859
860 op_done = 0;
861
862 } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
863 spin_lock(&gl->gl_spin);
864 list_del_init(&gh->gh_list);
865 gh->gh_error = GLR_TRYFAILED;
866 spin_unlock(&gl->gl_spin);
867
868 } else {
869 if (gfs2_assert_withdraw(sdp, 0) == -1)
870 fs_err(sdp, "ret = 0x%.8X\n", ret);
871 }
872
873 if (glops->go_xmote_bh)
874 glops->go_xmote_bh(gl);
875
876 if (op_done) {
877 spin_lock(&gl->gl_spin);
878 gl->gl_req_gh = NULL;
879 gl->gl_req_bh = NULL;
880 clear_bit(GLF_LOCK, &gl->gl_flags);
881 run_queue(gl);
882 spin_unlock(&gl->gl_spin);
883 }
884
885 gfs2_glock_put(gl);
886
887 if (gh) {
888 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
889 gfs2_holder_put(gh);
890 else
891 complete(&gh->gh_wait);
892 }
893}
894
895/**
896 * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock
897 * @gl: The glock in question
898 * @state: the requested state
899 * @flags: modifier flags to the lock call
900 *
901 */
902
903void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags)
904{
905 struct gfs2_sbd *sdp = gl->gl_sbd;
906 struct gfs2_glock_operations *glops = gl->gl_ops;
907 int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
908 LM_FLAG_NOEXP | LM_FLAG_ANY |
909 LM_FLAG_PRIORITY);
910 unsigned int lck_ret;
911
912 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
913 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
914 gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED);
915 gfs2_assert_warn(sdp, state != gl->gl_state);
916
917 if (gl->gl_state == LM_ST_EXCLUSIVE) {
918 if (glops->go_sync)
919 glops->go_sync(gl,
920 DIO_METADATA | DIO_DATA | DIO_RELEASE);
921 }
922
923 gfs2_glock_hold(gl);
924 gl->gl_req_bh = xmote_bh;
925
926 lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state,
927 lck_flags);
928
929 if (gfs2_assert_withdraw(sdp, !(lck_ret & LM_OUT_ERROR)))
930 return;
931
932 if (lck_ret & LM_OUT_ASYNC)
933 gfs2_assert_warn(sdp, lck_ret == LM_OUT_ASYNC);
934 else
935 xmote_bh(gl, lck_ret);
936}
937
938/**
939 * drop_bh - Called after a lock module unlock completes
940 * @gl: the glock
941 * @ret: the return status
942 *
943 * Doesn't wake up the process waiting on the struct gfs2_holder (if any)
944 * Doesn't drop the reference on the glock the top half took out
945 *
946 */
947
948static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
949{
950 struct gfs2_sbd *sdp = gl->gl_sbd;
951 struct gfs2_glock_operations *glops = gl->gl_ops;
952 struct gfs2_holder *gh = gl->gl_req_gh;
953
954 clear_bit(GLF_PREFETCH, &gl->gl_flags);
955
956 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
957 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
958 gfs2_assert_warn(sdp, !ret);
959
960 state_change(gl, LM_ST_UNLOCKED);
961
962 if (glops->go_inval)
963 glops->go_inval(gl, DIO_METADATA | DIO_DATA);
964
965 if (gh) {
966 spin_lock(&gl->gl_spin);
967 list_del_init(&gh->gh_list);
968 gh->gh_error = 0;
969 spin_unlock(&gl->gl_spin);
970 }
971
972 if (glops->go_drop_bh)
973 glops->go_drop_bh(gl);
974
975 spin_lock(&gl->gl_spin);
976 gl->gl_req_gh = NULL;
977 gl->gl_req_bh = NULL;
978 clear_bit(GLF_LOCK, &gl->gl_flags);
979 run_queue(gl);
980 spin_unlock(&gl->gl_spin);
981
982 gfs2_glock_put(gl);
983
984 if (gh) {
985 if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
986 gfs2_holder_put(gh);
987 else
988 complete(&gh->gh_wait);
989 }
990}
991
992/**
993 * gfs2_glock_drop_th - call into the lock module to unlock a lock
994 * @gl: the glock
995 *
996 */
997
998void gfs2_glock_drop_th(struct gfs2_glock *gl)
999{
1000 struct gfs2_sbd *sdp = gl->gl_sbd;
1001 struct gfs2_glock_operations *glops = gl->gl_ops;
1002 unsigned int ret;
1003
1004 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1005 gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
1006 gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
1007
1008 if (gl->gl_state == LM_ST_EXCLUSIVE) {
1009 if (glops->go_sync)
1010 glops->go_sync(gl,
1011 DIO_METADATA | DIO_DATA | DIO_RELEASE);
1012 }
1013
1014 gfs2_glock_hold(gl);
1015 gl->gl_req_bh = drop_bh;
1016
1017 ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state);
1018
1019 if (gfs2_assert_withdraw(sdp, !(ret & LM_OUT_ERROR)))
1020 return;
1021
1022 if (!ret)
1023 drop_bh(gl, ret);
1024 else
1025 gfs2_assert_warn(sdp, ret == LM_OUT_ASYNC);
1026}
1027
1028/**
1029 * do_cancels - cancel requests for locks stuck waiting on an expire flag
1030 * @gh: the LM_FLAG_PRIORITY holder waiting to acquire the lock
1031 *
1032 * Don't cancel GL_NOCANCEL requests.
1033 */
1034
1035static void do_cancels(struct gfs2_holder *gh)
1036{
1037 struct gfs2_glock *gl = gh->gh_gl;
1038
1039 spin_lock(&gl->gl_spin);
1040
1041 while (gl->gl_req_gh != gh &&
1042 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
1043 !list_empty(&gh->gh_list)) {
1044 if (gl->gl_req_bh &&
1045 !(gl->gl_req_gh &&
1046 (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) {
1047 spin_unlock(&gl->gl_spin);
1048 gfs2_lm_cancel(gl->gl_sbd, gl->gl_lock);
1049 msleep(100);
1050 spin_lock(&gl->gl_spin);
1051 } else {
1052 spin_unlock(&gl->gl_spin);
1053 msleep(100);
1054 spin_lock(&gl->gl_spin);
1055 }
1056 }
1057
1058 spin_unlock(&gl->gl_spin);
1059}
1060
1061/**
1062 * glock_wait_internal - wait on a glock acquisition
1063 * @gh: the glock holder
1064 *
1065 * Returns: 0 on success
1066 */
1067
1068static int glock_wait_internal(struct gfs2_holder *gh)
1069{
1070 struct gfs2_glock *gl = gh->gh_gl;
1071 struct gfs2_sbd *sdp = gl->gl_sbd;
1072 struct gfs2_glock_operations *glops = gl->gl_ops;
1073
1074 if (test_bit(HIF_ABORTED, &gh->gh_iflags))
1075 return -EIO;
1076
1077 if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
1078 spin_lock(&gl->gl_spin);
1079 if (gl->gl_req_gh != gh &&
1080 !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
1081 !list_empty(&gh->gh_list)) {
1082 list_del_init(&gh->gh_list);
1083 gh->gh_error = GLR_TRYFAILED;
1084 run_queue(gl);
1085 spin_unlock(&gl->gl_spin);
1086 return gh->gh_error;
1087 }
1088 spin_unlock(&gl->gl_spin);
1089 }
1090
1091 if (gh->gh_flags & LM_FLAG_PRIORITY)
1092 do_cancels(gh);
1093
1094 wait_for_completion(&gh->gh_wait);
1095
1096 if (gh->gh_error)
1097 return gh->gh_error;
1098
1099 gfs2_assert_withdraw(sdp, test_bit(HIF_HOLDER, &gh->gh_iflags));
1100 gfs2_assert_withdraw(sdp, relaxed_state_ok(gl->gl_state,
1101 gh->gh_state,
1102 gh->gh_flags));
1103
1104 if (test_bit(HIF_FIRST, &gh->gh_iflags)) {
1105 gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
1106
1107 if (glops->go_lock) {
1108 gh->gh_error = glops->go_lock(gh);
1109 if (gh->gh_error) {
1110 spin_lock(&gl->gl_spin);
1111 list_del_init(&gh->gh_list);
1112 spin_unlock(&gl->gl_spin);
1113 }
1114 }
1115
1116 spin_lock(&gl->gl_spin);
1117 gl->gl_req_gh = NULL;
1118 gl->gl_req_bh = NULL;
1119 clear_bit(GLF_LOCK, &gl->gl_flags);
1120 run_queue(gl);
1121 spin_unlock(&gl->gl_spin);
1122 }
1123
1124 return gh->gh_error;
1125}
1126
1127static inline struct gfs2_holder *
1128find_holder_by_owner(struct list_head *head, struct task_struct *owner)
1129{
1130 struct gfs2_holder *gh;
1131
1132 list_for_each_entry(gh, head, gh_list) {
1133 if (gh->gh_owner == owner)
1134 return gh;
1135 }
1136
1137 return NULL;
1138}
1139
1140/**
1141 * add_to_queue - Add a holder to the wait queue (but look for recursion)
1142 * @gh: the holder structure to add
1143 *
1144 */
1145
1146static void add_to_queue(struct gfs2_holder *gh)
1147{
1148 struct gfs2_glock *gl = gh->gh_gl;
1149 struct gfs2_holder *existing;
1150
1151 BUG_ON(!gh->gh_owner);
1152
1153 existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner);
1154 if (existing) {
1155 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
1156 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
1157 BUG();
1158 }
1159
1160 existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner);
1161 if (existing) {
1162 print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
1163 print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
1164 BUG();
1165 }
1166
1167 if (gh->gh_flags & LM_FLAG_PRIORITY)
1168 list_add(&gh->gh_list, &gl->gl_waiters3);
1169 else
1170 list_add_tail(&gh->gh_list, &gl->gl_waiters3);
1171}
1172
1173/**
1174 * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
1175 * @gh: the holder structure
1176 *
1177 * if (gh->gh_flags & GL_ASYNC), this never returns an error
1178 *
1179 * Returns: 0, GLR_TRYFAILED, or errno on failure
1180 */
1181
1182int gfs2_glock_nq(struct gfs2_holder *gh)
1183{
1184 struct gfs2_glock *gl = gh->gh_gl;
1185 struct gfs2_sbd *sdp = gl->gl_sbd;
1186 int error = 0;
1187
1188restart:
1189 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
1190 set_bit(HIF_ABORTED, &gh->gh_iflags);
1191 return -EIO;
1192 }
1193
1194 set_bit(HIF_PROMOTE, &gh->gh_iflags);
1195
1196 spin_lock(&gl->gl_spin);
1197 add_to_queue(gh);
1198 run_queue(gl);
1199 spin_unlock(&gl->gl_spin);
1200
1201 if (!(gh->gh_flags & GL_ASYNC)) {
1202 error = glock_wait_internal(gh);
1203 if (error == GLR_CANCELED) {
1204 msleep(100);
1205 goto restart;
1206 }
1207 }
1208
1209 clear_bit(GLF_PREFETCH, &gl->gl_flags);
1210
1211 if (error == GLR_TRYFAILED && (gh->gh_flags & GL_DUMP))
1212 dump_glock(gl);
1213
1214 return error;
1215}
1216
1217/**
1218 * gfs2_glock_poll - poll to see if an async request has been completed
1219 * @gh: the holder
1220 *
1221 * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on
1222 */
1223
1224int gfs2_glock_poll(struct gfs2_holder *gh)
1225{
1226 struct gfs2_glock *gl = gh->gh_gl;
1227 int ready = 0;
1228
1229 spin_lock(&gl->gl_spin);
1230
1231 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
1232 ready = 1;
1233 else if (list_empty(&gh->gh_list)) {
1234 if (gh->gh_error == GLR_CANCELED) {
1235 spin_unlock(&gl->gl_spin);
1236 msleep(100);
1237 if (gfs2_glock_nq(gh))
1238 return 1;
1239 return 0;
1240 } else
1241 ready = 1;
1242 }
1243
1244 spin_unlock(&gl->gl_spin);
1245
1246 return ready;
1247}
1248
1249/**
1250 * gfs2_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC
1251 * @gh: the holder structure
1252 *
1253 * Returns: 0, GLR_TRYFAILED, or errno on failure
1254 */
1255
1256int gfs2_glock_wait(struct gfs2_holder *gh)
1257{
1258 int error;
1259
1260 error = glock_wait_internal(gh);
1261 if (error == GLR_CANCELED) {
1262 msleep(100);
1263 gh->gh_flags &= ~GL_ASYNC;
1264 error = gfs2_glock_nq(gh);
1265 }
1266
1267 return error;
1268}
1269
1270/**
1271 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1272 * @gh: the glock holder
1273 *
1274 */
1275
1276void gfs2_glock_dq(struct gfs2_holder *gh)
1277{
1278 struct gfs2_glock *gl = gh->gh_gl;
1279 struct gfs2_glock_operations *glops = gl->gl_ops;
1280
1281 if (gh->gh_flags & GL_SYNC)
1282 set_bit(GLF_SYNC, &gl->gl_flags);
1283
1284 if (gh->gh_flags & GL_NOCACHE)
1285 handle_callback(gl, LM_ST_UNLOCKED);
1286
1287 gfs2_glmutex_lock(gl);
1288
1289 spin_lock(&gl->gl_spin);
1290 list_del_init(&gh->gh_list);
1291
1292 if (list_empty(&gl->gl_holders)) {
1293 spin_unlock(&gl->gl_spin);
1294
1295 if (glops->go_unlock)
1296 glops->go_unlock(gh);
1297
1298 if (test_bit(GLF_SYNC, &gl->gl_flags)) {
1299 if (glops->go_sync)
1300 glops->go_sync(gl, DIO_METADATA | DIO_DATA);
1301 }
1302
1303 gl->gl_stamp = jiffies;
1304
1305 spin_lock(&gl->gl_spin);
1306 }
1307
1308 clear_bit(GLF_LOCK, &gl->gl_flags);
1309 run_queue(gl);
1310 spin_unlock(&gl->gl_spin);
1311}
1312
1313/**
1314 * gfs2_glock_prefetch - Try to prefetch a glock
1315 * @gl: the glock
1316 * @state: the state to prefetch in
1317 * @flags: flags passed to go_xmote_th()
1318 *
1319 */
1320
1321static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state,
1322 int flags)
1323{
1324 struct gfs2_glock_operations *glops = gl->gl_ops;
1325
1326 spin_lock(&gl->gl_spin);
1327
1328 if (test_bit(GLF_LOCK, &gl->gl_flags) ||
1329 !list_empty(&gl->gl_holders) ||
1330 !list_empty(&gl->gl_waiters1) ||
1331 !list_empty(&gl->gl_waiters2) ||
1332 !list_empty(&gl->gl_waiters3) ||
1333 relaxed_state_ok(gl->gl_state, state, flags)) {
1334 spin_unlock(&gl->gl_spin);
1335 return;
1336 }
1337
1338 set_bit(GLF_PREFETCH, &gl->gl_flags);
1339 set_bit(GLF_LOCK, &gl->gl_flags);
1340 spin_unlock(&gl->gl_spin);
1341
1342 glops->go_xmote_th(gl, state, flags);
1343}
1344
1345static void greedy_work(void *data)
1346{
1347 struct greedy *gr = data;
1348 struct gfs2_holder *gh = &gr->gr_gh;
1349 struct gfs2_glock *gl = gh->gh_gl;
1350 struct gfs2_glock_operations *glops = gl->gl_ops;
1351
1352 clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1353
1354 if (glops->go_greedy)
1355 glops->go_greedy(gl);
1356
1357 spin_lock(&gl->gl_spin);
1358
1359 if (list_empty(&gl->gl_waiters2)) {
1360 clear_bit(GLF_GREEDY, &gl->gl_flags);
1361 spin_unlock(&gl->gl_spin);
1362 gfs2_holder_uninit(gh);
1363 kfree(gr);
1364 } else {
1365 gfs2_glock_hold(gl);
1366 list_add_tail(&gh->gh_list, &gl->gl_waiters2);
1367 run_queue(gl);
1368 spin_unlock(&gl->gl_spin);
1369 gfs2_glock_put(gl);
1370 }
1371}
1372
1373/**
1374 * gfs2_glock_be_greedy -
1375 * @gl:
1376 * @time:
1377 *
1378 * Returns: 0 if go_greedy will be called, 1 otherwise
1379 */
1380
1381int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time)
1382{
1383 struct greedy *gr;
1384 struct gfs2_holder *gh;
1385
1386 if (!time ||
1387 gl->gl_sbd->sd_args.ar_localcaching ||
1388 test_and_set_bit(GLF_GREEDY, &gl->gl_flags))
1389 return 1;
1390
1391 gr = kmalloc(sizeof(struct greedy), GFP_KERNEL);
1392 if (!gr) {
1393 clear_bit(GLF_GREEDY, &gl->gl_flags);
1394 return 1;
1395 }
1396 gh = &gr->gr_gh;
1397
1398 gfs2_holder_init(gl, 0, 0, gh);
1399 set_bit(HIF_GREEDY, &gh->gh_iflags);
1400 INIT_WORK(&gr->gr_work, greedy_work, gr);
1401
1402 set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
1403 schedule_delayed_work(&gr->gr_work, time);
1404
1405 return 0;
1406}
1407
1408/**
1409 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1410 * @gh: the holder structure
1411 *
1412 */
1413
1414void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1415{
1416 gfs2_glock_dq(gh);
1417 gfs2_holder_uninit(gh);
1418}
1419
1420/**
1421 * gfs2_glock_nq_num - acquire a glock based on lock number
1422 * @sdp: the filesystem
1423 * @number: the lock number
1424 * @glops: the glock operations for the type of glock
1425 * @state: the state to acquire the glock in
1426 * @flags: modifier flags for the aquisition
1427 * @gh: the struct gfs2_holder
1428 *
1429 * Returns: errno
1430 */
1431
1432int gfs2_glock_nq_num(struct gfs2_sbd *sdp, uint64_t number,
1433 struct gfs2_glock_operations *glops, unsigned int state,
1434 int flags, struct gfs2_holder *gh)
1435{
1436 struct gfs2_glock *gl;
1437 int error;
1438
1439 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1440 if (!error) {
1441 error = gfs2_glock_nq_init(gl, state, flags, gh);
1442 gfs2_glock_put(gl);
1443 }
1444
1445 return error;
1446}
1447
1448/**
1449 * glock_compare - Compare two struct gfs2_glock structures for sorting
1450 * @arg_a: the first structure
1451 * @arg_b: the second structure
1452 *
1453 */
1454
1455static int glock_compare(const void *arg_a, const void *arg_b)
1456{
1457 struct gfs2_holder *gh_a = *(struct gfs2_holder **)arg_a;
1458 struct gfs2_holder *gh_b = *(struct gfs2_holder **)arg_b;
1459 struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1460 struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1461 int ret = 0;
1462
1463 if (a->ln_number > b->ln_number)
1464 ret = 1;
1465 else if (a->ln_number < b->ln_number)
1466 ret = -1;
1467 else {
1468 if (gh_a->gh_state == LM_ST_SHARED &&
1469 gh_b->gh_state == LM_ST_EXCLUSIVE)
1470 ret = 1;
1471 else if (!(gh_a->gh_flags & GL_LOCAL_EXCL) &&
1472 (gh_b->gh_flags & GL_LOCAL_EXCL))
1473 ret = 1;
1474 }
1475
1476 return ret;
1477}
1478
1479/**
1480 * nq_m_sync - synchonously acquire more than one glock in deadlock free order
1481 * @num_gh: the number of structures
1482 * @ghs: an array of struct gfs2_holder structures
1483 *
1484 * Returns: 0 on success (all glocks acquired),
1485 * errno on failure (no glocks acquired)
1486 */
1487
1488static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1489 struct gfs2_holder **p)
1490{
1491 unsigned int x;
1492 int error = 0;
1493
1494 for (x = 0; x < num_gh; x++)
1495 p[x] = &ghs[x];
1496
1497 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);
1498
1499 for (x = 0; x < num_gh; x++) {
1500 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1501
1502 error = gfs2_glock_nq(p[x]);
1503 if (error) {
1504 while (x--)
1505 gfs2_glock_dq(p[x]);
1506 break;
1507 }
1508 }
1509
1510 return error;
1511}
1512
1513/**
1514 * gfs2_glock_nq_m - acquire multiple glocks
1515 * @num_gh: the number of structures
1516 * @ghs: an array of struct gfs2_holder structures
1517 *
1518 * Figure out how big an impact this function has. Either:
1519 * 1) Replace this code with code that calls gfs2_glock_prefetch()
1520 * 2) Forget async stuff and just call nq_m_sync()
1521 * 3) Leave it like it is
1522 *
1523 * Returns: 0 on success (all glocks acquired),
1524 * errno on failure (no glocks acquired)
1525 */
1526
1527int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1528{
1529 int *e;
1530 unsigned int x;
1531 int borked = 0, serious = 0;
1532 int error = 0;
1533
1534 if (!num_gh)
1535 return 0;
1536
1537 if (num_gh == 1) {
1538 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1539 return gfs2_glock_nq(ghs);
1540 }
1541
1542 e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1543 if (!e)
1544 return -ENOMEM;
1545
1546 for (x = 0; x < num_gh; x++) {
1547 ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
1548 error = gfs2_glock_nq(&ghs[x]);
1549 if (error) {
1550 borked = 1;
1551 serious = error;
1552 num_gh = x;
1553 break;
1554 }
1555 }
1556
1557 for (x = 0; x < num_gh; x++) {
1558 error = e[x] = glock_wait_internal(&ghs[x]);
1559 if (error) {
1560 borked = 1;
1561 if (error != GLR_TRYFAILED && error != GLR_CANCELED)
1562 serious = error;
1563 }
1564 }
1565
1566 if (!borked) {
1567 kfree(e);
1568 return 0;
1569 }
1570
1571 for (x = 0; x < num_gh; x++)
1572 if (!e[x])
1573 gfs2_glock_dq(&ghs[x]);
1574
1575 if (serious)
1576 error = serious;
1577 else {
1578 for (x = 0; x < num_gh; x++)
1579 gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
1580 &ghs[x]);
1581 error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
1582 }
1583
1584 kfree(e);
1585
1586 return error;
1587}
1588
1589/**
1590 * gfs2_glock_dq_m - release multiple glocks
1591 * @num_gh: the number of structures
1592 * @ghs: an array of struct gfs2_holder structures
1593 *
1594 */
1595
1596void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1597{
1598 unsigned int x;
1599
1600 for (x = 0; x < num_gh; x++)
1601 gfs2_glock_dq(&ghs[x]);
1602}
1603
1604/**
1605 * gfs2_glock_dq_uninit_m - release multiple glocks
1606 * @num_gh: the number of structures
1607 * @ghs: an array of struct gfs2_holder structures
1608 *
1609 */
1610
1611void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1612{
1613 unsigned int x;
1614
1615 for (x = 0; x < num_gh; x++)
1616 gfs2_glock_dq_uninit(&ghs[x]);
1617}
1618
1619/**
1620 * gfs2_glock_prefetch_num - prefetch a glock based on lock number
1621 * @sdp: the filesystem
1622 * @number: the lock number
1623 * @glops: the glock operations for the type of glock
1624 * @state: the state to acquire the glock in
1625 * @flags: modifier flags for the aquisition
1626 *
1627 * Returns: errno
1628 */
1629
1630void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, uint64_t number,
1631 struct gfs2_glock_operations *glops,
1632 unsigned int state, int flags)
1633{
1634 struct gfs2_glock *gl;
1635 int error;
1636
1637 if (atomic_read(&sdp->sd_reclaim_count) <
1638 gfs2_tune_get(sdp, gt_reclaim_limit)) {
1639 error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1640 if (!error) {
1641 gfs2_glock_prefetch(gl, state, flags);
1642 gfs2_glock_put(gl);
1643 }
1644 }
1645}
1646
1647/**
1648 * gfs2_lvb_hold - attach a LVB from a glock
1649 * @gl: The glock in question
1650 *
1651 */
1652
1653int gfs2_lvb_hold(struct gfs2_glock *gl)
1654{
1655 int error;
1656
1657 gfs2_glmutex_lock(gl);
1658
1659 if (!atomic_read(&gl->gl_lvb_count)) {
1660 error = gfs2_lm_hold_lvb(gl->gl_sbd, gl->gl_lock, &gl->gl_lvb);
1661 if (error) {
1662 gfs2_glmutex_unlock(gl);
1663 return error;
1664 }
1665 gfs2_glock_hold(gl);
1666 }
1667 atomic_inc(&gl->gl_lvb_count);
1668
1669 gfs2_glmutex_unlock(gl);
1670
1671 return 0;
1672}
1673
1674/**
1675 * gfs2_lvb_unhold - detach a LVB from a glock
1676 * @gl: The glock in question
1677 *
1678 */
1679
1680void gfs2_lvb_unhold(struct gfs2_glock *gl)
1681{
1682 gfs2_glock_hold(gl);
1683 gfs2_glmutex_lock(gl);
1684
1685 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0);
1686 if (atomic_dec_and_test(&gl->gl_lvb_count)) {
1687 gfs2_lm_unhold_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb);
1688 gl->gl_lvb = NULL;
1689 gfs2_glock_put(gl);
1690 }
1691
1692 gfs2_glmutex_unlock(gl);
1693 gfs2_glock_put(gl);
1694}
1695
1696#if 0
1697void gfs2_lvb_sync(struct gfs2_glock *gl)
1698{
1699 gfs2_glmutex_lock(gl);
1700
1701 gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count));
1702 if (!gfs2_assert_warn(gl->gl_sbd, gfs2_glock_is_held_excl(gl)))
1703 gfs2_lm_sync_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb);
1704
1705 gfs2_glmutex_unlock(gl);
1706}
1707#endif /* 0 */
1708
1709static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
1710 unsigned int state)
1711{
1712 struct gfs2_glock *gl;
1713
1714 gl = gfs2_glock_find(sdp, name);
1715 if (!gl)
1716 return;
1717
1718 if (gl->gl_ops->go_callback)
1719 gl->gl_ops->go_callback(gl, state);
1720 handle_callback(gl, state);
1721
1722 spin_lock(&gl->gl_spin);
1723 run_queue(gl);
1724 spin_unlock(&gl->gl_spin);
1725
1726 gfs2_glock_put(gl);
1727}
1728
1729/**
1730 * gfs2_glock_cb - Callback used by locking module
1731 * @fsdata: Pointer to the superblock
1732 * @type: Type of callback
1733 * @data: Type dependent data pointer
1734 *
1735 * Called by the locking module when it wants to tell us something.
1736 * Either we need to drop a lock, one of our ASYNC requests completed, or
1737 * a journal from another client needs to be recovered.
1738 */
1739
1740void gfs2_glock_cb(lm_fsdata_t *fsdata, unsigned int type, void *data)
1741{
1742 struct gfs2_sbd *sdp = (struct gfs2_sbd *)fsdata;
1743
1744 switch (type) {
1745 case LM_CB_NEED_E:
1746 blocking_cb(sdp, data, LM_ST_UNLOCKED);
1747 return;
1748
1749 case LM_CB_NEED_D:
1750 blocking_cb(sdp, data, LM_ST_DEFERRED);
1751 return;
1752
1753 case LM_CB_NEED_S:
1754 blocking_cb(sdp, data, LM_ST_SHARED);
1755 return;
1756
1757 case LM_CB_ASYNC: {
1758 struct lm_async_cb *async = data;
1759 struct gfs2_glock *gl;
1760
1761 gl = gfs2_glock_find(sdp, &async->lc_name);
1762 if (gfs2_assert_warn(sdp, gl))
1763 return;
1764 if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
1765 gl->gl_req_bh(gl, async->lc_ret);
1766 gfs2_glock_put(gl);
1767 return;
1768 }
1769
1770 case LM_CB_NEED_RECOVERY:
1771 gfs2_jdesc_make_dirty(sdp, *(unsigned int *)data);
1772 if (sdp->sd_recoverd_process)
1773 wake_up_process(sdp->sd_recoverd_process);
1774 return;
1775
1776 case LM_CB_DROPLOCKS:
1777 gfs2_gl_hash_clear(sdp, NO_WAIT);
1778 gfs2_quota_scan(sdp);
1779 return;
1780
1781 default:
1782 gfs2_assert_warn(sdp, 0);
1783 return;
1784 }
1785}
1786
1787/**
1788 * gfs2_try_toss_inode - try to remove a particular inode struct from cache
1789 * sdp: the filesystem
1790 * inum: the inode number
1791 *
1792 */
1793
1794void gfs2_try_toss_inode(struct gfs2_sbd *sdp, struct gfs2_inum *inum)
1795{
1796 struct gfs2_glock *gl;
1797 struct gfs2_inode *ip;
1798 int error;
1799
1800 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops,
1801 NO_CREATE, &gl);
1802 if (error || !gl)
1803 return;
1804
1805 if (!gfs2_glmutex_trylock(gl))
1806 goto out;
1807
1808 ip = gl->gl_object;
1809 if (!ip)
1810 goto out_unlock;
1811
1812 if (atomic_read(&ip->i_count))
1813 goto out_unlock;
1814
1815 gfs2_inode_destroy(ip, 1);
1816
1817 out_unlock:
1818 gfs2_glmutex_unlock(gl);
1819
1820 out:
1821 gfs2_glock_put(gl);
1822}
1823
1824/**
1825 * gfs2_iopen_go_callback - Try to kick the inode/vnode associated with an
1826 * iopen glock from memory
1827 * @io_gl: the iopen glock
1828 * @state: the state into which the glock should be put
1829 *
1830 */
1831
1832void gfs2_iopen_go_callback(struct gfs2_glock *io_gl, unsigned int state)
1833{
1834 struct gfs2_glock *i_gl;
1835
1836 if (state != LM_ST_UNLOCKED)
1837 return;
1838
1839 spin_lock(&io_gl->gl_spin);
1840 i_gl = io_gl->gl_object;
1841 if (i_gl) {
1842 gfs2_glock_hold(i_gl);
1843 spin_unlock(&io_gl->gl_spin);
1844 } else {
1845 spin_unlock(&io_gl->gl_spin);
1846 return;
1847 }
1848
1849 if (gfs2_glmutex_trylock(i_gl)) {
1850 struct gfs2_inode *ip = i_gl->gl_object;
1851 if (ip) {
1852 gfs2_try_toss_vnode(ip);
1853 gfs2_glmutex_unlock(i_gl);
1854 gfs2_glock_schedule_for_reclaim(i_gl);
1855 goto out;
1856 }
1857 gfs2_glmutex_unlock(i_gl);
1858 }
1859
1860 out:
1861 gfs2_glock_put(i_gl);
1862}
1863
1864/**
1865 * demote_ok - Check to see if it's ok to unlock a glock
1866 * @gl: the glock
1867 *
1868 * Returns: 1 if it's ok
1869 */
1870
1871static int demote_ok(struct gfs2_glock *gl)
1872{
1873 struct gfs2_sbd *sdp = gl->gl_sbd;
1874 struct gfs2_glock_operations *glops = gl->gl_ops;
1875 int demote = 1;
1876
1877 if (test_bit(GLF_STICKY, &gl->gl_flags))
1878 demote = 0;
1879 else if (test_bit(GLF_PREFETCH, &gl->gl_flags))
1880 demote = time_after_eq(jiffies,
1881 gl->gl_stamp +
1882 gfs2_tune_get(sdp, gt_prefetch_secs) * HZ);
1883 else if (glops->go_demote_ok)
1884 demote = glops->go_demote_ok(gl);
1885
1886 return demote;
1887}
1888
1889/**
1890 * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
1891 * @gl: the glock
1892 *
1893 */
1894
1895void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
1896{
1897 struct gfs2_sbd *sdp = gl->gl_sbd;
1898
1899 spin_lock(&sdp->sd_reclaim_lock);
1900 if (list_empty(&gl->gl_reclaim)) {
1901 gfs2_glock_hold(gl);
1902 list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list);
1903 atomic_inc(&sdp->sd_reclaim_count);
1904 }
1905 spin_unlock(&sdp->sd_reclaim_lock);
1906
1907 wake_up(&sdp->sd_reclaim_wq);
1908}
1909
1910/**
1911 * gfs2_reclaim_glock - process the next glock on the filesystem's reclaim list
1912 * @sdp: the filesystem
1913 *
1914 * Called from gfs2_glockd() glock reclaim daemon, or when promoting a
1915 * different glock and we notice that there are a lot of glocks in the
1916 * reclaim list.
1917 *
1918 */
1919
1920void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
1921{
1922 struct gfs2_glock *gl;
1923
1924 spin_lock(&sdp->sd_reclaim_lock);
1925 if (list_empty(&sdp->sd_reclaim_list)) {
1926 spin_unlock(&sdp->sd_reclaim_lock);
1927 return;
1928 }
1929 gl = list_entry(sdp->sd_reclaim_list.next,
1930 struct gfs2_glock, gl_reclaim);
1931 list_del_init(&gl->gl_reclaim);
1932 spin_unlock(&sdp->sd_reclaim_lock);
1933
1934 atomic_dec(&sdp->sd_reclaim_count);
1935 atomic_inc(&sdp->sd_reclaimed);
1936
1937 if (gfs2_glmutex_trylock(gl)) {
1938 if (gl->gl_ops == &gfs2_inode_glops) {
1939 struct gfs2_inode *ip = gl->gl_object;
1940 if (ip && !atomic_read(&ip->i_count))
1941 gfs2_inode_destroy(ip, 1);
1942 }
1943 if (queue_empty(gl, &gl->gl_holders) &&
1944 gl->gl_state != LM_ST_UNLOCKED &&
1945 demote_ok(gl))
1946 handle_callback(gl, LM_ST_UNLOCKED);
1947 gfs2_glmutex_unlock(gl);
1948 }
1949
1950 gfs2_glock_put(gl);
1951}
1952
1953/**
1954 * examine_bucket - Call a function for glock in a hash bucket
1955 * @examiner: the function
1956 * @sdp: the filesystem
1957 * @bucket: the bucket
1958 *
1959 * Returns: 1 if the bucket has entries
1960 */
1961
1962static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
1963 struct gfs2_gl_hash_bucket *bucket)
1964{
1965 struct glock_plug plug;
1966 struct list_head *tmp;
1967 struct gfs2_glock *gl;
1968 int entries;
1969
1970 /* Add "plug" to end of bucket list, work back up list from there */
1971 memset(&plug.gl_flags, 0, sizeof(unsigned long));
1972 set_bit(GLF_PLUG, &plug.gl_flags);
1973
1974 write_lock(&bucket->hb_lock);
1975 list_add(&plug.gl_list, &bucket->hb_list);
1976 write_unlock(&bucket->hb_lock);
1977
1978 for (;;) {
1979 write_lock(&bucket->hb_lock);
1980
1981 for (;;) {
1982 tmp = plug.gl_list.next;
1983
1984 if (tmp == &bucket->hb_list) {
1985 list_del(&plug.gl_list);
1986 entries = !list_empty(&bucket->hb_list);
1987 write_unlock(&bucket->hb_lock);
1988 return entries;
1989 }
1990 gl = list_entry(tmp, struct gfs2_glock, gl_list);
1991
1992 /* Move plug up list */
1993 list_move(&plug.gl_list, &gl->gl_list);
1994
1995 if (test_bit(GLF_PLUG, &gl->gl_flags))
1996 continue;
1997
1998 /* examiner() must glock_put() */
1999 gfs2_glock_hold(gl);
2000
2001 break;
2002 }
2003
2004 write_unlock(&bucket->hb_lock);
2005
2006 examiner(gl);
2007 }
2008}
2009
2010/**
2011 * scan_glock - look at a glock and see if we can reclaim it
2012 * @gl: the glock to look at
2013 *
2014 */
2015
2016static void scan_glock(struct gfs2_glock *gl)
2017{
2018 if (gfs2_glmutex_trylock(gl)) {
2019 if (gl->gl_ops == &gfs2_inode_glops) {
2020 struct gfs2_inode *ip = gl->gl_object;
2021 if (ip && !atomic_read(&ip->i_count))
2022 goto out_schedule;
2023 }
2024 if (queue_empty(gl, &gl->gl_holders) &&
2025 gl->gl_state != LM_ST_UNLOCKED &&
2026 demote_ok(gl))
2027 goto out_schedule;
2028
2029 gfs2_glmutex_unlock(gl);
2030 }
2031
2032 gfs2_glock_put(gl);
2033
2034 return;
2035
2036 out_schedule:
2037 gfs2_glmutex_unlock(gl);
2038 gfs2_glock_schedule_for_reclaim(gl);
2039 gfs2_glock_put(gl);
2040}
2041
2042/**
2043 * gfs2_scand_internal - Look for glocks and inodes to toss from memory
2044 * @sdp: the filesystem
2045 *
2046 */
2047
2048void gfs2_scand_internal(struct gfs2_sbd *sdp)
2049{
2050 unsigned int x;
2051
2052 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
2053 examine_bucket(scan_glock, sdp, &sdp->sd_gl_hash[x]);
2054 cond_resched();
2055 }
2056}
2057
2058/**
2059 * clear_glock - look at a glock and see if we can free it from glock cache
2060 * @gl: the glock to look at
2061 *
2062 */
2063
2064static void clear_glock(struct gfs2_glock *gl)
2065{
2066 struct gfs2_sbd *sdp = gl->gl_sbd;
2067 int released;
2068
2069 spin_lock(&sdp->sd_reclaim_lock);
2070 if (!list_empty(&gl->gl_reclaim)) {
2071 list_del_init(&gl->gl_reclaim);
2072 atomic_dec(&sdp->sd_reclaim_count);
2073 spin_unlock(&sdp->sd_reclaim_lock);
2074 released = gfs2_glock_put(gl);
2075 gfs2_assert(sdp, !released);
2076 } else {
2077 spin_unlock(&sdp->sd_reclaim_lock);
2078 }
2079
2080 if (gfs2_glmutex_trylock(gl)) {
2081 if (gl->gl_ops == &gfs2_inode_glops) {
2082 struct gfs2_inode *ip = gl->gl_object;
2083 if (ip && !atomic_read(&ip->i_count))
2084 gfs2_inode_destroy(ip, 1);
2085 }
2086 if (queue_empty(gl, &gl->gl_holders) &&
2087 gl->gl_state != LM_ST_UNLOCKED)
2088 handle_callback(gl, LM_ST_UNLOCKED);
2089
2090 gfs2_glmutex_unlock(gl);
2091 }
2092
2093 gfs2_glock_put(gl);
2094}
2095
2096/**
2097 * gfs2_gl_hash_clear - Empty out the glock hash table
2098 * @sdp: the filesystem
2099 * @wait: wait until it's all gone
2100 *
2101 * Called when unmounting the filesystem, or when inter-node lock manager
2102 * requests DROPLOCKS because it is running out of capacity.
2103 */
2104
2105void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
2106{
2107 unsigned long t;
2108 unsigned int x;
2109 int cont;
2110
2111 t = jiffies;
2112
2113 for (;;) {
2114 cont = 0;
2115
2116 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
2117 if (examine_bucket(clear_glock, sdp,
2118 &sdp->sd_gl_hash[x]))
2119 cont = 1;
2120
2121 if (!wait || !cont)
2122 break;
2123
2124 if (time_after_eq(jiffies,
2125 t + gfs2_tune_get(sdp, gt_stall_secs) * HZ)) {
2126 fs_warn(sdp, "Unmount seems to be stalled. "
2127 "Dumping lock state...\n");
2128 gfs2_dump_lockstate(sdp);
2129 t = jiffies;
2130 }
2131
2132 /* invalidate_inodes() requires that the sb inodes list
2133 not change, but an async completion callback for an
2134 unlock can occur which does glock_put() which
2135 can call iput() which will change the sb inodes list.
2136 invalidate_inodes_mutex prevents glock_put()'s during
2137 an invalidate_inodes() */
2138
2139 mutex_lock(&sdp->sd_invalidate_inodes_mutex);
2140 invalidate_inodes(sdp->sd_vfs);
2141 mutex_unlock(&sdp->sd_invalidate_inodes_mutex);
2142 msleep(10);
2143 }
2144}
2145
2146/*
2147 * Diagnostic routines to help debug distributed deadlock
2148 */
2149
2150/**
2151 * dump_holder - print information about a glock holder
2152 * @str: a string naming the type of holder
2153 * @gh: the glock holder
2154 *
2155 * Returns: 0 on success, -ENOBUFS when we run out of space
2156 */
2157
2158static int dump_holder(char *str, struct gfs2_holder *gh)
2159{
2160 unsigned int x;
2161 int error = -ENOBUFS;
2162
2163 printk(KERN_INFO " %s\n", str);
2164 printk(KERN_INFO " owner = %ld\n",
2165 (gh->gh_owner) ? (long)gh->gh_owner->pid : -1);
2166 printk(KERN_INFO " gh_state = %u\n", gh->gh_state);
2167 printk(KERN_INFO " gh_flags =");
2168 for (x = 0; x < 32; x++)
2169 if (gh->gh_flags & (1 << x))
2170 printk(" %u", x);
2171 printk(" \n");
2172 printk(KERN_INFO " error = %d\n", gh->gh_error);
2173 printk(KERN_INFO " gh_iflags =");
2174 for (x = 0; x < 32; x++)
2175 if (test_bit(x, &gh->gh_iflags))
2176 printk(" %u", x);
2177 printk(" \n");
2178 print_symbol(KERN_INFO " initialized at: %s\n", gh->gh_ip);
2179
2180 error = 0;
2181
2182 return error;
2183}
2184
2185/**
2186 * dump_inode - print information about an inode
2187 * @ip: the inode
2188 *
2189 * Returns: 0 on success, -ENOBUFS when we run out of space
2190 */
2191
2192static int dump_inode(struct gfs2_inode *ip)
2193{
2194 unsigned int x;
2195 int error = -ENOBUFS;
2196
2197 printk(KERN_INFO " Inode:\n");
2198 printk(KERN_INFO " num = %llu %llu\n",
2199 (unsigned long long)ip->i_num.no_formal_ino,
2200 (unsigned long long)ip->i_num.no_addr);
2201 printk(KERN_INFO " type = %u\n", IF2DT(ip->i_di.di_mode));
2202 printk(KERN_INFO " i_count = %d\n", atomic_read(&ip->i_count));
2203 printk(KERN_INFO " i_flags =");
2204 for (x = 0; x < 32; x++)
2205 if (test_bit(x, &ip->i_flags))
2206 printk(" %u", x);
2207 printk(" \n");
2208 printk(KERN_INFO " vnode = %s\n", (ip->i_vnode) ? "yes" : "no");
2209
2210 error = 0;
2211
2212 return error;
2213}
2214
2215/**
2216 * dump_glock - print information about a glock
2217 * @gl: the glock
2218 * @count: where we are in the buffer
2219 *
2220 * Returns: 0 on success, -ENOBUFS when we run out of space
2221 */
2222
2223static int dump_glock(struct gfs2_glock *gl)
2224{
2225 struct gfs2_holder *gh;
2226 unsigned int x;
2227 int error = -ENOBUFS;
2228
2229 spin_lock(&gl->gl_spin);
2230
2231 printk(KERN_INFO "Glock (%u, %llu)\n", gl->gl_name.ln_type,
2232 (unsigned long long)gl->gl_name.ln_number);
2233 printk(KERN_INFO " gl_flags =");
2234 for (x = 0; x < 32; x++)
2235 if (test_bit(x, &gl->gl_flags))
2236 printk(" %u", x);
2237 printk(" \n");
2238 printk(KERN_INFO " gl_ref = %d\n", atomic_read(&gl->gl_ref.refcount));
2239 printk(KERN_INFO " gl_state = %u\n", gl->gl_state);
2240 printk(KERN_INFO " gl_owner = %s\n", gl->gl_owner->comm);
2241 print_symbol(KERN_INFO " gl_ip = %s\n", gl->gl_ip);
2242 printk(KERN_INFO " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
2243 printk(KERN_INFO " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
2244 printk(KERN_INFO " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
2245 printk(KERN_INFO " object = %s\n", (gl->gl_object) ? "yes" : "no");
2246 printk(KERN_INFO " le = %s\n",
2247 (list_empty(&gl->gl_le.le_list)) ? "no" : "yes");
2248 printk(KERN_INFO " reclaim = %s\n",
2249 (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
2250 if (gl->gl_aspace)
2251 printk(KERN_INFO " aspace = %lu\n",
2252 gl->gl_aspace->i_mapping->nrpages);
2253 else
2254 printk(KERN_INFO " aspace = no\n");
2255 printk(KERN_INFO " ail = %d\n", atomic_read(&gl->gl_ail_count));
2256 if (gl->gl_req_gh) {
2257 error = dump_holder("Request", gl->gl_req_gh);
2258 if (error)
2259 goto out;
2260 }
2261 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
2262 error = dump_holder("Holder", gh);
2263 if (error)
2264 goto out;
2265 }
2266 list_for_each_entry(gh, &gl->gl_waiters1, gh_list) {
2267 error = dump_holder("Waiter1", gh);
2268 if (error)
2269 goto out;
2270 }
2271 list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
2272 error = dump_holder("Waiter2", gh);
2273 if (error)
2274 goto out;
2275 }
2276 list_for_each_entry(gh, &gl->gl_waiters3, gh_list) {
2277 error = dump_holder("Waiter3", gh);
2278 if (error)
2279 goto out;
2280 }
2281 if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
2282 if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
2283 list_empty(&gl->gl_holders)) {
2284 error = dump_inode(gl->gl_object);
2285 if (error)
2286 goto out;
2287 } else {
2288 error = -ENOBUFS;
2289 printk(KERN_INFO " Inode: busy\n");
2290 }
2291 }
2292
2293 error = 0;
2294
2295 out:
2296 spin_unlock(&gl->gl_spin);
2297
2298 return error;
2299}
2300
2301/**
2302 * gfs2_dump_lockstate - print out the current lockstate
2303 * @sdp: the filesystem
2304 * @ub: the buffer to copy the information into
2305 *
2306 * If @ub is NULL, dump the lockstate to the console.
2307 *
2308 */
2309
2310static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
2311{
2312 struct gfs2_gl_hash_bucket *bucket;
2313 struct gfs2_glock *gl;
2314 unsigned int x;
2315 int error = 0;
2316
2317 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
2318 bucket = &sdp->sd_gl_hash[x];
2319
2320 read_lock(&bucket->hb_lock);
2321
2322 list_for_each_entry(gl, &bucket->hb_list, gl_list) {
2323 if (test_bit(GLF_PLUG, &gl->gl_flags))
2324 continue;
2325
2326 error = dump_glock(gl);
2327 if (error)
2328 break;
2329 }
2330
2331 read_unlock(&bucket->hb_lock);
2332
2333 if (error)
2334 break;
2335 }
2336
2337
2338 return error;
2339}
2340
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
new file mode 100644
index 000000000000..2e0a2ba92aa0
--- /dev/null
+++ b/fs/gfs2/glock.h
@@ -0,0 +1,155 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GLOCK_DOT_H__
11#define __GLOCK_DOT_H__
12
13/* Flags for lock requests; used in gfs2_holder gh_flag field.
14 From lm_interface.h:
15#define LM_FLAG_TRY 0x00000001
16#define LM_FLAG_TRY_1CB 0x00000002
17#define LM_FLAG_NOEXP 0x00000004
18#define LM_FLAG_ANY 0x00000008
19#define LM_FLAG_PRIORITY 0x00000010 */
20
21#define GL_LOCAL_EXCL 0x00000020
22#define GL_ASYNC 0x00000040
23#define GL_EXACT 0x00000080
24#define GL_SKIP 0x00000100
25#define GL_ATIME 0x00000200
26#define GL_NOCACHE 0x00000400
27#define GL_SYNC 0x00000800
28#define GL_NOCANCEL 0x00001000
29#define GL_AOP 0x00004000
30#define GL_DUMP 0x00008000
31
32#define GLR_TRYFAILED 13
33#define GLR_CANCELED 14
34
35static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
36{
37 struct gfs2_holder *gh;
38 int locked = 0;
39
40 /* Look in glock's list of holders for one with current task as owner */
41 spin_lock(&gl->gl_spin);
42 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
43 if (gh->gh_owner == current) {
44 locked = 1;
45 break;
46 }
47 }
48 spin_unlock(&gl->gl_spin);
49
50 return locked;
51}
52
53static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
54{
55 return (gl->gl_state == LM_ST_EXCLUSIVE);
56}
57
58static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl)
59{
60 return (gl->gl_state == LM_ST_DEFERRED);
61}
62
63static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
64{
65 return (gl->gl_state == LM_ST_SHARED);
66}
67
68static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
69{
70 int ret;
71 spin_lock(&gl->gl_spin);
72 ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3);
73 spin_unlock(&gl->gl_spin);
74 return ret;
75}
76
77int gfs2_glock_get(struct gfs2_sbd *sdp,
78 uint64_t number, struct gfs2_glock_operations *glops,
79 int create, struct gfs2_glock **glp);
80void gfs2_glock_hold(struct gfs2_glock *gl);
81int gfs2_glock_put(struct gfs2_glock *gl);
82void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
83 struct gfs2_holder *gh);
84void gfs2_holder_reinit(unsigned int state, unsigned flags,
85 struct gfs2_holder *gh);
86void gfs2_holder_uninit(struct gfs2_holder *gh);
87
88void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags);
89void gfs2_glock_drop_th(struct gfs2_glock *gl);
90
91void gfs2_glmutex_lock(struct gfs2_glock *gl);
92void gfs2_glmutex_unlock(struct gfs2_glock *gl);
93
94int gfs2_glock_nq(struct gfs2_holder *gh);
95int gfs2_glock_poll(struct gfs2_holder *gh);
96int gfs2_glock_wait(struct gfs2_holder *gh);
97void gfs2_glock_dq(struct gfs2_holder *gh);
98
99int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time);
100
101void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
102int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
103 uint64_t number, struct gfs2_glock_operations *glops,
104 unsigned int state, int flags, struct gfs2_holder *gh);
105
106int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
107void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
108void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
109
110void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, uint64_t number,
111 struct gfs2_glock_operations *glops,
112 unsigned int state, int flags);
113
114/**
115 * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
116 * @gl: the glock
117 * @state: the state we're requesting
118 * @flags: the modifier flags
119 * @gh: the holder structure
120 *
121 * Returns: 0, GLR_*, or errno
122 */
123
124static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
125 unsigned int state, int flags,
126 struct gfs2_holder *gh)
127{
128 int error;
129
130 gfs2_holder_init(gl, state, flags, gh);
131
132 error = gfs2_glock_nq(gh);
133 if (error)
134 gfs2_holder_uninit(gh);
135
136 return error;
137}
138
139/* Lock Value Block functions */
140
141int gfs2_lvb_hold(struct gfs2_glock *gl);
142void gfs2_lvb_unhold(struct gfs2_glock *gl);
143
144void gfs2_glock_cb(lm_fsdata_t *fsdata, unsigned int type, void *data);
145
146void gfs2_try_toss_inode(struct gfs2_sbd *sdp, struct gfs2_inum *inum);
147void gfs2_iopen_go_callback(struct gfs2_glock *gl, unsigned int state);
148
149void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
150void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
151
152void gfs2_scand_internal(struct gfs2_sbd *sdp);
153void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
154
155#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
new file mode 100644
index 000000000000..e262f22f744e
--- /dev/null
+++ b/fs/gfs2/glops.c
@@ -0,0 +1,491 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16
17#include "gfs2.h"
18#include "lm_interface.h"
19#include "incore.h"
20#include "bmap.h"
21#include "glock.h"
22#include "glops.h"
23#include "inode.h"
24#include "log.h"
25#include "meta_io.h"
26#include "page.h"
27#include "recovery.h"
28#include "rgrp.h"
29#include "util.h"
30
31/**
32 * meta_go_sync - sync out the metadata for this glock
33 * @gl: the glock
34 * @flags: DIO_*
35 *
36 * Called when demoting or unlocking an EX glock. We must flush
37 * to disk all dirty buffers/pages relating to this glock, and must not
38 * not return to caller to demote/unlock the glock until I/O is complete.
39 */
40
41static void meta_go_sync(struct gfs2_glock *gl, int flags)
42{
43 if (!(flags & DIO_METADATA))
44 return;
45
46 if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
47 gfs2_log_flush(gl->gl_sbd, gl);
48 gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
49 if (flags & DIO_RELEASE)
50 gfs2_ail_empty_gl(gl);
51 }
52
53 clear_bit(GLF_SYNC, &gl->gl_flags);
54}
55
56/**
57 * meta_go_inval - invalidate the metadata for this glock
58 * @gl: the glock
59 * @flags:
60 *
61 */
62
63static void meta_go_inval(struct gfs2_glock *gl, int flags)
64{
65 if (!(flags & DIO_METADATA))
66 return;
67
68 gfs2_meta_inval(gl);
69 gl->gl_vn++;
70}
71
72/**
73 * meta_go_demote_ok - Check to see if it's ok to unlock a glock
74 * @gl: the glock
75 *
76 * Returns: 1 if we have no cached data; ok to demote meta glock
77 */
78
79static int meta_go_demote_ok(struct gfs2_glock *gl)
80{
81 return !gl->gl_aspace->i_mapping->nrpages;
82}
83
84/**
85 * inode_go_xmote_th - promote/demote a glock
86 * @gl: the glock
87 * @state: the requested state
88 * @flags:
89 *
90 */
91
92static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
93 int flags)
94{
95 if (gl->gl_state != LM_ST_UNLOCKED)
96 gfs2_pte_inval(gl);
97 gfs2_glock_xmote_th(gl, state, flags);
98}
99
100/**
101 * inode_go_xmote_bh - After promoting/demoting a glock
102 * @gl: the glock
103 *
104 */
105
106static void inode_go_xmote_bh(struct gfs2_glock *gl)
107{
108 struct gfs2_holder *gh = gl->gl_req_gh;
109 struct buffer_head *bh;
110 int error;
111
112 if (gl->gl_state != LM_ST_UNLOCKED &&
113 (!gh || !(gh->gh_flags & GL_SKIP))) {
114 error = gfs2_meta_read(gl, gl->gl_name.ln_number, DIO_START,
115 &bh);
116 if (!error)
117 brelse(bh);
118 }
119}
120
121/**
122 * inode_go_drop_th - unlock a glock
123 * @gl: the glock
124 *
125 * Invoked from rq_demote().
126 * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long)
127 * is being purged from our node's glock cache; we're dropping lock.
128 */
129
130static void inode_go_drop_th(struct gfs2_glock *gl)
131{
132 gfs2_pte_inval(gl);
133 gfs2_glock_drop_th(gl);
134}
135
136/**
137 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
138 * @gl: the glock protecting the inode
139 * @flags:
140 *
141 */
142
143static void inode_go_sync(struct gfs2_glock *gl, int flags)
144{
145 int meta = (flags & DIO_METADATA);
146 int data = (flags & DIO_DATA);
147
148 if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
149 if (meta && data) {
150 gfs2_page_sync(gl, flags | DIO_START);
151 gfs2_log_flush(gl->gl_sbd, gl);
152 gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
153 gfs2_page_sync(gl, flags | DIO_WAIT);
154 clear_bit(GLF_DIRTY, &gl->gl_flags);
155 } else if (meta) {
156 gfs2_log_flush(gl->gl_sbd, gl);
157 gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT);
158 } else if (data)
159 gfs2_page_sync(gl, flags | DIO_START | DIO_WAIT);
160 if (flags & DIO_RELEASE)
161 gfs2_ail_empty_gl(gl);
162 }
163
164 clear_bit(GLF_SYNC, &gl->gl_flags);
165}
166
167/**
168 * inode_go_inval - prepare a inode glock to be released
169 * @gl: the glock
170 * @flags:
171 *
172 */
173
174static void inode_go_inval(struct gfs2_glock *gl, int flags)
175{
176 int meta = (flags & DIO_METADATA);
177 int data = (flags & DIO_DATA);
178
179 if (meta) {
180 gfs2_meta_inval(gl);
181 gl->gl_vn++;
182 }
183 if (data)
184 gfs2_page_inval(gl);
185}
186
187/**
188 * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
189 * @gl: the glock
190 *
191 * Returns: 1 if it's ok
192 */
193
194static int inode_go_demote_ok(struct gfs2_glock *gl)
195{
196 struct gfs2_sbd *sdp = gl->gl_sbd;
197 int demote = 0;
198
199 if (!gl->gl_object && !gl->gl_aspace->i_mapping->nrpages)
200 demote = 1;
201 else if (!sdp->sd_args.ar_localcaching &&
202 time_after_eq(jiffies, gl->gl_stamp +
203 gfs2_tune_get(sdp, gt_demote_secs) * HZ))
204 demote = 1;
205
206 return demote;
207}
208
209/**
210 * inode_go_lock - operation done after an inode lock is locked by a process
211 * @gl: the glock
212 * @flags:
213 *
214 * Returns: errno
215 */
216
217static int inode_go_lock(struct gfs2_holder *gh)
218{
219 struct gfs2_glock *gl = gh->gh_gl;
220 struct gfs2_inode *ip = gl->gl_object;
221 int error = 0;
222
223 if (!ip)
224 return 0;
225
226 if (ip->i_vn != gl->gl_vn) {
227 error = gfs2_inode_refresh(ip);
228 if (error)
229 return error;
230 gfs2_inode_attr_in(ip);
231 }
232
233 if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
234 (gl->gl_state == LM_ST_EXCLUSIVE) &&
235 (gh->gh_flags & GL_LOCAL_EXCL))
236 error = gfs2_truncatei_resume(ip);
237
238 return error;
239}
240
241/**
242 * inode_go_unlock - operation done before an inode lock is unlocked by a
243 * process
244 * @gl: the glock
245 * @flags:
246 *
247 */
248
249static void inode_go_unlock(struct gfs2_holder *gh)
250{
251 struct gfs2_glock *gl = gh->gh_gl;
252 struct gfs2_inode *ip = gl->gl_object;
253
254 if (ip && test_bit(GLF_DIRTY, &gl->gl_flags))
255 gfs2_inode_attr_in(ip);
256
257 if (ip)
258 gfs2_meta_cache_flush(ip);
259}
260
261/**
262 * inode_greedy -
263 * @gl: the glock
264 *
265 */
266
267static void inode_greedy(struct gfs2_glock *gl)
268{
269 struct gfs2_sbd *sdp = gl->gl_sbd;
270 struct gfs2_inode *ip = gl->gl_object;
271 unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum);
272 unsigned int max = gfs2_tune_get(sdp, gt_greedy_max);
273 unsigned int new_time;
274
275 spin_lock(&ip->i_spin);
276
277 if (time_after(ip->i_last_pfault + quantum, jiffies)) {
278 new_time = ip->i_greedy + quantum;
279 if (new_time > max)
280 new_time = max;
281 } else {
282 new_time = ip->i_greedy - quantum;
283 if (!new_time || new_time > max)
284 new_time = 1;
285 }
286
287 ip->i_greedy = new_time;
288
289 spin_unlock(&ip->i_spin);
290
291 gfs2_inode_put(ip);
292}
293
294/**
295 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
296 * @gl: the glock
297 *
298 * Returns: 1 if it's ok
299 */
300
301static int rgrp_go_demote_ok(struct gfs2_glock *gl)
302{
303 return !gl->gl_aspace->i_mapping->nrpages;
304}
305
306/**
307 * rgrp_go_lock - operation done after an rgrp lock is locked by
308 * a first holder on this node.
309 * @gl: the glock
310 * @flags:
311 *
312 * Returns: errno
313 */
314
315static int rgrp_go_lock(struct gfs2_holder *gh)
316{
317 return gfs2_rgrp_bh_get(gh->gh_gl->gl_object);
318}
319
320/**
321 * rgrp_go_unlock - operation done before an rgrp lock is unlocked by
322 * a last holder on this node.
323 * @gl: the glock
324 * @flags:
325 *
326 */
327
328static void rgrp_go_unlock(struct gfs2_holder *gh)
329{
330 gfs2_rgrp_bh_put(gh->gh_gl->gl_object);
331}
332
333/**
334 * trans_go_xmote_th - promote/demote the transaction glock
335 * @gl: the glock
336 * @state: the requested state
337 * @flags:
338 *
339 */
340
341static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
342 int flags)
343{
344 struct gfs2_sbd *sdp = gl->gl_sbd;
345
346 if (gl->gl_state != LM_ST_UNLOCKED &&
347 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
348 gfs2_meta_syncfs(sdp);
349 gfs2_log_shutdown(sdp);
350 }
351
352 gfs2_glock_xmote_th(gl, state, flags);
353}
354
355/**
356 * trans_go_xmote_bh - After promoting/demoting the transaction glock
357 * @gl: the glock
358 *
359 */
360
361static void trans_go_xmote_bh(struct gfs2_glock *gl)
362{
363 struct gfs2_sbd *sdp = gl->gl_sbd;
364 struct gfs2_inode *ip = sdp->sd_jdesc->jd_inode->u.generic_ip;
365 struct gfs2_glock *j_gl = ip->i_gl;
366 struct gfs2_log_header head;
367 int error;
368
369 if (gl->gl_state != LM_ST_UNLOCKED &&
370 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
371 gfs2_meta_cache_flush(sdp->sd_jdesc->jd_inode->u.generic_ip);
372 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
373
374 error = gfs2_find_jhead(sdp->sd_jdesc, &head);
375 if (error)
376 gfs2_consist(sdp);
377 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
378 gfs2_consist(sdp);
379
380 /* Initialize some head of the log stuff */
381 if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
382 sdp->sd_log_sequence = head.lh_sequence + 1;
383 gfs2_log_pointers_init(sdp, head.lh_blkno);
384 }
385 }
386}
387
388/**
389 * trans_go_drop_th - unlock the transaction glock
390 * @gl: the glock
391 *
392 * We want to sync the device even with localcaching. Remember
393 * that localcaching journal replay only marks buffers dirty.
394 */
395
396static void trans_go_drop_th(struct gfs2_glock *gl)
397{
398 struct gfs2_sbd *sdp = gl->gl_sbd;
399
400 if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
401 gfs2_meta_syncfs(sdp);
402 gfs2_log_shutdown(sdp);
403 }
404
405 gfs2_glock_drop_th(gl);
406}
407
408/**
409 * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
410 * @gl: the glock
411 *
412 * Returns: 1 if it's ok
413 */
414
415static int quota_go_demote_ok(struct gfs2_glock *gl)
416{
417 return !atomic_read(&gl->gl_lvb_count);
418}
419
420struct gfs2_glock_operations gfs2_meta_glops = {
421 .go_xmote_th = gfs2_glock_xmote_th,
422 .go_drop_th = gfs2_glock_drop_th,
423 .go_sync = meta_go_sync,
424 .go_inval = meta_go_inval,
425 .go_demote_ok = meta_go_demote_ok,
426 .go_type = LM_TYPE_META
427};
428
429struct gfs2_glock_operations gfs2_inode_glops = {
430 .go_xmote_th = inode_go_xmote_th,
431 .go_xmote_bh = inode_go_xmote_bh,
432 .go_drop_th = inode_go_drop_th,
433 .go_sync = inode_go_sync,
434 .go_inval = inode_go_inval,
435 .go_demote_ok = inode_go_demote_ok,
436 .go_lock = inode_go_lock,
437 .go_unlock = inode_go_unlock,
438 .go_greedy = inode_greedy,
439 .go_type = LM_TYPE_INODE
440};
441
442struct gfs2_glock_operations gfs2_rgrp_glops = {
443 .go_xmote_th = gfs2_glock_xmote_th,
444 .go_drop_th = gfs2_glock_drop_th,
445 .go_sync = meta_go_sync,
446 .go_inval = meta_go_inval,
447 .go_demote_ok = rgrp_go_demote_ok,
448 .go_lock = rgrp_go_lock,
449 .go_unlock = rgrp_go_unlock,
450 .go_type = LM_TYPE_RGRP
451};
452
453struct gfs2_glock_operations gfs2_trans_glops = {
454 .go_xmote_th = trans_go_xmote_th,
455 .go_xmote_bh = trans_go_xmote_bh,
456 .go_drop_th = trans_go_drop_th,
457 .go_type = LM_TYPE_NONDISK
458};
459
460struct gfs2_glock_operations gfs2_iopen_glops = {
461 .go_xmote_th = gfs2_glock_xmote_th,
462 .go_drop_th = gfs2_glock_drop_th,
463 .go_callback = gfs2_iopen_go_callback,
464 .go_type = LM_TYPE_IOPEN
465};
466
467struct gfs2_glock_operations gfs2_flock_glops = {
468 .go_xmote_th = gfs2_glock_xmote_th,
469 .go_drop_th = gfs2_glock_drop_th,
470 .go_type = LM_TYPE_FLOCK
471};
472
473struct gfs2_glock_operations gfs2_nondisk_glops = {
474 .go_xmote_th = gfs2_glock_xmote_th,
475 .go_drop_th = gfs2_glock_drop_th,
476 .go_type = LM_TYPE_NONDISK
477};
478
479struct gfs2_glock_operations gfs2_quota_glops = {
480 .go_xmote_th = gfs2_glock_xmote_th,
481 .go_drop_th = gfs2_glock_drop_th,
482 .go_demote_ok = quota_go_demote_ok,
483 .go_type = LM_TYPE_QUOTA
484};
485
486struct gfs2_glock_operations gfs2_journal_glops = {
487 .go_xmote_th = gfs2_glock_xmote_th,
488 .go_drop_th = gfs2_glock_drop_th,
489 .go_type = LM_TYPE_JOURNAL
490};
491
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
new file mode 100644
index 000000000000..5c1e9491024f
--- /dev/null
+++ b/fs/gfs2/glops.h
@@ -0,0 +1,23 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __GLOPS_DOT_H__
11#define __GLOPS_DOT_H__
12
13extern struct gfs2_glock_operations gfs2_meta_glops;
14extern struct gfs2_glock_operations gfs2_inode_glops;
15extern struct gfs2_glock_operations gfs2_rgrp_glops;
16extern struct gfs2_glock_operations gfs2_trans_glops;
17extern struct gfs2_glock_operations gfs2_iopen_glops;
18extern struct gfs2_glock_operations gfs2_flock_glops;
19extern struct gfs2_glock_operations gfs2_nondisk_glops;
20extern struct gfs2_glock_operations gfs2_quota_glops;
21extern struct gfs2_glock_operations gfs2_journal_glops;
22
23#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
new file mode 100644
index 000000000000..92091d006a02
--- /dev/null
+++ b/fs/gfs2/incore.h
@@ -0,0 +1,687 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __INCORE_DOT_H__
11#define __INCORE_DOT_H__
12
13#define DIO_FORCE 0x00000001
14#define DIO_CLEAN 0x00000002
15#define DIO_DIRTY 0x00000004
16#define DIO_START 0x00000008
17#define DIO_WAIT 0x00000010
18#define DIO_METADATA 0x00000020
19#define DIO_DATA 0x00000040
20#define DIO_RELEASE 0x00000080
21#define DIO_ALL 0x00000100
22
23struct gfs2_log_operations;
24struct gfs2_log_element;
25struct gfs2_bitmap;
26struct gfs2_rgrpd;
27struct gfs2_bufdata;
28struct gfs2_glock_operations;
29struct gfs2_holder;
30struct gfs2_glock;
31struct gfs2_alloc;
32struct gfs2_inode;
33struct gfs2_file;
34struct gfs2_revoke;
35struct gfs2_revoke_replay;
36struct gfs2_unlinked;
37struct gfs2_quota_data;
38struct gfs2_log_buf;
39struct gfs2_trans;
40struct gfs2_ail;
41struct gfs2_jdesc;
42struct gfs2_args;
43struct gfs2_tune;
44struct gfs2_gl_hash_bucket;
45struct gfs2_sbd;
46
47typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
48
49/*
50 * Structure of operations that are associated with each
51 * type of element in the log.
52 */
53
54struct gfs2_log_operations {
55 void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le);
56 void (*lo_incore_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr);
57 void (*lo_before_commit) (struct gfs2_sbd *sdp);
58 void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
59 void (*lo_before_scan) (struct gfs2_jdesc *jd,
60 struct gfs2_log_header *head, int pass);
61 int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
62 struct gfs2_log_descriptor *ld, __be64 *ptr,
63 int pass);
64 void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass);
65 const char *lo_name;
66};
67
68struct gfs2_log_element {
69 struct list_head le_list;
70 const struct gfs2_log_operations *le_ops;
71};
72
73struct gfs2_bitmap {
74 struct buffer_head *bi_bh;
75 char *bi_clone;
76 uint32_t bi_offset;
77 uint32_t bi_start;
78 uint32_t bi_len;
79};
80
81struct gfs2_rgrpd {
82 struct list_head rd_list; /* Link with superblock */
83 struct list_head rd_list_mru;
84 struct list_head rd_recent; /* Recently used rgrps */
85 struct gfs2_glock *rd_gl; /* Glock for this rgrp */
86 struct gfs2_rindex rd_ri;
87 struct gfs2_rgrp rd_rg;
88 uint64_t rd_rg_vn;
89 struct gfs2_bitmap *rd_bits;
90 unsigned int rd_bh_count;
91 struct mutex rd_mutex;
92 uint32_t rd_free_clone;
93 struct gfs2_log_element rd_le;
94 uint32_t rd_last_alloc_data;
95 uint32_t rd_last_alloc_meta;
96 struct gfs2_sbd *rd_sbd;
97};
98
99enum gfs2_state_bits {
100 BH_Pinned = BH_PrivateStart,
101 BH_Escaped = BH_PrivateStart + 1,
102};
103
104BUFFER_FNS(Pinned, pinned)
105TAS_BUFFER_FNS(Pinned, pinned)
106BUFFER_FNS(Escaped, escaped)
107TAS_BUFFER_FNS(Escaped, escaped)
108
109struct gfs2_bufdata {
110 struct buffer_head *bd_bh;
111 struct gfs2_glock *bd_gl;
112
113 struct list_head bd_list_tr;
114 struct gfs2_log_element bd_le;
115
116 struct gfs2_ail *bd_ail;
117 struct list_head bd_ail_st_list;
118 struct list_head bd_ail_gl_list;
119};
120
121struct gfs2_glock_operations {
122 void (*go_xmote_th) (struct gfs2_glock * gl, unsigned int state,
123 int flags);
124 void (*go_xmote_bh) (struct gfs2_glock * gl);
125 void (*go_drop_th) (struct gfs2_glock * gl);
126 void (*go_drop_bh) (struct gfs2_glock * gl);
127 void (*go_sync) (struct gfs2_glock * gl, int flags);
128 void (*go_inval) (struct gfs2_glock * gl, int flags);
129 int (*go_demote_ok) (struct gfs2_glock * gl);
130 int (*go_lock) (struct gfs2_holder * gh);
131 void (*go_unlock) (struct gfs2_holder * gh);
132 void (*go_callback) (struct gfs2_glock * gl, unsigned int state);
133 void (*go_greedy) (struct gfs2_glock * gl);
134 int go_type;
135};
136
137enum {
138 /* Actions */
139 HIF_MUTEX = 0,
140 HIF_PROMOTE = 1,
141 HIF_DEMOTE = 2,
142 HIF_GREEDY = 3,
143
144 /* States */
145 HIF_ALLOCED = 4,
146 HIF_DEALLOC = 5,
147 HIF_HOLDER = 6,
148 HIF_FIRST = 7,
149 HIF_ABORTED = 9,
150};
151
152struct gfs2_holder {
153 struct list_head gh_list;
154
155 struct gfs2_glock *gh_gl;
156 struct task_struct *gh_owner;
157 unsigned int gh_state;
158 unsigned gh_flags;
159
160 int gh_error;
161 unsigned long gh_iflags;
162 struct completion gh_wait;
163 unsigned long gh_ip;
164};
165
166enum {
167 GLF_PLUG = 0,
168 GLF_LOCK = 1,
169 GLF_STICKY = 2,
170 GLF_PREFETCH = 3,
171 GLF_SYNC = 4,
172 GLF_DIRTY = 5,
173 GLF_SKIP_WAITERS2 = 6,
174 GLF_GREEDY = 7,
175};
176
177struct gfs2_glock {
178 struct list_head gl_list;
179 unsigned long gl_flags; /* GLF_... */
180 struct lm_lockname gl_name;
181 struct kref gl_ref;
182
183 spinlock_t gl_spin;
184
185 unsigned int gl_state;
186 struct task_struct *gl_owner;
187 unsigned long gl_ip;
188 struct list_head gl_holders;
189 struct list_head gl_waiters1; /* HIF_MUTEX */
190 struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */
191 struct list_head gl_waiters3; /* HIF_PROMOTE */
192
193 struct gfs2_glock_operations *gl_ops;
194
195 struct gfs2_holder *gl_req_gh;
196 gfs2_glop_bh_t gl_req_bh;
197
198 lm_lock_t *gl_lock;
199 char *gl_lvb;
200 atomic_t gl_lvb_count;
201
202 uint64_t gl_vn;
203 unsigned long gl_stamp;
204 void *gl_object;
205
206 struct gfs2_gl_hash_bucket *gl_bucket;
207 struct list_head gl_reclaim;
208
209 struct gfs2_sbd *gl_sbd;
210
211 struct inode *gl_aspace;
212 struct gfs2_log_element gl_le;
213 struct list_head gl_ail_list;
214 atomic_t gl_ail_count;
215};
216
217struct gfs2_alloc {
218 /* Quota stuff */
219
220 unsigned int al_qd_num;
221 struct gfs2_quota_data *al_qd[4];
222 struct gfs2_holder al_qd_ghs[4];
223
224 /* Filled in by the caller to gfs2_inplace_reserve() */
225
226 uint32_t al_requested;
227
228 /* Filled in by gfs2_inplace_reserve() */
229
230 char *al_file;
231 unsigned int al_line;
232 struct gfs2_holder al_ri_gh;
233 struct gfs2_holder al_rgd_gh;
234 struct gfs2_rgrpd *al_rgd;
235
236 /* Filled in by gfs2_alloc_*() */
237
238 uint32_t al_alloced;
239};
240
241enum {
242 GIF_MIN_INIT = 0,
243 GIF_QD_LOCKED = 1,
244 GIF_PAGED = 2,
245 GIF_SW_PAGED = 3,
246};
247
248struct gfs2_inode {
249 struct inode i_inode;
250 struct gfs2_inum i_num;
251
252 atomic_t i_count;
253 unsigned long i_flags; /* GIF_... */
254
255 uint64_t i_vn;
256 struct gfs2_dinode i_di;
257
258 struct gfs2_glock *i_gl;
259 struct gfs2_sbd *i_sbd;
260 struct inode *i_vnode;
261
262 struct gfs2_holder i_iopen_gh;
263 struct gfs2_holder i_gh; /* for prepare/commit_write only */
264 struct gfs2_alloc i_alloc;
265 uint64_t i_last_rg_alloc;
266
267 spinlock_t i_spin;
268 struct rw_semaphore i_rw_mutex;
269
270 unsigned int i_greedy;
271 unsigned long i_last_pfault;
272
273 struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
274};
275
276static inline struct gfs2_inode *GFS2_I(struct inode *inode)
277{
278 return container_of(inode, struct gfs2_inode, i_inode);
279}
280
281enum {
282 GFF_DID_DIRECT_ALLOC = 0,
283};
284
285struct gfs2_file {
286 unsigned long f_flags; /* GFF_... */
287 struct mutex f_fl_mutex;
288 struct gfs2_holder f_fl_gh;
289};
290
291struct gfs2_revoke {
292 struct gfs2_log_element rv_le;
293 uint64_t rv_blkno;
294};
295
296struct gfs2_revoke_replay {
297 struct list_head rr_list;
298 uint64_t rr_blkno;
299 unsigned int rr_where;
300};
301
302enum {
303 ULF_LOCKED = 0,
304};
305
306struct gfs2_unlinked {
307 struct list_head ul_list;
308 unsigned int ul_count;
309 struct gfs2_unlinked_tag ul_ut;
310 unsigned long ul_flags; /* ULF_... */
311 unsigned int ul_slot;
312};
313
314enum {
315 QDF_USER = 0,
316 QDF_CHANGE = 1,
317 QDF_LOCKED = 2,
318};
319
320struct gfs2_quota_lvb {
321 uint32_t qb_magic;
322 uint32_t __pad;
323 uint64_t qb_limit; /* Hard limit of # blocks to alloc */
324 uint64_t qb_warn; /* Warn user when alloc is above this # */
325 int64_t qb_value; /* Current # blocks allocated */
326};
327
328struct gfs2_quota_data {
329 struct list_head qd_list;
330 unsigned int qd_count;
331
332 uint32_t qd_id;
333 unsigned long qd_flags; /* QDF_... */
334
335 int64_t qd_change;
336 int64_t qd_change_sync;
337
338 unsigned int qd_slot;
339 unsigned int qd_slot_count;
340
341 struct buffer_head *qd_bh;
342 struct gfs2_quota_change *qd_bh_qc;
343 unsigned int qd_bh_count;
344
345 struct gfs2_glock *qd_gl;
346 struct gfs2_quota_lvb qd_qb;
347
348 uint64_t qd_sync_gen;
349 unsigned long qd_last_warn;
350 unsigned long qd_last_touched;
351};
352
353struct gfs2_log_buf {
354 struct list_head lb_list;
355 struct buffer_head *lb_bh;
356 struct buffer_head *lb_real;
357};
358
359struct gfs2_trans {
360 unsigned long tr_ip;
361
362 unsigned int tr_blocks;
363 unsigned int tr_revokes;
364 unsigned int tr_reserved;
365
366 struct gfs2_holder tr_t_gh;
367
368 int tr_touched;
369
370 unsigned int tr_num_buf;
371 unsigned int tr_num_buf_new;
372 unsigned int tr_num_buf_rm;
373 struct list_head tr_list_buf;
374
375 unsigned int tr_num_revoke;
376 unsigned int tr_num_revoke_rm;
377};
378
379struct gfs2_ail {
380 struct list_head ai_list;
381
382 unsigned int ai_first;
383 struct list_head ai_ail1_list;
384 struct list_head ai_ail2_list;
385
386 uint64_t ai_sync_gen;
387};
388
389struct gfs2_jdesc {
390 struct list_head jd_list;
391
392 struct inode *jd_inode;
393 unsigned int jd_jid;
394 int jd_dirty;
395
396 unsigned int jd_blocks;
397};
398
399#define GFS2_GLOCKD_DEFAULT 1
400#define GFS2_GLOCKD_MAX 16
401
402#define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF
403#define GFS2_QUOTA_OFF 0
404#define GFS2_QUOTA_ACCOUNT 1
405#define GFS2_QUOTA_ON 2
406
407#define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED
408#define GFS2_DATA_WRITEBACK 1
409#define GFS2_DATA_ORDERED 2
410
411struct gfs2_args {
412 char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
413 char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
414 char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
415 int ar_spectator; /* Don't get a journal because we're always RO */
416 int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */
417 int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */
418 int ar_localcaching; /* Local-style caching (dangerous on multihost) */
419 int ar_debug; /* Oops on errors instead of trying to be graceful */
420 int ar_upgrade; /* Upgrade ondisk/multihost format */
421 unsigned int ar_num_glockd; /* Number of glockd threads */
422 int ar_posix_acl; /* Enable posix acls */
423 int ar_quota; /* off/account/on */
424 int ar_suiddir; /* suiddir support */
425 int ar_data; /* ordered/writeback */
426};
427
428struct gfs2_tune {
429 spinlock_t gt_spin;
430
431 unsigned int gt_ilimit;
432 unsigned int gt_ilimit_tries;
433 unsigned int gt_ilimit_min;
434 unsigned int gt_demote_secs; /* Cache retention for unheld glock */
435 unsigned int gt_incore_log_blocks;
436 unsigned int gt_log_flush_secs;
437 unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
438
439 unsigned int gt_scand_secs;
440 unsigned int gt_recoverd_secs;
441 unsigned int gt_logd_secs;
442 unsigned int gt_quotad_secs;
443 unsigned int gt_inoded_secs;
444
445 unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
446 unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
447 unsigned int gt_quota_scale_num; /* Numerator */
448 unsigned int gt_quota_scale_den; /* Denominator */
449 unsigned int gt_quota_cache_secs;
450 unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
451 unsigned int gt_atime_quantum; /* Min secs between atime updates */
452 unsigned int gt_new_files_jdata;
453 unsigned int gt_new_files_directio;
454 unsigned int gt_max_atomic_write; /* Split big writes into this size */
455 unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
456 unsigned int gt_lockdump_size;
457 unsigned int gt_stall_secs; /* Detects trouble! */
458 unsigned int gt_complain_secs;
459 unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
460 unsigned int gt_entries_per_readdir;
461 unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */
462 unsigned int gt_greedy_default;
463 unsigned int gt_greedy_quantum;
464 unsigned int gt_greedy_max;
465 unsigned int gt_statfs_quantum;
466 unsigned int gt_statfs_slow;
467};
468
469struct gfs2_gl_hash_bucket {
470 rwlock_t hb_lock;
471 struct list_head hb_list;
472};
473
474enum {
475 SDF_JOURNAL_CHECKED = 0,
476 SDF_JOURNAL_LIVE = 1,
477 SDF_SHUTDOWN = 2,
478 SDF_NOATIME = 3,
479};
480
481#define GFS2_GL_HASH_SHIFT 13
482#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
483#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
484#define GFS2_FSNAME_LEN 256
485
486struct gfs2_sbd {
487 struct super_block *sd_vfs;
488 struct kobject sd_kobj;
489 unsigned long sd_flags; /* SDF_... */
490 struct gfs2_sb sd_sb;
491
492 /* Constants computed on mount */
493
494 uint32_t sd_fsb2bb;
495 uint32_t sd_fsb2bb_shift;
496 uint32_t sd_diptrs; /* Number of pointers in a dinode */
497 uint32_t sd_inptrs; /* Number of pointers in a indirect block */
498 uint32_t sd_jbsize; /* Size of a journaled data block */
499 uint32_t sd_hash_bsize; /* sizeof(exhash block) */
500 uint32_t sd_hash_bsize_shift;
501 uint32_t sd_hash_ptrs; /* Number of pointers in a hash block */
502 uint32_t sd_ut_per_block;
503 uint32_t sd_qc_per_block;
504 uint32_t sd_max_dirres; /* Max blocks needed to add a directory entry */
505 uint32_t sd_max_height; /* Max height of a file's metadata tree */
506 uint64_t sd_heightsize[GFS2_MAX_META_HEIGHT];
507 uint32_t sd_max_jheight; /* Max height of journaled file's meta tree */
508 uint64_t sd_jheightsize[GFS2_MAX_META_HEIGHT];
509
510 struct gfs2_args sd_args; /* Mount arguments */
511 struct gfs2_tune sd_tune; /* Filesystem tuning structure */
512
513 /* Lock Stuff */
514
515 struct lm_lockstruct sd_lockstruct;
516 struct gfs2_gl_hash_bucket sd_gl_hash[GFS2_GL_HASH_SIZE];
517 struct list_head sd_reclaim_list;
518 spinlock_t sd_reclaim_lock;
519 wait_queue_head_t sd_reclaim_wq;
520 atomic_t sd_reclaim_count;
521 struct gfs2_holder sd_live_gh;
522 struct gfs2_glock *sd_rename_gl;
523 struct gfs2_glock *sd_trans_gl;
524 struct mutex sd_invalidate_inodes_mutex;
525
526 /* Inode Stuff */
527
528 struct inode *sd_master_dir;
529 struct inode *sd_jindex;
530 struct inode *sd_inum_inode;
531 struct inode *sd_statfs_inode;
532 struct inode *sd_ir_inode;
533 struct inode *sd_sc_inode;
534 struct inode *sd_ut_inode;
535 struct inode *sd_qc_inode;
536 struct inode *sd_rindex;
537 struct inode *sd_quota_inode;
538
539 /* Inum stuff */
540
541 struct mutex sd_inum_mutex;
542
543 /* StatFS stuff */
544
545 spinlock_t sd_statfs_spin;
546 struct mutex sd_statfs_mutex;
547 struct gfs2_statfs_change sd_statfs_master;
548 struct gfs2_statfs_change sd_statfs_local;
549 unsigned long sd_statfs_sync_time;
550
551 /* Resource group stuff */
552
553 uint64_t sd_rindex_vn;
554 spinlock_t sd_rindex_spin;
555 struct mutex sd_rindex_mutex;
556 struct list_head sd_rindex_list;
557 struct list_head sd_rindex_mru_list;
558 struct list_head sd_rindex_recent_list;
559 struct gfs2_rgrpd *sd_rindex_forward;
560 unsigned int sd_rgrps;
561
562 /* Journal index stuff */
563
564 struct list_head sd_jindex_list;
565 spinlock_t sd_jindex_spin;
566 struct mutex sd_jindex_mutex;
567 unsigned int sd_journals;
568 unsigned long sd_jindex_refresh_time;
569
570 struct gfs2_jdesc *sd_jdesc;
571 struct gfs2_holder sd_journal_gh;
572 struct gfs2_holder sd_jinode_gh;
573
574 struct gfs2_holder sd_ir_gh;
575 struct gfs2_holder sd_sc_gh;
576 struct gfs2_holder sd_ut_gh;
577 struct gfs2_holder sd_qc_gh;
578
579 /* Daemon stuff */
580
581 struct task_struct *sd_scand_process;
582 struct task_struct *sd_recoverd_process;
583 struct task_struct *sd_logd_process;
584 struct task_struct *sd_quotad_process;
585 struct task_struct *sd_inoded_process;
586 struct task_struct *sd_glockd_process[GFS2_GLOCKD_MAX];
587 unsigned int sd_glockd_num;
588
589 /* Unlinked inode stuff */
590
591 struct list_head sd_unlinked_list;
592 atomic_t sd_unlinked_count;
593 spinlock_t sd_unlinked_spin;
594 struct mutex sd_unlinked_mutex;
595
596 unsigned int sd_unlinked_slots;
597 unsigned int sd_unlinked_chunks;
598 unsigned char **sd_unlinked_bitmap;
599
600 /* Quota stuff */
601
602 struct list_head sd_quota_list;
603 atomic_t sd_quota_count;
604 spinlock_t sd_quota_spin;
605 struct mutex sd_quota_mutex;
606
607 unsigned int sd_quota_slots;
608 unsigned int sd_quota_chunks;
609 unsigned char **sd_quota_bitmap;
610
611 uint64_t sd_quota_sync_gen;
612 unsigned long sd_quota_sync_time;
613
614 /* Log stuff */
615
616 spinlock_t sd_log_lock;
617
618 unsigned int sd_log_blks_reserved;
619 unsigned int sd_log_commited_buf;
620 unsigned int sd_log_commited_revoke;
621
622 unsigned int sd_log_num_gl;
623 unsigned int sd_log_num_buf;
624 unsigned int sd_log_num_revoke;
625 unsigned int sd_log_num_rg;
626 unsigned int sd_log_num_databuf;
627 unsigned int sd_log_num_jdata;
628 unsigned int sd_log_num_hdrs;
629
630 struct list_head sd_log_le_gl;
631 struct list_head sd_log_le_buf;
632 struct list_head sd_log_le_revoke;
633 struct list_head sd_log_le_rg;
634 struct list_head sd_log_le_databuf;
635
636 unsigned int sd_log_blks_free;
637 struct mutex sd_log_reserve_mutex;
638
639 uint64_t sd_log_sequence;
640 unsigned int sd_log_head;
641 unsigned int sd_log_tail;
642 int sd_log_idle;
643
644 unsigned long sd_log_flush_time;
645 struct rw_semaphore sd_log_flush_lock;
646 struct list_head sd_log_flush_list;
647
648 unsigned int sd_log_flush_head;
649 uint64_t sd_log_flush_wrapped;
650
651 struct list_head sd_ail1_list;
652 struct list_head sd_ail2_list;
653 uint64_t sd_ail_sync_gen;
654
655 /* Replay stuff */
656
657 struct list_head sd_revoke_list;
658 unsigned int sd_replay_tail;
659
660 unsigned int sd_found_blocks;
661 unsigned int sd_found_revokes;
662 unsigned int sd_replayed_blocks;
663
664 /* For quiescing the filesystem */
665
666 struct gfs2_holder sd_freeze_gh;
667 struct mutex sd_freeze_lock;
668 unsigned int sd_freeze_count;
669
670 /* Counters */
671
672 atomic_t sd_glock_count;
673 atomic_t sd_glock_held_count;
674 atomic_t sd_inode_count;
675 atomic_t sd_reclaimed;
676
677 char sd_fsname[GFS2_FSNAME_LEN];
678 char sd_table_name[GFS2_FSNAME_LEN];
679 char sd_proto_name[GFS2_FSNAME_LEN];
680
681 /* Debugging crud */
682
683 unsigned long sd_last_warning;
684};
685
686#endif /* __INCORE_DOT_H__ */
687
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
new file mode 100644
index 000000000000..c2c7d2b63a57
--- /dev/null
+++ b/fs/gfs2/inode.c
@@ -0,0 +1,1820 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/posix_acl.h>
16#include <linux/sort.h>
17#include <linux/gfs2_ondisk.h>
18#include <linux/crc32.h>
19
20#include "gfs2.h"
21#include "lm_interface.h"
22#include "incore.h"
23#include "acl.h"
24#include "bmap.h"
25#include "dir.h"
26#include "eattr.h"
27#include "glock.h"
28#include "glops.h"
29#include "inode.h"
30#include "log.h"
31#include "meta_io.h"
32#include "ops_address.h"
33#include "ops_file.h"
34#include "ops_inode.h"
35#include "quota.h"
36#include "rgrp.h"
37#include "trans.h"
38#include "unlinked.h"
39#include "util.h"
40
41/**
42 * inode_attr_in - Copy attributes from the dinode into the VFS inode
43 * @ip: The GFS2 inode (with embedded disk inode data)
44 * @inode: The Linux VFS inode
45 *
46 */
47
48static void inode_attr_in(struct gfs2_inode *ip, struct inode *inode)
49{
50 inode->i_ino = ip->i_num.no_formal_ino;
51
52 switch (ip->i_di.di_mode & S_IFMT) {
53 case S_IFBLK:
54 case S_IFCHR:
55 inode->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
56 break;
57 default:
58 inode->i_rdev = 0;
59 break;
60 };
61
62 inode->i_mode = ip->i_di.di_mode;
63 inode->i_nlink = ip->i_di.di_nlink;
64 inode->i_uid = ip->i_di.di_uid;
65 inode->i_gid = ip->i_di.di_gid;
66 i_size_write(inode, ip->i_di.di_size);
67 inode->i_atime.tv_sec = ip->i_di.di_atime;
68 inode->i_mtime.tv_sec = ip->i_di.di_mtime;
69 inode->i_ctime.tv_sec = ip->i_di.di_ctime;
70 inode->i_atime.tv_nsec = 0;
71 inode->i_mtime.tv_nsec = 0;
72 inode->i_ctime.tv_nsec = 0;
73 inode->i_blksize = PAGE_SIZE;
74 inode->i_blocks = ip->i_di.di_blocks <<
75 (ip->i_sbd->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
76
77 if (ip->i_di.di_flags & GFS2_DIF_IMMUTABLE)
78 inode->i_flags |= S_IMMUTABLE;
79 else
80 inode->i_flags &= ~S_IMMUTABLE;
81
82 if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY)
83 inode->i_flags |= S_APPEND;
84 else
85 inode->i_flags &= ~S_APPEND;
86}
87
88/**
89 * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode
90 * @ip: The GFS2 inode (with embedded disk inode data)
91 *
92 */
93
94void gfs2_inode_attr_in(struct gfs2_inode *ip)
95{
96 struct inode *inode;
97
98 inode = gfs2_ip2v_lookup(ip);
99 if (inode) {
100 inode_attr_in(ip, inode);
101 iput(inode);
102 }
103}
104
105/**
106 * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode
107 * @ip: The GFS2 inode
108 *
109 * Only copy out the attributes that we want the VFS layer
110 * to be able to modify.
111 */
112
113void gfs2_inode_attr_out(struct gfs2_inode *ip)
114{
115 struct inode *inode = ip->i_vnode;
116
117 gfs2_assert_withdraw(ip->i_sbd,
118 (ip->i_di.di_mode & S_IFMT) == (inode->i_mode & S_IFMT));
119 ip->i_di.di_mode = inode->i_mode;
120 ip->i_di.di_uid = inode->i_uid;
121 ip->i_di.di_gid = inode->i_gid;
122 ip->i_di.di_atime = inode->i_atime.tv_sec;
123 ip->i_di.di_mtime = inode->i_mtime.tv_sec;
124 ip->i_di.di_ctime = inode->i_ctime.tv_sec;
125}
126
127/**
128 * gfs2_ip2v_lookup - Get the struct inode for a struct gfs2_inode
129 * @ip: the struct gfs2_inode to get the struct inode for
130 *
131 * Returns: A VFS inode, or NULL if none
132 */
133
134struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip)
135{
136 struct inode *inode = NULL;
137
138 gfs2_assert_warn(ip->i_sbd, test_bit(GIF_MIN_INIT, &ip->i_flags));
139
140 spin_lock(&ip->i_spin);
141 if (ip->i_vnode)
142 inode = igrab(ip->i_vnode);
143 spin_unlock(&ip->i_spin);
144
145 return inode;
146}
147
148/**
149 * gfs2_ip2v - Get/Create a struct inode for a struct gfs2_inode
150 * @ip: the struct gfs2_inode to get the struct inode for
151 *
152 * Returns: A VFS inode, or NULL if no mem
153 */
154
155struct inode *gfs2_ip2v(struct gfs2_inode *ip)
156{
157 struct inode *inode, *tmp;
158
159 inode = gfs2_ip2v_lookup(ip);
160 if (inode)
161 return inode;
162
163 tmp = new_inode(ip->i_sbd->sd_vfs);
164 if (!tmp)
165 return NULL;
166
167 inode_attr_in(ip, tmp);
168
169 if (S_ISREG(ip->i_di.di_mode)) {
170 tmp->i_op = &gfs2_file_iops;
171 tmp->i_fop = &gfs2_file_fops;
172 tmp->i_mapping->a_ops = &gfs2_file_aops;
173 } else if (S_ISDIR(ip->i_di.di_mode)) {
174 tmp->i_op = &gfs2_dir_iops;
175 tmp->i_fop = &gfs2_dir_fops;
176 } else if (S_ISLNK(ip->i_di.di_mode)) {
177 tmp->i_op = &gfs2_symlink_iops;
178 } else {
179 tmp->i_op = &gfs2_dev_iops;
180 init_special_inode(tmp, tmp->i_mode, tmp->i_rdev);
181 }
182
183 tmp->u.generic_ip = NULL;
184
185 for (;;) {
186 spin_lock(&ip->i_spin);
187 if (!ip->i_vnode)
188 break;
189 inode = igrab(ip->i_vnode);
190 spin_unlock(&ip->i_spin);
191
192 if (inode) {
193 iput(tmp);
194 return inode;
195 }
196 yield();
197 }
198
199 inode = tmp;
200
201 gfs2_inode_hold(ip);
202 ip->i_vnode = inode;
203 inode->u.generic_ip = ip;
204
205 spin_unlock(&ip->i_spin);
206
207 insert_inode_hash(inode);
208
209 return inode;
210}
211
212static int iget_test(struct inode *inode, void *opaque)
213{
214 struct gfs2_inode *ip = inode->u.generic_ip;
215 struct gfs2_inum *inum = (struct gfs2_inum *)opaque;
216
217 if (ip && ip->i_num.no_addr == inum->no_addr)
218 return 1;
219
220 return 0;
221}
222
223struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
224{
225 return ilookup5(sb, (unsigned long)inum->no_formal_ino,
226 iget_test, inum);
227}
228
229void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type)
230{
231 if (!test_and_set_bit(GIF_MIN_INIT, &ip->i_flags)) {
232 ip->i_di.di_nlink = 1;
233 ip->i_di.di_mode = DT2IF(type);
234 }
235}
236
237/**
238 * gfs2_inode_refresh - Refresh the incore copy of the dinode
239 * @ip: The GFS2 inode
240 *
241 * Returns: errno
242 */
243
244int gfs2_inode_refresh(struct gfs2_inode *ip)
245{
246 struct buffer_head *dibh;
247 int error;
248
249 error = gfs2_meta_inode_buffer(ip, &dibh);
250 if (error)
251 return error;
252
253 if (gfs2_metatype_check(ip->i_sbd, dibh, GFS2_METATYPE_DI)) {
254 brelse(dibh);
255 return -EIO;
256 }
257
258 gfs2_dinode_in(&ip->i_di, dibh->b_data);
259 set_bit(GIF_MIN_INIT, &ip->i_flags);
260
261 brelse(dibh);
262
263 if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
264 if (gfs2_consist_inode(ip))
265 gfs2_dinode_print(&ip->i_di);
266 return -EIO;
267 }
268 if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino)
269 return -ESTALE;
270
271 ip->i_vn = ip->i_gl->gl_vn;
272
273 return 0;
274}
275
276/**
277 * inode_create - create a struct gfs2_inode
278 * @i_gl: The glock covering the inode
279 * @inum: The inode number
280 * @io_gl: the iopen glock to acquire/hold (using holder in new gfs2_inode)
281 * @io_state: the state the iopen glock should be acquired in
282 * @ipp: pointer to put the returned inode in
283 *
284 * Returns: errno
285 */
286
287static int inode_create(struct gfs2_glock *i_gl, const struct gfs2_inum *inum,
288 struct gfs2_glock *io_gl, unsigned int io_state,
289 struct gfs2_inode **ipp, int need_lock)
290{
291 struct gfs2_sbd *sdp = i_gl->gl_sbd;
292 struct gfs2_inode *ip;
293 int error = 0;
294
295 ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
296 if (!ip)
297 return -ENOMEM;
298 memset(ip, 0, sizeof(struct gfs2_inode));
299 ip->i_num = *inum;
300 atomic_set(&ip->i_count, 1);
301 ip->i_vn = i_gl->gl_vn - 1;
302 ip->i_gl = i_gl;
303 ip->i_sbd = sdp;
304 spin_lock_init(&ip->i_spin);
305 init_rwsem(&ip->i_rw_mutex);
306 ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
307
308 if (need_lock) {
309 error = gfs2_glock_nq_init(io_gl,
310 io_state, GL_LOCAL_EXCL | GL_EXACT,
311 &ip->i_iopen_gh);
312 if (error)
313 goto fail;
314
315 spin_lock(&io_gl->gl_spin);
316 gfs2_glock_hold(i_gl);
317 io_gl->gl_object = i_gl;
318 spin_unlock(&io_gl->gl_spin);
319 }
320
321 gfs2_glock_hold(i_gl);
322 i_gl->gl_object = ip;
323 atomic_inc(&sdp->sd_inode_count);
324 *ipp = ip;
325 return 0;
326
327fail:
328 gfs2_meta_cache_flush(ip);
329 kmem_cache_free(gfs2_inode_cachep, ip);
330 *ipp = NULL;
331 return error;
332}
333
334/**
335 * gfs2_inode_get - Create or get a reference on an inode
336 * @i_gl: The glock covering the inode
337 * @inum: The inode number
338 * @create:
339 * @ipp: pointer to put the returned inode in
340 *
341 * Returns: errno
342 */
343
344int gfs2_inode_get(struct gfs2_glock *i_gl, const struct gfs2_inum *inum,
345 int create, struct gfs2_inode **ipp)
346{
347 struct gfs2_sbd *sdp = i_gl->gl_sbd;
348 struct gfs2_glock *io_gl;
349 int error = 0;
350
351 gfs2_glmutex_lock(i_gl);
352
353 *ipp = i_gl->gl_object;
354 if (*ipp) {
355 error = -ESTALE;
356 if ((*ipp)->i_num.no_formal_ino != inum->no_formal_ino)
357 goto out;
358 atomic_inc(&(*ipp)->i_count);
359 error = 0;
360 goto out;
361 }
362
363 if (!create)
364 goto out;
365
366 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops,
367 CREATE, &io_gl);
368 if (!error) {
369 error = inode_create(i_gl, inum, io_gl, LM_ST_SHARED, ipp, 1);
370 gfs2_glock_put(io_gl);
371 }
372
373 out:
374 gfs2_glmutex_unlock(i_gl);
375
376 return error;
377}
378
379void gfs2_inode_hold(struct gfs2_inode *ip)
380{
381 gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0);
382 atomic_inc(&ip->i_count);
383}
384
385void gfs2_inode_put(struct gfs2_inode *ip)
386{
387 gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0);
388 atomic_dec(&ip->i_count);
389}
390
391void gfs2_inode_destroy(struct gfs2_inode *ip, int unlock)
392{
393 struct gfs2_sbd *sdp = ip->i_sbd;
394 struct gfs2_glock *i_gl = ip->i_gl;
395
396 gfs2_assert_warn(sdp, !atomic_read(&ip->i_count));
397 if (unlock) {
398 struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl;
399 gfs2_assert(sdp, io_gl->gl_object == i_gl);
400
401 spin_lock(&io_gl->gl_spin);
402 io_gl->gl_object = NULL;
403 spin_unlock(&io_gl->gl_spin);
404 gfs2_glock_put(i_gl);
405
406 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
407 }
408
409 gfs2_meta_cache_flush(ip);
410 kmem_cache_free(gfs2_inode_cachep, ip);
411
412 i_gl->gl_object = NULL;
413 gfs2_glock_put(i_gl);
414
415 atomic_dec(&sdp->sd_inode_count);
416}
417
418static int dinode_dealloc(struct gfs2_inode *ip, struct gfs2_unlinked *ul)
419{
420 struct gfs2_sbd *sdp = ip->i_sbd;
421 struct gfs2_alloc *al;
422 struct gfs2_rgrpd *rgd;
423 int error;
424
425 if (ip->i_di.di_blocks != 1) {
426 if (gfs2_consist_inode(ip))
427 gfs2_dinode_print(&ip->i_di);
428 return -EIO;
429 }
430
431 al = gfs2_alloc_get(ip);
432
433 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
434 if (error)
435 goto out;
436
437 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
438 if (error)
439 goto out_qs;
440
441 rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
442 if (!rgd) {
443 gfs2_consist_inode(ip);
444 error = -EIO;
445 goto out_rindex_relse;
446 }
447
448 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
449 &al->al_rgd_gh);
450 if (error)
451 goto out_rindex_relse;
452
453 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED +
454 RES_STATFS + RES_QUOTA, 1);
455 if (error)
456 goto out_rg_gunlock;
457
458 gfs2_trans_add_gl(ip->i_gl);
459
460 gfs2_free_di(rgd, ip);
461
462 error = gfs2_unlinked_ondisk_rm(sdp, ul);
463
464 gfs2_trans_end(sdp);
465 clear_bit(GLF_STICKY, &ip->i_gl->gl_flags);
466
467 out_rg_gunlock:
468 gfs2_glock_dq_uninit(&al->al_rgd_gh);
469
470 out_rindex_relse:
471 gfs2_glock_dq_uninit(&al->al_ri_gh);
472
473 out_qs:
474 gfs2_quota_unhold(ip);
475
476 out:
477 gfs2_alloc_put(ip);
478
479 return error;
480}
481
482/**
483 * inode_dealloc - Deallocate all on-disk blocks for an inode (dinode)
484 * @sdp: the filesystem
485 * @inum: the inode number to deallocate
486 * @io_gh: a holder for the iopen glock for this inode
487 *
488 * N.B. When we enter this we already hold the iopen glock and getting
489 * the glock for the inode means that we are grabbing the locks in the
490 * "wrong" order so we must only so a try lock operation and fail if we
491 * don't get the lock. Thats ok, since if we fail it means someone else
492 * is using the inode still and thus we shouldn't be deallocating it
493 * anyway.
494 *
495 * Returns: errno
496 */
497
498static int inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul,
499 struct gfs2_holder *io_gh)
500{
501 struct gfs2_inode *ip;
502 struct gfs2_holder i_gh;
503 int error;
504
505 error = gfs2_glock_nq_num(sdp, ul->ul_ut.ut_inum.no_addr,
506 &gfs2_inode_glops, LM_ST_EXCLUSIVE,
507 LM_FLAG_TRY_1CB|GL_DUMP, &i_gh);
508 switch(error) {
509 case 0:
510 break;
511 case GLR_TRYFAILED:
512 return 1; /* or back off and relock in different order? */
513 default:
514 return error;
515 }
516
517 gfs2_assert_warn(sdp, !i_gh.gh_gl->gl_object);
518 error = inode_create(i_gh.gh_gl, &ul->ul_ut.ut_inum, io_gh->gh_gl,
519 LM_ST_EXCLUSIVE, &ip, 0);
520
521 if (error)
522 goto out;
523
524 error = gfs2_inode_refresh(ip);
525 if (error)
526 goto out_iput;
527
528 if (ip->i_di.di_nlink) {
529 if (gfs2_consist_inode(ip))
530 gfs2_dinode_print(&ip->i_di);
531 error = -EIO;
532 goto out_iput;
533 }
534
535 if (S_ISDIR(ip->i_di.di_mode) &&
536 (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
537 error = gfs2_dir_exhash_dealloc(ip);
538 if (error)
539 goto out_iput;
540 }
541
542 if (ip->i_di.di_eattr) {
543 error = gfs2_ea_dealloc(ip);
544 if (error)
545 goto out_iput;
546 }
547
548 if (!gfs2_is_stuffed(ip)) {
549 error = gfs2_file_dealloc(ip);
550 if (error)
551 goto out_iput;
552 }
553
554 error = dinode_dealloc(ip, ul);
555 if (error)
556 goto out_iput;
557
558out_iput:
559 gfs2_glmutex_lock(i_gh.gh_gl);
560 gfs2_inode_put(ip);
561 gfs2_inode_destroy(ip, 0);
562 gfs2_glmutex_unlock(i_gh.gh_gl);
563
564out:
565 gfs2_glock_dq_uninit(&i_gh);
566
567 return error;
568}
569
570/**
571 * try_inode_dealloc - Try to deallocate an inode and all its blocks
572 * @sdp: the filesystem
573 *
574 * Returns: 0 on success, -errno on error, 1 on busy (inode open)
575 */
576
577static int try_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
578{
579 int error = 0;
580 struct gfs2_holder iogh;
581
582 gfs2_try_toss_inode(sdp, &ul->ul_ut.ut_inum);
583 error = gfs2_glock_nq_num(sdp, ul->ul_ut.ut_inum.no_addr,
584 &gfs2_iopen_glops, LM_ST_EXCLUSIVE,
585 LM_FLAG_TRY_1CB, &iogh);
586 switch (error) {
587 case 0:
588 break;
589 case GLR_TRYFAILED:
590 return 1;
591 default:
592 return error;
593 }
594
595 error = inode_dealloc(sdp, ul, &iogh);
596 gfs2_glock_dq_uninit(&iogh);
597
598 return error;
599}
600
601static int inode_dealloc_uninit(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
602{
603 struct gfs2_rgrpd *rgd;
604 struct gfs2_holder ri_gh, rgd_gh;
605 int error;
606
607 error = gfs2_rindex_hold(sdp, &ri_gh);
608 if (error)
609 return error;
610
611 rgd = gfs2_blk2rgrpd(sdp, ul->ul_ut.ut_inum.no_addr);
612 if (!rgd) {
613 gfs2_consist(sdp);
614 error = -EIO;
615 goto out;
616 }
617
618 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
619 if (error)
620 goto out;
621
622 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED + RES_STATFS, 0);
623 if (error)
624 goto out_gunlock;
625
626 gfs2_free_uninit_di(rgd, ul->ul_ut.ut_inum.no_addr);
627 gfs2_unlinked_ondisk_rm(sdp, ul);
628
629 gfs2_trans_end(sdp);
630
631 out_gunlock:
632 gfs2_glock_dq_uninit(&rgd_gh);
633 out:
634 gfs2_glock_dq_uninit(&ri_gh);
635
636 return error;
637}
638
639int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
640{
641 if (ul->ul_ut.ut_flags & GFS2_UTF_UNINIT)
642 return inode_dealloc_uninit(sdp, ul);
643 else
644 return try_inode_dealloc(sdp, ul);
645}
646
647/**
648 * gfs2_change_nlink - Change nlink count on inode
649 * @ip: The GFS2 inode
650 * @diff: The change in the nlink count required
651 *
652 * Returns: errno
653 */
654
655int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
656{
657 struct buffer_head *dibh;
658 uint32_t nlink;
659 int error;
660
661 nlink = ip->i_di.di_nlink + diff;
662
663 /* If we are reducing the nlink count, but the new value ends up being
664 bigger than the old one, we must have underflowed. */
665 if (diff < 0 && nlink > ip->i_di.di_nlink) {
666 if (gfs2_consist_inode(ip))
667 gfs2_dinode_print(&ip->i_di);
668 return -EIO;
669 }
670
671 error = gfs2_meta_inode_buffer(ip, &dibh);
672 if (error)
673 return error;
674
675 ip->i_di.di_nlink = nlink;
676 ip->i_di.di_ctime = get_seconds();
677
678 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
679 gfs2_dinode_out(&ip->i_di, dibh->b_data);
680 brelse(dibh);
681
682 return 0;
683}
684
685struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
686{
687 struct qstr qstr;
688 gfs2_str2qstr(&qstr, name);
689 return gfs2_lookupi(dip, &qstr, 1, NULL);
690}
691
692
693/**
694 * gfs2_lookupi - Look up a filename in a directory and return its inode
695 * @d_gh: An initialized holder for the directory glock
696 * @name: The name of the inode to look for
697 * @is_root: If 1, ignore the caller's permissions
698 * @i_gh: An uninitialized holder for the new inode glock
699 *
700 * There will always be a vnode (Linux VFS inode) for the d_gh inode unless
701 * @is_root is true.
702 *
703 * Returns: errno
704 */
705
706struct inode *gfs2_lookupi(struct inode *dir, struct qstr *name, int is_root,
707 struct nameidata *nd)
708
709{
710 struct super_block *sb = dir->i_sb;
711 struct gfs2_inode *ipp;
712 struct gfs2_inode *dip = dir->u.generic_ip;
713 struct gfs2_sbd *sdp = dip->i_sbd;
714 struct gfs2_holder d_gh;
715 struct gfs2_inum inum;
716 unsigned int type;
717 struct gfs2_glock *gl;
718 int error = 0;
719 struct inode *inode = NULL;
720
721 if (!name->len || name->len > GFS2_FNAMESIZE)
722 return ERR_PTR(-ENAMETOOLONG);
723
724 if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) ||
725 (name->len == 2 && memcmp(name->name, "..", 2) == 0 &&
726 dir == sb->s_root->d_inode)) {
727 igrab(dir);
728 return dir;
729 }
730
731 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
732 if (error)
733 return ERR_PTR(error);
734
735 if (!is_root) {
736 error = gfs2_repermission(dir, MAY_EXEC, NULL);
737 if (error)
738 goto out;
739 }
740
741 error = gfs2_dir_search(dir, name, &inum, &type);
742 if (error)
743 goto out;
744
745 error = gfs2_glock_get(sdp, inum.no_addr, &gfs2_inode_glops,
746 CREATE, &gl);
747 if (error)
748 goto out;
749
750 error = gfs2_inode_get(gl, &inum, CREATE, &ipp);
751 if (!error)
752 gfs2_inode_min_init(ipp, type);
753
754 gfs2_glock_put(gl);
755
756out:
757 gfs2_glock_dq_uninit(&d_gh);
758 if (error == -ENOENT)
759 return NULL;
760 if (error == 0) {
761 inode = gfs2_ip2v(ipp);
762 gfs2_inode_put(ipp);
763 if (!inode)
764 return ERR_PTR(-ENOMEM);
765 return inode;
766 }
767 return ERR_PTR(error);
768}
769
770static int pick_formal_ino_1(struct gfs2_sbd *sdp, uint64_t *formal_ino)
771{
772 struct gfs2_inode *ip = sdp->sd_ir_inode->u.generic_ip;
773 struct buffer_head *bh;
774 struct gfs2_inum_range ir;
775 int error;
776
777 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
778 if (error)
779 return error;
780 mutex_lock(&sdp->sd_inum_mutex);
781
782 error = gfs2_meta_inode_buffer(ip, &bh);
783 if (error) {
784 mutex_unlock(&sdp->sd_inum_mutex);
785 gfs2_trans_end(sdp);
786 return error;
787 }
788
789 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
790
791 if (ir.ir_length) {
792 *formal_ino = ir.ir_start++;
793 ir.ir_length--;
794 gfs2_trans_add_bh(ip->i_gl, bh, 1);
795 gfs2_inum_range_out(&ir,
796 bh->b_data + sizeof(struct gfs2_dinode));
797 brelse(bh);
798 mutex_unlock(&sdp->sd_inum_mutex);
799 gfs2_trans_end(sdp);
800 return 0;
801 }
802
803 brelse(bh);
804
805 mutex_unlock(&sdp->sd_inum_mutex);
806 gfs2_trans_end(sdp);
807
808 return 1;
809}
810
811static int pick_formal_ino_2(struct gfs2_sbd *sdp, uint64_t *formal_ino)
812{
813 struct gfs2_inode *ip = sdp->sd_ir_inode->u.generic_ip;
814 struct gfs2_inode *m_ip = sdp->sd_inum_inode->u.generic_ip;
815 struct gfs2_holder gh;
816 struct buffer_head *bh;
817 struct gfs2_inum_range ir;
818 int error;
819
820 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
821 if (error)
822 return error;
823
824 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
825 if (error)
826 goto out;
827 mutex_lock(&sdp->sd_inum_mutex);
828
829 error = gfs2_meta_inode_buffer(ip, &bh);
830 if (error)
831 goto out_end_trans;
832
833 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
834
835 if (!ir.ir_length) {
836 struct buffer_head *m_bh;
837 uint64_t x, y;
838
839 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
840 if (error)
841 goto out_brelse;
842
843 x = *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode));
844 x = y = be64_to_cpu(x);
845 ir.ir_start = x;
846 ir.ir_length = GFS2_INUM_QUANTUM;
847 x += GFS2_INUM_QUANTUM;
848 if (x < y)
849 gfs2_consist_inode(m_ip);
850 x = cpu_to_be64(x);
851 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
852 *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x;
853
854 brelse(m_bh);
855 }
856
857 *formal_ino = ir.ir_start++;
858 ir.ir_length--;
859
860 gfs2_trans_add_bh(ip->i_gl, bh, 1);
861 gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode));
862
863 out_brelse:
864 brelse(bh);
865
866 out_end_trans:
867 mutex_unlock(&sdp->sd_inum_mutex);
868 gfs2_trans_end(sdp);
869
870 out:
871 gfs2_glock_dq_uninit(&gh);
872
873 return error;
874}
875
876static int pick_formal_ino(struct gfs2_sbd *sdp, uint64_t *inum)
877{
878 int error;
879
880 error = pick_formal_ino_1(sdp, inum);
881 if (error <= 0)
882 return error;
883
884 error = pick_formal_ino_2(sdp, inum);
885
886 return error;
887}
888
889/**
890 * create_ok - OK to create a new on-disk inode here?
891 * @dip: Directory in which dinode is to be created
892 * @name: Name of new dinode
893 * @mode:
894 *
895 * Returns: errno
896 */
897
898static int create_ok(struct gfs2_inode *dip, struct qstr *name,
899 unsigned int mode)
900{
901 int error;
902
903 error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
904 if (error)
905 return error;
906
907 /* Don't create entries in an unlinked directory */
908 if (!dip->i_di.di_nlink)
909 return -EPERM;
910
911 error = gfs2_dir_search(dip->i_vnode, name, NULL, NULL);
912 switch (error) {
913 case -ENOENT:
914 error = 0;
915 break;
916 case 0:
917 return -EEXIST;
918 default:
919 return error;
920 }
921
922 if (dip->i_di.di_entries == (uint32_t)-1)
923 return -EFBIG;
924 if (S_ISDIR(mode) && dip->i_di.di_nlink == (uint32_t)-1)
925 return -EMLINK;
926
927 return 0;
928}
929
930static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
931 unsigned int *uid, unsigned int *gid)
932{
933 if (dip->i_sbd->sd_args.ar_suiddir &&
934 (dip->i_di.di_mode & S_ISUID) &&
935 dip->i_di.di_uid) {
936 if (S_ISDIR(*mode))
937 *mode |= S_ISUID;
938 else if (dip->i_di.di_uid != current->fsuid)
939 *mode &= ~07111;
940 *uid = dip->i_di.di_uid;
941 } else
942 *uid = current->fsuid;
943
944 if (dip->i_di.di_mode & S_ISGID) {
945 if (S_ISDIR(*mode))
946 *mode |= S_ISGID;
947 *gid = dip->i_di.di_gid;
948 } else
949 *gid = current->fsgid;
950}
951
952static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_unlinked *ul)
953{
954 struct gfs2_sbd *sdp = dip->i_sbd;
955 int error;
956
957 gfs2_alloc_get(dip);
958
959 dip->i_alloc.al_requested = RES_DINODE;
960 error = gfs2_inplace_reserve(dip);
961 if (error)
962 goto out;
963
964 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED +
965 RES_STATFS, 0);
966 if (error)
967 goto out_ipreserv;
968
969 ul->ul_ut.ut_inum.no_addr = gfs2_alloc_di(dip);
970
971 ul->ul_ut.ut_flags = GFS2_UTF_UNINIT;
972 error = gfs2_unlinked_ondisk_add(sdp, ul);
973
974 gfs2_trans_end(sdp);
975
976 out_ipreserv:
977 gfs2_inplace_release(dip);
978
979 out:
980 gfs2_alloc_put(dip);
981
982 return error;
983}
984
985/**
986 * init_dinode - Fill in a new dinode structure
987 * @dip: the directory this inode is being created in
988 * @gl: The glock covering the new inode
989 * @inum: the inode number
990 * @mode: the file permissions
991 * @uid:
992 * @gid:
993 *
994 */
995
996static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
997 struct gfs2_inum *inum, unsigned int mode,
998 unsigned int uid, unsigned int gid)
999{
1000 struct gfs2_sbd *sdp = dip->i_sbd;
1001 struct gfs2_dinode *di;
1002 struct buffer_head *dibh;
1003
1004 dibh = gfs2_meta_new(gl, inum->no_addr);
1005 gfs2_trans_add_bh(gl, dibh, 1);
1006 gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
1007 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1008 di = (struct gfs2_dinode *)dibh->b_data;
1009
1010 di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino);
1011 di->di_num.no_addr = cpu_to_be64(inum->no_addr);
1012 di->di_mode = cpu_to_be32(mode);
1013 di->di_uid = cpu_to_be32(uid);
1014 di->di_gid = cpu_to_be32(gid);
1015 di->di_nlink = cpu_to_be32(0);
1016 di->di_size = cpu_to_be64(0);
1017 di->di_blocks = cpu_to_be64(1);
1018 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
1019 di->di_major = di->di_minor = cpu_to_be32(0);
1020 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
1021 di->__pad[0] = di->__pad[1] = 0;
1022 di->di_flags = cpu_to_be32(0);
1023
1024 if (S_ISREG(mode)) {
1025 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
1026 gfs2_tune_get(sdp, gt_new_files_jdata))
1027 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
1028 if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) ||
1029 gfs2_tune_get(sdp, gt_new_files_directio))
1030 di->di_flags |= cpu_to_be32(GFS2_DIF_DIRECTIO);
1031 } else if (S_ISDIR(mode)) {
1032 di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
1033 GFS2_DIF_INHERIT_DIRECTIO);
1034 di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
1035 GFS2_DIF_INHERIT_JDATA);
1036 }
1037
1038 di->__pad1 = 0;
1039 di->di_height = cpu_to_be32(0);
1040 di->__pad2 = 0;
1041 di->__pad3 = 0;
1042 di->di_depth = cpu_to_be16(0);
1043 di->di_entries = cpu_to_be32(0);
1044 memset(&di->__pad4, 0, sizeof(di->__pad4));
1045 di->di_eattr = cpu_to_be64(0);
1046 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
1047
1048 brelse(dibh);
1049}
1050
1051static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
1052 unsigned int mode, struct gfs2_unlinked *ul)
1053{
1054 struct gfs2_sbd *sdp = dip->i_sbd;
1055 unsigned int uid, gid;
1056 int error;
1057
1058 munge_mode_uid_gid(dip, &mode, &uid, &gid);
1059 gfs2_alloc_get(dip);
1060
1061 error = gfs2_quota_lock(dip, uid, gid);
1062 if (error)
1063 goto out;
1064
1065 error = gfs2_quota_check(dip, uid, gid);
1066 if (error)
1067 goto out_quota;
1068
1069 error = gfs2_trans_begin(sdp, RES_DINODE + RES_UNLINKED + RES_QUOTA, 0);
1070 if (error)
1071 goto out_quota;
1072
1073 ul->ul_ut.ut_flags = 0;
1074 error = gfs2_unlinked_ondisk_munge(sdp, ul);
1075 init_dinode(dip, gl, &ul->ul_ut.ut_inum, mode, uid, gid);
1076 gfs2_quota_change(dip, +1, uid, gid);
1077 gfs2_trans_end(sdp);
1078
1079 out_quota:
1080 gfs2_quota_unlock(dip);
1081
1082 out:
1083 gfs2_alloc_put(dip);
1084 return error;
1085}
1086
1087static int link_dinode(struct gfs2_inode *dip, struct qstr *name,
1088 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1089{
1090 struct gfs2_sbd *sdp = dip->i_sbd;
1091 struct gfs2_alloc *al;
1092 int alloc_required;
1093 struct buffer_head *dibh;
1094 int error;
1095
1096 al = gfs2_alloc_get(dip);
1097
1098 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1099 if (error)
1100 goto fail;
1101
1102 error = alloc_required = gfs2_diradd_alloc_required(dip->i_vnode, name);
1103 if (alloc_required < 0)
1104 goto fail;
1105 if (alloc_required) {
1106 error = gfs2_quota_check(dip, dip->i_di.di_uid,
1107 dip->i_di.di_gid);
1108 if (error)
1109 goto fail_quota_locks;
1110
1111 al->al_requested = sdp->sd_max_dirres;
1112
1113 error = gfs2_inplace_reserve(dip);
1114 if (error)
1115 goto fail_quota_locks;
1116
1117 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
1118 al->al_rgd->rd_ri.ri_length +
1119 2 * RES_DINODE + RES_UNLINKED +
1120 RES_STATFS + RES_QUOTA, 0);
1121 if (error)
1122 goto fail_ipreserv;
1123 } else {
1124 error = gfs2_trans_begin(sdp,
1125 RES_LEAF +
1126 2 * RES_DINODE +
1127 RES_UNLINKED, 0);
1128 if (error)
1129 goto fail_quota_locks;
1130 }
1131
1132 error = gfs2_dir_add(dip->i_vnode, name, &ip->i_num, IF2DT(ip->i_di.di_mode));
1133 if (error)
1134 goto fail_end_trans;
1135
1136 error = gfs2_meta_inode_buffer(ip, &dibh);
1137 if (error)
1138 goto fail_end_trans;
1139 ip->i_di.di_nlink = 1;
1140 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1141 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1142 brelse(dibh);
1143
1144 error = gfs2_unlinked_ondisk_rm(sdp, ul);
1145 if (error)
1146 goto fail_end_trans;
1147
1148 return 0;
1149
1150fail_end_trans:
1151 gfs2_trans_end(sdp);
1152
1153fail_ipreserv:
1154 if (dip->i_alloc.al_rgd)
1155 gfs2_inplace_release(dip);
1156
1157fail_quota_locks:
1158 gfs2_quota_unlock(dip);
1159
1160fail:
1161 gfs2_alloc_put(dip);
1162 return error;
1163}
1164
1165/**
1166 * gfs2_createi - Create a new inode
1167 * @ghs: An array of two holders
1168 * @name: The name of the new file
1169 * @mode: the permissions on the new inode
1170 *
1171 * @ghs[0] is an initialized holder for the directory
1172 * @ghs[1] is the holder for the inode lock
1173 *
1174 * If the return value is not NULL, the glocks on both the directory and the new
1175 * file are held. A transaction has been started and an inplace reservation
1176 * is held, as well.
1177 *
1178 * Returns: An inode
1179 */
1180
1181struct inode *gfs2_createi(struct gfs2_holder *ghs, struct qstr *name,
1182 unsigned int mode)
1183{
1184 struct inode *inode;
1185 struct gfs2_inode *dip = ghs->gh_gl->gl_object;
1186 struct gfs2_sbd *sdp = dip->i_sbd;
1187 struct gfs2_unlinked *ul;
1188 struct gfs2_inode *ip;
1189 int error;
1190
1191 if (!name->len || name->len > GFS2_FNAMESIZE)
1192 return ERR_PTR(-ENAMETOOLONG);
1193
1194 error = gfs2_unlinked_get(sdp, &ul);
1195 if (error)
1196 return ERR_PTR(error);
1197
1198 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
1199 error = gfs2_glock_nq(ghs);
1200 if (error)
1201 goto fail;
1202
1203 error = create_ok(dip, name, mode);
1204 if (error)
1205 goto fail_gunlock;
1206
1207 error = pick_formal_ino(sdp, &ul->ul_ut.ut_inum.no_formal_ino);
1208 if (error)
1209 goto fail_gunlock;
1210
1211 error = alloc_dinode(dip, ul);
1212 if (error)
1213 goto fail_gunlock;
1214
1215 if (ul->ul_ut.ut_inum.no_addr < dip->i_num.no_addr) {
1216 gfs2_glock_dq(ghs);
1217
1218 error = gfs2_glock_nq_num(sdp, ul->ul_ut.ut_inum.no_addr,
1219 &gfs2_inode_glops, LM_ST_EXCLUSIVE,
1220 GL_SKIP, ghs + 1);
1221 if (error) {
1222 gfs2_unlinked_put(sdp, ul);
1223 return ERR_PTR(error);
1224 }
1225
1226 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
1227 error = gfs2_glock_nq(ghs);
1228 if (error) {
1229 gfs2_glock_dq_uninit(ghs + 1);
1230 gfs2_unlinked_put(sdp, ul);
1231 return ERR_PTR(error);
1232 }
1233
1234 error = create_ok(dip, name, mode);
1235 if (error)
1236 goto fail_gunlock2;
1237 } else {
1238 error = gfs2_glock_nq_num(sdp, ul->ul_ut.ut_inum.no_addr,
1239 &gfs2_inode_glops, LM_ST_EXCLUSIVE,
1240 GL_SKIP, ghs + 1);
1241 if (error)
1242 goto fail_gunlock;
1243 }
1244
1245 error = make_dinode(dip, ghs[1].gh_gl, mode, ul);
1246 if (error)
1247 goto fail_gunlock2;
1248
1249 error = gfs2_inode_get(ghs[1].gh_gl, &ul->ul_ut.ut_inum, CREATE, &ip);
1250 if (error)
1251 goto fail_gunlock2;
1252
1253 error = gfs2_inode_refresh(ip);
1254 if (error)
1255 goto fail_iput;
1256
1257 error = gfs2_acl_create(dip, ip);
1258 if (error)
1259 goto fail_iput;
1260
1261 error = link_dinode(dip, name, ip, ul);
1262 if (error)
1263 goto fail_iput;
1264
1265 gfs2_unlinked_put(sdp, ul);
1266
1267 inode = gfs2_ip2v(ip);
1268 gfs2_inode_put(ip);
1269 if (!inode)
1270 return ERR_PTR(-ENOMEM);
1271 return inode;
1272
1273fail_iput:
1274 gfs2_inode_put(ip);
1275
1276fail_gunlock2:
1277 gfs2_glock_dq_uninit(ghs + 1);
1278
1279fail_gunlock:
1280 gfs2_glock_dq(ghs);
1281
1282fail:
1283 gfs2_unlinked_put(sdp, ul);
1284 return ERR_PTR(error);
1285}
1286
1287/**
1288 * gfs2_unlinki - Unlink a file
1289 * @dip: The inode of the directory
1290 * @name: The name of the file to be unlinked
1291 * @ip: The inode of the file to be removed
1292 *
1293 * Assumes Glocks on both dip and ip are held.
1294 *
1295 * Returns: errno
1296 */
1297
1298int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name,
1299 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1300{
1301 struct gfs2_sbd *sdp = dip->i_sbd;
1302 int error;
1303
1304 error = gfs2_dir_del(dip, name);
1305 if (error)
1306 return error;
1307
1308 error = gfs2_change_nlink(ip, -1);
1309 if (error)
1310 return error;
1311
1312 /* If this inode is being unlinked from the directory structure,
1313 we need to mark that in the log so that it isn't lost during
1314 a crash. */
1315
1316 if (!ip->i_di.di_nlink) {
1317 ul->ul_ut.ut_inum = ip->i_num;
1318 error = gfs2_unlinked_ondisk_add(sdp, ul);
1319 if (!error)
1320 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
1321 }
1322
1323 return error;
1324}
1325
1326/**
1327 * gfs2_rmdiri - Remove a directory
1328 * @dip: The parent directory of the directory to be removed
1329 * @name: The name of the directory to be removed
1330 * @ip: The GFS2 inode of the directory to be removed
1331 *
1332 * Assumes Glocks on dip and ip are held
1333 *
1334 * Returns: errno
1335 */
1336
1337int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name,
1338 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1339{
1340 struct gfs2_sbd *sdp = dip->i_sbd;
1341 struct qstr dotname;
1342 int error;
1343
1344 if (ip->i_di.di_entries != 2) {
1345 if (gfs2_consist_inode(ip))
1346 gfs2_dinode_print(&ip->i_di);
1347 return -EIO;
1348 }
1349
1350 error = gfs2_dir_del(dip, name);
1351 if (error)
1352 return error;
1353
1354 error = gfs2_change_nlink(dip, -1);
1355 if (error)
1356 return error;
1357
1358 gfs2_str2qstr(&dotname, ".");
1359 error = gfs2_dir_del(ip, &dotname);
1360 if (error)
1361 return error;
1362
1363 dotname.len = 2;
1364 dotname.name = "..";
1365 dotname.hash = gfs2_disk_hash(dotname.name, dotname.len);
1366 error = gfs2_dir_del(ip, &dotname);
1367 if (error)
1368 return error;
1369
1370 error = gfs2_change_nlink(ip, -2);
1371 if (error)
1372 return error;
1373
1374 /* This inode is being unlinked from the directory structure and
1375 we need to mark that in the log so that it isn't lost during
1376 a crash. */
1377
1378 ul->ul_ut.ut_inum = ip->i_num;
1379 error = gfs2_unlinked_ondisk_add(sdp, ul);
1380 if (!error)
1381 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
1382
1383 return error;
1384}
1385
1386/*
1387 * gfs2_unlink_ok - check to see that a inode is still in a directory
1388 * @dip: the directory
1389 * @name: the name of the file
1390 * @ip: the inode
1391 *
1392 * Assumes that the lock on (at least) @dip is held.
1393 *
1394 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
1395 */
1396
1397int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name,
1398 struct gfs2_inode *ip)
1399{
1400 struct gfs2_inum inum;
1401 unsigned int type;
1402 int error;
1403
1404 if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode))
1405 return -EPERM;
1406
1407 if ((dip->i_di.di_mode & S_ISVTX) &&
1408 dip->i_di.di_uid != current->fsuid &&
1409 ip->i_di.di_uid != current->fsuid &&
1410 !capable(CAP_FOWNER))
1411 return -EPERM;
1412
1413 if (IS_APPEND(dip->i_vnode))
1414 return -EPERM;
1415
1416 error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
1417 if (error)
1418 return error;
1419
1420 error = gfs2_dir_search(dip->i_vnode, name, &inum, &type);
1421 if (error)
1422 return error;
1423
1424 if (!gfs2_inum_equal(&inum, &ip->i_num))
1425 return -ENOENT;
1426
1427 if (IF2DT(ip->i_di.di_mode) != type) {
1428 gfs2_consist_inode(dip);
1429 return -EIO;
1430 }
1431
1432 return 0;
1433}
1434
1435/*
1436 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1437 * @this: move this
1438 * @to: to here
1439 *
1440 * Follow @to back to the root and make sure we don't encounter @this
1441 * Assumes we already hold the rename lock.
1442 *
1443 * Returns: errno
1444 */
1445
1446int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1447{
1448 struct inode *dir = to->i_vnode;
1449 struct super_block *sb = dir->i_sb;
1450 struct inode *tmp;
1451 struct qstr dotdot;
1452 int error = 0;
1453
1454 gfs2_str2qstr(&dotdot, "..");
1455
1456 igrab(dir);
1457
1458 for (;;) {
1459 if (dir == this->i_vnode) {
1460 error = -EINVAL;
1461 break;
1462 }
1463 if (dir == sb->s_root->d_inode) {
1464 error = 0;
1465 break;
1466 }
1467
1468 tmp = gfs2_lookupi(dir, &dotdot, 1, NULL);
1469 if (IS_ERR(tmp)) {
1470 error = PTR_ERR(tmp);
1471 break;
1472 }
1473
1474 iput(dir);
1475 dir = tmp;
1476 }
1477
1478 iput(dir);
1479
1480 return error;
1481}
1482
1483/**
1484 * gfs2_readlinki - return the contents of a symlink
1485 * @ip: the symlink's inode
1486 * @buf: a pointer to the buffer to be filled
1487 * @len: a pointer to the length of @buf
1488 *
1489 * If @buf is too small, a piece of memory is kmalloc()ed and needs
1490 * to be freed by the caller.
1491 *
1492 * Returns: errno
1493 */
1494
1495int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
1496{
1497 struct gfs2_holder i_gh;
1498 struct buffer_head *dibh;
1499 unsigned int x;
1500 int error;
1501
1502 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
1503 error = gfs2_glock_nq_atime(&i_gh);
1504 if (error) {
1505 gfs2_holder_uninit(&i_gh);
1506 return error;
1507 }
1508
1509 if (!ip->i_di.di_size) {
1510 gfs2_consist_inode(ip);
1511 error = -EIO;
1512 goto out;
1513 }
1514
1515 error = gfs2_meta_inode_buffer(ip, &dibh);
1516 if (error)
1517 goto out;
1518
1519 x = ip->i_di.di_size + 1;
1520 if (x > *len) {
1521 *buf = kmalloc(x, GFP_KERNEL);
1522 if (!*buf) {
1523 error = -ENOMEM;
1524 goto out_brelse;
1525 }
1526 }
1527
1528 memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
1529 *len = x;
1530
1531 out_brelse:
1532 brelse(dibh);
1533
1534 out:
1535 gfs2_glock_dq_uninit(&i_gh);
1536
1537 return error;
1538}
1539
1540/**
1541 * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and
1542 * conditionally update the inode's atime
1543 * @gh: the holder to acquire
1544 *
1545 * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap
1546 * Update if the difference between the current time and the inode's current
1547 * atime is greater than an interval specified at mount.
1548 *
1549 * Returns: errno
1550 */
1551
1552int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1553{
1554 struct gfs2_glock *gl = gh->gh_gl;
1555 struct gfs2_sbd *sdp = gl->gl_sbd;
1556 struct gfs2_inode *ip = gl->gl_object;
1557 int64_t curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
1558 unsigned int state;
1559 int flags;
1560 int error;
1561
1562 if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
1563 gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
1564 gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops))
1565 return -EINVAL;
1566
1567 state = gh->gh_state;
1568 flags = gh->gh_flags;
1569
1570 error = gfs2_glock_nq(gh);
1571 if (error)
1572 return error;
1573
1574 if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
1575 (sdp->sd_vfs->s_flags & MS_RDONLY))
1576 return 0;
1577
1578 curtime = get_seconds();
1579 if (curtime - ip->i_di.di_atime >= quantum) {
1580 gfs2_glock_dq(gh);
1581 gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
1582 gh);
1583 error = gfs2_glock_nq(gh);
1584 if (error)
1585 return error;
1586
1587 /* Verify that atime hasn't been updated while we were
1588 trying to get exclusive lock. */
1589
1590 curtime = get_seconds();
1591 if (curtime - ip->i_di.di_atime >= quantum) {
1592 struct buffer_head *dibh;
1593
1594 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1595 if (error == -EROFS)
1596 return 0;
1597 if (error)
1598 goto fail;
1599
1600 error = gfs2_meta_inode_buffer(ip, &dibh);
1601 if (error)
1602 goto fail_end_trans;
1603
1604 ip->i_di.di_atime = curtime;
1605
1606 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1607 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1608 brelse(dibh);
1609
1610 gfs2_trans_end(sdp);
1611 }
1612
1613 /* If someone else has asked for the glock,
1614 unlock and let them have it. Then reacquire
1615 in the original state. */
1616 if (gfs2_glock_is_blocking(gl)) {
1617 gfs2_glock_dq(gh);
1618 gfs2_holder_reinit(state, flags, gh);
1619 return gfs2_glock_nq(gh);
1620 }
1621 }
1622
1623 return 0;
1624
1625 fail_end_trans:
1626 gfs2_trans_end(sdp);
1627
1628 fail:
1629 gfs2_glock_dq(gh);
1630
1631 return error;
1632}
1633
1634/**
1635 * glock_compare_atime - Compare two struct gfs2_glock structures for sort
1636 * @arg_a: the first structure
1637 * @arg_b: the second structure
1638 *
1639 * Returns: 1 if A > B
1640 * -1 if A < B
1641 * 0 if A = B
1642 */
1643
1644static int glock_compare_atime(const void *arg_a, const void *arg_b)
1645{
1646 struct gfs2_holder *gh_a = *(struct gfs2_holder **)arg_a;
1647 struct gfs2_holder *gh_b = *(struct gfs2_holder **)arg_b;
1648 struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1649 struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1650 int ret = 0;
1651
1652 if (a->ln_number > b->ln_number)
1653 ret = 1;
1654 else if (a->ln_number < b->ln_number)
1655 ret = -1;
1656 else {
1657 if (gh_a->gh_state == LM_ST_SHARED &&
1658 gh_b->gh_state == LM_ST_EXCLUSIVE)
1659 ret = 1;
1660 else if (gh_a->gh_state == LM_ST_SHARED &&
1661 (gh_b->gh_flags & GL_ATIME))
1662 ret = 1;
1663 }
1664
1665 return ret;
1666}
1667
1668/**
1669 * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an
1670 * atime update
1671 * @num_gh: the number of structures
1672 * @ghs: an array of struct gfs2_holder structures
1673 *
1674 * Returns: 0 on success (all glocks acquired),
1675 * errno on failure (no glocks acquired)
1676 */
1677
1678int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs)
1679{
1680 struct gfs2_holder **p;
1681 unsigned int x;
1682 int error = 0;
1683
1684 if (!num_gh)
1685 return 0;
1686
1687 if (num_gh == 1) {
1688 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1689 if (ghs->gh_flags & GL_ATIME)
1690 error = gfs2_glock_nq_atime(ghs);
1691 else
1692 error = gfs2_glock_nq(ghs);
1693 return error;
1694 }
1695
1696 p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1697 if (!p)
1698 return -ENOMEM;
1699
1700 for (x = 0; x < num_gh; x++)
1701 p[x] = &ghs[x];
1702
1703 sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL);
1704
1705 for (x = 0; x < num_gh; x++) {
1706 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1707
1708 if (p[x]->gh_flags & GL_ATIME)
1709 error = gfs2_glock_nq_atime(p[x]);
1710 else
1711 error = gfs2_glock_nq(p[x]);
1712
1713 if (error) {
1714 while (x--)
1715 gfs2_glock_dq(p[x]);
1716 break;
1717 }
1718 }
1719
1720 kfree(p);
1721
1722 return error;
1723}
1724
1725/**
1726 * gfs2_try_toss_vnode - See if we can toss a vnode from memory
1727 * @ip: the inode
1728 *
1729 * Returns: 1 if the vnode was tossed
1730 */
1731
1732void gfs2_try_toss_vnode(struct gfs2_inode *ip)
1733{
1734 struct inode *inode;
1735
1736 inode = gfs2_ip2v_lookup(ip);
1737 if (!inode)
1738 return;
1739
1740 d_prune_aliases(inode);
1741
1742 if (S_ISDIR(ip->i_di.di_mode)) {
1743 struct list_head *head = &inode->i_dentry;
1744 struct dentry *d = NULL;
1745
1746 spin_lock(&dcache_lock);
1747 if (list_empty(head))
1748 spin_unlock(&dcache_lock);
1749 else {
1750 d = list_entry(head->next, struct dentry, d_alias);
1751 dget_locked(d);
1752 spin_unlock(&dcache_lock);
1753
1754 if (have_submounts(d))
1755 dput(d);
1756 else {
1757 shrink_dcache_parent(d);
1758 dput(d);
1759 d_prune_aliases(inode);
1760 }
1761 }
1762 }
1763
1764 inode->i_nlink = 0;
1765 iput(inode);
1766}
1767
1768
1769static int
1770__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1771{
1772 struct buffer_head *dibh;
1773 int error;
1774
1775 error = gfs2_meta_inode_buffer(ip, &dibh);
1776 if (!error) {
1777 error = inode_setattr(ip->i_vnode, attr);
1778 gfs2_assert_warn(ip->i_sbd, !error);
1779 gfs2_inode_attr_out(ip);
1780
1781 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1782 gfs2_dinode_out(&ip->i_di, dibh->b_data);
1783 brelse(dibh);
1784 }
1785 return error;
1786}
1787
1788/**
1789 * gfs2_setattr_simple -
1790 * @ip:
1791 * @attr:
1792 *
1793 * Called with a reference on the vnode.
1794 *
1795 * Returns: errno
1796 */
1797
1798int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1799{
1800 int error;
1801
1802 if (current->journal_info)
1803 return __gfs2_setattr_simple(ip, attr);
1804
1805 error = gfs2_trans_begin(ip->i_sbd, RES_DINODE, 0);
1806 if (error)
1807 return error;
1808
1809 error = __gfs2_setattr_simple(ip, attr);
1810
1811 gfs2_trans_end(ip->i_sbd);
1812
1813 return error;
1814}
1815
1816int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd)
1817{
1818 return permission(inode, mask, nd);
1819}
1820
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
new file mode 100644
index 000000000000..5ef21317b2f6
--- /dev/null
+++ b/fs/gfs2/inode.h
@@ -0,0 +1,72 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __INODE_DOT_H__
11#define __INODE_DOT_H__
12
13static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
14{
15 return !ip->i_di.di_height;
16}
17
18static inline int gfs2_is_jdata(struct gfs2_inode *ip)
19{
20 return ip->i_di.di_flags & GFS2_DIF_JDATA;
21}
22
23static inline int gfs2_is_dir(struct gfs2_inode *ip)
24{
25 return S_ISDIR(ip->i_di.di_mode);
26}
27
28void gfs2_inode_attr_in(struct gfs2_inode *ip);
29void gfs2_inode_attr_out(struct gfs2_inode *ip);
30struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip);
31struct inode *gfs2_ip2v(struct gfs2_inode *ip);
32struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum);
33
34void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type);
35int gfs2_inode_refresh(struct gfs2_inode *ip);
36
37int gfs2_inode_get(struct gfs2_glock *i_gl,
38 const struct gfs2_inum *inum, int create,
39 struct gfs2_inode **ipp);
40void gfs2_inode_hold(struct gfs2_inode *ip);
41void gfs2_inode_put(struct gfs2_inode *ip);
42void gfs2_inode_destroy(struct gfs2_inode *ip, int unlock);
43
44int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
45
46int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
47struct inode *gfs2_lookupi(struct inode *dir, struct qstr *name, int is_root,
48 struct nameidata *nd);
49struct inode *gfs2_createi(struct gfs2_holder *ghs, struct qstr *name,
50 unsigned int mode);
51int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name,
52 struct gfs2_inode *ip, struct gfs2_unlinked *ul);
53int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name,
54 struct gfs2_inode *ip, struct gfs2_unlinked *ul);
55int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name,
56 struct gfs2_inode *ip);
57int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
58int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
59
60int gfs2_glock_nq_atime(struct gfs2_holder *gh);
61int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs);
62
63void gfs2_try_toss_vnode(struct gfs2_inode *ip);
64
65int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
66
67int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd);
68
69struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
70
71#endif /* __INODE_DOT_H__ */
72
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c
new file mode 100644
index 000000000000..f45c0ffd1c35
--- /dev/null
+++ b/fs/gfs2/lm.c
@@ -0,0 +1,244 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/delay.h>
16#include <linux/gfs2_ondisk.h>
17
18#include "gfs2.h"
19#include "lm_interface.h"
20#include "incore.h"
21#include "glock.h"
22#include "lm.h"
23#include "super.h"
24#include "util.h"
25#include "lvb.h"
26
27/**
28 * gfs2_lm_mount - mount a locking protocol
29 * @sdp: the filesystem
30 * @args: mount arguements
31 * @silent: if 1, don't complain if the FS isn't a GFS2 fs
32 *
33 * Returns: errno
34 */
35
36int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
37{
38 char *proto = sdp->sd_proto_name;
39 char *table = sdp->sd_table_name;
40 int flags = 0;
41 int error;
42
43 if (sdp->sd_args.ar_spectator)
44 flags |= LM_MFLAG_SPECTATOR;
45
46 fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
47
48 error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata,
49 gfs2_glock_cb, sdp,
50 GFS2_MIN_LVB_SIZE, flags,
51 &sdp->sd_lockstruct, &sdp->sd_kobj);
52 if (error) {
53 fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
54 proto, table, sdp->sd_args.ar_hostdata);
55 goto out;
56 }
57
58 if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) ||
59 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
60 gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >=
61 GFS2_MIN_LVB_SIZE)) {
62 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
63 goto out;
64 }
65
66 if (sdp->sd_args.ar_spectator)
67 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
68 else
69 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
70 sdp->sd_lockstruct.ls_jid);
71
72 fs_info(sdp, "Joined cluster. Now mounting FS...\n");
73
74 if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
75 !sdp->sd_args.ar_ignore_local_fs) {
76 sdp->sd_args.ar_localflocks = 1;
77 sdp->sd_args.ar_localcaching = 1;
78 }
79
80 out:
81 return error;
82}
83
84void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
85{
86 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
87 sdp->sd_lockstruct.ls_ops->lm_others_may_mount(
88 sdp->sd_lockstruct.ls_lockspace);
89}
90
91void gfs2_lm_unmount(struct gfs2_sbd *sdp)
92{
93 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
94 gfs2_unmount_lockproto(&sdp->sd_lockstruct);
95}
96
97int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
98{
99 va_list args;
100
101 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
102 return 0;
103
104 va_start(args, fmt);
105 vprintk(fmt, args);
106 va_end(args);
107
108 fs_err(sdp, "about to withdraw from the cluster\n");
109 BUG_ON(sdp->sd_args.ar_debug);
110
111
112 fs_err(sdp, "waiting for outstanding I/O\n");
113
114 /* FIXME: suspend dm device so oustanding bio's complete
115 and all further io requests fail */
116
117 fs_err(sdp, "telling LM to withdraw\n");
118 gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
119 fs_err(sdp, "withdrawn\n");
120 dump_stack();
121
122 return -1;
123}
124
125int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
126 lm_lock_t **lockp)
127{
128 int error;
129 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
130 error = -EIO;
131 else
132 error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
133 sdp->sd_lockstruct.ls_lockspace, name, lockp);
134 return error;
135}
136
137void gfs2_lm_put_lock(struct gfs2_sbd *sdp, lm_lock_t *lock)
138{
139 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
140 sdp->sd_lockstruct.ls_ops->lm_put_lock(lock);
141}
142
143unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, lm_lock_t *lock,
144 unsigned int cur_state, unsigned int req_state,
145 unsigned int flags)
146{
147 int ret;
148 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
149 ret = 0;
150 else
151 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
152 cur_state,
153 req_state, flags);
154 return ret;
155}
156
157unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, lm_lock_t *lock,
158 unsigned int cur_state)
159{
160 int ret;
161 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
162 ret = 0;
163 else
164 ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state);
165 return ret;
166}
167
168void gfs2_lm_cancel(struct gfs2_sbd *sdp, lm_lock_t *lock)
169{
170 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
171 sdp->sd_lockstruct.ls_ops->lm_cancel(lock);
172}
173
174int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char **lvbp)
175{
176 int error;
177 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
178 error = -EIO;
179 else
180 error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
181 return error;
182}
183
184void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb)
185{
186 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
187 sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(lock, lvb);
188}
189
190#if 0
191void gfs2_lm_sync_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb)
192{
193 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
194 sdp->sd_lockstruct.ls_ops->lm_sync_lvb(lock, lvb);
195}
196#endif /* 0 */
197
198int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
199 struct file *file, struct file_lock *fl)
200{
201 int error;
202 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
203 error = -EIO;
204 else
205 error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
206 sdp->sd_lockstruct.ls_lockspace,
207 name, file, fl);
208 return error;
209}
210
211int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
212 struct file *file, int cmd, struct file_lock *fl)
213{
214 int error;
215 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
216 error = -EIO;
217 else
218 error = sdp->sd_lockstruct.ls_ops->lm_plock(
219 sdp->sd_lockstruct.ls_lockspace,
220 name, file, cmd, fl);
221 return error;
222}
223
224int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
225 struct file *file, struct file_lock *fl)
226{
227 int error;
228 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
229 error = -EIO;
230 else
231 error = sdp->sd_lockstruct.ls_ops->lm_punlock(
232 sdp->sd_lockstruct.ls_lockspace,
233 name, file, fl);
234 return error;
235}
236
237void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
238 unsigned int message)
239{
240 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
241 sdp->sd_lockstruct.ls_ops->lm_recovery_done(
242 sdp->sd_lockstruct.ls_lockspace, jid, message);
243}
244
diff --git a/fs/gfs2/lm.h b/fs/gfs2/lm.h
new file mode 100644
index 000000000000..e821101d19c0
--- /dev/null
+++ b/fs/gfs2/lm.h
@@ -0,0 +1,41 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LM_DOT_H__
11#define __LM_DOT_H__
12
13int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent);
14void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp);
15void gfs2_lm_unmount(struct gfs2_sbd *sdp);
16int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
17__attribute__ ((format(printf, 2, 3)));
18int gfs2_lm_get_lock(struct gfs2_sbd *sdp,
19 struct lm_lockname *name, lm_lock_t **lockp);
20void gfs2_lm_put_lock(struct gfs2_sbd *sdp, lm_lock_t *lock);
21unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, lm_lock_t *lock,
22 unsigned int cur_state, unsigned int req_state,
23 unsigned int flags);
24unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, lm_lock_t *lock,
25 unsigned int cur_state);
26void gfs2_lm_cancel(struct gfs2_sbd *sdp, lm_lock_t *lock);
27int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char **lvbp);
28void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, lm_lock_t *lock, char *lvb);
29int gfs2_lm_plock_get(struct gfs2_sbd *sdp,
30 struct lm_lockname *name,
31 struct file *file, struct file_lock *fl);
32int gfs2_lm_plock(struct gfs2_sbd *sdp,
33 struct lm_lockname *name,
34 struct file *file, int cmd, struct file_lock *fl);
35int gfs2_lm_punlock(struct gfs2_sbd *sdp,
36 struct lm_lockname *name,
37 struct file *file, struct file_lock *fl);
38void gfs2_lm_recovery_done(struct gfs2_sbd *sdp,
39 unsigned int jid, unsigned int message);
40
41#endif /* __LM_DOT_H__ */
diff --git a/fs/gfs2/lm_interface.h b/fs/gfs2/lm_interface.h
new file mode 100644
index 000000000000..9d34bf3df103
--- /dev/null
+++ b/fs/gfs2/lm_interface.h
@@ -0,0 +1,295 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LM_INTERFACE_DOT_H__
11#define __LM_INTERFACE_DOT_H__
12
13/*
14 * Opaque handles represent the lock module's lockspace structure, the lock
15 * module's lock structures, and GFS's file system (superblock) structure.
16 */
17
18typedef void lm_lockspace_t;
19typedef void lm_lock_t;
20typedef void lm_fsdata_t;
21
22typedef void (*lm_callback_t) (lm_fsdata_t *fsdata, unsigned int type,
23 void *data);
24
25/*
26 * lm_mount() flags
27 *
28 * LM_MFLAG_SPECTATOR
29 * GFS is asking to join the filesystem's lockspace, but it doesn't want to
30 * modify the filesystem. The lock module shouldn't assign a journal to the FS
31 * mount. It shouldn't send recovery callbacks to the FS mount. If the node
32 * dies or withdraws, all locks can be wiped immediately.
33 */
34
35#define LM_MFLAG_SPECTATOR 0x00000001
36
37/*
38 * lm_lockstruct flags
39 *
40 * LM_LSFLAG_LOCAL
41 * The lock_nolock module returns LM_LSFLAG_LOCAL to GFS, indicating that GFS
42 * can make single-node optimizations.
43 */
44
45#define LM_LSFLAG_LOCAL 0x00000001
46
47/*
48 * lm_lockname types
49 */
50
51#define LM_TYPE_RESERVED 0x00
52#define LM_TYPE_NONDISK 0x01
53#define LM_TYPE_INODE 0x02
54#define LM_TYPE_RGRP 0x03
55#define LM_TYPE_META 0x04
56#define LM_TYPE_IOPEN 0x05
57#define LM_TYPE_FLOCK 0x06
58#define LM_TYPE_PLOCK 0x07
59#define LM_TYPE_QUOTA 0x08
60#define LM_TYPE_JOURNAL 0x09
61
62/*
63 * lm_lock() states
64 *
65 * SHARED is compatible with SHARED, not with DEFERRED or EX.
66 * DEFERRED is compatible with DEFERRED, not with SHARED or EX.
67 */
68
69#define LM_ST_UNLOCKED 0
70#define LM_ST_EXCLUSIVE 1
71#define LM_ST_DEFERRED 2
72#define LM_ST_SHARED 3
73
74/*
75 * lm_lock() flags
76 *
77 * LM_FLAG_TRY
78 * Don't wait to acquire the lock if it can't be granted immediately.
79 *
80 * LM_FLAG_TRY_1CB
81 * Send one blocking callback if TRY is set and the lock is not granted.
82 *
83 * LM_FLAG_NOEXP
84 * GFS sets this flag on lock requests it makes while doing journal recovery.
85 * These special requests should not be blocked due to the recovery like
86 * ordinary locks would be.
87 *
88 * LM_FLAG_ANY
89 * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may
90 * also be granted in SHARED. The preferred state is whichever is compatible
91 * with other granted locks, or the specified state if no other locks exist.
92 *
93 * LM_FLAG_PRIORITY
94 * Override fairness considerations. Suppose a lock is held in a shared state
95 * and there is a pending request for the deferred state. A shared lock
96 * request with the priority flag would be allowed to bypass the deferred
97 * request and directly join the other shared lock. A shared lock request
98 * without the priority flag might be forced to wait until the deferred
99 * requested had acquired and released the lock.
100 */
101
102#define LM_FLAG_TRY 0x00000001
103#define LM_FLAG_TRY_1CB 0x00000002
104#define LM_FLAG_NOEXP 0x00000004
105#define LM_FLAG_ANY 0x00000008
106#define LM_FLAG_PRIORITY 0x00000010
107
108/*
109 * lm_lock() and lm_async_cb return flags
110 *
111 * LM_OUT_ST_MASK
112 * Masks the lower two bits of lock state in the returned value.
113 *
114 * LM_OUT_CACHEABLE
115 * The lock hasn't been released so GFS can continue to cache data for it.
116 *
117 * LM_OUT_CANCELED
118 * The lock request was canceled.
119 *
120 * LM_OUT_ASYNC
121 * The result of the request will be returned in an LM_CB_ASYNC callback.
122 */
123
124#define LM_OUT_ST_MASK 0x00000003
125#define LM_OUT_CACHEABLE 0x00000004
126#define LM_OUT_CANCELED 0x00000008
127#define LM_OUT_ASYNC 0x00000080
128#define LM_OUT_ERROR 0x00000100
129
130/*
131 * lm_callback_t types
132 *
133 * LM_CB_NEED_E LM_CB_NEED_D LM_CB_NEED_S
134 * Blocking callback, a remote node is requesting the given lock in
135 * EXCLUSIVE, DEFERRED, or SHARED.
136 *
137 * LM_CB_NEED_RECOVERY
138 * The given journal needs to be recovered.
139 *
140 * LM_CB_DROPLOCKS
141 * Reduce the number of cached locks.
142 *
143 * LM_CB_ASYNC
144 * The given lock has been granted.
145 */
146
147#define LM_CB_NEED_E 257
148#define LM_CB_NEED_D 258
149#define LM_CB_NEED_S 259
150#define LM_CB_NEED_RECOVERY 260
151#define LM_CB_DROPLOCKS 261
152#define LM_CB_ASYNC 262
153
154/*
155 * lm_recovery_done() messages
156 */
157
158#define LM_RD_GAVEUP 308
159#define LM_RD_SUCCESS 309
160
161
162struct lm_lockname {
163 uint64_t ln_number;
164 unsigned int ln_type;
165};
166
167#define lm_name_equal(name1, name2) \
168 (((name1)->ln_number == (name2)->ln_number) && \
169 ((name1)->ln_type == (name2)->ln_type)) \
170
171struct lm_async_cb {
172 struct lm_lockname lc_name;
173 int lc_ret;
174};
175
176struct lm_lockstruct;
177
178struct lm_lockops {
179 char lm_proto_name[256];
180
181 /*
182 * Mount/Unmount
183 */
184
185 int (*lm_mount) (char *table_name, char *host_data,
186 lm_callback_t cb, lm_fsdata_t *fsdata,
187 unsigned int min_lvb_size, int flags,
188 struct lm_lockstruct *lockstruct,
189 struct kobject *fskobj);
190
191 void (*lm_others_may_mount) (lm_lockspace_t *lockspace);
192
193 void (*lm_unmount) (lm_lockspace_t *lockspace);
194
195 void (*lm_withdraw) (lm_lockspace_t *lockspace);
196
197 /*
198 * Lock oriented operations
199 */
200
201 int (*lm_get_lock) (lm_lockspace_t *lockspace,
202 struct lm_lockname *name, lm_lock_t **lockp);
203
204 void (*lm_put_lock) (lm_lock_t *lock);
205
206 unsigned int (*lm_lock) (lm_lock_t *lock, unsigned int cur_state,
207 unsigned int req_state, unsigned int flags);
208
209 unsigned int (*lm_unlock) (lm_lock_t *lock, unsigned int cur_state);
210
211 void (*lm_cancel) (lm_lock_t *lock);
212
213 int (*lm_hold_lvb) (lm_lock_t *lock, char **lvbp);
214 void (*lm_unhold_lvb) (lm_lock_t *lock, char *lvb);
215 void (*lm_sync_lvb) (lm_lock_t *lock, char *lvb);
216
217 /*
218 * Posix Lock oriented operations
219 */
220
221 int (*lm_plock_get) (lm_lockspace_t *lockspace,
222 struct lm_lockname *name,
223 struct file *file, struct file_lock *fl);
224
225 int (*lm_plock) (lm_lockspace_t *lockspace,
226 struct lm_lockname *name,
227 struct file *file, int cmd, struct file_lock *fl);
228
229 int (*lm_punlock) (lm_lockspace_t *lockspace,
230 struct lm_lockname *name,
231 struct file *file, struct file_lock *fl);
232
233 /*
234 * Client oriented operations
235 */
236
237 void (*lm_recovery_done) (lm_lockspace_t *lockspace, unsigned int jid,
238 unsigned int message);
239
240 struct module *lm_owner;
241};
242
243/*
244 * lm_mount() return values
245 *
246 * ls_jid - the journal ID this node should use
247 * ls_first - this node is the first to mount the file system
248 * ls_lvb_size - size in bytes of lock value blocks
249 * ls_lockspace - lock module's context for this file system
250 * ls_ops - lock module's functions
251 * ls_flags - lock module features
252 */
253
254struct lm_lockstruct {
255 unsigned int ls_jid;
256 unsigned int ls_first;
257 unsigned int ls_lvb_size;
258 lm_lockspace_t *ls_lockspace;
259 struct lm_lockops *ls_ops;
260 int ls_flags;
261};
262
263void __init gfs2_init_lmh(void);
264
265/*
266 * Lock module bottom interface. A lock module makes itself available to GFS
267 * with these functions.
268 *
269 * For the time being, we copy the gfs1 lock module bottom interface so the
270 * same lock modules can be used with both gfs1 and gfs2 (it won't be possible
271 * to load both gfs1 and gfs2 at once.) Eventually the lock modules will fork
272 * for gfs1/gfs2 and this API can change to the gfs2_ prefix.
273 */
274
275int gfs_register_lockproto(struct lm_lockops *proto);
276
277void gfs_unregister_lockproto(struct lm_lockops *proto);
278
279/*
280 * Lock module top interface. GFS calls these functions when mounting or
281 * unmounting a file system.
282 */
283
284int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
285 lm_callback_t cb, lm_fsdata_t *fsdata,
286 unsigned int min_lvb_size, int flags,
287 struct lm_lockstruct *lockstruct,
288 struct kobject *fskobj);
289
290void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct);
291
292void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct);
293
294#endif /* __LM_INTERFACE_DOT_H__ */
295
diff --git a/fs/gfs2/locking.c b/fs/gfs2/locking.c
new file mode 100644
index 000000000000..183192836e98
--- /dev/null
+++ b/fs/gfs2/locking.c
@@ -0,0 +1,191 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/string.h>
13#include <linux/slab.h>
14#include <linux/wait.h>
15#include <linux/sched.h>
16#include <linux/kmod.h>
17#include <linux/fs.h>
18#include <linux/delay.h>
19
20#include "lm_interface.h"
21
22struct lmh_wrapper {
23 struct list_head lw_list;
24 struct lm_lockops *lw_ops;
25};
26
27/* List of registered low-level locking protocols. A file system selects one
28 of them by name at mount time, e.g. lock_nolock, lock_dlm. */
29
30static struct list_head lmh_list;
31static struct mutex lmh_lock;
32
33/**
34 * gfs_register_lockproto - Register a low-level locking protocol
35 * @proto: the protocol definition
36 *
37 * Returns: 0 on success, -EXXX on failure
38 */
39
40int gfs_register_lockproto(struct lm_lockops *proto)
41{
42 struct lmh_wrapper *lw;
43
44 mutex_lock(&lmh_lock);
45
46 list_for_each_entry(lw, &lmh_list, lw_list) {
47 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
48 mutex_unlock(&lmh_lock);
49 printk(KERN_INFO "GFS2: protocol %s already exists\n",
50 proto->lm_proto_name);
51 return -EEXIST;
52 }
53 }
54
55 lw = kzalloc(sizeof(struct lmh_wrapper), GFP_KERNEL);
56 if (!lw) {
57 mutex_unlock(&lmh_lock);
58 return -ENOMEM;
59 }
60
61 lw->lw_ops = proto;
62 list_add(&lw->lw_list, &lmh_list);
63
64 mutex_unlock(&lmh_lock);
65
66 return 0;
67}
68
69/**
70 * gfs_unregister_lockproto - Unregister a low-level locking protocol
71 * @proto: the protocol definition
72 *
73 */
74
75void gfs_unregister_lockproto(struct lm_lockops *proto)
76{
77 struct lmh_wrapper *lw;
78
79 mutex_lock(&lmh_lock);
80
81 list_for_each_entry(lw, &lmh_list, lw_list) {
82 if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
83 list_del(&lw->lw_list);
84 mutex_unlock(&lmh_lock);
85 kfree(lw);
86 return;
87 }
88 }
89
90 mutex_unlock(&lmh_lock);
91
92 printk(KERN_WARNING "GFS2: can't unregister lock protocol %s\n",
93 proto->lm_proto_name);
94}
95
96/**
97 * gfs2_mount_lockproto - Mount a lock protocol
98 * @proto_name - the name of the protocol
99 * @table_name - the name of the lock space
100 * @host_data - data specific to this host
101 * @cb - the callback to the code using the lock module
102 * @fsdata - data to pass back with the callback
103 * @min_lvb_size - the mininum LVB size that the caller can deal with
104 * @flags - LM_MFLAG_*
105 * @lockstruct - a structure returned describing the mount
106 *
107 * Returns: 0 on success, -EXXX on failure
108 */
109
110int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
111 lm_callback_t cb, lm_fsdata_t *fsdata,
112 unsigned int min_lvb_size, int flags,
113 struct lm_lockstruct *lockstruct,
114 struct kobject *fskobj)
115{
116 struct lmh_wrapper *lw = NULL;
117 int try = 0;
118 int error, found;
119
120 retry:
121 mutex_lock(&lmh_lock);
122
123 found = 0;
124 list_for_each_entry(lw, &lmh_list, lw_list) {
125 if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
126 found = 1;
127 break;
128 }
129 }
130
131 if (!found) {
132 if (!try && capable(CAP_SYS_MODULE)) {
133 try = 1;
134 mutex_unlock(&lmh_lock);
135 request_module(proto_name);
136 goto retry;
137 }
138 printk(KERN_INFO "GFS2: can't find protocol %s\n", proto_name);
139 error = -ENOENT;
140 goto out;
141 }
142
143 if (!try_module_get(lw->lw_ops->lm_owner)) {
144 try = 0;
145 mutex_unlock(&lmh_lock);
146 msleep(1000);
147 goto retry;
148 }
149
150 error = lw->lw_ops->lm_mount(table_name, host_data, cb, fsdata,
151 min_lvb_size, flags, lockstruct, fskobj);
152 if (error)
153 module_put(lw->lw_ops->lm_owner);
154 out:
155 mutex_unlock(&lmh_lock);
156 return error;
157}
158
159void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct)
160{
161 mutex_lock(&lmh_lock);
162 lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
163 if (lockstruct->ls_ops->lm_owner)
164 module_put(lockstruct->ls_ops->lm_owner);
165 mutex_unlock(&lmh_lock);
166}
167
168/**
169 * gfs2_withdraw_lockproto - abnormally unmount a lock module
170 * @lockstruct: the lockstruct passed into mount
171 *
172 */
173
174void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct)
175{
176 mutex_lock(&lmh_lock);
177 lockstruct->ls_ops->lm_withdraw(lockstruct->ls_lockspace);
178 if (lockstruct->ls_ops->lm_owner)
179 module_put(lockstruct->ls_ops->lm_owner);
180 mutex_unlock(&lmh_lock);
181}
182
183void __init gfs2_init_lmh(void)
184{
185 mutex_init(&lmh_lock);
186 INIT_LIST_HEAD(&lmh_list);
187}
188
189EXPORT_SYMBOL_GPL(gfs_register_lockproto);
190EXPORT_SYMBOL_GPL(gfs_unregister_lockproto);
191
diff --git a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile
new file mode 100644
index 000000000000..a9733ff80371
--- /dev/null
+++ b/fs/gfs2/locking/dlm/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_GFS2_FS) += lock_dlm.o
2lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o plock.o
3
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
new file mode 100644
index 000000000000..e74f1215672f
--- /dev/null
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -0,0 +1,541 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include "lock_dlm.h"
11
12static char junk_lvb[GDLM_LVB_SIZE];
13
14static void queue_complete(struct gdlm_lock *lp)
15{
16 struct gdlm_ls *ls = lp->ls;
17
18 clear_bit(LFL_ACTIVE, &lp->flags);
19
20 spin_lock(&ls->async_lock);
21 list_add_tail(&lp->clist, &ls->complete);
22 spin_unlock(&ls->async_lock);
23 wake_up(&ls->thread_wait);
24}
25
26static inline void gdlm_ast(void *astarg)
27{
28 queue_complete(astarg);
29}
30
31static inline void gdlm_bast(void *astarg, int mode)
32{
33 struct gdlm_lock *lp = astarg;
34 struct gdlm_ls *ls = lp->ls;
35
36 if (!mode) {
37 printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
38 lp->lockname.ln_type,
39 (unsigned long long)lp->lockname.ln_number);
40 return;
41 }
42
43 spin_lock(&ls->async_lock);
44 if (!lp->bast_mode) {
45 list_add_tail(&lp->blist, &ls->blocking);
46 lp->bast_mode = mode;
47 } else if (lp->bast_mode < mode)
48 lp->bast_mode = mode;
49 spin_unlock(&ls->async_lock);
50 wake_up(&ls->thread_wait);
51}
52
53void gdlm_queue_delayed(struct gdlm_lock *lp)
54{
55 struct gdlm_ls *ls = lp->ls;
56
57 spin_lock(&ls->async_lock);
58 list_add_tail(&lp->delay_list, &ls->delayed);
59 spin_unlock(&ls->async_lock);
60}
61
62/* convert gfs lock-state to dlm lock-mode */
63
64static int16_t make_mode(int16_t lmstate)
65{
66 switch (lmstate) {
67 case LM_ST_UNLOCKED:
68 return DLM_LOCK_NL;
69 case LM_ST_EXCLUSIVE:
70 return DLM_LOCK_EX;
71 case LM_ST_DEFERRED:
72 return DLM_LOCK_CW;
73 case LM_ST_SHARED:
74 return DLM_LOCK_PR;
75 }
76 gdlm_assert(0, "unknown LM state %d", lmstate);
77 return -1;
78}
79
80/* convert dlm lock-mode to gfs lock-state */
81
82int16_t gdlm_make_lmstate(int16_t dlmmode)
83{
84 switch (dlmmode) {
85 case DLM_LOCK_IV:
86 case DLM_LOCK_NL:
87 return LM_ST_UNLOCKED;
88 case DLM_LOCK_EX:
89 return LM_ST_EXCLUSIVE;
90 case DLM_LOCK_CW:
91 return LM_ST_DEFERRED;
92 case DLM_LOCK_PR:
93 return LM_ST_SHARED;
94 }
95 gdlm_assert(0, "unknown DLM mode %d", dlmmode);
96 return -1;
97}
98
99/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
100 DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
101
102static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
103{
104 int16_t cur = make_mode(cur_state);
105 if (lp->cur != DLM_LOCK_IV)
106 gdlm_assert(lp->cur == cur, "%d, %d", lp->cur, cur);
107}
108
109static inline unsigned int make_flags(struct gdlm_lock *lp,
110 unsigned int gfs_flags,
111 int16_t cur, int16_t req)
112{
113 unsigned int lkf = 0;
114
115 if (gfs_flags & LM_FLAG_TRY)
116 lkf |= DLM_LKF_NOQUEUE;
117
118 if (gfs_flags & LM_FLAG_TRY_1CB) {
119 lkf |= DLM_LKF_NOQUEUE;
120 lkf |= DLM_LKF_NOQUEUEBAST;
121 }
122
123 if (gfs_flags & LM_FLAG_PRIORITY) {
124 lkf |= DLM_LKF_NOORDER;
125 lkf |= DLM_LKF_HEADQUE;
126 }
127
128 if (gfs_flags & LM_FLAG_ANY) {
129 if (req == DLM_LOCK_PR)
130 lkf |= DLM_LKF_ALTCW;
131 else if (req == DLM_LOCK_CW)
132 lkf |= DLM_LKF_ALTPR;
133 }
134
135 if (lp->lksb.sb_lkid != 0) {
136 lkf |= DLM_LKF_CONVERT;
137
138 /* Conversion deadlock avoidance by DLM */
139
140 if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
141 !(lkf & DLM_LKF_NOQUEUE) &&
142 cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
143 lkf |= DLM_LKF_CONVDEADLK;
144 }
145
146 if (lp->lvb)
147 lkf |= DLM_LKF_VALBLK;
148
149 return lkf;
150}
151
152/* make_strname - convert GFS lock numbers to a string */
153
154static inline void make_strname(struct lm_lockname *lockname,
155 struct gdlm_strname *str)
156{
157 sprintf(str->name, "%8x%16llx", lockname->ln_type,
158 (unsigned long long)lockname->ln_number);
159 str->namelen = GDLM_STRNAME_BYTES;
160}
161
162static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
163 struct gdlm_lock **lpp)
164{
165 struct gdlm_lock *lp;
166
167 lp = kzalloc(sizeof(struct gdlm_lock), GFP_KERNEL);
168 if (!lp)
169 return -ENOMEM;
170
171 lp->lockname = *name;
172 lp->ls = ls;
173 lp->cur = DLM_LOCK_IV;
174 lp->lvb = NULL;
175 lp->hold_null = NULL;
176 init_completion(&lp->ast_wait);
177 INIT_LIST_HEAD(&lp->clist);
178 INIT_LIST_HEAD(&lp->blist);
179 INIT_LIST_HEAD(&lp->delay_list);
180
181 spin_lock(&ls->async_lock);
182 list_add(&lp->all_list, &ls->all_locks);
183 ls->all_locks_count++;
184 spin_unlock(&ls->async_lock);
185
186 *lpp = lp;
187 return 0;
188}
189
190void gdlm_delete_lp(struct gdlm_lock *lp)
191{
192 struct gdlm_ls *ls = lp->ls;
193
194 spin_lock(&ls->async_lock);
195 if (!list_empty(&lp->clist))
196 list_del_init(&lp->clist);
197 if (!list_empty(&lp->blist))
198 list_del_init(&lp->blist);
199 if (!list_empty(&lp->delay_list))
200 list_del_init(&lp->delay_list);
201 gdlm_assert(!list_empty(&lp->all_list), "%x,%llx", lp->lockname.ln_type,
202 (unsigned long long)lp->lockname.ln_number);
203 list_del_init(&lp->all_list);
204 ls->all_locks_count--;
205 spin_unlock(&ls->async_lock);
206
207 kfree(lp);
208}
209
210int gdlm_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
211 lm_lock_t **lockp)
212{
213 struct gdlm_lock *lp;
214 int error;
215
216 error = gdlm_create_lp((struct gdlm_ls *) lockspace, name, &lp);
217
218 *lockp = (lm_lock_t *) lp;
219 return error;
220}
221
222void gdlm_put_lock(lm_lock_t *lock)
223{
224 gdlm_delete_lp((struct gdlm_lock *) lock);
225}
226
227unsigned int gdlm_do_lock(struct gdlm_lock *lp)
228{
229 struct gdlm_ls *ls = lp->ls;
230 struct gdlm_strname str;
231 int error, bast = 1;
232
233 /*
234 * When recovery is in progress, delay lock requests for submission
235 * once recovery is done. Requests for recovery (NOEXP) and unlocks
236 * can pass.
237 */
238
239 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
240 !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
241 gdlm_queue_delayed(lp);
242 return LM_OUT_ASYNC;
243 }
244
245 /*
246 * Submit the actual lock request.
247 */
248
249 if (test_bit(LFL_NOBAST, &lp->flags))
250 bast = 0;
251
252 make_strname(&lp->lockname, &str);
253
254 set_bit(LFL_ACTIVE, &lp->flags);
255
256 log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type,
257 (unsigned long long)lp->lockname.ln_number, lp->lksb.sb_lkid,
258 lp->cur, lp->req, lp->lkf);
259
260 error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
261 str.name, str.namelen, 0, gdlm_ast, lp,
262 bast ? gdlm_bast : NULL);
263
264 if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
265 lp->lksb.sb_status = -EAGAIN;
266 queue_complete(lp);
267 error = 0;
268 }
269
270 if (error) {
271 log_debug("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x "
272 "flags=%lx", ls->fsname, lp->lockname.ln_type,
273 (unsigned long long)lp->lockname.ln_number, error,
274 lp->cur, lp->req, lp->lkf, lp->flags);
275 return LM_OUT_ERROR;
276 }
277 return LM_OUT_ASYNC;
278}
279
280static unsigned int gdlm_do_unlock(struct gdlm_lock *lp)
281{
282 struct gdlm_ls *ls = lp->ls;
283 unsigned int lkf = 0;
284 int error;
285
286 set_bit(LFL_DLM_UNLOCK, &lp->flags);
287 set_bit(LFL_ACTIVE, &lp->flags);
288
289 if (lp->lvb)
290 lkf = DLM_LKF_VALBLK;
291
292 log_debug("un %x,%llx %x %d %x", lp->lockname.ln_type,
293 (unsigned long long)lp->lockname.ln_number,
294 lp->lksb.sb_lkid, lp->cur, lkf);
295
296 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp);
297
298 if (error) {
299 log_debug("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x "
300 "flags=%lx", ls->fsname, lp->lockname.ln_type,
301 (unsigned long long)lp->lockname.ln_number, error,
302 lp->cur, lp->req, lp->lkf, lp->flags);
303 return LM_OUT_ERROR;
304 }
305 return LM_OUT_ASYNC;
306}
307
308unsigned int gdlm_lock(lm_lock_t *lock, unsigned int cur_state,
309 unsigned int req_state, unsigned int flags)
310{
311 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
312
313 clear_bit(LFL_DLM_CANCEL, &lp->flags);
314 if (flags & LM_FLAG_NOEXP)
315 set_bit(LFL_NOBLOCK, &lp->flags);
316
317 check_cur_state(lp, cur_state);
318 lp->req = make_mode(req_state);
319 lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
320
321 return gdlm_do_lock(lp);
322}
323
324unsigned int gdlm_unlock(lm_lock_t *lock, unsigned int cur_state)
325{
326 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
327
328 clear_bit(LFL_DLM_CANCEL, &lp->flags);
329 if (lp->cur == DLM_LOCK_IV)
330 return 0;
331 return gdlm_do_unlock(lp);
332}
333
334void gdlm_cancel(lm_lock_t *lock)
335{
336 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
337 struct gdlm_ls *ls = lp->ls;
338 int error, delay_list = 0;
339
340 if (test_bit(LFL_DLM_CANCEL, &lp->flags))
341 return;
342
343 log_info("gdlm_cancel %x,%llx flags %lx", lp->lockname.ln_type,
344 (unsigned long long)lp->lockname.ln_number, lp->flags);
345
346 spin_lock(&ls->async_lock);
347 if (!list_empty(&lp->delay_list)) {
348 list_del_init(&lp->delay_list);
349 delay_list = 1;
350 }
351 spin_unlock(&ls->async_lock);
352
353 if (delay_list) {
354 set_bit(LFL_CANCEL, &lp->flags);
355 set_bit(LFL_ACTIVE, &lp->flags);
356 queue_complete(lp);
357 return;
358 }
359
360 if (!test_bit(LFL_ACTIVE, &lp->flags) ||
361 test_bit(LFL_DLM_UNLOCK, &lp->flags)) {
362 log_info("gdlm_cancel skip %x,%llx flags %lx",
363 lp->lockname.ln_type,
364 (unsigned long long)lp->lockname.ln_number, lp->flags);
365 return;
366 }
367
368 /* the lock is blocked in the dlm */
369
370 set_bit(LFL_DLM_CANCEL, &lp->flags);
371 set_bit(LFL_ACTIVE, &lp->flags);
372
373 error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
374 NULL, lp);
375
376 log_info("gdlm_cancel rv %d %x,%llx flags %lx", error,
377 lp->lockname.ln_type,
378 (unsigned long long)lp->lockname.ln_number, lp->flags);
379
380 if (error == -EBUSY)
381 clear_bit(LFL_DLM_CANCEL, &lp->flags);
382}
383
384static int gdlm_add_lvb(struct gdlm_lock *lp)
385{
386 char *lvb;
387
388 lvb = kzalloc(GDLM_LVB_SIZE, GFP_KERNEL);
389 if (!lvb)
390 return -ENOMEM;
391
392 lp->lksb.sb_lvbptr = lvb;
393 lp->lvb = lvb;
394 return 0;
395}
396
397static void gdlm_del_lvb(struct gdlm_lock *lp)
398{
399 kfree(lp->lvb);
400 lp->lvb = NULL;
401 lp->lksb.sb_lvbptr = NULL;
402}
403
404/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
405 the completion) because gfs won't call hold_lvb() during a callback (from
406 the context of a lock_dlm thread). */
407
408static int hold_null_lock(struct gdlm_lock *lp)
409{
410 struct gdlm_lock *lpn = NULL;
411 int error;
412
413 if (lp->hold_null) {
414 printk(KERN_INFO "lock_dlm: lvb already held\n");
415 return 0;
416 }
417
418 error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
419 if (error)
420 goto out;
421
422 lpn->lksb.sb_lvbptr = junk_lvb;
423 lpn->lvb = junk_lvb;
424
425 lpn->req = DLM_LOCK_NL;
426 lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
427 set_bit(LFL_NOBAST, &lpn->flags);
428 set_bit(LFL_INLOCK, &lpn->flags);
429
430 init_completion(&lpn->ast_wait);
431 gdlm_do_lock(lpn);
432 wait_for_completion(&lpn->ast_wait);
433 error = lp->lksb.sb_status;
434 if (error) {
435 printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
436 error);
437 gdlm_delete_lp(lpn);
438 lpn = NULL;
439 }
440 out:
441 lp->hold_null = lpn;
442 return error;
443}
444
445/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
446 the completion) because gfs may call unhold_lvb() during a callback (from
447 the context of a lock_dlm thread) which could cause a deadlock since the
448 other lock_dlm thread could be engaged in recovery. */
449
450static void unhold_null_lock(struct gdlm_lock *lp)
451{
452 struct gdlm_lock *lpn = lp->hold_null;
453
454 gdlm_assert(lpn, "%x,%llx", lp->lockname.ln_type,
455 (unsigned long long)lp->lockname.ln_number);
456 lpn->lksb.sb_lvbptr = NULL;
457 lpn->lvb = NULL;
458 set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
459 gdlm_do_unlock(lpn);
460 lp->hold_null = NULL;
461}
462
463/* Acquire a NL lock because gfs requires the value block to remain
464 intact on the resource while the lvb is "held" even if it's holding no locks
465 on the resource. */
466
467int gdlm_hold_lvb(lm_lock_t *lock, char **lvbp)
468{
469 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
470 int error;
471
472 error = gdlm_add_lvb(lp);
473 if (error)
474 return error;
475
476 *lvbp = lp->lvb;
477
478 error = hold_null_lock(lp);
479 if (error)
480 gdlm_del_lvb(lp);
481
482 return error;
483}
484
485void gdlm_unhold_lvb(lm_lock_t *lock, char *lvb)
486{
487 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
488
489 unhold_null_lock(lp);
490 gdlm_del_lvb(lp);
491}
492
493void gdlm_sync_lvb(lm_lock_t *lock, char *lvb)
494{
495 struct gdlm_lock *lp = (struct gdlm_lock *) lock;
496
497 if (lp->cur != DLM_LOCK_EX)
498 return;
499
500 init_completion(&lp->ast_wait);
501 set_bit(LFL_SYNC_LVB, &lp->flags);
502
503 lp->req = DLM_LOCK_EX;
504 lp->lkf = make_flags(lp, 0, lp->cur, lp->req);
505
506 gdlm_do_lock(lp);
507 wait_for_completion(&lp->ast_wait);
508}
509
510void gdlm_submit_delayed(struct gdlm_ls *ls)
511{
512 struct gdlm_lock *lp, *safe;
513
514 spin_lock(&ls->async_lock);
515 list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
516 list_del_init(&lp->delay_list);
517 list_add_tail(&lp->delay_list, &ls->submit);
518 }
519 spin_unlock(&ls->async_lock);
520 wake_up(&ls->thread_wait);
521}
522
523int gdlm_release_all_locks(struct gdlm_ls *ls)
524{
525 struct gdlm_lock *lp, *safe;
526 int count = 0;
527
528 spin_lock(&ls->async_lock);
529 list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) {
530 list_del_init(&lp->all_list);
531
532 if (lp->lvb && lp->lvb != junk_lvb)
533 kfree(lp->lvb);
534 kfree(lp);
535 count++;
536 }
537 spin_unlock(&ls->async_lock);
538
539 return count;
540}
541
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
new file mode 100644
index 000000000000..530c2f542584
--- /dev/null
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -0,0 +1,188 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef LOCK_DLM_DOT_H
11#define LOCK_DLM_DOT_H
12
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/spinlock.h>
16#include <linux/module.h>
17#include <linux/types.h>
18#include <linux/string.h>
19#include <linux/list.h>
20#include <linux/socket.h>
21#include <linux/delay.h>
22#include <linux/kthread.h>
23#include <linux/kobject.h>
24#include <linux/fcntl.h>
25#include <linux/wait.h>
26#include <net/sock.h>
27
28#include <linux/dlm.h>
29#include "../../lm_interface.h"
30
31/*
32 * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
33 * prefix of lock_dlm_ gets awkward. Externally, GFS refers to this module
34 * as "lock_dlm".
35 */
36
37#define GDLM_STRNAME_BYTES 24
38#define GDLM_LVB_SIZE 32
39#define GDLM_DROP_COUNT 50000
40#define GDLM_DROP_PERIOD 60
41#define GDLM_NAME_LEN 128
42
43/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
44 We sprintf these numbers into a 24 byte string of hex values to make them
45 human-readable (to make debugging simpler.) */
46
47struct gdlm_strname {
48 unsigned char name[GDLM_STRNAME_BYTES];
49 unsigned short namelen;
50};
51
52enum {
53 DFL_BLOCK_LOCKS = 0,
54 DFL_SPECTATOR = 1,
55 DFL_WITHDRAW = 2,
56};
57
58struct gdlm_ls {
59 uint32_t id;
60 int jid;
61 int first;
62 int first_done;
63 unsigned long flags;
64 struct kobject kobj;
65 char clustername[GDLM_NAME_LEN];
66 char fsname[GDLM_NAME_LEN];
67 int fsflags;
68 dlm_lockspace_t *dlm_lockspace;
69 lm_callback_t fscb;
70 lm_fsdata_t *fsdata;
71 int recover_jid;
72 int recover_jid_done;
73 int recover_jid_status;
74 spinlock_t async_lock;
75 struct list_head complete;
76 struct list_head blocking;
77 struct list_head delayed;
78 struct list_head submit;
79 struct list_head all_locks;
80 uint32_t all_locks_count;
81 wait_queue_head_t wait_control;
82 struct task_struct *thread1;
83 struct task_struct *thread2;
84 wait_queue_head_t thread_wait;
85 unsigned long drop_time;
86 int drop_locks_count;
87 int drop_locks_period;
88};
89
90enum {
91 LFL_NOBLOCK = 0,
92 LFL_NOCACHE = 1,
93 LFL_DLM_UNLOCK = 2,
94 LFL_DLM_CANCEL = 3,
95 LFL_SYNC_LVB = 4,
96 LFL_FORCE_PROMOTE = 5,
97 LFL_REREQUEST = 6,
98 LFL_ACTIVE = 7,
99 LFL_INLOCK = 8,
100 LFL_CANCEL = 9,
101 LFL_NOBAST = 10,
102 LFL_HEADQUE = 11,
103 LFL_UNLOCK_DELETE = 12,
104};
105
106struct gdlm_lock {
107 struct gdlm_ls *ls;
108 struct lm_lockname lockname;
109 char *lvb;
110 struct dlm_lksb lksb;
111
112 int16_t cur;
113 int16_t req;
114 int16_t prev_req;
115 uint32_t lkf; /* dlm flags DLM_LKF_ */
116 unsigned long flags; /* lock_dlm flags LFL_ */
117
118 int bast_mode; /* protected by async_lock */
119 struct completion ast_wait;
120
121 struct list_head clist; /* complete */
122 struct list_head blist; /* blocking */
123 struct list_head delay_list; /* delayed */
124 struct list_head all_list; /* all locks for the fs */
125 struct gdlm_lock *hold_null; /* NL lock for hold_lvb */
126};
127
128#define gdlm_assert(assertion, fmt, args...) \
129do { \
130 if (unlikely(!(assertion))) { \
131 printk(KERN_EMERG "lock_dlm: fatal assertion failed \"%s\"\n" \
132 "lock_dlm: " fmt "\n", \
133 #assertion, ##args); \
134 BUG(); \
135 } \
136} while (0)
137
138#define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg)
139#define log_info(fmt, arg...) log_print(KERN_INFO , fmt , ## arg)
140#define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg)
141#ifdef LOCK_DLM_LOG_DEBUG
142#define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg)
143#else
144#define log_debug(fmt, arg...)
145#endif
146
147/* sysfs.c */
148
149int gdlm_sysfs_init(void);
150void gdlm_sysfs_exit(void);
151int gdlm_kobject_setup(struct gdlm_ls *, struct kobject *);
152void gdlm_kobject_release(struct gdlm_ls *);
153
154/* thread.c */
155
156int gdlm_init_threads(struct gdlm_ls *);
157void gdlm_release_threads(struct gdlm_ls *);
158
159/* lock.c */
160
161int16_t gdlm_make_lmstate(int16_t);
162void gdlm_queue_delayed(struct gdlm_lock *);
163void gdlm_submit_delayed(struct gdlm_ls *);
164int gdlm_release_all_locks(struct gdlm_ls *);
165void gdlm_delete_lp(struct gdlm_lock *);
166unsigned int gdlm_do_lock(struct gdlm_lock *);
167
168int gdlm_get_lock(lm_lockspace_t *, struct lm_lockname *, lm_lock_t **);
169void gdlm_put_lock(lm_lock_t *);
170unsigned int gdlm_lock(lm_lock_t *, unsigned int, unsigned int, unsigned int);
171unsigned int gdlm_unlock(lm_lock_t *, unsigned int);
172void gdlm_cancel(lm_lock_t *);
173int gdlm_hold_lvb(lm_lock_t *, char **);
174void gdlm_unhold_lvb(lm_lock_t *, char *);
175void gdlm_sync_lvb(lm_lock_t *, char *);
176
177/* plock.c */
178
179int gdlm_plock_init(void);
180void gdlm_plock_exit(void);
181int gdlm_plock(lm_lockspace_t *, struct lm_lockname *, struct file *, int,
182 struct file_lock *);
183int gdlm_plock_get(lm_lockspace_t *, struct lm_lockname *, struct file *,
184 struct file_lock *);
185int gdlm_punlock(lm_lockspace_t *, struct lm_lockname *, struct file *,
186 struct file_lock *);
187#endif
188
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
new file mode 100644
index 000000000000..89728c91665f
--- /dev/null
+++ b/fs/gfs2/locking/dlm/main.c
@@ -0,0 +1,64 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/init.h>
11
12#include "lock_dlm.h"
13
14extern int gdlm_drop_count;
15extern int gdlm_drop_period;
16
17extern struct lm_lockops gdlm_ops;
18
19static int __init init_lock_dlm(void)
20{
21 int error;
22
23 error = gfs_register_lockproto(&gdlm_ops);
24 if (error) {
25 printk(KERN_WARNING "lock_dlm: can't register protocol: %d\n",
26 error);
27 return error;
28 }
29
30 error = gdlm_sysfs_init();
31 if (error) {
32 gfs_unregister_lockproto(&gdlm_ops);
33 return error;
34 }
35
36 error = gdlm_plock_init();
37 if (error) {
38 gdlm_sysfs_exit();
39 gfs_unregister_lockproto(&gdlm_ops);
40 return error;
41 }
42
43 gdlm_drop_count = GDLM_DROP_COUNT;
44 gdlm_drop_period = GDLM_DROP_PERIOD;
45
46 printk(KERN_INFO
47 "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
48 return 0;
49}
50
51static void __exit exit_lock_dlm(void)
52{
53 gdlm_plock_exit();
54 gdlm_sysfs_exit();
55 gfs_unregister_lockproto(&gdlm_ops);
56}
57
58module_init(init_lock_dlm);
59module_exit(exit_lock_dlm);
60
61MODULE_DESCRIPTION("GFS DLM Locking Module");
62MODULE_AUTHOR("Red Hat, Inc.");
63MODULE_LICENSE("GPL");
64
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
new file mode 100644
index 000000000000..3caeafc02a1b
--- /dev/null
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -0,0 +1,256 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include "lock_dlm.h"
11
12int gdlm_drop_count;
13int gdlm_drop_period;
14struct lm_lockops gdlm_ops;
15
16
17static struct gdlm_ls *init_gdlm(lm_callback_t cb, lm_fsdata_t *fsdata,
18 int flags, char *table_name)
19{
20 struct gdlm_ls *ls;
21 char buf[256], *p;
22
23 ls = kzalloc(sizeof(struct gdlm_ls), GFP_KERNEL);
24 if (!ls)
25 return NULL;
26
27 ls->drop_locks_count = gdlm_drop_count;
28 ls->drop_locks_period = gdlm_drop_period;
29 ls->fscb = cb;
30 ls->fsdata = fsdata;
31 ls->fsflags = flags;
32 spin_lock_init(&ls->async_lock);
33 INIT_LIST_HEAD(&ls->complete);
34 INIT_LIST_HEAD(&ls->blocking);
35 INIT_LIST_HEAD(&ls->delayed);
36 INIT_LIST_HEAD(&ls->submit);
37 INIT_LIST_HEAD(&ls->all_locks);
38 init_waitqueue_head(&ls->thread_wait);
39 init_waitqueue_head(&ls->wait_control);
40 ls->thread1 = NULL;
41 ls->thread2 = NULL;
42 ls->drop_time = jiffies;
43 ls->jid = -1;
44
45 strncpy(buf, table_name, 256);
46 buf[255] = '\0';
47
48 p = strstr(buf, ":");
49 if (!p) {
50 log_info("invalid table_name \"%s\"", table_name);
51 kfree(ls);
52 return NULL;
53 }
54 *p = '\0';
55 p++;
56
57 strncpy(ls->clustername, buf, GDLM_NAME_LEN);
58 strncpy(ls->fsname, p, GDLM_NAME_LEN);
59
60 return ls;
61}
62
63static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir)
64{
65 char data[256];
66 char *options, *x, *y;
67 int error = 0;
68
69 memset(data, 0, 256);
70 strncpy(data, data_arg, 255);
71
72 for (options = data; (x = strsep(&options, ":")); ) {
73 if (!*x)
74 continue;
75
76 y = strchr(x, '=');
77 if (y)
78 *y++ = 0;
79
80 if (!strcmp(x, "jid")) {
81 if (!y) {
82 log_error("need argument to jid");
83 error = -EINVAL;
84 break;
85 }
86 sscanf(y, "%u", &ls->jid);
87
88 } else if (!strcmp(x, "first")) {
89 if (!y) {
90 log_error("need argument to first");
91 error = -EINVAL;
92 break;
93 }
94 sscanf(y, "%u", &ls->first);
95
96 } else if (!strcmp(x, "id")) {
97 if (!y) {
98 log_error("need argument to id");
99 error = -EINVAL;
100 break;
101 }
102 sscanf(y, "%u", &ls->id);
103
104 } else if (!strcmp(x, "nodir")) {
105 if (!y) {
106 log_error("need argument to nodir");
107 error = -EINVAL;
108 break;
109 }
110 sscanf(y, "%u", nodir);
111
112 } else {
113 log_error("unkonwn option: %s", x);
114 error = -EINVAL;
115 break;
116 }
117 }
118
119 return error;
120}
121
122static int gdlm_mount(char *table_name, char *host_data,
123 lm_callback_t cb, lm_fsdata_t *fsdata,
124 unsigned int min_lvb_size, int flags,
125 struct lm_lockstruct *lockstruct,
126 struct kobject *fskobj)
127{
128 struct gdlm_ls *ls;
129 int error = -ENOMEM, nodir = 0;
130
131 if (min_lvb_size > GDLM_LVB_SIZE)
132 goto out;
133
134 ls = init_gdlm(cb, fsdata, flags, table_name);
135 if (!ls)
136 goto out;
137
138 error = make_args(ls, host_data, &nodir);
139 if (error)
140 goto out;
141
142 error = gdlm_init_threads(ls);
143 if (error)
144 goto out_free;
145
146 error = gdlm_kobject_setup(ls, fskobj);
147 if (error)
148 goto out_thread;
149
150 error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
151 &ls->dlm_lockspace,
152 nodir ? DLM_LSFL_NODIR : 0,
153 GDLM_LVB_SIZE);
154 if (error) {
155 log_error("dlm_new_lockspace error %d", error);
156 goto out_kobj;
157 }
158
159 lockstruct->ls_jid = ls->jid;
160 lockstruct->ls_first = ls->first;
161 lockstruct->ls_lockspace = ls;
162 lockstruct->ls_ops = &gdlm_ops;
163 lockstruct->ls_flags = 0;
164 lockstruct->ls_lvb_size = GDLM_LVB_SIZE;
165 return 0;
166
167 out_kobj:
168 gdlm_kobject_release(ls);
169 out_thread:
170 gdlm_release_threads(ls);
171 out_free:
172 kfree(ls);
173 out:
174 return error;
175}
176
177static void gdlm_unmount(lm_lockspace_t *lockspace)
178{
179 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
180 int rv;
181
182 log_debug("unmount flags %lx", ls->flags);
183
184 /* FIXME: serialize unmount and withdraw in case they
185 happen at once. Also, if unmount follows withdraw,
186 wait for withdraw to finish. */
187
188 if (test_bit(DFL_WITHDRAW, &ls->flags))
189 goto out;
190
191 gdlm_kobject_release(ls);
192 dlm_release_lockspace(ls->dlm_lockspace, 2);
193 gdlm_release_threads(ls);
194 rv = gdlm_release_all_locks(ls);
195 if (rv)
196 log_info("gdlm_unmount: %d stray locks freed", rv);
197 out:
198 kfree(ls);
199}
200
201static void gdlm_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
202 unsigned int message)
203{
204 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
205 ls->recover_jid_done = jid;
206 ls->recover_jid_status = message;
207 kobject_uevent(&ls->kobj, KOBJ_CHANGE);
208}
209
210static void gdlm_others_may_mount(lm_lockspace_t *lockspace)
211{
212 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
213 ls->first_done = 1;
214 kobject_uevent(&ls->kobj, KOBJ_CHANGE);
215}
216
217/* Userspace gets the offline uevent, blocks new gfs locks on
218 other mounters, and lets us know (sets WITHDRAW flag). Then,
219 userspace leaves the mount group while we leave the lockspace. */
220
221static void gdlm_withdraw(lm_lockspace_t *lockspace)
222{
223 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
224
225 kobject_uevent(&ls->kobj, KOBJ_OFFLINE);
226
227 wait_event_interruptible(ls->wait_control,
228 test_bit(DFL_WITHDRAW, &ls->flags));
229
230 dlm_release_lockspace(ls->dlm_lockspace, 2);
231 gdlm_release_threads(ls);
232 gdlm_release_all_locks(ls);
233 gdlm_kobject_release(ls);
234}
235
236struct lm_lockops gdlm_ops = {
237 .lm_proto_name = "lock_dlm",
238 .lm_mount = gdlm_mount,
239 .lm_others_may_mount = gdlm_others_may_mount,
240 .lm_unmount = gdlm_unmount,
241 .lm_withdraw = gdlm_withdraw,
242 .lm_get_lock = gdlm_get_lock,
243 .lm_put_lock = gdlm_put_lock,
244 .lm_lock = gdlm_lock,
245 .lm_unlock = gdlm_unlock,
246 .lm_plock = gdlm_plock,
247 .lm_punlock = gdlm_punlock,
248 .lm_plock_get = gdlm_plock_get,
249 .lm_cancel = gdlm_cancel,
250 .lm_hold_lvb = gdlm_hold_lvb,
251 .lm_unhold_lvb = gdlm_unhold_lvb,
252 .lm_sync_lvb = gdlm_sync_lvb,
253 .lm_recovery_done = gdlm_recovery_done,
254 .lm_owner = THIS_MODULE,
255};
256
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
new file mode 100644
index 000000000000..6adfb2d4fd8c
--- /dev/null
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -0,0 +1,299 @@
1/*
2 * Copyright (C) 2005 Red Hat, Inc. All rights reserved.
3 *
4 * This copyrighted material is made available to anyone wishing to use,
5 * modify, copy, or redistribute it subject to the terms and conditions
6 * of the GNU General Public License v.2.
7 */
8
9#include <linux/miscdevice.h>
10#include <linux/lock_dlm_plock.h>
11
12#include "lock_dlm.h"
13
14
15static spinlock_t ops_lock;
16static struct list_head send_list;
17static struct list_head recv_list;
18static wait_queue_head_t send_wq;
19static wait_queue_head_t recv_wq;
20
21struct plock_op {
22 struct list_head list;
23 int done;
24 struct gdlm_plock_info info;
25};
26
27static inline void set_version(struct gdlm_plock_info *info)
28{
29 info->version[0] = GDLM_PLOCK_VERSION_MAJOR;
30 info->version[1] = GDLM_PLOCK_VERSION_MINOR;
31 info->version[2] = GDLM_PLOCK_VERSION_PATCH;
32}
33
34static int check_version(struct gdlm_plock_info *info)
35{
36 if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
37 (GDLM_PLOCK_VERSION_MINOR < info->version[1])) {
38 log_error("plock device version mismatch: "
39 "kernel (%u.%u.%u), user (%u.%u.%u)",
40 GDLM_PLOCK_VERSION_MAJOR,
41 GDLM_PLOCK_VERSION_MINOR,
42 GDLM_PLOCK_VERSION_PATCH,
43 info->version[0],
44 info->version[1],
45 info->version[2]);
46 return -EINVAL;
47 }
48 return 0;
49}
50
51static void send_op(struct plock_op *op)
52{
53 set_version(&op->info);
54 INIT_LIST_HEAD(&op->list);
55 spin_lock(&ops_lock);
56 list_add_tail(&op->list, &send_list);
57 spin_unlock(&ops_lock);
58 wake_up(&send_wq);
59}
60
61int gdlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
62 struct file *file, int cmd, struct file_lock *fl)
63{
64 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
65 struct plock_op *op;
66 int rv;
67
68 op = kzalloc(sizeof(*op), GFP_KERNEL);
69 if (!op)
70 return -ENOMEM;
71
72 op->info.optype = GDLM_PLOCK_OP_LOCK;
73 op->info.pid = fl->fl_pid;
74 op->info.ex = (fl->fl_type == F_WRLCK);
75 op->info.wait = IS_SETLKW(cmd);
76 op->info.fsid = ls->id;
77 op->info.number = name->ln_number;
78 op->info.start = fl->fl_start;
79 op->info.end = fl->fl_end;
80
81 send_op(op);
82 wait_event(recv_wq, (op->done != 0));
83
84 spin_lock(&ops_lock);
85 if (!list_empty(&op->list)) {
86 printk(KERN_INFO "plock op on list\n");
87 list_del(&op->list);
88 }
89 spin_unlock(&ops_lock);
90
91 rv = op->info.rv;
92
93 if (!rv) {
94 if (posix_lock_file_wait(file, fl) < 0)
95 log_error("gdlm_plock: vfs lock error %x,%llx",
96 name->ln_type,
97 (unsigned long long)name->ln_number);
98 }
99
100 kfree(op);
101 return rv;
102}
103
104int gdlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
105 struct file *file, struct file_lock *fl)
106{
107 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
108 struct plock_op *op;
109 int rv;
110
111 op = kzalloc(sizeof(*op), GFP_KERNEL);
112 if (!op)
113 return -ENOMEM;
114
115 if (posix_lock_file_wait(file, fl) < 0)
116 log_error("gdlm_punlock: vfs unlock error %x,%llx",
117 name->ln_type, (unsigned long long)name->ln_number);
118
119 op->info.optype = GDLM_PLOCK_OP_UNLOCK;
120 op->info.pid = fl->fl_pid;
121 op->info.fsid = ls->id;
122 op->info.number = name->ln_number;
123 op->info.start = fl->fl_start;
124 op->info.end = fl->fl_end;
125
126 send_op(op);
127 wait_event(recv_wq, (op->done != 0));
128
129 spin_lock(&ops_lock);
130 if (!list_empty(&op->list)) {
131 printk(KERN_INFO "punlock op on list\n");
132 list_del(&op->list);
133 }
134 spin_unlock(&ops_lock);
135
136 rv = op->info.rv;
137
138 kfree(op);
139 return rv;
140}
141
142int gdlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
143 struct file *file, struct file_lock *fl)
144{
145 struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
146 struct plock_op *op;
147 int rv;
148
149 op = kzalloc(sizeof(*op), GFP_KERNEL);
150 if (!op)
151 return -ENOMEM;
152
153 op->info.optype = GDLM_PLOCK_OP_GET;
154 op->info.pid = fl->fl_pid;
155 op->info.ex = (fl->fl_type == F_WRLCK);
156 op->info.fsid = ls->id;
157 op->info.number = name->ln_number;
158 op->info.start = fl->fl_start;
159 op->info.end = fl->fl_end;
160
161 send_op(op);
162 wait_event(recv_wq, (op->done != 0));
163
164 spin_lock(&ops_lock);
165 if (!list_empty(&op->list)) {
166 printk(KERN_INFO "plock_get op on list\n");
167 list_del(&op->list);
168 }
169 spin_unlock(&ops_lock);
170
171 rv = op->info.rv;
172
173 if (rv == 0)
174 fl->fl_type = F_UNLCK;
175 else if (rv > 0) {
176 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
177 fl->fl_pid = op->info.pid;
178 fl->fl_start = op->info.start;
179 fl->fl_end = op->info.end;
180 }
181
182 kfree(op);
183 return rv;
184}
185
186/* a read copies out one plock request from the send list */
187static ssize_t dev_read(struct file *file, char __user *u, size_t count,
188 loff_t *ppos)
189{
190 struct gdlm_plock_info info;
191 struct plock_op *op = NULL;
192
193 if (count < sizeof(info))
194 return -EINVAL;
195
196 spin_lock(&ops_lock);
197 if (!list_empty(&send_list)) {
198 op = list_entry(send_list.next, struct plock_op, list);
199 list_move(&op->list, &recv_list);
200 memcpy(&info, &op->info, sizeof(info));
201 }
202 spin_unlock(&ops_lock);
203
204 if (!op)
205 return -EAGAIN;
206
207 if (copy_to_user(u, &info, sizeof(info)))
208 return -EFAULT;
209 return sizeof(info);
210}
211
212/* a write copies in one plock result that should match a plock_op
213 on the recv list */
214static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
215 loff_t *ppos)
216{
217 struct gdlm_plock_info info;
218 struct plock_op *op;
219 int found = 0;
220
221 if (count != sizeof(info))
222 return -EINVAL;
223
224 if (copy_from_user(&info, u, sizeof(info)))
225 return -EFAULT;
226
227 if (check_version(&info))
228 return -EINVAL;
229
230 spin_lock(&ops_lock);
231 list_for_each_entry(op, &recv_list, list) {
232 if (op->info.fsid == info.fsid &&
233 op->info.number == info.number) {
234 list_del_init(&op->list);
235 found = 1;
236 op->done = 1;
237 memcpy(&op->info, &info, sizeof(info));
238 break;
239 }
240 }
241 spin_unlock(&ops_lock);
242
243 if (found)
244 wake_up(&recv_wq);
245 else
246 printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid,
247 (unsigned long long)info.number);
248 return count;
249}
250
251static unsigned int dev_poll(struct file *file, poll_table *wait)
252{
253 poll_wait(file, &send_wq, wait);
254
255 spin_lock(&ops_lock);
256 if (!list_empty(&send_list)) {
257 spin_unlock(&ops_lock);
258 return POLLIN | POLLRDNORM;
259 }
260 spin_unlock(&ops_lock);
261 return 0;
262}
263
264static struct file_operations dev_fops = {
265 .read = dev_read,
266 .write = dev_write,
267 .poll = dev_poll,
268 .owner = THIS_MODULE
269};
270
271static struct miscdevice plock_dev_misc = {
272 .minor = MISC_DYNAMIC_MINOR,
273 .name = GDLM_PLOCK_MISC_NAME,
274 .fops = &dev_fops
275};
276
277int gdlm_plock_init(void)
278{
279 int rv;
280
281 spin_lock_init(&ops_lock);
282 INIT_LIST_HEAD(&send_list);
283 INIT_LIST_HEAD(&recv_list);
284 init_waitqueue_head(&send_wq);
285 init_waitqueue_head(&recv_wq);
286
287 rv = misc_register(&plock_dev_misc);
288 if (rv)
289 printk(KERN_INFO "gdlm_plock_init: misc_register failed %d",
290 rv);
291 return rv;
292}
293
294void gdlm_plock_exit(void)
295{
296 if (misc_deregister(&plock_dev_misc) < 0)
297 printk(KERN_INFO "gdlm_plock_exit: misc_deregister failed");
298}
299
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
new file mode 100644
index 000000000000..0d8bd0806dba
--- /dev/null
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -0,0 +1,225 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/ctype.h>
11#include <linux/stat.h>
12
13#include "lock_dlm.h"
14
15extern struct lm_lockops gdlm_ops;
16
17static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf)
18{
19 return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name);
20}
21
22static ssize_t block_show(struct gdlm_ls *ls, char *buf)
23{
24 ssize_t ret;
25 int val = 0;
26
27 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags))
28 val = 1;
29 ret = sprintf(buf, "%d\n", val);
30 return ret;
31}
32
33static ssize_t block_store(struct gdlm_ls *ls, const char *buf, size_t len)
34{
35 ssize_t ret = len;
36 int val;
37
38 val = simple_strtol(buf, NULL, 0);
39
40 if (val == 1)
41 set_bit(DFL_BLOCK_LOCKS, &ls->flags);
42 else if (val == 0) {
43 clear_bit(DFL_BLOCK_LOCKS, &ls->flags);
44 gdlm_submit_delayed(ls);
45 } else
46 ret = -EINVAL;
47 return ret;
48}
49
50static ssize_t withdraw_show(struct gdlm_ls *ls, char *buf)
51{
52 ssize_t ret;
53 int val = 0;
54
55 if (test_bit(DFL_WITHDRAW, &ls->flags))
56 val = 1;
57 ret = sprintf(buf, "%d\n", val);
58 return ret;
59}
60
61static ssize_t withdraw_store(struct gdlm_ls *ls, const char *buf, size_t len)
62{
63 ssize_t ret = len;
64 int val;
65
66 val = simple_strtol(buf, NULL, 0);
67
68 if (val == 1)
69 set_bit(DFL_WITHDRAW, &ls->flags);
70 else
71 ret = -EINVAL;
72 wake_up(&ls->wait_control);
73 return ret;
74}
75
76static ssize_t id_show(struct gdlm_ls *ls, char *buf)
77{
78 return sprintf(buf, "%u\n", ls->id);
79}
80
81static ssize_t jid_show(struct gdlm_ls *ls, char *buf)
82{
83 return sprintf(buf, "%d\n", ls->jid);
84}
85
86static ssize_t first_show(struct gdlm_ls *ls, char *buf)
87{
88 return sprintf(buf, "%d\n", ls->first);
89}
90
91static ssize_t first_done_show(struct gdlm_ls *ls, char *buf)
92{
93 return sprintf(buf, "%d\n", ls->first_done);
94}
95
96static ssize_t recover_show(struct gdlm_ls *ls, char *buf)
97{
98 return sprintf(buf, "%d\n", ls->recover_jid);
99}
100
101static ssize_t recover_store(struct gdlm_ls *ls, const char *buf, size_t len)
102{
103 ls->recover_jid = simple_strtol(buf, NULL, 0);
104 ls->fscb(ls->fsdata, LM_CB_NEED_RECOVERY, &ls->recover_jid);
105 return len;
106}
107
108static ssize_t recover_done_show(struct gdlm_ls *ls, char *buf)
109{
110 return sprintf(buf, "%d\n", ls->recover_jid_done);
111}
112
113static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
114{
115 return sprintf(buf, "%d\n", ls->recover_jid_status);
116}
117
118struct gdlm_attr {
119 struct attribute attr;
120 ssize_t (*show)(struct gdlm_ls *, char *);
121 ssize_t (*store)(struct gdlm_ls *, const char *, size_t);
122};
123
124#define GDLM_ATTR(_name,_mode,_show,_store) \
125static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
126
127GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
128GDLM_ATTR(block, 0644, block_show, block_store);
129GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
130GDLM_ATTR(id, 0444, id_show, NULL);
131GDLM_ATTR(jid, 0444, jid_show, NULL);
132GDLM_ATTR(first, 0444, first_show, NULL);
133GDLM_ATTR(first_done, 0444, first_done_show, NULL);
134GDLM_ATTR(recover, 0644, recover_show, recover_store);
135GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
136GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
137
138static struct attribute *gdlm_attrs[] = {
139 &gdlm_attr_proto_name.attr,
140 &gdlm_attr_block.attr,
141 &gdlm_attr_withdraw.attr,
142 &gdlm_attr_id.attr,
143 &gdlm_attr_jid.attr,
144 &gdlm_attr_first.attr,
145 &gdlm_attr_first_done.attr,
146 &gdlm_attr_recover.attr,
147 &gdlm_attr_recover_done.attr,
148 &gdlm_attr_recover_status.attr,
149 NULL,
150};
151
152static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr,
153 char *buf)
154{
155 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
156 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
157 return a->show ? a->show(ls, buf) : 0;
158}
159
160static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr,
161 const char *buf, size_t len)
162{
163 struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
164 struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
165 return a->store ? a->store(ls, buf, len) : len;
166}
167
168static struct sysfs_ops gdlm_attr_ops = {
169 .show = gdlm_attr_show,
170 .store = gdlm_attr_store,
171};
172
173static struct kobj_type gdlm_ktype = {
174 .default_attrs = gdlm_attrs,
175 .sysfs_ops = &gdlm_attr_ops,
176};
177
178static struct kset gdlm_kset = {
179 .subsys = &kernel_subsys,
180 .kobj = {.name = "lock_dlm",},
181 .ktype = &gdlm_ktype,
182};
183
184int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj)
185{
186 int error;
187
188 error = kobject_set_name(&ls->kobj, "%s", "lock_module");
189 if (error) {
190 log_error("can't set kobj name %d", error);
191 return error;
192 }
193
194 ls->kobj.kset = &gdlm_kset;
195 ls->kobj.ktype = &gdlm_ktype;
196 ls->kobj.parent = fskobj;
197
198 error = kobject_register(&ls->kobj);
199 if (error)
200 log_error("can't register kobj %d", error);
201
202 return error;
203}
204
205void gdlm_kobject_release(struct gdlm_ls *ls)
206{
207 kobject_unregister(&ls->kobj);
208}
209
210int gdlm_sysfs_init(void)
211{
212 int error;
213
214 error = kset_register(&gdlm_kset);
215 if (error)
216 printk("lock_dlm: cannot register kset %d\n", error);
217
218 return error;
219}
220
221void gdlm_sysfs_exit(void)
222{
223 kset_unregister(&gdlm_kset);
224}
225
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
new file mode 100644
index 000000000000..3e2edcc2dbf6
--- /dev/null
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -0,0 +1,352 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include "lock_dlm.h"
11
12/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
13 thread gets to it. */
14
15static void queue_submit(struct gdlm_lock *lp)
16{
17 struct gdlm_ls *ls = lp->ls;
18
19 spin_lock(&ls->async_lock);
20 list_add_tail(&lp->delay_list, &ls->submit);
21 spin_unlock(&ls->async_lock);
22 wake_up(&ls->thread_wait);
23}
24
25static void process_blocking(struct gdlm_lock *lp, int bast_mode)
26{
27 struct gdlm_ls *ls = lp->ls;
28 unsigned int cb = 0;
29
30 switch (gdlm_make_lmstate(bast_mode)) {
31 case LM_ST_EXCLUSIVE:
32 cb = LM_CB_NEED_E;
33 break;
34 case LM_ST_DEFERRED:
35 cb = LM_CB_NEED_D;
36 break;
37 case LM_ST_SHARED:
38 cb = LM_CB_NEED_S;
39 break;
40 default:
41 gdlm_assert(0, "unknown bast mode %u", lp->bast_mode);
42 }
43
44 ls->fscb(ls->fsdata, cb, &lp->lockname);
45}
46
47static void process_complete(struct gdlm_lock *lp)
48{
49 struct gdlm_ls *ls = lp->ls;
50 struct lm_async_cb acb;
51 int16_t prev_mode = lp->cur;
52
53 memset(&acb, 0, sizeof(acb));
54
55 if (lp->lksb.sb_status == -DLM_ECANCEL) {
56 log_info("complete dlm cancel %x,%llx flags %lx",
57 lp->lockname.ln_type, lp->lockname.ln_number,
58 lp->flags);
59
60 lp->req = lp->cur;
61 acb.lc_ret |= LM_OUT_CANCELED;
62 if (lp->cur == DLM_LOCK_IV)
63 lp->lksb.sb_lkid = 0;
64 goto out;
65 }
66
67 if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
68 if (lp->lksb.sb_status != -DLM_EUNLOCK) {
69 log_info("unlock sb_status %d %x,%llx flags %lx",
70 lp->lksb.sb_status, lp->lockname.ln_type,
71 lp->lockname.ln_number, lp->flags);
72 return;
73 }
74
75 lp->cur = DLM_LOCK_IV;
76 lp->req = DLM_LOCK_IV;
77 lp->lksb.sb_lkid = 0;
78
79 if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
80 gdlm_delete_lp(lp);
81 return;
82 }
83 goto out;
84 }
85
86 if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
87 memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
88
89 if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
90 if (lp->req == DLM_LOCK_PR)
91 lp->req = DLM_LOCK_CW;
92 else if (lp->req == DLM_LOCK_CW)
93 lp->req = DLM_LOCK_PR;
94 }
95
96 /*
97 * A canceled lock request. The lock was just taken off the delayed
98 * list and was never even submitted to dlm.
99 */
100
101 if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
102 log_info("complete internal cancel %x,%llx",
103 lp->lockname.ln_type, lp->lockname.ln_number);
104 lp->req = lp->cur;
105 acb.lc_ret |= LM_OUT_CANCELED;
106 goto out;
107 }
108
109 /*
110 * An error occured.
111 */
112
113 if (lp->lksb.sb_status) {
114 /* a "normal" error */
115 if ((lp->lksb.sb_status == -EAGAIN) &&
116 (lp->lkf & DLM_LKF_NOQUEUE)) {
117 lp->req = lp->cur;
118 if (lp->cur == DLM_LOCK_IV)
119 lp->lksb.sb_lkid = 0;
120 goto out;
121 }
122
123 /* this could only happen with cancels I think */
124 log_info("ast sb_status %d %x,%llx flags %lx",
125 lp->lksb.sb_status, lp->lockname.ln_type,
126 lp->lockname.ln_number, lp->flags);
127 return;
128 }
129
130 /*
131 * This is an AST for an EX->EX conversion for sync_lvb from GFS.
132 */
133
134 if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
135 complete(&lp->ast_wait);
136 return;
137 }
138
139 /*
140 * A lock has been demoted to NL because it initially completed during
141 * BLOCK_LOCKS. Now it must be requested in the originally requested
142 * mode.
143 */
144
145 if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
146 gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
147 lp->lockname.ln_type, lp->lockname.ln_number);
148 gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
149 lp->lockname.ln_type, lp->lockname.ln_number);
150
151 lp->cur = DLM_LOCK_NL;
152 lp->req = lp->prev_req;
153 lp->prev_req = DLM_LOCK_IV;
154 lp->lkf &= ~DLM_LKF_CONVDEADLK;
155
156 set_bit(LFL_NOCACHE, &lp->flags);
157
158 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
159 !test_bit(LFL_NOBLOCK, &lp->flags))
160 gdlm_queue_delayed(lp);
161 else
162 queue_submit(lp);
163 return;
164 }
165
166 /*
167 * A request is granted during dlm recovery. It may be granted
168 * because the locks of a failed node were cleared. In that case,
169 * there may be inconsistent data beneath this lock and we must wait
170 * for recovery to complete to use it. When gfs recovery is done this
171 * granted lock will be converted to NL and then reacquired in this
172 * granted state.
173 */
174
175 if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
176 !test_bit(LFL_NOBLOCK, &lp->flags) &&
177 lp->req != DLM_LOCK_NL) {
178
179 lp->cur = lp->req;
180 lp->prev_req = lp->req;
181 lp->req = DLM_LOCK_NL;
182 lp->lkf |= DLM_LKF_CONVERT;
183 lp->lkf &= ~DLM_LKF_CONVDEADLK;
184
185 log_debug("rereq %x,%llx id %x %d,%d",
186 lp->lockname.ln_type, lp->lockname.ln_number,
187 lp->lksb.sb_lkid, lp->cur, lp->req);
188
189 set_bit(LFL_REREQUEST, &lp->flags);
190 queue_submit(lp);
191 return;
192 }
193
194 /*
195 * DLM demoted the lock to NL before it was granted so GFS must be
196 * told it cannot cache data for this lock.
197 */
198
199 if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
200 set_bit(LFL_NOCACHE, &lp->flags);
201
202 out:
203 /*
204 * This is an internal lock_dlm lock
205 */
206
207 if (test_bit(LFL_INLOCK, &lp->flags)) {
208 clear_bit(LFL_NOBLOCK, &lp->flags);
209 lp->cur = lp->req;
210 complete(&lp->ast_wait);
211 return;
212 }
213
214 /*
215 * Normal completion of a lock request. Tell GFS it now has the lock.
216 */
217
218 clear_bit(LFL_NOBLOCK, &lp->flags);
219 lp->cur = lp->req;
220
221 acb.lc_name = lp->lockname;
222 acb.lc_ret |= gdlm_make_lmstate(lp->cur);
223
224 if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) &&
225 (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL))
226 acb.lc_ret |= LM_OUT_CACHEABLE;
227
228 ls->fscb(ls->fsdata, LM_CB_ASYNC, &acb);
229}
230
231static inline int no_work(struct gdlm_ls *ls, int blocking)
232{
233 int ret;
234
235 spin_lock(&ls->async_lock);
236 ret = list_empty(&ls->complete) && list_empty(&ls->submit);
237 if (ret && blocking)
238 ret = list_empty(&ls->blocking);
239 spin_unlock(&ls->async_lock);
240
241 return ret;
242}
243
244static inline int check_drop(struct gdlm_ls *ls)
245{
246 if (!ls->drop_locks_count)
247 return 0;
248
249 if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) {
250 ls->drop_time = jiffies;
251 if (ls->all_locks_count >= ls->drop_locks_count)
252 return 1;
253 }
254 return 0;
255}
256
257static int gdlm_thread(void *data)
258{
259 struct gdlm_ls *ls = (struct gdlm_ls *) data;
260 struct gdlm_lock *lp = NULL;
261 int blist = 0;
262 uint8_t complete, blocking, submit, drop;
263 DECLARE_WAITQUEUE(wait, current);
264
265 /* Only thread1 is allowed to do blocking callbacks since gfs
266 may wait for a completion callback within a blocking cb. */
267
268 if (current == ls->thread1)
269 blist = 1;
270
271 while (!kthread_should_stop()) {
272 set_current_state(TASK_INTERRUPTIBLE);
273 add_wait_queue(&ls->thread_wait, &wait);
274 if (no_work(ls, blist))
275 schedule();
276 remove_wait_queue(&ls->thread_wait, &wait);
277 set_current_state(TASK_RUNNING);
278
279 complete = blocking = submit = drop = 0;
280
281 spin_lock(&ls->async_lock);
282
283 if (blist && !list_empty(&ls->blocking)) {
284 lp = list_entry(ls->blocking.next, struct gdlm_lock,
285 blist);
286 list_del_init(&lp->blist);
287 blocking = lp->bast_mode;
288 lp->bast_mode = 0;
289 } else if (!list_empty(&ls->complete)) {
290 lp = list_entry(ls->complete.next, struct gdlm_lock,
291 clist);
292 list_del_init(&lp->clist);
293 complete = 1;
294 } else if (!list_empty(&ls->submit)) {
295 lp = list_entry(ls->submit.next, struct gdlm_lock,
296 delay_list);
297 list_del_init(&lp->delay_list);
298 submit = 1;
299 }
300
301 drop = check_drop(ls);
302 spin_unlock(&ls->async_lock);
303
304 if (complete)
305 process_complete(lp);
306
307 else if (blocking)
308 process_blocking(lp, blocking);
309
310 else if (submit)
311 gdlm_do_lock(lp);
312
313 if (drop)
314 ls->fscb(ls->fsdata, LM_CB_DROPLOCKS, NULL);
315
316 schedule();
317 }
318
319 return 0;
320}
321
322int gdlm_init_threads(struct gdlm_ls *ls)
323{
324 struct task_struct *p;
325 int error;
326
327 p = kthread_run(gdlm_thread, ls, "lock_dlm1");
328 error = IS_ERR(p);
329 if (error) {
330 log_error("can't start lock_dlm1 thread %d", error);
331 return error;
332 }
333 ls->thread1 = p;
334
335 p = kthread_run(gdlm_thread, ls, "lock_dlm2");
336 error = IS_ERR(p);
337 if (error) {
338 log_error("can't start lock_dlm2 thread %d", error);
339 kthread_stop(ls->thread1);
340 return error;
341 }
342 ls->thread2 = p;
343
344 return 0;
345}
346
347void gdlm_release_threads(struct gdlm_ls *ls)
348{
349 kthread_stop(ls->thread1);
350 kthread_stop(ls->thread2);
351}
352
diff --git a/fs/gfs2/locking/nolock/Makefile b/fs/gfs2/locking/nolock/Makefile
new file mode 100644
index 000000000000..cdadf956c831
--- /dev/null
+++ b/fs/gfs2/locking/nolock/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_GFS2_FS) += lock_nolock.o
2lock_nolock-y := main.o
3
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
new file mode 100644
index 000000000000..97ffac5cdefb
--- /dev/null
+++ b/fs/gfs2/locking/nolock/main.c
@@ -0,0 +1,259 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/module.h>
11#include <linux/slab.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/types.h>
15#include <linux/fs.h>
16#include <linux/smp_lock.h>
17
18#include "../../lm_interface.h"
19
20struct nolock_lockspace {
21 unsigned int nl_lvb_size;
22};
23
24static struct lm_lockops nolock_ops;
25
26static int nolock_mount(char *table_name, char *host_data,
27 lm_callback_t cb, lm_fsdata_t *fsdata,
28 unsigned int min_lvb_size, int flags,
29 struct lm_lockstruct *lockstruct,
30 struct kobject *fskobj)
31{
32 char *c;
33 unsigned int jid;
34 struct nolock_lockspace *nl;
35
36 c = strstr(host_data, "jid=");
37 if (!c)
38 jid = 0;
39 else {
40 c += 4;
41 sscanf(c, "%u", &jid);
42 }
43
44 nl = kzalloc(sizeof(struct nolock_lockspace), GFP_KERNEL);
45 if (!nl)
46 return -ENOMEM;
47
48 nl->nl_lvb_size = min_lvb_size;
49
50 lockstruct->ls_jid = jid;
51 lockstruct->ls_first = 1;
52 lockstruct->ls_lvb_size = min_lvb_size;
53 lockstruct->ls_lockspace = (lm_lockspace_t *)nl;
54 lockstruct->ls_ops = &nolock_ops;
55 lockstruct->ls_flags = LM_LSFLAG_LOCAL;
56
57 return 0;
58}
59
60static void nolock_others_may_mount(lm_lockspace_t *lockspace)
61{
62}
63
64static void nolock_unmount(lm_lockspace_t *lockspace)
65{
66 struct nolock_lockspace *nl = (struct nolock_lockspace *)lockspace;
67 kfree(nl);
68}
69
70static void nolock_withdraw(lm_lockspace_t *lockspace)
71{
72}
73
74/**
75 * nolock_get_lock - get a lm_lock_t given a descripton of the lock
76 * @lockspace: the lockspace the lock lives in
77 * @name: the name of the lock
78 * @lockp: return the lm_lock_t here
79 *
80 * Returns: 0 on success, -EXXX on failure
81 */
82
83static int nolock_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
84 lm_lock_t **lockp)
85{
86 *lockp = (lm_lock_t *)lockspace;
87 return 0;
88}
89
90/**
91 * nolock_put_lock - get rid of a lock structure
92 * @lock: the lock to throw away
93 *
94 */
95
96static void nolock_put_lock(lm_lock_t *lock)
97{
98}
99
100/**
101 * nolock_lock - acquire a lock
102 * @lock: the lock to manipulate
103 * @cur_state: the current state
104 * @req_state: the requested state
105 * @flags: modifier flags
106 *
107 * Returns: A bitmap of LM_OUT_*
108 */
109
110static unsigned int nolock_lock(lm_lock_t *lock, unsigned int cur_state,
111 unsigned int req_state, unsigned int flags)
112{
113 return req_state | LM_OUT_CACHEABLE;
114}
115
116/**
117 * nolock_unlock - unlock a lock
118 * @lock: the lock to manipulate
119 * @cur_state: the current state
120 *
121 * Returns: 0
122 */
123
124static unsigned int nolock_unlock(lm_lock_t *lock, unsigned int cur_state)
125{
126 return 0;
127}
128
129static void nolock_cancel(lm_lock_t *lock)
130{
131}
132
133/**
134 * nolock_hold_lvb - hold on to a lock value block
135 * @lock: the lock the LVB is associated with
136 * @lvbp: return the lm_lvb_t here
137 *
138 * Returns: 0 on success, -EXXX on failure
139 */
140
141static int nolock_hold_lvb(lm_lock_t *lock, char **lvbp)
142{
143 struct nolock_lockspace *nl = (struct nolock_lockspace *)lock;
144 int error = 0;
145
146 *lvbp = kzalloc(nl->nl_lvb_size, GFP_KERNEL);
147 if (!*lvbp)
148 error = -ENOMEM;
149
150 return error;
151}
152
153/**
154 * nolock_unhold_lvb - release a LVB
155 * @lock: the lock the LVB is associated with
156 * @lvb: the lock value block
157 *
158 */
159
160static void nolock_unhold_lvb(lm_lock_t *lock, char *lvb)
161{
162 kfree(lvb);
163}
164
165/**
166 * nolock_sync_lvb - sync out the value of a lvb
167 * @lock: the lock the LVB is associated with
168 * @lvb: the lock value block
169 *
170 */
171
172static void nolock_sync_lvb(lm_lock_t *lock, char *lvb)
173{
174}
175
176static int nolock_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
177 struct file *file, struct file_lock *fl)
178{
179 struct file_lock tmp;
180 int ret;
181
182 ret = posix_test_lock(file, fl, &tmp);
183 fl->fl_type = F_UNLCK;
184 if (ret)
185 memcpy(fl, &tmp, sizeof(struct file_lock));
186
187 return 0;
188}
189
190static int nolock_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
191 struct file *file, int cmd, struct file_lock *fl)
192{
193 int error;
194 error = posix_lock_file_wait(file, fl);
195 return error;
196}
197
198static int nolock_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
199 struct file *file, struct file_lock *fl)
200{
201 int error;
202 error = posix_lock_file_wait(file, fl);
203 return error;
204}
205
206static void nolock_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
207 unsigned int message)
208{
209}
210
211static struct lm_lockops nolock_ops = {
212 .lm_proto_name = "lock_nolock",
213 .lm_mount = nolock_mount,
214 .lm_others_may_mount = nolock_others_may_mount,
215 .lm_unmount = nolock_unmount,
216 .lm_withdraw = nolock_withdraw,
217 .lm_get_lock = nolock_get_lock,
218 .lm_put_lock = nolock_put_lock,
219 .lm_lock = nolock_lock,
220 .lm_unlock = nolock_unlock,
221 .lm_cancel = nolock_cancel,
222 .lm_hold_lvb = nolock_hold_lvb,
223 .lm_unhold_lvb = nolock_unhold_lvb,
224 .lm_sync_lvb = nolock_sync_lvb,
225 .lm_plock_get = nolock_plock_get,
226 .lm_plock = nolock_plock,
227 .lm_punlock = nolock_punlock,
228 .lm_recovery_done = nolock_recovery_done,
229 .lm_owner = THIS_MODULE,
230};
231
232static int __init init_nolock(void)
233{
234 int error;
235
236 error = gfs_register_lockproto(&nolock_ops);
237 if (error) {
238 printk(KERN_WARNING
239 "lock_nolock: can't register protocol: %d\n", error);
240 return error;
241 }
242
243 printk(KERN_INFO
244 "Lock_Nolock (built %s %s) installed\n", __DATE__, __TIME__);
245 return 0;
246}
247
248static void __exit exit_nolock(void)
249{
250 gfs_unregister_lockproto(&nolock_ops);
251}
252
253module_init(init_nolock);
254module_exit(exit_nolock);
255
256MODULE_DESCRIPTION("GFS Nolock Locking Module");
257MODULE_AUTHOR("Red Hat, Inc.");
258MODULE_LICENSE("GPL");
259
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
new file mode 100644
index 000000000000..2a8b4b71dd1f
--- /dev/null
+++ b/fs/gfs2/log.c
@@ -0,0 +1,598 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17
18#include "gfs2.h"
19#include "lm_interface.h"
20#include "incore.h"
21#include "bmap.h"
22#include "glock.h"
23#include "log.h"
24#include "lops.h"
25#include "meta_io.h"
26#include "util.h"
27#include "dir.h"
28
29#define PULL 1
30
31/**
32 * gfs2_struct2blk - compute stuff
33 * @sdp: the filesystem
34 * @nstruct: the number of structures
35 * @ssize: the size of the structures
36 *
37 * Compute the number of log descriptor blocks needed to hold a certain number
38 * of structures of a certain size.
39 *
40 * Returns: the number of blocks needed (minimum is always 1)
41 */
42
43unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
44 unsigned int ssize)
45{
46 unsigned int blks;
47 unsigned int first, second;
48
49 blks = 1;
50 first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) /
51 ssize;
52
53 if (nstruct > first) {
54 second = (sdp->sd_sb.sb_bsize -
55 sizeof(struct gfs2_meta_header)) / ssize;
56 blks += DIV_ROUND_UP(nstruct - first, second);
57 }
58
59 return blks;
60}
61
62void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
63{
64 struct list_head *head = &sdp->sd_ail1_list;
65 uint64_t sync_gen;
66 struct list_head *first, *tmp;
67 struct gfs2_ail *first_ai, *ai;
68
69 gfs2_log_lock(sdp);
70 if (list_empty(head)) {
71 gfs2_log_unlock(sdp);
72 return;
73 }
74 sync_gen = sdp->sd_ail_sync_gen++;
75
76 first = head->prev;
77 first_ai = list_entry(first, struct gfs2_ail, ai_list);
78 first_ai->ai_sync_gen = sync_gen;
79 gfs2_ail1_start_one(sdp, first_ai);
80
81 if (flags & DIO_ALL)
82 first = NULL;
83
84 for (;;) {
85 if (first && (head->prev != first ||
86 gfs2_ail1_empty_one(sdp, first_ai, 0)))
87 break;
88
89 for (tmp = head->prev; tmp != head; tmp = tmp->prev) {
90 ai = list_entry(tmp, struct gfs2_ail, ai_list);
91 if (ai->ai_sync_gen >= sync_gen)
92 continue;
93 ai->ai_sync_gen = sync_gen;
94 gfs2_ail1_start_one(sdp, ai);
95 break;
96 }
97
98 if (tmp == head)
99 break;
100 }
101
102 gfs2_log_unlock(sdp);
103}
104
105int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
106{
107 struct gfs2_ail *ai, *s;
108 int ret;
109
110 gfs2_log_lock(sdp);
111
112 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
113 if (gfs2_ail1_empty_one(sdp, ai, flags))
114 list_move(&ai->ai_list, &sdp->sd_ail2_list);
115 else if (!(flags & DIO_ALL))
116 break;
117 }
118
119 ret = list_empty(&sdp->sd_ail1_list);
120
121 gfs2_log_unlock(sdp);
122
123 return ret;
124}
125
126static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
127{
128 struct gfs2_ail *ai, *safe;
129 unsigned int old_tail = sdp->sd_log_tail;
130 int wrap = (new_tail < old_tail);
131 int a, b, rm;
132
133 gfs2_log_lock(sdp);
134
135 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
136 a = (old_tail <= ai->ai_first);
137 b = (ai->ai_first < new_tail);
138 rm = (wrap) ? (a || b) : (a && b);
139 if (!rm)
140 continue;
141
142 gfs2_ail2_empty_one(sdp, ai);
143 list_del(&ai->ai_list);
144 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list));
145 gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list));
146 kfree(ai);
147 }
148
149 gfs2_log_unlock(sdp);
150}
151
152/**
153 * gfs2_log_reserve - Make a log reservation
154 * @sdp: The GFS2 superblock
155 * @blks: The number of blocks to reserve
156 *
157 * Returns: errno
158 */
159
160int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
161{
162 unsigned int try = 0;
163
164 if (gfs2_assert_warn(sdp, blks) ||
165 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
166 return -EINVAL;
167
168 mutex_lock(&sdp->sd_log_reserve_mutex);
169 gfs2_log_lock(sdp);
170 while(sdp->sd_log_blks_free <= blks) {
171 gfs2_log_unlock(sdp);
172 gfs2_ail1_empty(sdp, 0);
173 gfs2_log_flush(sdp, NULL);
174
175 if (try++)
176 gfs2_ail1_start(sdp, 0);
177 gfs2_log_lock(sdp);
178 }
179 sdp->sd_log_blks_free -= blks;
180 /* printk(KERN_INFO "reserved %u blocks (%u left)\n", blks, sdp->sd_log_blks_free); */
181 gfs2_log_unlock(sdp);
182 mutex_unlock(&sdp->sd_log_reserve_mutex);
183
184 down_read(&sdp->sd_log_flush_lock);
185
186 return 0;
187}
188
189/**
190 * gfs2_log_release - Release a given number of log blocks
191 * @sdp: The GFS2 superblock
192 * @blks: The number of blocks
193 *
194 */
195
196void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
197{
198
199 gfs2_log_lock(sdp);
200 sdp->sd_log_blks_free += blks;
201 /* printk(KERN_INFO "released %u blocks (%u left)\n", blks, sdp->sd_log_blks_free); */
202 gfs2_assert_withdraw(sdp,
203 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
204 gfs2_log_unlock(sdp);
205 up_read(&sdp->sd_log_flush_lock);
206}
207
208static uint64_t log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
209{
210 int new = 0;
211 uint64_t dbn;
212 int error;
213 int bdy;
214
215 error = gfs2_block_map(sdp->sd_jdesc->jd_inode, lbn, &new, &dbn, &bdy);
216 gfs2_assert_withdraw(sdp, !error && dbn);
217
218 return dbn;
219}
220
221/**
222 * log_distance - Compute distance between two journal blocks
223 * @sdp: The GFS2 superblock
224 * @newer: The most recent journal block of the pair
225 * @older: The older journal block of the pair
226 *
227 * Compute the distance (in the journal direction) between two
228 * blocks in the journal
229 *
230 * Returns: the distance in blocks
231 */
232
233static inline unsigned int log_distance(struct gfs2_sbd *sdp,
234 unsigned int newer,
235 unsigned int older)
236{
237 int dist;
238
239 dist = newer - older;
240 if (dist < 0)
241 dist += sdp->sd_jdesc->jd_blocks;
242
243 return dist;
244}
245
246static unsigned int current_tail(struct gfs2_sbd *sdp)
247{
248 struct gfs2_ail *ai;
249 unsigned int tail;
250
251 gfs2_log_lock(sdp);
252
253 if (list_empty(&sdp->sd_ail1_list))
254 tail = sdp->sd_log_head;
255 else {
256 ai = list_entry(sdp->sd_ail1_list.prev,
257 struct gfs2_ail, ai_list);
258 tail = ai->ai_first;
259 }
260
261 gfs2_log_unlock(sdp);
262
263 return tail;
264}
265
266static inline void log_incr_head(struct gfs2_sbd *sdp)
267{
268 if (sdp->sd_log_flush_head == sdp->sd_log_tail)
269 gfs2_assert_withdraw(sdp,
270 sdp->sd_log_flush_head == sdp->sd_log_head);
271
272 if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
273 sdp->sd_log_flush_head = 0;
274 sdp->sd_log_flush_wrapped = 1;
275 }
276}
277
278/**
279 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
280 * @sdp: The GFS2 superblock
281 *
282 * Returns: the buffer_head
283 */
284
285struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
286{
287 uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
288 struct gfs2_log_buf *lb;
289 struct buffer_head *bh;
290
291 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
292 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
293
294 bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno);
295 lock_buffer(bh);
296 memset(bh->b_data, 0, bh->b_size);
297 set_buffer_uptodate(bh);
298 clear_buffer_dirty(bh);
299 unlock_buffer(bh);
300
301 log_incr_head(sdp);
302
303 return bh;
304}
305
306/**
307 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
308 * @sdp: the filesystem
309 * @data: the data the buffer_head should point to
310 *
311 * Returns: the log buffer descriptor
312 */
313
314struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
315 struct buffer_head *real)
316{
317 uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
318 struct gfs2_log_buf *lb;
319 struct buffer_head *bh;
320
321 lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
322 list_add(&lb->lb_list, &sdp->sd_log_flush_list);
323 lb->lb_real = real;
324
325 bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
326 atomic_set(&bh->b_count, 1);
327 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
328 set_bh_page(bh, real->b_page, bh_offset(real));
329 bh->b_blocknr = blkno;
330 bh->b_size = sdp->sd_sb.sb_bsize;
331 bh->b_bdev = sdp->sd_vfs->s_bdev;
332
333 log_incr_head(sdp);
334
335 return bh;
336}
337
338static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
339{
340 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
341
342 ail2_empty(sdp, new_tail);
343
344 gfs2_log_lock(sdp);
345 sdp->sd_log_blks_free += dist - ((pull) ? 1 : 0);
346 /* printk(KERN_INFO "pull tail refunding %u blocks (%u left) pull=%d\n", dist - ((pull) ? 1 : 0), sdp->sd_log_blks_free, pull); */
347 gfs2_assert_withdraw(sdp,
348 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
349 gfs2_log_unlock(sdp);
350
351 sdp->sd_log_tail = new_tail;
352}
353
354/**
355 * log_write_header - Get and initialize a journal header buffer
356 * @sdp: The GFS2 superblock
357 *
358 * Returns: the initialized log buffer descriptor
359 */
360
361static void log_write_header(struct gfs2_sbd *sdp, uint32_t flags, int pull)
362{
363 uint64_t blkno = log_bmap(sdp, sdp->sd_log_flush_head);
364 struct buffer_head *bh;
365 struct gfs2_log_header *lh;
366 unsigned int tail;
367 uint32_t hash;
368
369 /* printk(KERN_INFO "log write header start (flags=%08x, pull=%d)\n", flags, pull); */
370
371 bh = sb_getblk(sdp->sd_vfs, blkno);
372 lock_buffer(bh);
373 memset(bh->b_data, 0, bh->b_size);
374 set_buffer_uptodate(bh);
375 clear_buffer_dirty(bh);
376 unlock_buffer(bh);
377
378 gfs2_ail1_empty(sdp, 0);
379 tail = current_tail(sdp);
380
381 lh = (struct gfs2_log_header *)bh->b_data;
382 memset(lh, 0, sizeof(struct gfs2_log_header));
383 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
384 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
385 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
386 lh->lh_sequence = be64_to_cpu(sdp->sd_log_sequence++);
387 lh->lh_flags = be32_to_cpu(flags);
388 lh->lh_tail = be32_to_cpu(tail);
389 lh->lh_blkno = be32_to_cpu(sdp->sd_log_flush_head);
390 hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
391 lh->lh_hash = cpu_to_be32(hash);
392
393 set_buffer_dirty(bh);
394 if (sync_dirty_buffer(bh))
395 gfs2_io_error_bh(sdp, bh);
396 brelse(bh);
397
398 if (sdp->sd_log_tail != tail)
399 log_pull_tail(sdp, tail, pull);
400 else
401 gfs2_assert_withdraw(sdp, !pull);
402
403 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
404 log_incr_head(sdp);
405
406 /* printk(KERN_INFO "log write header out\n"); */
407}
408
409static void log_flush_commit(struct gfs2_sbd *sdp)
410{
411 struct list_head *head = &sdp->sd_log_flush_list;
412 struct gfs2_log_buf *lb;
413 struct buffer_head *bh;
414#if 0
415 unsigned int d;
416
417 d = log_distance(sdp, sdp->sd_log_flush_head, sdp->sd_log_head);
418
419 gfs2_assert_withdraw(sdp, d + 1 == sdp->sd_log_blks_reserved);
420#endif
421
422 while (!list_empty(head)) {
423 lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
424 list_del(&lb->lb_list);
425 bh = lb->lb_bh;
426
427 wait_on_buffer(bh);
428 if (!buffer_uptodate(bh))
429 gfs2_io_error_bh(sdp, bh);
430 if (lb->lb_real) {
431 while (atomic_read(&bh->b_count) != 1) /* Grrrr... */
432 schedule();
433 free_buffer_head(bh);
434 } else
435 brelse(bh);
436 kfree(lb);
437 }
438
439 log_write_header(sdp, 0, 0);
440}
441
442/**
443 * gfs2_log_flush - flush incore transaction(s)
444 * @sdp: the filesystem
445 * @gl: The glock structure to flush. If NULL, flush the whole incore log
446 *
447 */
448
449void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
450{
451 struct gfs2_ail *ai;
452
453 down_write(&sdp->sd_log_flush_lock);
454
455 if (gl) {
456 gfs2_log_lock(sdp);
457 if (list_empty(&gl->gl_le.le_list)) {
458 gfs2_log_unlock(sdp);
459 up_write(&sdp->sd_log_flush_lock);
460 return;
461 }
462 gfs2_log_unlock(sdp);
463 }
464
465 ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
466 INIT_LIST_HEAD(&ai->ai_ail1_list);
467 INIT_LIST_HEAD(&ai->ai_ail2_list);
468
469 gfs2_assert_withdraw(sdp,
470 sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
471 gfs2_assert_withdraw(sdp,
472 sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
473
474 sdp->sd_log_flush_head = sdp->sd_log_head;
475 sdp->sd_log_flush_wrapped = 0;
476 ai->ai_first = sdp->sd_log_flush_head;
477
478 lops_before_commit(sdp);
479 if (!list_empty(&sdp->sd_log_flush_list))
480 log_flush_commit(sdp);
481 else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
482 log_write_header(sdp, 0, PULL);
483 lops_after_commit(sdp, ai);
484 sdp->sd_log_head = sdp->sd_log_flush_head;
485
486 /* printk(KERN_INFO "sd_log_num_hdrs %u\n", sdp->sd_log_num_hdrs); */
487 sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
488
489 sdp->sd_log_blks_reserved =
490 sdp->sd_log_commited_buf =
491 sdp->sd_log_num_hdrs =
492 sdp->sd_log_commited_revoke = 0;
493
494 gfs2_log_lock(sdp);
495 if (!list_empty(&ai->ai_ail1_list)) {
496 list_add(&ai->ai_list, &sdp->sd_ail1_list);
497 ai = NULL;
498 }
499 gfs2_log_unlock(sdp);
500
501 sdp->sd_vfs->s_dirt = 0;
502 up_write(&sdp->sd_log_flush_lock);
503
504 kfree(ai);
505}
506
507static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
508{
509 unsigned int reserved = 1;
510 unsigned int old;
511
512 gfs2_log_lock(sdp);
513
514 sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
515 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
516 sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
517 gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
518
519 if (sdp->sd_log_commited_buf)
520 reserved += sdp->sd_log_commited_buf;
521 if (sdp->sd_log_commited_revoke)
522 reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
523 sizeof(uint64_t));
524
525 old = sdp->sd_log_blks_free;
526 sdp->sd_log_blks_free += tr->tr_reserved -
527 (reserved - sdp->sd_log_blks_reserved);
528
529 gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
530 gfs2_assert_withdraw(sdp,
531 sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks +
532 sdp->sd_log_num_hdrs);
533
534 sdp->sd_log_blks_reserved = reserved;
535
536 gfs2_log_unlock(sdp);
537}
538
539/**
540 * gfs2_log_commit - Commit a transaction to the log
541 * @sdp: the filesystem
542 * @tr: the transaction
543 *
544 * Returns: errno
545 */
546
547void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
548{
549 log_refund(sdp, tr);
550 lops_incore_commit(sdp, tr);
551
552 sdp->sd_vfs->s_dirt = 1;
553 up_read(&sdp->sd_log_flush_lock);
554
555 gfs2_log_lock(sdp);
556 if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) {
557 gfs2_log_unlock(sdp);
558 gfs2_log_flush(sdp, NULL);
559 } else
560 gfs2_log_unlock(sdp);
561}
562
563/**
564 * gfs2_log_shutdown - write a shutdown header into a journal
565 * @sdp: the filesystem
566 *
567 */
568
569void gfs2_log_shutdown(struct gfs2_sbd *sdp)
570{
571 down_write(&sdp->sd_log_flush_lock);
572
573 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
574 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
575 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
576 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
577 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
578 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
579 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
580 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs);
581 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
582
583 sdp->sd_log_flush_head = sdp->sd_log_head;
584 sdp->sd_log_flush_wrapped = 0;
585
586 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
587
588 /* printk(KERN_INFO "sd_log_blks_free %u, sd_jdesc->jd_blocks %u\n", sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks); */
589 gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
590 gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
591 gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
592
593 sdp->sd_log_head = sdp->sd_log_flush_head;
594 sdp->sd_log_tail = sdp->sd_log_head;
595
596 up_write(&sdp->sd_log_flush_lock);
597}
598
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
new file mode 100644
index 000000000000..8cfd0f1d29f8
--- /dev/null
+++ b/fs/gfs2/log.h
@@ -0,0 +1,61 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LOG_DOT_H__
11#define __LOG_DOT_H__
12
13/**
14 * gfs2_log_lock - acquire the right to mess with the log manager
15 * @sdp: the filesystem
16 *
17 */
18
19static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
20{
21 spin_lock(&sdp->sd_log_lock);
22}
23
24/**
25 * gfs2_log_unlock - release the right to mess with the log manager
26 * @sdp: the filesystem
27 *
28 */
29
30static inline void gfs2_log_unlock(struct gfs2_sbd *sdp)
31{
32 spin_unlock(&sdp->sd_log_lock);
33}
34
35static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
36 unsigned int value)
37{
38 if (++value == sdp->sd_jdesc->jd_blocks) {
39 value = 0;
40 }
41 sdp->sd_log_head = sdp->sd_log_tail = value;
42}
43
44unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
45 unsigned int ssize);
46
47void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags);
48int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
49
50int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
51void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
52
53struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
54struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
55 struct buffer_head *real);
56void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
57void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
58
59void gfs2_log_shutdown(struct gfs2_sbd *sdp);
60
61#endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
new file mode 100644
index 000000000000..e4c75a74df5b
--- /dev/null
+++ b/fs/gfs2/lops.c
@@ -0,0 +1,804 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16
17#include "gfs2.h"
18#include "lm_interface.h"
19#include "incore.h"
20#include "glock.h"
21#include "log.h"
22#include "lops.h"
23#include "meta_io.h"
24#include "recovery.h"
25#include "rgrp.h"
26#include "trans.h"
27#include "util.h"
28
29static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
30{
31 struct gfs2_glock *gl;
32 struct gfs2_trans *tr = current->journal_info;
33
34 tr->tr_touched = 1;
35
36 if (!list_empty(&le->le_list))
37 return;
38
39 gl = container_of(le, struct gfs2_glock, gl_le);
40 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
41 return;
42 gfs2_glock_hold(gl);
43 set_bit(GLF_DIRTY, &gl->gl_flags);
44
45 gfs2_log_lock(sdp);
46 sdp->sd_log_num_gl++;
47 list_add(&le->le_list, &sdp->sd_log_le_gl);
48 gfs2_log_unlock(sdp);
49}
50
51static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
52{
53 struct list_head *head = &sdp->sd_log_le_gl;
54 struct gfs2_glock *gl;
55
56 while (!list_empty(head)) {
57 gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list);
58 list_del_init(&gl->gl_le.le_list);
59 sdp->sd_log_num_gl--;
60
61 gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl));
62 gfs2_glock_put(gl);
63 }
64 gfs2_assert_warn(sdp, !sdp->sd_log_num_gl);
65}
66
67static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
68{
69 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
70 struct gfs2_trans *tr;
71
72 if (!list_empty(&bd->bd_list_tr))
73 return;
74
75 tr = current->journal_info;
76 tr->tr_touched = 1;
77 tr->tr_num_buf++;
78 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
79
80 if (!list_empty(&le->le_list))
81 return;
82
83 gfs2_trans_add_gl(bd->bd_gl);
84
85 gfs2_meta_check(sdp, bd->bd_bh);
86 gfs2_pin(sdp, bd->bd_bh);
87
88 gfs2_log_lock(sdp);
89 sdp->sd_log_num_buf++;
90 list_add(&le->le_list, &sdp->sd_log_le_buf);
91 gfs2_log_unlock(sdp);
92
93 tr->tr_num_buf_new++;
94}
95
96static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
97{
98 struct list_head *head = &tr->tr_list_buf;
99 struct gfs2_bufdata *bd;
100
101 while (!list_empty(head)) {
102 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
103 list_del_init(&bd->bd_list_tr);
104 tr->tr_num_buf--;
105 }
106 gfs2_assert_warn(sdp, !tr->tr_num_buf);
107}
108
109static void buf_lo_before_commit(struct gfs2_sbd *sdp)
110{
111 struct buffer_head *bh;
112 struct gfs2_log_descriptor *ld;
113 struct gfs2_bufdata *bd1 = NULL, *bd2;
114 unsigned int total = sdp->sd_log_num_buf;
115 unsigned int offset = sizeof(struct gfs2_log_descriptor);
116 unsigned int limit;
117 unsigned int num;
118 unsigned n;
119 __be64 *ptr;
120
121 offset += (sizeof(__be64) - 1);
122 offset &= ~(sizeof(__be64) - 1);
123 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
124 /* for 4k blocks, limit = 503 */
125
126 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
127 while(total) {
128 num = total;
129 if (total > limit)
130 num = limit;
131 bh = gfs2_log_get_buf(sdp);
132 sdp->sd_log_num_hdrs++;
133 ld = (struct gfs2_log_descriptor *)bh->b_data;
134 ptr = (__be64 *)(bh->b_data + offset);
135 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
136 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
137 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
138 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
139 ld->ld_length = cpu_to_be32(num + 1);
140 ld->ld_data1 = cpu_to_be32(num);
141 ld->ld_data2 = cpu_to_be32(0);
142 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
143
144 n = 0;
145 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
146 bd_le.le_list) {
147 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
148 if (++n >= num)
149 break;
150 }
151
152 set_buffer_dirty(bh);
153 ll_rw_block(WRITE, 1, &bh);
154
155 n = 0;
156 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
157 bd_le.le_list) {
158 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
159 set_buffer_dirty(bh);
160 ll_rw_block(WRITE, 1, &bh);
161 if (++n >= num)
162 break;
163 }
164
165 total -= num;
166 }
167}
168
169static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
170{
171 struct list_head *head = &sdp->sd_log_le_buf;
172 struct gfs2_bufdata *bd;
173
174 while (!list_empty(head)) {
175 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
176 list_del_init(&bd->bd_le.le_list);
177 sdp->sd_log_num_buf--;
178
179 gfs2_unpin(sdp, bd->bd_bh, ai);
180 }
181 gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
182}
183
184static void buf_lo_before_scan(struct gfs2_jdesc *jd,
185 struct gfs2_log_header *head, int pass)
186{
187 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
188 struct gfs2_sbd *sdp = ip->i_sbd;
189
190 if (pass != 0)
191 return;
192
193 sdp->sd_found_blocks = 0;
194 sdp->sd_replayed_blocks = 0;
195}
196
197static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
198 struct gfs2_log_descriptor *ld, __be64 *ptr,
199 int pass)
200{
201 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
202 struct gfs2_sbd *sdp = ip->i_sbd;
203 struct gfs2_glock *gl = ip->i_gl;
204 unsigned int blks = be32_to_cpu(ld->ld_data1);
205 struct buffer_head *bh_log, *bh_ip;
206 uint64_t blkno;
207 int error = 0;
208
209 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
210 return 0;
211
212 gfs2_replay_incr_blk(sdp, &start);
213
214 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
215 blkno = be64_to_cpu(*ptr++);
216
217 sdp->sd_found_blocks++;
218
219 if (gfs2_revoke_check(sdp, blkno, start))
220 continue;
221
222 error = gfs2_replay_read_block(jd, start, &bh_log);
223 if (error)
224 return error;
225
226 bh_ip = gfs2_meta_new(gl, blkno);
227 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
228
229 if (gfs2_meta_check(sdp, bh_ip))
230 error = -EIO;
231 else
232 mark_buffer_dirty(bh_ip);
233
234 brelse(bh_log);
235 brelse(bh_ip);
236
237 if (error)
238 break;
239
240 sdp->sd_replayed_blocks++;
241 }
242
243 return error;
244}
245
246static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
247{
248 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
249 struct gfs2_sbd *sdp = ip->i_sbd;
250
251 if (error) {
252 gfs2_meta_sync(ip->i_gl,
253 DIO_START | DIO_WAIT);
254 return;
255 }
256 if (pass != 1)
257 return;
258
259 gfs2_meta_sync(ip->i_gl, DIO_START | DIO_WAIT);
260
261 fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
262 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
263}
264
265static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
266{
267 struct gfs2_trans *tr;
268
269 tr = current->journal_info;
270 tr->tr_touched = 1;
271 tr->tr_num_revoke++;
272
273 gfs2_log_lock(sdp);
274 sdp->sd_log_num_revoke++;
275 list_add(&le->le_list, &sdp->sd_log_le_revoke);
276 gfs2_log_unlock(sdp);
277}
278
279static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
280{
281 struct gfs2_log_descriptor *ld;
282 struct gfs2_meta_header *mh;
283 struct buffer_head *bh;
284 unsigned int offset;
285 struct list_head *head = &sdp->sd_log_le_revoke;
286 struct gfs2_revoke *rv;
287
288 if (!sdp->sd_log_num_revoke)
289 return;
290
291 bh = gfs2_log_get_buf(sdp);
292 ld = (struct gfs2_log_descriptor *)bh->b_data;
293 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
294 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
295 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
296 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
297 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
298 sizeof(uint64_t)));
299 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
300 ld->ld_data2 = cpu_to_be32(0);
301 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
302 offset = sizeof(struct gfs2_log_descriptor);
303
304 while (!list_empty(head)) {
305 rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list);
306 list_del_init(&rv->rv_le.le_list);
307 sdp->sd_log_num_revoke--;
308
309 if (offset + sizeof(uint64_t) > sdp->sd_sb.sb_bsize) {
310 set_buffer_dirty(bh);
311 ll_rw_block(WRITE, 1, &bh);
312
313 bh = gfs2_log_get_buf(sdp);
314 mh = (struct gfs2_meta_header *)bh->b_data;
315 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
316 mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
317 mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
318 offset = sizeof(struct gfs2_meta_header);
319 }
320
321 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno);
322 kfree(rv);
323
324 offset += sizeof(uint64_t);
325 }
326 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
327
328 set_buffer_dirty(bh);
329 ll_rw_block(WRITE, 1, &bh);
330}
331
332static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
333 struct gfs2_log_header *head, int pass)
334{
335 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
336 struct gfs2_sbd *sdp = ip->i_sbd;
337
338 if (pass != 0)
339 return;
340
341 sdp->sd_found_revokes = 0;
342 sdp->sd_replay_tail = head->lh_tail;
343}
344
345static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
346 struct gfs2_log_descriptor *ld, __be64 *ptr,
347 int pass)
348{
349 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
350 struct gfs2_sbd *sdp = ip->i_sbd;
351 unsigned int blks = be32_to_cpu(ld->ld_length);
352 unsigned int revokes = be32_to_cpu(ld->ld_data1);
353 struct buffer_head *bh;
354 unsigned int offset;
355 uint64_t blkno;
356 int first = 1;
357 int error;
358
359 if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
360 return 0;
361
362 offset = sizeof(struct gfs2_log_descriptor);
363
364 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
365 error = gfs2_replay_read_block(jd, start, &bh);
366 if (error)
367 return error;
368
369 if (!first)
370 gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
371
372 while (offset + sizeof(uint64_t) <= sdp->sd_sb.sb_bsize) {
373 blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
374
375 error = gfs2_revoke_add(sdp, blkno, start);
376 if (error < 0)
377 return error;
378 else if (error)
379 sdp->sd_found_revokes++;
380
381 if (!--revokes)
382 break;
383 offset += sizeof(uint64_t);
384 }
385
386 brelse(bh);
387 offset = sizeof(struct gfs2_meta_header);
388 first = 0;
389 }
390
391 return 0;
392}
393
394static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
395{
396 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
397 struct gfs2_sbd *sdp = ip->i_sbd;
398
399 if (error) {
400 gfs2_revoke_clean(sdp);
401 return;
402 }
403 if (pass != 1)
404 return;
405
406 fs_info(sdp, "jid=%u: Found %u revoke tags\n",
407 jd->jd_jid, sdp->sd_found_revokes);
408
409 gfs2_revoke_clean(sdp);
410}
411
412static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
413{
414 struct gfs2_rgrpd *rgd;
415 struct gfs2_trans *tr = current->journal_info;
416
417 tr->tr_touched = 1;
418
419 if (!list_empty(&le->le_list))
420 return;
421
422 rgd = container_of(le, struct gfs2_rgrpd, rd_le);
423 gfs2_rgrp_bh_hold(rgd);
424
425 gfs2_log_lock(sdp);
426 sdp->sd_log_num_rg++;
427 list_add(&le->le_list, &sdp->sd_log_le_rg);
428 gfs2_log_unlock(sdp);
429}
430
431static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
432{
433 struct list_head *head = &sdp->sd_log_le_rg;
434 struct gfs2_rgrpd *rgd;
435
436 while (!list_empty(head)) {
437 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
438 list_del_init(&rgd->rd_le.le_list);
439 sdp->sd_log_num_rg--;
440
441 gfs2_rgrp_repolish_clones(rgd);
442 gfs2_rgrp_bh_put(rgd);
443 }
444 gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
445}
446
447/**
448 * databuf_lo_add - Add a databuf to the transaction.
449 *
450 * This is used in two distinct cases:
451 * i) In ordered write mode
452 * We put the data buffer on a list so that we can ensure that its
453 * synced to disk at the right time
454 * ii) In journaled data mode
455 * We need to journal the data block in the same way as metadata in
456 * the functions above. The difference is that here we have a tag
457 * which is two __be64's being the block number (as per meta data)
458 * and a flag which says whether the data block needs escaping or
459 * not. This means we need a new log entry for each 251 or so data
460 * blocks, which isn't an enormous overhead but twice as much as
461 * for normal metadata blocks.
462 */
463static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
464{
465 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
466 struct gfs2_trans *tr = current->journal_info;
467 struct address_space *mapping = bd->bd_bh->b_page->mapping;
468 struct gfs2_inode *ip = mapping->host->u.generic_ip;
469
470 tr->tr_touched = 1;
471 if (!list_empty(&bd->bd_list_tr) &&
472 (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
473 tr->tr_num_buf++;
474 gfs2_trans_add_gl(bd->bd_gl);
475 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
476 gfs2_pin(sdp, bd->bd_bh);
477 tr->tr_num_buf_new++;
478 }
479 gfs2_log_lock(sdp);
480 if (!list_empty(&le->le_list)) {
481 if (ip->i_di.di_flags & GFS2_DIF_JDATA)
482 sdp->sd_log_num_jdata++;
483 sdp->sd_log_num_databuf++;
484 list_add(&le->le_list, &sdp->sd_log_le_databuf);
485 }
486 gfs2_log_unlock(sdp);
487}
488
489static int gfs2_check_magic(struct buffer_head *bh)
490{
491 struct page *page = bh->b_page;
492 void *kaddr;
493 __be32 *ptr;
494 int rv = 0;
495
496 kaddr = kmap_atomic(page, KM_USER0);
497 ptr = kaddr + bh_offset(bh);
498 if (*ptr == cpu_to_be32(GFS2_MAGIC))
499 rv = 1;
500 kunmap_atomic(page, KM_USER0);
501
502 return rv;
503}
504
505/**
506 * databuf_lo_before_commit - Scan the data buffers, writing as we go
507 *
508 * Here we scan through the lists of buffers and make the assumption
509 * that any buffer thats been pinned is being journaled, and that
510 * any unpinned buffer is an ordered write data buffer and therefore
511 * will be written back rather than journaled.
512 */
513static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
514{
515 LIST_HEAD(started);
516 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
517 struct buffer_head *bh = NULL;
518 unsigned int offset = sizeof(struct gfs2_log_descriptor);
519 struct gfs2_log_descriptor *ld;
520 unsigned int limit;
521 unsigned int total_dbuf = sdp->sd_log_num_databuf;
522 unsigned int total_jdata = sdp->sd_log_num_jdata;
523 unsigned int num, n;
524 __be64 *ptr = NULL;
525
526 offset += (2*sizeof(__be64) - 1);
527 offset &= ~(2*sizeof(__be64) - 1);
528 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
529
530 /*
531 * Start writing ordered buffers, write journaled buffers
532 * into the log along with a header
533 */
534 gfs2_log_lock(sdp);
535 bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf,
536 bd_le.le_list);
537 while(total_dbuf) {
538 num = total_jdata;
539 if (num > limit)
540 num = limit;
541 n = 0;
542 list_for_each_entry_safe_continue(bd1, bdt,
543 &sdp->sd_log_le_databuf,
544 bd_le.le_list) {
545 /* An ordered write buffer */
546 if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) {
547 list_move(&bd1->bd_le.le_list, &started);
548 if (bd1 == bd2) {
549 bd2 = NULL;
550 bd2 = list_prepare_entry(bd2,
551 &sdp->sd_log_le_databuf,
552 bd_le.le_list);
553 }
554 total_dbuf--;
555 if (bd1->bd_bh) {
556 get_bh(bd1->bd_bh);
557 if (buffer_dirty(bd1->bd_bh)) {
558 gfs2_log_unlock(sdp);
559 wait_on_buffer(bd1->bd_bh);
560 ll_rw_block(WRITE, 1,
561 &bd1->bd_bh);
562 gfs2_log_lock(sdp);
563 }
564 brelse(bd1->bd_bh);
565 continue;
566 }
567 continue;
568 } else if (bd1->bd_bh) { /* A journaled buffer */
569 int magic;
570 gfs2_log_unlock(sdp);
571 if (!bh) {
572 bh = gfs2_log_get_buf(sdp);
573 sdp->sd_log_num_hdrs++;
574 ld = (struct gfs2_log_descriptor *)
575 bh->b_data;
576 ptr = (__be64 *)(bh->b_data + offset);
577 ld->ld_header.mh_magic =
578 cpu_to_be32(GFS2_MAGIC);
579 ld->ld_header.mh_type =
580 cpu_to_be32(GFS2_METATYPE_LD);
581 ld->ld_header.mh_format =
582 cpu_to_be32(GFS2_FORMAT_LD);
583 ld->ld_type =
584 cpu_to_be32(GFS2_LOG_DESC_JDATA);
585 ld->ld_length = cpu_to_be32(num + 1);
586 ld->ld_data1 = cpu_to_be32(num);
587 ld->ld_data2 = cpu_to_be32(0);
588 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
589 }
590 magic = gfs2_check_magic(bd1->bd_bh);
591 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
592 *ptr++ = cpu_to_be64((__u64)magic);
593 clear_buffer_escaped(bd1->bd_bh);
594 if (unlikely(magic != 0))
595 set_buffer_escaped(bd1->bd_bh);
596 gfs2_log_lock(sdp);
597 if (n++ > num)
598 break;
599 }
600 }
601 gfs2_log_unlock(sdp);
602 if (bh) {
603 set_buffer_dirty(bh);
604 ll_rw_block(WRITE, 1, &bh);
605 bh = NULL;
606 }
607 n = 0;
608 gfs2_log_lock(sdp);
609 list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf,
610 bd_le.le_list) {
611 if (!bd2->bd_bh)
612 continue;
613 /* copy buffer if it needs escaping */
614 gfs2_log_unlock(sdp);
615 if (unlikely(buffer_escaped(bd2->bd_bh))) {
616 void *kaddr;
617 struct page *page = bd2->bd_bh->b_page;
618 bh = gfs2_log_get_buf(sdp);
619 kaddr = kmap_atomic(page, KM_USER0);
620 memcpy(bh->b_data,
621 kaddr + bh_offset(bd2->bd_bh),
622 sdp->sd_sb.sb_bsize);
623 kunmap_atomic(page, KM_USER0);
624 *(__be32 *)bh->b_data = 0;
625 } else {
626 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
627 }
628 set_buffer_dirty(bh);
629 ll_rw_block(WRITE, 1, &bh);
630 gfs2_log_lock(sdp);
631 if (++n >= num)
632 break;
633 }
634 bh = NULL;
635 total_dbuf -= num;
636 total_jdata -= num;
637 }
638 gfs2_log_unlock(sdp);
639
640 /* Wait on all ordered buffers */
641 while (!list_empty(&started)) {
642 gfs2_log_lock(sdp);
643 bd1 = list_entry(started.next, struct gfs2_bufdata,
644 bd_le.le_list);
645 list_del(&bd1->bd_le.le_list);
646 sdp->sd_log_num_databuf--;
647
648 bh = bd1->bd_bh;
649 if (bh) {
650 bh->b_private = NULL;
651 gfs2_log_unlock(sdp);
652 wait_on_buffer(bh);
653 brelse(bh);
654 } else
655 gfs2_log_unlock(sdp);
656
657 kfree(bd1);
658 }
659
660 /* We've removed all the ordered write bufs here, so only jdata left */
661 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
662}
663
664static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
665 struct gfs2_log_descriptor *ld,
666 __be64 *ptr, int pass)
667{
668 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
669 struct gfs2_sbd *sdp = ip->i_sbd;
670 struct gfs2_glock *gl = ip->i_gl;
671 unsigned int blks = be32_to_cpu(ld->ld_data1);
672 struct buffer_head *bh_log, *bh_ip;
673 uint64_t blkno;
674 uint64_t esc;
675 int error = 0;
676
677 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
678 return 0;
679
680 gfs2_replay_incr_blk(sdp, &start);
681 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
682 blkno = be64_to_cpu(*ptr++);
683 esc = be64_to_cpu(*ptr++);
684
685 sdp->sd_found_blocks++;
686
687 if (gfs2_revoke_check(sdp, blkno, start))
688 continue;
689
690 error = gfs2_replay_read_block(jd, start, &bh_log);
691 if (error)
692 return error;
693
694 bh_ip = gfs2_meta_new(gl, blkno);
695 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
696
697 /* Unescape */
698 if (esc) {
699 __be32 *eptr = (__be32 *)bh_ip->b_data;
700 *eptr = cpu_to_be32(GFS2_MAGIC);
701 }
702 mark_buffer_dirty(bh_ip);
703
704 brelse(bh_log);
705 brelse(bh_ip);
706 if (error)
707 break;
708
709 sdp->sd_replayed_blocks++;
710 }
711
712 return error;
713}
714
715/* FIXME: sort out accounting for log blocks etc. */
716
717static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
718{
719 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
720 struct gfs2_sbd *sdp = ip->i_sbd;
721
722 if (error) {
723 gfs2_meta_sync(ip->i_gl,
724 DIO_START | DIO_WAIT);
725 return;
726 }
727 if (pass != 1)
728 return;
729
730 /* data sync? */
731 gfs2_meta_sync(ip->i_gl, DIO_START | DIO_WAIT);
732
733 fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
734 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
735}
736
737static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
738{
739 struct list_head *head = &sdp->sd_log_le_databuf;
740 struct gfs2_bufdata *bd;
741
742 while (!list_empty(head)) {
743 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
744 list_del(&bd->bd_le.le_list);
745 sdp->sd_log_num_databuf--;
746 sdp->sd_log_num_jdata--;
747 gfs2_unpin(sdp, bd->bd_bh, ai);
748 }
749 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
750 gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
751}
752
753
754const struct gfs2_log_operations gfs2_glock_lops = {
755 .lo_add = glock_lo_add,
756 .lo_after_commit = glock_lo_after_commit,
757 .lo_name = "glock"
758};
759
760const struct gfs2_log_operations gfs2_buf_lops = {
761 .lo_add = buf_lo_add,
762 .lo_incore_commit = buf_lo_incore_commit,
763 .lo_before_commit = buf_lo_before_commit,
764 .lo_after_commit = buf_lo_after_commit,
765 .lo_before_scan = buf_lo_before_scan,
766 .lo_scan_elements = buf_lo_scan_elements,
767 .lo_after_scan = buf_lo_after_scan,
768 .lo_name = "buf"
769};
770
771const struct gfs2_log_operations gfs2_revoke_lops = {
772 .lo_add = revoke_lo_add,
773 .lo_before_commit = revoke_lo_before_commit,
774 .lo_before_scan = revoke_lo_before_scan,
775 .lo_scan_elements = revoke_lo_scan_elements,
776 .lo_after_scan = revoke_lo_after_scan,
777 .lo_name = "revoke"
778};
779
780const struct gfs2_log_operations gfs2_rg_lops = {
781 .lo_add = rg_lo_add,
782 .lo_after_commit = rg_lo_after_commit,
783 .lo_name = "rg"
784};
785
786const struct gfs2_log_operations gfs2_databuf_lops = {
787 .lo_add = databuf_lo_add,
788 .lo_incore_commit = buf_lo_incore_commit,
789 .lo_before_commit = databuf_lo_before_commit,
790 .lo_after_commit = databuf_lo_after_commit,
791 .lo_scan_elements = databuf_lo_scan_elements,
792 .lo_after_scan = databuf_lo_after_scan,
793 .lo_name = "databuf"
794};
795
796const struct gfs2_log_operations *gfs2_log_ops[] = {
797 &gfs2_glock_lops,
798 &gfs2_buf_lops,
799 &gfs2_revoke_lops,
800 &gfs2_rg_lops,
801 &gfs2_databuf_lops,
802 NULL
803};
804
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
new file mode 100644
index 000000000000..8a1029d3d389
--- /dev/null
+++ b/fs/gfs2/lops.h
@@ -0,0 +1,96 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LOPS_DOT_H__
11#define __LOPS_DOT_H__
12
13extern const struct gfs2_log_operations gfs2_glock_lops;
14extern const struct gfs2_log_operations gfs2_buf_lops;
15extern const struct gfs2_log_operations gfs2_revoke_lops;
16extern const struct gfs2_log_operations gfs2_rg_lops;
17extern const struct gfs2_log_operations gfs2_databuf_lops;
18
19extern const struct gfs2_log_operations *gfs2_log_ops[];
20
21static inline void lops_init_le(struct gfs2_log_element *le,
22 const struct gfs2_log_operations *lops)
23{
24 INIT_LIST_HEAD(&le->le_list);
25 le->le_ops = lops;
26}
27
28static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
29{
30 if (le->le_ops->lo_add)
31 le->le_ops->lo_add(sdp, le);
32}
33
34static inline void lops_incore_commit(struct gfs2_sbd *sdp,
35 struct gfs2_trans *tr)
36{
37 int x;
38 for (x = 0; gfs2_log_ops[x]; x++)
39 if (gfs2_log_ops[x]->lo_incore_commit)
40 gfs2_log_ops[x]->lo_incore_commit(sdp, tr);
41}
42
43static inline void lops_before_commit(struct gfs2_sbd *sdp)
44{
45 int x;
46 for (x = 0; gfs2_log_ops[x]; x++)
47 if (gfs2_log_ops[x]->lo_before_commit)
48 gfs2_log_ops[x]->lo_before_commit(sdp);
49}
50
51static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
52{
53 int x;
54 for (x = 0; gfs2_log_ops[x]; x++)
55 if (gfs2_log_ops[x]->lo_after_commit)
56 gfs2_log_ops[x]->lo_after_commit(sdp, ai);
57}
58
59static inline void lops_before_scan(struct gfs2_jdesc *jd,
60 struct gfs2_log_header *head,
61 unsigned int pass)
62{
63 int x;
64 for (x = 0; gfs2_log_ops[x]; x++)
65 if (gfs2_log_ops[x]->lo_before_scan)
66 gfs2_log_ops[x]->lo_before_scan(jd, head, pass);
67}
68
69static inline int lops_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
70 struct gfs2_log_descriptor *ld,
71 __be64 *ptr,
72 unsigned int pass)
73{
74 int x, error;
75 for (x = 0; gfs2_log_ops[x]; x++)
76 if (gfs2_log_ops[x]->lo_scan_elements) {
77 error = gfs2_log_ops[x]->lo_scan_elements(jd, start,
78 ld, ptr, pass);
79 if (error)
80 return error;
81 }
82
83 return 0;
84}
85
86static inline void lops_after_scan(struct gfs2_jdesc *jd, int error,
87 unsigned int pass)
88{
89 int x;
90 for (x = 0; gfs2_log_ops[x]; x++)
91 if (gfs2_log_ops[x]->lo_before_scan)
92 gfs2_log_ops[x]->lo_after_scan(jd, error, pass);
93}
94
95#endif /* __LOPS_DOT_H__ */
96
diff --git a/fs/gfs2/lvb.c b/fs/gfs2/lvb.c
new file mode 100644
index 000000000000..e88e9cce14e7
--- /dev/null
+++ b/fs/gfs2/lvb.c
@@ -0,0 +1,45 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16
17#include "gfs2.h"
18#include "lm_interface.h"
19#include "incore.h"
20#include "lvb.h"
21
22#define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \
23 struct->member);
24
25void gfs2_quota_lvb_in(struct gfs2_quota_lvb *qb, char *lvb)
26{
27 struct gfs2_quota_lvb *str = (struct gfs2_quota_lvb *)lvb;
28
29 qb->qb_magic = be32_to_cpu(str->qb_magic);
30 qb->qb_limit = be64_to_cpu(str->qb_limit);
31 qb->qb_warn = be64_to_cpu(str->qb_warn);
32 qb->qb_value = be64_to_cpu(str->qb_value);
33}
34
35void gfs2_quota_lvb_out(struct gfs2_quota_lvb *qb, char *lvb)
36{
37 struct gfs2_quota_lvb *str = (struct gfs2_quota_lvb *)lvb;
38
39 str->qb_magic = cpu_to_be32(qb->qb_magic);
40 str->qb_limit = cpu_to_be64(qb->qb_limit);
41 str->qb_warn = cpu_to_be64(qb->qb_warn);
42 str->qb_value = cpu_to_be64(qb->qb_value);
43}
44
45
diff --git a/fs/gfs2/lvb.h b/fs/gfs2/lvb.h
new file mode 100644
index 000000000000..1b1a8b75219a
--- /dev/null
+++ b/fs/gfs2/lvb.h
@@ -0,0 +1,19 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __LVB_DOT_H__
11#define __LVB_DOT_H__
12
13#define GFS2_MIN_LVB_SIZE 32
14
15void gfs2_quota_lvb_in(struct gfs2_quota_lvb *qb, char *lvb);
16void gfs2_quota_lvb_out(struct gfs2_quota_lvb *qb, char *lvb);
17
18#endif /* __LVB_DOT_H__ */
19
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
new file mode 100644
index 000000000000..b24d0b40d965
--- /dev/null
+++ b/fs/gfs2/main.c
@@ -0,0 +1,129 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/init.h>
17#include <linux/gfs2_ondisk.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "ops_fstype.h"
23#include "sys.h"
24#include "util.h"
25
26static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
27{
28 struct gfs2_inode *ip = foo;
29 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
30 SLAB_CTOR_CONSTRUCTOR) {
31 inode_init_once(&ip->i_inode);
32 atomic_set(&ip->i_count, 0);
33 ip->i_vnode = &ip->i_inode;
34 spin_lock_init(&ip->i_spin);
35 init_rwsem(&ip->i_rw_mutex);
36 memset(ip->i_cache, 0, sizeof(ip->i_cache));
37 }
38}
39
40/**
41 * init_gfs2_fs - Register GFS2 as a filesystem
42 *
43 * Returns: 0 on success, error code on failure
44 */
45
46static int __init init_gfs2_fs(void)
47{
48 int error;
49
50 gfs2_init_lmh();
51
52 error = gfs2_sys_init();
53 if (error)
54 return error;
55
56 error = -ENOMEM;
57
58 gfs2_glock_cachep = kmem_cache_create("gfs2_glock",
59 sizeof(struct gfs2_glock),
60 0, 0, NULL, NULL);
61 if (!gfs2_glock_cachep)
62 goto fail;
63
64 gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
65 sizeof(struct gfs2_inode),
66 0, (SLAB_RECLAIM_ACCOUNT|
67 SLAB_PANIC|SLAB_MEM_SPREAD),
68 gfs2_init_inode_once, NULL);
69 if (!gfs2_inode_cachep)
70 goto fail;
71
72 gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata",
73 sizeof(struct gfs2_bufdata),
74 0, 0, NULL, NULL);
75 if (!gfs2_bufdata_cachep)
76 goto fail;
77
78 error = register_filesystem(&gfs2_fs_type);
79 if (error)
80 goto fail;
81
82 error = register_filesystem(&gfs2meta_fs_type);
83 if (error)
84 goto fail_unregister;
85
86 printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__);
87
88 return 0;
89
90fail_unregister:
91 unregister_filesystem(&gfs2_fs_type);
92fail:
93 if (gfs2_bufdata_cachep)
94 kmem_cache_destroy(gfs2_bufdata_cachep);
95
96 if (gfs2_inode_cachep)
97 kmem_cache_destroy(gfs2_inode_cachep);
98
99 if (gfs2_glock_cachep)
100 kmem_cache_destroy(gfs2_glock_cachep);
101
102 gfs2_sys_uninit();
103 return error;
104}
105
106/**
107 * exit_gfs2_fs - Unregister the file system
108 *
109 */
110
111static void __exit exit_gfs2_fs(void)
112{
113 unregister_filesystem(&gfs2_fs_type);
114 unregister_filesystem(&gfs2meta_fs_type);
115
116 kmem_cache_destroy(gfs2_bufdata_cachep);
117 kmem_cache_destroy(gfs2_inode_cachep);
118 kmem_cache_destroy(gfs2_glock_cachep);
119
120 gfs2_sys_uninit();
121}
122
123MODULE_DESCRIPTION("Global File System");
124MODULE_AUTHOR("Red Hat, Inc.");
125MODULE_LICENSE("GPL");
126
127module_init(init_gfs2_fs);
128module_exit(exit_gfs2_fs);
129
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
new file mode 100644
index 000000000000..c78517225f61
--- /dev/null
+++ b/fs/gfs2/meta_io.c
@@ -0,0 +1,892 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <linux/writeback.h>
18#include <linux/swap.h>
19#include <linux/delay.h>
20#include <linux/gfs2_ondisk.h>
21
22#include "gfs2.h"
23#include "lm_interface.h"
24#include "incore.h"
25#include "glock.h"
26#include "glops.h"
27#include "inode.h"
28#include "log.h"
29#include "lops.h"
30#include "meta_io.h"
31#include "rgrp.h"
32#include "trans.h"
33#include "util.h"
34
35#define buffer_busy(bh) \
36((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
37#define buffer_in_io(bh) \
38((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
39
40static int aspace_get_block(struct inode *inode, sector_t lblock,
41 struct buffer_head *bh_result, int create)
42{
43 gfs2_assert_warn(inode->i_sb->s_fs_info, 0);
44 return -EOPNOTSUPP;
45}
46
47static int gfs2_aspace_writepage(struct page *page,
48 struct writeback_control *wbc)
49{
50 return block_write_full_page(page, aspace_get_block, wbc);
51}
52
53/**
54 * stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out.
55 * @bh: the buffer we're stuck on
56 *
57 */
58
59static void stuck_releasepage(struct buffer_head *bh)
60{
61 struct inode *inode = bh->b_page->mapping->host;
62 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
63 struct gfs2_bufdata *bd = bh->b_private;
64 struct gfs2_glock *gl;
65
66 fs_warn(sdp, "stuck in gfs2_releasepage() %p\n", inode);
67 fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n",
68 (unsigned long long)bh->b_blocknr, atomic_read(&bh->b_count));
69 fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh));
70 fs_warn(sdp, "bh->b_private = %s\n", (bd) ? "!NULL" : "NULL");
71
72 if (!bd)
73 return;
74
75 gl = bd->bd_gl;
76
77 fs_warn(sdp, "gl = (%u, %llu)\n",
78 gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number);
79
80 fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n",
81 (list_empty(&bd->bd_list_tr)) ? "no" : "yes",
82 (list_empty(&bd->bd_le.le_list)) ? "no" : "yes");
83
84 if (gl->gl_ops == &gfs2_inode_glops) {
85 struct gfs2_inode *ip = gl->gl_object;
86 unsigned int x;
87
88 if (!ip)
89 return;
90
91 fs_warn(sdp, "ip = %llu %llu\n",
92 (unsigned long long)ip->i_num.no_formal_ino,
93 (unsigned long long)ip->i_num.no_addr);
94 fs_warn(sdp, "ip->i_count = %d, ip->i_vnode = %s\n",
95 atomic_read(&ip->i_count),
96 (ip->i_vnode) ? "!NULL" : "NULL");
97
98 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
99 fs_warn(sdp, "ip->i_cache[%u] = %s\n",
100 x, (ip->i_cache[x]) ? "!NULL" : "NULL");
101 }
102}
103
104/**
105 * gfs2_aspace_releasepage - free the metadata associated with a page
106 * @page: the page that's being released
107 * @gfp_mask: passed from Linux VFS, ignored by us
108 *
109 * Call try_to_free_buffers() if the buffers in this page can be
110 * released.
111 *
112 * Returns: 0
113 */
114
115static int gfs2_aspace_releasepage(struct page *page, gfp_t gfp_mask)
116{
117 struct inode *aspace = page->mapping->host;
118 struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info;
119 struct buffer_head *bh, *head;
120 struct gfs2_bufdata *bd;
121 unsigned long t;
122
123 if (!page_has_buffers(page))
124 goto out;
125
126 head = bh = page_buffers(page);
127 do {
128 t = jiffies;
129
130 while (atomic_read(&bh->b_count)) {
131 if (atomic_read(&aspace->i_writecount)) {
132 if (time_after_eq(jiffies, t +
133 gfs2_tune_get(sdp, gt_stall_secs) * HZ)) {
134 stuck_releasepage(bh);
135 t = jiffies;
136 }
137
138 yield();
139 continue;
140 }
141
142 return 0;
143 }
144
145 gfs2_assert_warn(sdp, !buffer_pinned(bh));
146
147 bd = bh->b_private;
148 if (bd) {
149 gfs2_assert_warn(sdp, bd->bd_bh == bh);
150 gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
151 gfs2_assert_warn(sdp, list_empty(&bd->bd_le.le_list));
152 gfs2_assert_warn(sdp, !bd->bd_ail);
153 kmem_cache_free(gfs2_bufdata_cachep, bd);
154 bh->b_private = NULL;
155 }
156
157 bh = bh->b_this_page;
158 }
159 while (bh != head);
160
161 out:
162 return try_to_free_buffers(page);
163}
164
165static struct address_space_operations aspace_aops = {
166 .writepage = gfs2_aspace_writepage,
167 .releasepage = gfs2_aspace_releasepage,
168};
169
170/**
171 * gfs2_aspace_get - Create and initialize a struct inode structure
172 * @sdp: the filesystem the aspace is in
173 *
174 * Right now a struct inode is just a struct inode. Maybe Linux
175 * will supply a more lightweight address space construct (that works)
176 * in the future.
177 *
178 * Make sure pages/buffers in this aspace aren't in high memory.
179 *
180 * Returns: the aspace
181 */
182
183struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
184{
185 struct inode *aspace;
186
187 aspace = new_inode(sdp->sd_vfs);
188 if (aspace) {
189 mapping_set_gfp_mask(aspace->i_mapping, GFP_KERNEL);
190 aspace->i_mapping->a_ops = &aspace_aops;
191 aspace->i_size = ~0ULL;
192 aspace->u.generic_ip = NULL;
193 insert_inode_hash(aspace);
194 }
195 return aspace;
196}
197
198void gfs2_aspace_put(struct inode *aspace)
199{
200 remove_inode_hash(aspace);
201 iput(aspace);
202}
203
204/**
205 * gfs2_ail1_start_one - Start I/O on a part of the AIL
206 * @sdp: the filesystem
207 * @tr: the part of the AIL
208 *
209 */
210
211void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
212{
213 struct gfs2_bufdata *bd, *s;
214 struct buffer_head *bh;
215 int retry;
216
217 BUG_ON(!spin_is_locked(&sdp->sd_log_lock));
218
219 do {
220 retry = 0;
221
222 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
223 bd_ail_st_list) {
224 bh = bd->bd_bh;
225
226 gfs2_assert(sdp, bd->bd_ail == ai);
227
228 if (!buffer_busy(bh)) {
229 if (!buffer_uptodate(bh)) {
230 gfs2_log_unlock(sdp);
231 gfs2_io_error_bh(sdp, bh);
232 gfs2_log_lock(sdp);
233 }
234 list_move(&bd->bd_ail_st_list,
235 &ai->ai_ail2_list);
236 continue;
237 }
238
239 if (!buffer_dirty(bh))
240 continue;
241
242 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
243
244 gfs2_log_unlock(sdp);
245 wait_on_buffer(bh);
246 ll_rw_block(WRITE, 1, &bh);
247 gfs2_log_lock(sdp);
248
249 retry = 1;
250 break;
251 }
252 } while (retry);
253}
254
255/**
256 * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
257 * @sdp: the filesystem
258 * @ai: the AIL entry
259 *
260 */
261
262int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags)
263{
264 struct gfs2_bufdata *bd, *s;
265 struct buffer_head *bh;
266
267 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
268 bd_ail_st_list) {
269 bh = bd->bd_bh;
270
271 gfs2_assert(sdp, bd->bd_ail == ai);
272
273 if (buffer_busy(bh)) {
274 if (flags & DIO_ALL)
275 continue;
276 else
277 break;
278 }
279
280 if (!buffer_uptodate(bh))
281 gfs2_io_error_bh(sdp, bh);
282
283 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
284 }
285
286 return list_empty(&ai->ai_ail1_list);
287}
288
289/**
290 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
291 * @sdp: the filesystem
292 * @ai: the AIL entry
293 *
294 */
295
296void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
297{
298 struct list_head *head = &ai->ai_ail2_list;
299 struct gfs2_bufdata *bd;
300
301 while (!list_empty(head)) {
302 bd = list_entry(head->prev, struct gfs2_bufdata,
303 bd_ail_st_list);
304 gfs2_assert(sdp, bd->bd_ail == ai);
305 bd->bd_ail = NULL;
306 list_del(&bd->bd_ail_st_list);
307 list_del(&bd->bd_ail_gl_list);
308 atomic_dec(&bd->bd_gl->gl_ail_count);
309 brelse(bd->bd_bh);
310 }
311}
312
313/**
314 * ail_empty_gl - remove all buffers for a given lock from the AIL
315 * @gl: the glock
316 *
317 * None of the buffers should be dirty, locked, or pinned.
318 */
319
320void gfs2_ail_empty_gl(struct gfs2_glock *gl)
321{
322 struct gfs2_sbd *sdp = gl->gl_sbd;
323 unsigned int blocks;
324 struct list_head *head = &gl->gl_ail_list;
325 struct gfs2_bufdata *bd;
326 struct buffer_head *bh;
327 uint64_t blkno;
328 int error;
329
330 blocks = atomic_read(&gl->gl_ail_count);
331 if (!blocks)
332 return;
333
334 error = gfs2_trans_begin(sdp, 0, blocks);
335 if (gfs2_assert_withdraw(sdp, !error))
336 return;
337
338 gfs2_log_lock(sdp);
339 while (!list_empty(head)) {
340 bd = list_entry(head->next, struct gfs2_bufdata,
341 bd_ail_gl_list);
342 bh = bd->bd_bh;
343 blkno = bh->b_blocknr;
344 gfs2_assert_withdraw(sdp, !buffer_busy(bh));
345
346 bd->bd_ail = NULL;
347 list_del(&bd->bd_ail_st_list);
348 list_del(&bd->bd_ail_gl_list);
349 atomic_dec(&gl->gl_ail_count);
350 brelse(bh);
351 gfs2_log_unlock(sdp);
352
353 gfs2_trans_add_revoke(sdp, blkno);
354
355 gfs2_log_lock(sdp);
356 }
357 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
358 gfs2_log_unlock(sdp);
359
360 gfs2_trans_end(sdp);
361 gfs2_log_flush(sdp, NULL);
362}
363
364/**
365 * gfs2_meta_inval - Invalidate all buffers associated with a glock
366 * @gl: the glock
367 *
368 */
369
370void gfs2_meta_inval(struct gfs2_glock *gl)
371{
372 struct gfs2_sbd *sdp = gl->gl_sbd;
373 struct inode *aspace = gl->gl_aspace;
374 struct address_space *mapping = gl->gl_aspace->i_mapping;
375
376 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
377
378 atomic_inc(&aspace->i_writecount);
379 truncate_inode_pages(mapping, 0);
380 atomic_dec(&aspace->i_writecount);
381
382 gfs2_assert_withdraw(sdp, !mapping->nrpages);
383}
384
385/**
386 * gfs2_meta_sync - Sync all buffers associated with a glock
387 * @gl: The glock
388 * @flags: DIO_START | DIO_WAIT
389 *
390 */
391
392void gfs2_meta_sync(struct gfs2_glock *gl, int flags)
393{
394 struct address_space *mapping = gl->gl_aspace->i_mapping;
395 int error = 0;
396
397 if (flags & DIO_START)
398 filemap_fdatawrite(mapping);
399 if (!error && (flags & DIO_WAIT))
400 error = filemap_fdatawait(mapping);
401
402 if (error)
403 gfs2_io_error(gl->gl_sbd);
404}
405
406/**
407 * getbuf - Get a buffer with a given address space
408 * @sdp: the filesystem
409 * @aspace: the address space
410 * @blkno: the block number (filesystem scope)
411 * @create: 1 if the buffer should be created
412 *
413 * Returns: the buffer
414 */
415
416static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace,
417 uint64_t blkno, int create)
418{
419 struct page *page;
420 struct buffer_head *bh;
421 unsigned int shift;
422 unsigned long index;
423 unsigned int bufnum;
424
425 shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
426 index = blkno >> shift; /* convert block to page */
427 bufnum = blkno - (index << shift); /* block buf index within page */
428
429 if (create) {
430 for (;;) {
431 page = grab_cache_page(aspace->i_mapping, index);
432 if (page)
433 break;
434 yield();
435 }
436 } else {
437 page = find_lock_page(aspace->i_mapping, index);
438 if (!page)
439 return NULL;
440 }
441
442 if (!page_has_buffers(page))
443 create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
444
445 /* Locate header for our buffer within our page */
446 for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
447 /* Do nothing */;
448 get_bh(bh);
449
450 if (!buffer_mapped(bh))
451 map_bh(bh, sdp->sd_vfs, blkno);
452
453 unlock_page(page);
454 mark_page_accessed(page);
455 page_cache_release(page);
456
457 return bh;
458}
459
460static void meta_prep_new(struct buffer_head *bh)
461{
462 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
463
464 lock_buffer(bh);
465 clear_buffer_dirty(bh);
466 set_buffer_uptodate(bh);
467 unlock_buffer(bh);
468
469 mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
470}
471
472/**
473 * gfs2_meta_new - Get a block
474 * @gl: The glock associated with this block
475 * @blkno: The block number
476 *
477 * Returns: The buffer
478 */
479
480struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno)
481{
482 struct buffer_head *bh;
483 bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
484 meta_prep_new(bh);
485 return bh;
486}
487
488/**
489 * gfs2_meta_read - Read a block from disk
490 * @gl: The glock covering the block
491 * @blkno: The block number
492 * @flags: flags to gfs2_dreread()
493 * @bhp: the place where the buffer is returned (NULL on failure)
494 *
495 * Returns: errno
496 */
497
498int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno, int flags,
499 struct buffer_head **bhp)
500{
501 int error;
502
503 *bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE);
504 error = gfs2_meta_reread(gl->gl_sbd, *bhp, flags);
505 if (error)
506 brelse(*bhp);
507
508 return error;
509}
510
511/**
512 * gfs2_meta_reread - Reread a block from disk
513 * @sdp: the filesystem
514 * @bh: The block to read
515 * @flags: Flags that control the read
516 *
517 * Returns: errno
518 */
519
520int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags)
521{
522 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
523 return -EIO;
524
525 if (flags & DIO_FORCE)
526 clear_buffer_uptodate(bh);
527
528 if ((flags & DIO_START) && !buffer_uptodate(bh))
529 ll_rw_block(READ, 1, &bh);
530
531 if (flags & DIO_WAIT) {
532 wait_on_buffer(bh);
533
534 if (!buffer_uptodate(bh)) {
535 struct gfs2_trans *tr = current->journal_info;
536 if (tr && tr->tr_touched)
537 gfs2_io_error_bh(sdp, bh);
538 return -EIO;
539 }
540 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
541 return -EIO;
542 }
543
544 return 0;
545}
546
547/**
548 * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
549 * @gl: the glock the buffer belongs to
550 * @bh: The buffer to be attached to
551 * @meta: Flag to indicate whether its metadata or not
552 */
553
554void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
555 int meta)
556{
557 struct gfs2_bufdata *bd;
558
559 if (meta)
560 lock_page(bh->b_page);
561
562 if (bh->b_private) {
563 if (meta)
564 unlock_page(bh->b_page);
565 return;
566 }
567
568 bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
569 memset(bd, 0, sizeof(struct gfs2_bufdata));
570
571 bd->bd_bh = bh;
572 bd->bd_gl = gl;
573
574 INIT_LIST_HEAD(&bd->bd_list_tr);
575 if (meta) {
576 lops_init_le(&bd->bd_le, &gfs2_buf_lops);
577 } else {
578 lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
579 get_bh(bh);
580 }
581 bh->b_private = bd;
582
583 if (meta)
584 unlock_page(bh->b_page);
585}
586
587/**
588 * gfs2_pin - Pin a buffer in memory
589 * @sdp: the filesystem the buffer belongs to
590 * @bh: The buffer to be pinned
591 *
592 */
593
594void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
595{
596 struct gfs2_bufdata *bd = bh->b_private;
597
598 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
599
600 if (test_set_buffer_pinned(bh))
601 gfs2_assert_withdraw(sdp, 0);
602
603 wait_on_buffer(bh);
604
605 /* If this buffer is in the AIL and it has already been written
606 to in-place disk block, remove it from the AIL. */
607
608 gfs2_log_lock(sdp);
609 if (bd->bd_ail && !buffer_in_io(bh))
610 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
611 gfs2_log_unlock(sdp);
612
613 clear_buffer_dirty(bh);
614 wait_on_buffer(bh);
615
616 if (!buffer_uptodate(bh))
617 gfs2_io_error_bh(sdp, bh);
618
619 get_bh(bh);
620}
621
622/**
623 * gfs2_unpin - Unpin a buffer
624 * @sdp: the filesystem the buffer belongs to
625 * @bh: The buffer to unpin
626 * @ai:
627 *
628 */
629
630void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
631 struct gfs2_ail *ai)
632{
633 struct gfs2_bufdata *bd = bh->b_private;
634
635 gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
636
637 if (!buffer_pinned(bh))
638 gfs2_assert_withdraw(sdp, 0);
639
640 mark_buffer_dirty(bh);
641 clear_buffer_pinned(bh);
642
643 gfs2_log_lock(sdp);
644 if (bd->bd_ail) {
645 list_del(&bd->bd_ail_st_list);
646 brelse(bh);
647 } else {
648 struct gfs2_glock *gl = bd->bd_gl;
649 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
650 atomic_inc(&gl->gl_ail_count);
651 }
652 bd->bd_ail = ai;
653 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
654 gfs2_log_unlock(sdp);
655}
656
657/**
658 * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore
659 * @ip: the inode who owns the buffers
660 * @bstart: the first buffer in the run
661 * @blen: the number of buffers in the run
662 *
663 */
664
665void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
666{
667 struct gfs2_sbd *sdp = ip->i_sbd;
668 struct inode *aspace = ip->i_gl->gl_aspace;
669 struct buffer_head *bh;
670
671 while (blen) {
672 bh = getbuf(sdp, aspace, bstart, NO_CREATE);
673 if (bh) {
674 struct gfs2_bufdata *bd = bh->b_private;
675
676 if (test_clear_buffer_pinned(bh)) {
677 struct gfs2_trans *tr = current->journal_info;
678 gfs2_log_lock(sdp);
679 list_del_init(&bd->bd_le.le_list);
680 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
681 sdp->sd_log_num_buf--;
682 gfs2_log_unlock(sdp);
683 tr->tr_num_buf_rm++;
684 brelse(bh);
685 }
686 if (bd) {
687 gfs2_log_lock(sdp);
688 if (bd->bd_ail) {
689 uint64_t blkno = bh->b_blocknr;
690 bd->bd_ail = NULL;
691 list_del(&bd->bd_ail_st_list);
692 list_del(&bd->bd_ail_gl_list);
693 atomic_dec(&bd->bd_gl->gl_ail_count);
694 brelse(bh);
695 gfs2_log_unlock(sdp);
696 gfs2_trans_add_revoke(sdp, blkno);
697 } else
698 gfs2_log_unlock(sdp);
699 }
700
701 lock_buffer(bh);
702 clear_buffer_dirty(bh);
703 clear_buffer_uptodate(bh);
704 unlock_buffer(bh);
705
706 brelse(bh);
707 }
708
709 bstart++;
710 blen--;
711 }
712}
713
714/**
715 * gfs2_meta_cache_flush - get rid of any references on buffers for this inode
716 * @ip: The GFS2 inode
717 *
718 * This releases buffers that are in the most-recently-used array of
719 * blocks used for indirect block addressing for this inode.
720 */
721
722void gfs2_meta_cache_flush(struct gfs2_inode *ip)
723{
724 struct buffer_head **bh_slot;
725 unsigned int x;
726
727 spin_lock(&ip->i_spin);
728
729 for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
730 bh_slot = &ip->i_cache[x];
731 if (!*bh_slot)
732 break;
733 brelse(*bh_slot);
734 *bh_slot = NULL;
735 }
736
737 spin_unlock(&ip->i_spin);
738}
739
740/**
741 * gfs2_meta_indirect_buffer - Get a metadata buffer
742 * @ip: The GFS2 inode
743 * @height: The level of this buf in the metadata (indir addr) tree (if any)
744 * @num: The block number (device relative) of the buffer
745 * @new: Non-zero if we may create a new buffer
746 * @bhp: the buffer is returned here
747 *
748 * Try to use the gfs2_inode's MRU metadata tree cache.
749 *
750 * Returns: errno
751 */
752
753int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num,
754 int new, struct buffer_head **bhp)
755{
756 struct buffer_head *bh, **bh_slot = ip->i_cache + height;
757 int error;
758
759 spin_lock(&ip->i_spin);
760 bh = *bh_slot;
761 if (bh) {
762 if (bh->b_blocknr == num)
763 get_bh(bh);
764 else
765 bh = NULL;
766 }
767 spin_unlock(&ip->i_spin);
768
769 if (bh) {
770 if (new)
771 meta_prep_new(bh);
772 else {
773 error = gfs2_meta_reread(ip->i_sbd, bh,
774 DIO_START | DIO_WAIT);
775 if (error) {
776 brelse(bh);
777 return error;
778 }
779 }
780 } else {
781 if (new)
782 bh = gfs2_meta_new(ip->i_gl, num);
783 else {
784 error = gfs2_meta_read(ip->i_gl, num,
785 DIO_START | DIO_WAIT, &bh);
786 if (error)
787 return error;
788 }
789
790 spin_lock(&ip->i_spin);
791 if (*bh_slot != bh) {
792 brelse(*bh_slot);
793 *bh_slot = bh;
794 get_bh(bh);
795 }
796 spin_unlock(&ip->i_spin);
797 }
798
799 if (new) {
800 if (gfs2_assert_warn(ip->i_sbd, height)) {
801 brelse(bh);
802 return -EIO;
803 }
804 gfs2_trans_add_bh(ip->i_gl, bh, 1);
805 gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
806 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
807
808 } else if (gfs2_metatype_check(ip->i_sbd, bh,
809 (height) ? GFS2_METATYPE_IN : GFS2_METATYPE_DI)) {
810 brelse(bh);
811 return -EIO;
812 }
813
814 *bhp = bh;
815
816 return 0;
817}
818
819/**
820 * gfs2_meta_ra - start readahead on an extent of a file
821 * @gl: the glock the blocks belong to
822 * @dblock: the starting disk block
823 * @extlen: the number of blocks in the extent
824 *
825 */
826
827void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen)
828{
829 struct gfs2_sbd *sdp = gl->gl_sbd;
830 struct inode *aspace = gl->gl_aspace;
831 struct buffer_head *first_bh, *bh;
832 uint32_t max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
833 sdp->sd_sb.sb_bsize_shift;
834 int error;
835
836 if (!extlen || !max_ra)
837 return;
838 if (extlen > max_ra)
839 extlen = max_ra;
840
841 first_bh = getbuf(sdp, aspace, dblock, CREATE);
842
843 if (buffer_uptodate(first_bh))
844 goto out;
845 if (!buffer_locked(first_bh)) {
846 error = gfs2_meta_reread(sdp, first_bh, DIO_START);
847 if (error)
848 goto out;
849 }
850
851 dblock++;
852 extlen--;
853
854 while (extlen) {
855 bh = getbuf(sdp, aspace, dblock, CREATE);
856
857 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
858 error = gfs2_meta_reread(sdp, bh, DIO_START);
859 brelse(bh);
860 if (error)
861 goto out;
862 } else
863 brelse(bh);
864
865 dblock++;
866 extlen--;
867
868 if (buffer_uptodate(first_bh))
869 break;
870 }
871
872 out:
873 brelse(first_bh);
874}
875
876/**
877 * gfs2_meta_syncfs - sync all the buffers in a filesystem
878 * @sdp: the filesystem
879 *
880 */
881
882void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
883{
884 gfs2_log_flush(sdp, NULL);
885 for (;;) {
886 gfs2_ail1_start(sdp, DIO_ALL);
887 if (gfs2_ail1_empty(sdp, DIO_ALL))
888 break;
889 msleep(10);
890 }
891}
892
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
new file mode 100644
index 000000000000..23c6a596fd9e
--- /dev/null
+++ b/fs/gfs2/meta_io.h
@@ -0,0 +1,89 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __DIO_DOT_H__
11#define __DIO_DOT_H__
12
13static inline void gfs2_buffer_clear(struct buffer_head *bh)
14{
15 memset(bh->b_data, 0, bh->b_size);
16}
17
18static inline void gfs2_buffer_clear_tail(struct buffer_head *bh, int head)
19{
20 memset(bh->b_data + head, 0, bh->b_size - head);
21}
22
23static inline void gfs2_buffer_clear_ends(struct buffer_head *bh, int offset,
24 int amount, int journaled)
25{
26 int z_off1 = (journaled) ? sizeof(struct gfs2_meta_header) : 0;
27 int z_len1 = offset - z_off1;
28 int z_off2 = offset + amount;
29 int z_len2 = (bh)->b_size - z_off2;
30
31 if (z_len1)
32 memset(bh->b_data + z_off1, 0, z_len1);
33
34 if (z_len2)
35 memset(bh->b_data + z_off2, 0, z_len2);
36}
37
38static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
39 int to_head,
40 struct buffer_head *from_bh,
41 int from_head)
42{
43 memcpy(to_bh->b_data + to_head,
44 from_bh->b_data + from_head,
45 from_bh->b_size - from_head);
46 memset(to_bh->b_data + to_bh->b_size + to_head - from_head,
47 0,
48 from_head - to_head);
49}
50
51struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp);
52void gfs2_aspace_put(struct inode *aspace);
53
54void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai);
55int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags);
56void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai);
57void gfs2_ail_empty_gl(struct gfs2_glock *gl);
58
59void gfs2_meta_inval(struct gfs2_glock *gl);
60void gfs2_meta_sync(struct gfs2_glock *gl, int flags);
61
62struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno);
63int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno,
64 int flags, struct buffer_head **bhp);
65int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags);
66
67void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
68 int meta);
69void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
70void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
71 struct gfs2_ail *ai);
72
73void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
74
75void gfs2_meta_cache_flush(struct gfs2_inode *ip);
76int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num,
77 int new, struct buffer_head **bhp);
78
79static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
80 struct buffer_head **bhp)
81{
82 return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp);
83}
84
85void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen);
86void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
87
88#endif /* __DIO_DOT_H__ */
89
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
new file mode 100644
index 000000000000..0d4b230785af
--- /dev/null
+++ b/fs/gfs2/mount.c
@@ -0,0 +1,214 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16
17#include "gfs2.h"
18#include "lm_interface.h"
19#include "incore.h"
20#include "mount.h"
21#include "sys.h"
22#include "util.h"
23
24/**
25 * gfs2_mount_args - Parse mount options
26 * @sdp:
27 * @data:
28 *
29 * Return: errno
30 */
31
32int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
33{
34 struct gfs2_args *args = &sdp->sd_args;
35 char *data = data_arg;
36 char *options, *o, *v;
37 int error = 0;
38
39 if (!remount) {
40 /* If someone preloaded options, use those instead */
41 spin_lock(&gfs2_sys_margs_lock);
42 if (gfs2_sys_margs) {
43 data = gfs2_sys_margs;
44 gfs2_sys_margs = NULL;
45 }
46 spin_unlock(&gfs2_sys_margs_lock);
47
48 /* Set some defaults */
49 args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
50 args->ar_quota = GFS2_QUOTA_DEFAULT;
51 args->ar_data = GFS2_DATA_DEFAULT;
52 }
53
54 /* Split the options into tokens with the "," character and
55 process them */
56
57 for (options = data; (o = strsep(&options, ",")); ) {
58 if (!*o)
59 continue;
60
61 v = strchr(o, '=');
62 if (v)
63 *v++ = 0;
64
65 if (!strcmp(o, "lockproto")) {
66 if (!v)
67 goto need_value;
68 if (remount && strcmp(v, args->ar_lockproto))
69 goto cant_remount;
70 strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN);
71 args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0;
72 }
73
74 else if (!strcmp(o, "locktable")) {
75 if (!v)
76 goto need_value;
77 if (remount && strcmp(v, args->ar_locktable))
78 goto cant_remount;
79 strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN);
80 args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0;
81 }
82
83 else if (!strcmp(o, "hostdata")) {
84 if (!v)
85 goto need_value;
86 if (remount && strcmp(v, args->ar_hostdata))
87 goto cant_remount;
88 strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN);
89 args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0;
90 }
91
92 else if (!strcmp(o, "spectator")) {
93 if (remount && !args->ar_spectator)
94 goto cant_remount;
95 args->ar_spectator = 1;
96 sdp->sd_vfs->s_flags |= MS_RDONLY;
97 }
98
99 else if (!strcmp(o, "ignore_local_fs")) {
100 if (remount && !args->ar_ignore_local_fs)
101 goto cant_remount;
102 args->ar_ignore_local_fs = 1;
103 }
104
105 else if (!strcmp(o, "localflocks")) {
106 if (remount && !args->ar_localflocks)
107 goto cant_remount;
108 args->ar_localflocks = 1;
109 }
110
111 else if (!strcmp(o, "localcaching")) {
112 if (remount && !args->ar_localcaching)
113 goto cant_remount;
114 args->ar_localcaching = 1;
115 }
116
117 else if (!strcmp(o, "debug"))
118 args->ar_debug = 1;
119
120 else if (!strcmp(o, "nodebug"))
121 args->ar_debug = 0;
122
123 else if (!strcmp(o, "upgrade")) {
124 if (remount && !args->ar_upgrade)
125 goto cant_remount;
126 args->ar_upgrade = 1;
127 }
128
129 else if (!strcmp(o, "num_glockd")) {
130 unsigned int x;
131 if (!v)
132 goto need_value;
133 sscanf(v, "%u", &x);
134 if (remount && x != args->ar_num_glockd)
135 goto cant_remount;
136 if (!x || x > GFS2_GLOCKD_MAX) {
137 fs_info(sdp, "0 < num_glockd <= %u (not %u)\n",
138 GFS2_GLOCKD_MAX, x);
139 error = -EINVAL;
140 break;
141 }
142 args->ar_num_glockd = x;
143 }
144
145 else if (!strcmp(o, "acl")) {
146 args->ar_posix_acl = 1;
147 sdp->sd_vfs->s_flags |= MS_POSIXACL;
148 }
149
150 else if (!strcmp(o, "noacl")) {
151 args->ar_posix_acl = 0;
152 sdp->sd_vfs->s_flags &= ~MS_POSIXACL;
153 }
154
155 else if (!strcmp(o, "quota")) {
156 if (!v)
157 goto need_value;
158 if (!strcmp(v, "off"))
159 args->ar_quota = GFS2_QUOTA_OFF;
160 else if (!strcmp(v, "account"))
161 args->ar_quota = GFS2_QUOTA_ACCOUNT;
162 else if (!strcmp(v, "on"))
163 args->ar_quota = GFS2_QUOTA_ON;
164 else {
165 fs_info(sdp, "invalid value for quota\n");
166 error = -EINVAL;
167 break;
168 }
169 }
170
171 else if (!strcmp(o, "suiddir"))
172 args->ar_suiddir = 1;
173
174 else if (!strcmp(o, "nosuiddir"))
175 args->ar_suiddir = 0;
176
177 else if (!strcmp(o, "data")) {
178 if (!v)
179 goto need_value;
180 if (!strcmp(v, "writeback"))
181 args->ar_data = GFS2_DATA_WRITEBACK;
182 else if (!strcmp(v, "ordered"))
183 args->ar_data = GFS2_DATA_ORDERED;
184 else {
185 fs_info(sdp, "invalid value for data\n");
186 error = -EINVAL;
187 break;
188 }
189 }
190
191 else {
192 fs_info(sdp, "unknown option: %s\n", o);
193 error = -EINVAL;
194 break;
195 }
196 }
197
198 if (error)
199 fs_info(sdp, "invalid mount option(s)\n");
200
201 if (data != data_arg)
202 kfree(data);
203
204 return error;
205
206 need_value:
207 fs_info(sdp, "need value for option %s\n", o);
208 return -EINVAL;
209
210 cant_remount:
211 fs_info(sdp, "can't remount with option %s\n", o);
212 return -EINVAL;
213}
214
diff --git a/fs/gfs2/mount.h b/fs/gfs2/mount.h
new file mode 100644
index 000000000000..2eb14722144f
--- /dev/null
+++ b/fs/gfs2/mount.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __MOUNT_DOT_H__
11#define __MOUNT_DOT_H__
12
13int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount);
14
15#endif /* __MOUNT_DOT_H__ */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
new file mode 100644
index 000000000000..be5c86e5787e
--- /dev/null
+++ b/fs/gfs2/ondisk.c
@@ -0,0 +1,321 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15
16#include "gfs2.h"
17#include <linux/gfs2_ondisk.h>
18
19#define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \
20 struct->member);
21
22/*
23 * gfs2_xxx_in - read in an xxx struct
24 * first arg: the cpu-order structure
25 * buf: the disk-order buffer
26 *
27 * gfs2_xxx_out - write out an xxx struct
28 * first arg: the cpu-order structure
29 * buf: the disk-order buffer
30 *
31 * gfs2_xxx_print - print out an xxx struct
32 * first arg: the cpu-order structure
33 */
34
35void gfs2_inum_in(struct gfs2_inum *no, char *buf)
36{
37 struct gfs2_inum *str = (struct gfs2_inum *)buf;
38
39 no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
40 no->no_addr = be64_to_cpu(str->no_addr);
41}
42
43void gfs2_inum_out(const struct gfs2_inum *no, char *buf)
44{
45 struct gfs2_inum *str = (struct gfs2_inum *)buf;
46
47 str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
48 str->no_addr = cpu_to_be64(no->no_addr);
49}
50
51static void gfs2_inum_print(struct gfs2_inum *no)
52{
53 printk(KERN_INFO " no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
54 printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr);
55}
56
57static void gfs2_meta_header_in(struct gfs2_meta_header *mh, char *buf)
58{
59 struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf;
60
61 mh->mh_magic = be32_to_cpu(str->mh_magic);
62 mh->mh_type = be32_to_cpu(str->mh_type);
63 mh->mh_format = be32_to_cpu(str->mh_format);
64}
65
66static void gfs2_meta_header_out(struct gfs2_meta_header *mh, char *buf)
67{
68 struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf;
69
70 str->mh_magic = cpu_to_be32(mh->mh_magic);
71 str->mh_type = cpu_to_be32(mh->mh_type);
72 str->mh_format = cpu_to_be32(mh->mh_format);
73}
74
75static void gfs2_meta_header_print(struct gfs2_meta_header *mh)
76{
77 pv(mh, mh_magic, "0x%.8X");
78 pv(mh, mh_type, "%u");
79 pv(mh, mh_format, "%u");
80}
81
82void gfs2_sb_in(struct gfs2_sb *sb, char *buf)
83{
84 struct gfs2_sb *str = (struct gfs2_sb *)buf;
85
86 gfs2_meta_header_in(&sb->sb_header, buf);
87
88 sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
89 sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
90 sb->sb_bsize = be32_to_cpu(str->sb_bsize);
91 sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
92
93 gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
94 gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
95
96 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
97 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
98}
99
100void gfs2_rindex_in(struct gfs2_rindex *ri, char *buf)
101{
102 struct gfs2_rindex *str = (struct gfs2_rindex *)buf;
103
104 ri->ri_addr = be64_to_cpu(str->ri_addr);
105 ri->ri_length = be32_to_cpu(str->ri_length);
106 ri->ri_data0 = be64_to_cpu(str->ri_data0);
107 ri->ri_data = be32_to_cpu(str->ri_data);
108 ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
109
110}
111
112void gfs2_rindex_print(struct gfs2_rindex *ri)
113{
114 printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
115 pv(ri, ri_length, "%u");
116
117 printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)ri->ri_data0);
118 pv(ri, ri_data, "%u");
119
120 pv(ri, ri_bitbytes, "%u");
121}
122
123void gfs2_rgrp_in(struct gfs2_rgrp *rg, char *buf)
124{
125 struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf;
126
127 gfs2_meta_header_in(&rg->rg_header, buf);
128 rg->rg_flags = be32_to_cpu(str->rg_flags);
129 rg->rg_free = be32_to_cpu(str->rg_free);
130 rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
131}
132
133void gfs2_rgrp_out(struct gfs2_rgrp *rg, char *buf)
134{
135 struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf;
136
137 gfs2_meta_header_out(&rg->rg_header, buf);
138 str->rg_flags = cpu_to_be32(rg->rg_flags);
139 str->rg_free = cpu_to_be32(rg->rg_free);
140 str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
141
142 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
143}
144
145void gfs2_quota_in(struct gfs2_quota *qu, char *buf)
146{
147 struct gfs2_quota *str = (struct gfs2_quota *)buf;
148
149 qu->qu_limit = be64_to_cpu(str->qu_limit);
150 qu->qu_warn = be64_to_cpu(str->qu_warn);
151 qu->qu_value = be64_to_cpu(str->qu_value);
152}
153
154void gfs2_dinode_in(struct gfs2_dinode *di, char *buf)
155{
156 struct gfs2_dinode *str = (struct gfs2_dinode *)buf;
157
158 gfs2_meta_header_in(&di->di_header, buf);
159 gfs2_inum_in(&di->di_num, (char *)&str->di_num);
160
161 di->di_mode = be32_to_cpu(str->di_mode);
162 di->di_uid = be32_to_cpu(str->di_uid);
163 di->di_gid = be32_to_cpu(str->di_gid);
164 di->di_nlink = be32_to_cpu(str->di_nlink);
165 di->di_size = be64_to_cpu(str->di_size);
166 di->di_blocks = be64_to_cpu(str->di_blocks);
167 di->di_atime = be64_to_cpu(str->di_atime);
168 di->di_mtime = be64_to_cpu(str->di_mtime);
169 di->di_ctime = be64_to_cpu(str->di_ctime);
170 di->di_major = be32_to_cpu(str->di_major);
171 di->di_minor = be32_to_cpu(str->di_minor);
172
173 di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
174 di->di_goal_data = be64_to_cpu(str->di_goal_data);
175
176 di->di_flags = be32_to_cpu(str->di_flags);
177 di->di_payload_format = be32_to_cpu(str->di_payload_format);
178 di->di_height = be16_to_cpu(str->di_height);
179
180 di->di_depth = be16_to_cpu(str->di_depth);
181 di->di_entries = be32_to_cpu(str->di_entries);
182
183 di->di_eattr = be64_to_cpu(str->di_eattr);
184
185}
186
187void gfs2_dinode_out(struct gfs2_dinode *di, char *buf)
188{
189 struct gfs2_dinode *str = (struct gfs2_dinode *)buf;
190
191 gfs2_meta_header_out(&di->di_header, buf);
192 gfs2_inum_out(&di->di_num, (char *)&str->di_num);
193
194 str->di_mode = cpu_to_be32(di->di_mode);
195 str->di_uid = cpu_to_be32(di->di_uid);
196 str->di_gid = cpu_to_be32(di->di_gid);
197 str->di_nlink = cpu_to_be32(di->di_nlink);
198 str->di_size = cpu_to_be64(di->di_size);
199 str->di_blocks = cpu_to_be64(di->di_blocks);
200 str->di_atime = cpu_to_be64(di->di_atime);
201 str->di_mtime = cpu_to_be64(di->di_mtime);
202 str->di_ctime = cpu_to_be64(di->di_ctime);
203 str->di_major = cpu_to_be32(di->di_major);
204 str->di_minor = cpu_to_be32(di->di_minor);
205
206 str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
207 str->di_goal_data = cpu_to_be64(di->di_goal_data);
208
209 str->di_flags = cpu_to_be32(di->di_flags);
210 str->di_payload_format = cpu_to_be32(di->di_payload_format);
211 str->di_height = cpu_to_be16(di->di_height);
212
213 str->di_depth = cpu_to_be16(di->di_depth);
214 str->di_entries = cpu_to_be32(di->di_entries);
215
216 str->di_eattr = cpu_to_be64(di->di_eattr);
217
218}
219
220void gfs2_dinode_print(struct gfs2_dinode *di)
221{
222 gfs2_meta_header_print(&di->di_header);
223 gfs2_inum_print(&di->di_num);
224
225 pv(di, di_mode, "0%o");
226 pv(di, di_uid, "%u");
227 pv(di, di_gid, "%u");
228 pv(di, di_nlink, "%u");
229 printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size);
230 printk(KERN_INFO " di_blocks = %llu\n", (unsigned long long)di->di_blocks);
231 printk(KERN_INFO " di_atime = %lld\n", (long long)di->di_atime);
232 printk(KERN_INFO " di_mtime = %lld\n", (long long)di->di_mtime);
233 printk(KERN_INFO " di_ctime = %lld\n", (long long)di->di_ctime);
234 pv(di, di_major, "%u");
235 pv(di, di_minor, "%u");
236
237 printk(KERN_INFO " di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
238 printk(KERN_INFO " di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
239
240 pv(di, di_flags, "0x%.8X");
241 pv(di, di_payload_format, "%u");
242 pv(di, di_height, "%u");
243
244 pv(di, di_depth, "%u");
245 pv(di, di_entries, "%u");
246
247 printk(KERN_INFO " di_eattr = %llu\n", (unsigned long long)di->di_eattr);
248}
249
250void gfs2_log_header_in(struct gfs2_log_header *lh, char *buf)
251{
252 struct gfs2_log_header *str = (struct gfs2_log_header *)buf;
253
254 gfs2_meta_header_in(&lh->lh_header, buf);
255 lh->lh_sequence = be64_to_cpu(str->lh_sequence);
256 lh->lh_flags = be32_to_cpu(str->lh_flags);
257 lh->lh_tail = be32_to_cpu(str->lh_tail);
258 lh->lh_blkno = be32_to_cpu(str->lh_blkno);
259 lh->lh_hash = be32_to_cpu(str->lh_hash);
260}
261
262void gfs2_inum_range_in(struct gfs2_inum_range *ir, char *buf)
263{
264 struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf;
265
266 ir->ir_start = be64_to_cpu(str->ir_start);
267 ir->ir_length = be64_to_cpu(str->ir_length);
268}
269
270void gfs2_inum_range_out(struct gfs2_inum_range *ir, char *buf)
271{
272 struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf;
273
274 str->ir_start = cpu_to_be64(ir->ir_start);
275 str->ir_length = cpu_to_be64(ir->ir_length);
276}
277
278void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, char *buf)
279{
280 struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf;
281
282 sc->sc_total = be64_to_cpu(str->sc_total);
283 sc->sc_free = be64_to_cpu(str->sc_free);
284 sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
285}
286
287void gfs2_statfs_change_out(struct gfs2_statfs_change *sc, char *buf)
288{
289 struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf;
290
291 str->sc_total = cpu_to_be64(sc->sc_total);
292 str->sc_free = cpu_to_be64(sc->sc_free);
293 str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
294}
295
296void gfs2_unlinked_tag_in(struct gfs2_unlinked_tag *ut, char *buf)
297{
298 struct gfs2_unlinked_tag *str = (struct gfs2_unlinked_tag *)buf;
299
300 gfs2_inum_in(&ut->ut_inum, buf);
301 ut->ut_flags = be32_to_cpu(str->ut_flags);
302}
303
304void gfs2_unlinked_tag_out(struct gfs2_unlinked_tag *ut, char *buf)
305{
306 struct gfs2_unlinked_tag *str = (struct gfs2_unlinked_tag *)buf;
307
308 gfs2_inum_out(&ut->ut_inum, buf);
309 str->ut_flags = cpu_to_be32(ut->ut_flags);
310 str->__pad = 0;
311}
312
313void gfs2_quota_change_in(struct gfs2_quota_change *qc, char *buf)
314{
315 struct gfs2_quota_change *str = (struct gfs2_quota_change *)buf;
316
317 qc->qc_change = be64_to_cpu(str->qc_change);
318 qc->qc_flags = be32_to_cpu(str->qc_flags);
319 qc->qc_id = be32_to_cpu(str->qc_id);
320}
321
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
new file mode 100644
index 000000000000..16d3ebd32092
--- /dev/null
+++ b/fs/gfs2/ops_address.c
@@ -0,0 +1,670 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/pagevec.h>
17#include <linux/mpage.h>
18#include <linux/fs.h>
19#include <linux/gfs2_ondisk.h>
20
21#include "gfs2.h"
22#include "lm_interface.h"
23#include "incore.h"
24#include "bmap.h"
25#include "glock.h"
26#include "inode.h"
27#include "log.h"
28#include "meta_io.h"
29#include "ops_address.h"
30#include "page.h"
31#include "quota.h"
32#include "trans.h"
33#include "rgrp.h"
34#include "ops_file.h"
35#include "util.h"
36
37/**
38 * gfs2_get_block - Fills in a buffer head with details about a block
39 * @inode: The inode
40 * @lblock: The block number to look up
41 * @bh_result: The buffer head to return the result in
42 * @create: Non-zero if we may add block to the file
43 *
44 * Returns: errno
45 */
46
47int gfs2_get_block(struct inode *inode, sector_t lblock,
48 struct buffer_head *bh_result, int create)
49{
50 int new = create;
51 uint64_t dblock;
52 int error;
53 int boundary;
54
55 error = gfs2_block_map(inode, lblock, &new, &dblock, &boundary);
56 if (error)
57 return error;
58
59 if (!dblock)
60 return 0;
61
62 map_bh(bh_result, inode->i_sb, dblock);
63 if (new)
64 set_buffer_new(bh_result);
65 if (boundary)
66 set_buffer_boundary(bh_result);
67
68 return 0;
69}
70
71/**
72 * get_block_noalloc - Fills in a buffer head with details about a block
73 * @inode: The inode
74 * @lblock: The block number to look up
75 * @bh_result: The buffer head to return the result in
76 * @create: Non-zero if we may add block to the file
77 *
78 * Returns: errno
79 */
80
81static int get_block_noalloc(struct inode *inode, sector_t lblock,
82 struct buffer_head *bh_result, int create)
83{
84 struct gfs2_inode *ip = inode->u.generic_ip;
85 int new = 0;
86 uint64_t dblock;
87 int error;
88 int boundary;
89
90 error = gfs2_block_map(inode, lblock, &new, &dblock, &boundary);
91 if (error)
92 return error;
93
94 if (dblock)
95 map_bh(bh_result, inode->i_sb, dblock);
96 else if (gfs2_assert_withdraw(ip->i_sbd, !create))
97 error = -EIO;
98 if (boundary)
99 set_buffer_boundary(bh_result);
100
101 return error;
102}
103
104/**
105 * gfs2_writepage - Write complete page
106 * @page: Page to write
107 *
108 * Returns: errno
109 *
110 * Some of this is copied from block_write_full_page() although we still
111 * call it to do most of the work.
112 */
113
114static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
115{
116 struct inode *inode = page->mapping->host;
117 struct gfs2_inode *ip = page->mapping->host->u.generic_ip;
118 struct gfs2_sbd *sdp = ip->i_sbd;
119 loff_t i_size = i_size_read(inode);
120 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
121 unsigned offset;
122 int error;
123 int done_trans = 0;
124
125 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
126 unlock_page(page);
127 return -EIO;
128 }
129 if (current->journal_info)
130 goto out_ignore;
131
132 /* Is the page fully outside i_size? (truncate in progress) */
133 offset = i_size & (PAGE_CACHE_SIZE-1);
134 if (page->index > end_index || (page->index == end_index && !offset)) {
135 page->mapping->a_ops->invalidatepage(page, 0);
136 unlock_page(page);
137 return 0; /* don't care */
138 }
139
140 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
141 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
142 if (error)
143 goto out_ignore;
144 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
145 done_trans = 1;
146 }
147 error = block_write_full_page(page, get_block_noalloc, wbc);
148 if (done_trans)
149 gfs2_trans_end(sdp);
150 gfs2_meta_cache_flush(ip);
151 return error;
152
153out_ignore:
154 redirty_page_for_writepage(wbc, page);
155 unlock_page(page);
156 return 0;
157}
158
159static int zero_readpage(struct page *page)
160{
161 void *kaddr;
162
163 kaddr = kmap_atomic(page, KM_USER0);
164 memset(kaddr, 0, PAGE_CACHE_SIZE);
165 kunmap_atomic(page, KM_USER0);
166
167 SetPageUptodate(page);
168
169 return 0;
170}
171
172/**
173 * stuffed_readpage - Fill in a Linux page with stuffed file data
174 * @ip: the inode
175 * @page: the page
176 *
177 * Returns: errno
178 */
179
180static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
181{
182 struct buffer_head *dibh;
183 void *kaddr;
184 int error;
185
186 /* Only the first page of a stuffed file might contain data */
187 if (unlikely(page->index))
188 return zero_readpage(page);
189
190 error = gfs2_meta_inode_buffer(ip, &dibh);
191 if (error)
192 return error;
193
194 kaddr = kmap_atomic(page, KM_USER0);
195 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
196 ip->i_di.di_size);
197 memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size);
198 kunmap_atomic(page, KM_USER0);
199
200 brelse(dibh);
201
202 SetPageUptodate(page);
203
204 return 0;
205}
206
207
208/**
209 * gfs2_readpage - readpage with locking
210 * @file: The file to read a page for. N.B. This may be NULL if we are
211 * reading an internal file.
212 * @page: The page to read
213 *
214 * Returns: errno
215 */
216
217static int gfs2_readpage(struct file *file, struct page *page)
218{
219 struct gfs2_inode *ip = page->mapping->host->u.generic_ip;
220 struct gfs2_sbd *sdp = ip->i_sbd;
221 struct gfs2_holder gh;
222 int error;
223
224 if (likely(file != &gfs2_internal_file_sentinal)) {
225 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|GL_AOP, &gh);
226 error = gfs2_glock_nq_m_atime(1, &gh);
227 if (unlikely(error))
228 goto out_unlock;
229 }
230
231 if (gfs2_is_stuffed(ip)) {
232 error = stuffed_readpage(ip, page);
233 unlock_page(page);
234 } else
235 error = mpage_readpage(page, gfs2_get_block);
236
237 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
238 error = -EIO;
239
240 if (file != &gfs2_internal_file_sentinal) {
241 gfs2_glock_dq_m(1, &gh);
242 gfs2_holder_uninit(&gh);
243 }
244out:
245 return error;
246out_unlock:
247 unlock_page(page);
248 if (file != &gfs2_internal_file_sentinal)
249 gfs2_holder_uninit(&gh);
250 goto out;
251}
252
253#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
254
255/**
256 * gfs2_readpages - Read a bunch of pages at once
257 *
258 * Some notes:
259 * 1. This is only for readahead, so we can simply ignore any things
260 * which are slightly inconvenient (such as locking conflicts between
261 * the page lock and the glock) and return having done no I/O. Its
262 * obviously not something we'd want to do on too regular a basis.
263 * Any I/O we ignore at this time will be done via readpage later.
264 * 2. We have to handle stuffed files here too.
265 * 3. mpage_readpages() does most of the heavy lifting in the common case.
266 * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
267 * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
268 * well as read-ahead.
269 */
270static int gfs2_readpages(struct file *file, struct address_space *mapping,
271 struct list_head *pages, unsigned nr_pages)
272{
273 struct inode *inode = mapping->host;
274 struct gfs2_inode *ip = inode->u.generic_ip;
275 struct gfs2_sbd *sdp = ip->i_sbd;
276 struct gfs2_holder gh;
277 unsigned page_idx;
278 int ret;
279
280 if (likely(file != &gfs2_internal_file_sentinal)) {
281 gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
282 LM_FLAG_TRY_1CB|GL_ATIME|GL_AOP, &gh);
283 ret = gfs2_glock_nq_m_atime(1, &gh);
284 if (ret == GLR_TRYFAILED)
285 goto out_noerror;
286 if (unlikely(ret))
287 goto out_unlock;
288 }
289
290 if (gfs2_is_stuffed(ip)) {
291 struct pagevec lru_pvec;
292 pagevec_init(&lru_pvec, 0);
293 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
294 struct page *page = list_to_page(pages);
295 list_del(&page->lru);
296 if (!add_to_page_cache(page, mapping,
297 page->index, GFP_KERNEL)) {
298 ret = stuffed_readpage(ip, page);
299 unlock_page(page);
300 if (!pagevec_add(&lru_pvec, page))
301 __pagevec_lru_add(&lru_pvec);
302 }
303 page_cache_release(page);
304 }
305 pagevec_lru_add(&lru_pvec);
306 ret = 0;
307 } else {
308 /* What we really want to do .... */
309 ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
310 }
311
312 if (likely(file != &gfs2_internal_file_sentinal)) {
313 gfs2_glock_dq_m(1, &gh);
314 gfs2_holder_uninit(&gh);
315 }
316out:
317 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
318 ret = -EIO;
319 return ret;
320out_noerror:
321 ret = 0;
322out_unlock:
323 /* unlock all pages, we can't do any I/O right now */
324 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
325 struct page *page = list_to_page(pages);
326 list_del(&page->lru);
327 unlock_page(page);
328 page_cache_release(page);
329 }
330 if (likely(file != &gfs2_internal_file_sentinal))
331 gfs2_holder_uninit(&gh);
332 goto out;
333}
334
335/**
336 * gfs2_prepare_write - Prepare to write a page to a file
337 * @file: The file to write to
338 * @page: The page which is to be prepared for writing
339 * @from: From (byte range within page)
340 * @to: To (byte range within page)
341 *
342 * Returns: errno
343 */
344
345static int gfs2_prepare_write(struct file *file, struct page *page,
346 unsigned from, unsigned to)
347{
348 struct gfs2_inode *ip = page->mapping->host->u.generic_ip;
349 struct gfs2_sbd *sdp = ip->i_sbd;
350 unsigned int data_blocks, ind_blocks, rblocks;
351 int alloc_required;
352 int error = 0;
353 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
354 loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
355 struct gfs2_alloc *al;
356
357 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|GL_AOP, &ip->i_gh);
358 error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
359 if (error)
360 goto out_uninit;
361
362 gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks);
363
364 error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required);
365 if (error)
366 goto out_unlock;
367
368
369 if (alloc_required) {
370 al = gfs2_alloc_get(ip);
371
372 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
373 if (error)
374 goto out_alloc_put;
375
376 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
377 if (error)
378 goto out_qunlock;
379
380 al->al_requested = data_blocks + ind_blocks;
381 error = gfs2_inplace_reserve(ip);
382 if (error)
383 goto out_qunlock;
384 }
385
386 rblocks = RES_DINODE + ind_blocks;
387 if (gfs2_is_jdata(ip))
388 rblocks += data_blocks ? data_blocks : 1;
389 if (ind_blocks || data_blocks)
390 rblocks += RES_STATFS + RES_QUOTA;
391
392 error = gfs2_trans_begin(sdp, rblocks, 0);
393 if (error)
394 goto out;
395
396 if (gfs2_is_stuffed(ip)) {
397 if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
398 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
399 page);
400 if (error == 0)
401 goto prepare_write;
402 } else if (!PageUptodate(page))
403 error = stuffed_readpage(ip, page);
404 goto out;
405 }
406
407prepare_write:
408 error = block_prepare_write(page, from, to, gfs2_get_block);
409
410out:
411 if (error) {
412 gfs2_trans_end(sdp);
413 if (alloc_required) {
414 gfs2_inplace_release(ip);
415out_qunlock:
416 gfs2_quota_unlock(ip);
417out_alloc_put:
418 gfs2_alloc_put(ip);
419 }
420out_unlock:
421 gfs2_glock_dq_m(1, &ip->i_gh);
422out_uninit:
423 gfs2_holder_uninit(&ip->i_gh);
424 }
425
426 return error;
427}
428
429/**
430 * gfs2_commit_write - Commit write to a file
431 * @file: The file to write to
432 * @page: The page containing the data
433 * @from: From (byte range within page)
434 * @to: To (byte range within page)
435 *
436 * Returns: errno
437 */
438
439static int gfs2_commit_write(struct file *file, struct page *page,
440 unsigned from, unsigned to)
441{
442 struct inode *inode = page->mapping->host;
443 struct gfs2_inode *ip = inode->u.generic_ip;
444 struct gfs2_sbd *sdp = ip->i_sbd;
445 int error = -EOPNOTSUPP;
446 struct buffer_head *dibh;
447 struct gfs2_alloc *al = &ip->i_alloc;;
448
449 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
450 goto fail_nounlock;
451
452 error = gfs2_meta_inode_buffer(ip, &dibh);
453 if (error)
454 goto fail_endtrans;
455
456 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
457
458 if (gfs2_is_stuffed(ip)) {
459 uint64_t file_size;
460 void *kaddr;
461
462 file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
463
464 kaddr = kmap_atomic(page, KM_USER0);
465 memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
466 (char *)kaddr + from, to - from);
467 kunmap_atomic(page, KM_USER0);
468
469 SetPageUptodate(page);
470
471 if (inode->i_size < file_size)
472 i_size_write(inode, file_size);
473 } else {
474 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
475 gfs2_is_jdata(ip))
476 gfs2_page_add_databufs(ip, page, from, to);
477 error = generic_commit_write(file, page, from, to);
478 if (error)
479 goto fail;
480 }
481
482 if (ip->i_di.di_size < inode->i_size)
483 ip->i_di.di_size = inode->i_size;
484
485 gfs2_dinode_out(&ip->i_di, dibh->b_data);
486 brelse(dibh);
487 gfs2_trans_end(sdp);
488 if (al->al_requested) {
489 gfs2_inplace_release(ip);
490 gfs2_quota_unlock(ip);
491 gfs2_alloc_put(ip);
492 }
493 gfs2_glock_dq_m(1, &ip->i_gh);
494 gfs2_holder_uninit(&ip->i_gh);
495 return 0;
496
497fail:
498 brelse(dibh);
499fail_endtrans:
500 gfs2_trans_end(sdp);
501 if (al->al_requested) {
502 gfs2_inplace_release(ip);
503 gfs2_quota_unlock(ip);
504 gfs2_alloc_put(ip);
505 }
506 gfs2_glock_dq_m(1, &ip->i_gh);
507 gfs2_holder_uninit(&ip->i_gh);
508fail_nounlock:
509 ClearPageUptodate(page);
510 return error;
511}
512
513/**
514 * gfs2_bmap - Block map function
515 * @mapping: Address space info
516 * @lblock: The block to map
517 *
518 * Returns: The disk address for the block or 0 on hole or error
519 */
520
521static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
522{
523 struct gfs2_inode *ip = mapping->host->u.generic_ip;
524 struct gfs2_holder i_gh;
525 sector_t dblock = 0;
526 int error;
527
528 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
529 if (error)
530 return 0;
531
532 if (!gfs2_is_stuffed(ip))
533 dblock = generic_block_bmap(mapping, lblock, gfs2_get_block);
534
535 gfs2_glock_dq_uninit(&i_gh);
536
537 return dblock;
538}
539
540static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
541{
542 struct gfs2_bufdata *bd;
543
544 gfs2_log_lock(sdp);
545 bd = bh->b_private;
546 if (bd) {
547 bd->bd_bh = NULL;
548 bh->b_private = NULL;
549 gfs2_log_unlock(sdp);
550 brelse(bh);
551 } else
552 gfs2_log_unlock(sdp);
553
554 lock_buffer(bh);
555 clear_buffer_dirty(bh);
556 bh->b_bdev = NULL;
557 clear_buffer_mapped(bh);
558 clear_buffer_req(bh);
559 clear_buffer_new(bh);
560 clear_buffer_delay(bh);
561 unlock_buffer(bh);
562}
563
564static void gfs2_invalidatepage(struct page *page, unsigned long offset)
565{
566 struct gfs2_sbd *sdp = page->mapping->host->i_sb->s_fs_info;
567 struct buffer_head *head, *bh, *next;
568 unsigned int curr_off = 0;
569
570 BUG_ON(!PageLocked(page));
571 if (!page_has_buffers(page))
572 return;
573
574 bh = head = page_buffers(page);
575 do {
576 unsigned int next_off = curr_off + bh->b_size;
577 next = bh->b_this_page;
578
579 if (offset <= curr_off)
580 discard_buffer(sdp, bh);
581
582 curr_off = next_off;
583 bh = next;
584 } while (bh != head);
585
586 if (!offset)
587 try_to_release_page(page, 0);
588
589 return;
590}
591
592static ssize_t gfs2_direct_IO_write(struct kiocb *iocb, const struct iovec *iov,
593 loff_t offset, unsigned long nr_segs)
594{
595 struct file *file = iocb->ki_filp;
596 struct inode *inode = file->f_mapping->host;
597 struct gfs2_inode *ip = inode->u.generic_ip;
598 struct gfs2_holder gh;
599 int rv;
600
601 /*
602 * Shared lock, even though its write, since we do no allocation
603 * on this path. All we need change is atime.
604 */
605 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
606 rv = gfs2_glock_nq_m_atime(1, &gh);
607 if (rv)
608 goto out;
609
610 /*
611 * Should we return an error here? I can't see that O_DIRECT for
612 * a journaled file makes any sense. For now we'll silently fall
613 * back to buffered I/O, likewise we do the same for stuffed
614 * files since they are (a) small and (b) unaligned.
615 */
616 if (gfs2_is_jdata(ip))
617 goto out;
618
619 if (gfs2_is_stuffed(ip))
620 goto out;
621
622 rv = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
623 iov, offset, nr_segs, gfs2_get_block,
624 NULL, DIO_OWN_LOCKING);
625out:
626 gfs2_glock_dq_m(1, &gh);
627 gfs2_holder_uninit(&gh);
628
629 return rv;
630}
631
632/**
633 * gfs2_direct_IO
634 *
635 * This is called with a shared lock already held for the read path.
636 * Currently, no locks are held when the write path is called.
637 */
638static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
639 const struct iovec *iov, loff_t offset,
640 unsigned long nr_segs)
641{
642 struct file *file = iocb->ki_filp;
643 struct inode *inode = file->f_mapping->host;
644 struct gfs2_inode *ip = inode->u.generic_ip;
645 struct gfs2_sbd *sdp = ip->i_sbd;
646
647 if (rw == WRITE)
648 return gfs2_direct_IO_write(iocb, iov, offset, nr_segs);
649
650 if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
651 gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
652 return -EINVAL;
653
654 return __blockdev_direct_IO(READ, iocb, inode, inode->i_sb->s_bdev, iov,
655 offset, nr_segs, gfs2_get_block, NULL,
656 DIO_OWN_LOCKING);
657}
658
659struct address_space_operations gfs2_file_aops = {
660 .writepage = gfs2_writepage,
661 .readpage = gfs2_readpage,
662 .readpages = gfs2_readpages,
663 .sync_page = block_sync_page,
664 .prepare_write = gfs2_prepare_write,
665 .commit_write = gfs2_commit_write,
666 .bmap = gfs2_bmap,
667 .invalidatepage = gfs2_invalidatepage,
668 .direct_IO = gfs2_direct_IO,
669};
670
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
new file mode 100644
index 000000000000..b88adddaffb2
--- /dev/null
+++ b/fs/gfs2/ops_address.h
@@ -0,0 +1,17 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_ADDRESS_DOT_H__
11#define __OPS_ADDRESS_DOT_H__
12
13extern struct address_space_operations gfs2_file_aops;
14extern int gfs2_get_block(struct inode *inode, sector_t lblock,
15 struct buffer_head *bh_result, int create);
16
17#endif /* __OPS_ADDRESS_DOT_H__ */
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
new file mode 100644
index 000000000000..fef415e2068e
--- /dev/null
+++ b/fs/gfs2/ops_dentry.c
@@ -0,0 +1,123 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/smp_lock.h>
16#include <linux/gfs2_ondisk.h>
17#include <linux/crc32.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "dir.h"
23#include "glock.h"
24#include "ops_dentry.h"
25#include "util.h"
26
27/**
28 * gfs2_drevalidate - Check directory lookup consistency
29 * @dentry: the mapping to check
30 * @nd:
31 *
32 * Check to make sure the lookup necessary to arrive at this inode from its
33 * parent is still good.
34 *
35 * Returns: 1 if the dentry is ok, 0 if it isn't
36 */
37
38static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
39{
40 struct dentry *parent = dget_parent(dentry);
41 struct gfs2_sbd *sdp = parent->d_inode->i_sb->s_fs_info;
42 struct gfs2_inode *dip = parent->d_inode->u.generic_ip;
43 struct inode *inode = dentry->d_inode;
44 struct gfs2_holder d_gh;
45 struct gfs2_inode *ip;
46 struct gfs2_inum inum;
47 unsigned int type;
48 int error;
49
50 if (inode && is_bad_inode(inode))
51 goto invalid;
52
53 if (sdp->sd_args.ar_localcaching)
54 goto valid;
55
56 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
57 if (error)
58 goto fail;
59
60 error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
61 switch (error) {
62 case 0:
63 if (!inode)
64 goto invalid_gunlock;
65 break;
66 case -ENOENT:
67 if (!inode)
68 goto valid_gunlock;
69 goto invalid_gunlock;
70 default:
71 goto fail_gunlock;
72 }
73
74 ip = inode->u.generic_ip;
75
76 if (!gfs2_inum_equal(&ip->i_num, &inum))
77 goto invalid_gunlock;
78
79 if (IF2DT(ip->i_di.di_mode) != type) {
80 gfs2_consist_inode(dip);
81 goto fail_gunlock;
82 }
83
84 valid_gunlock:
85 gfs2_glock_dq_uninit(&d_gh);
86
87 valid:
88 dput(parent);
89 return 1;
90
91 invalid_gunlock:
92 gfs2_glock_dq_uninit(&d_gh);
93
94 invalid:
95 if (inode && S_ISDIR(inode->i_mode)) {
96 if (have_submounts(dentry))
97 goto valid;
98 shrink_dcache_parent(dentry);
99 }
100 d_drop(dentry);
101
102 dput(parent);
103 return 0;
104
105 fail_gunlock:
106 gfs2_glock_dq_uninit(&d_gh);
107
108 fail:
109 dput(parent);
110 return 0;
111}
112
113static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
114{
115 str->hash = gfs2_disk_hash(str->name, str->len);
116 return 0;
117}
118
119struct dentry_operations gfs2_dops = {
120 .d_revalidate = gfs2_drevalidate,
121 .d_hash = gfs2_dhash,
122};
123
diff --git a/fs/gfs2/ops_dentry.h b/fs/gfs2/ops_dentry.h
new file mode 100644
index 000000000000..1b6e75c0a4a7
--- /dev/null
+++ b/fs/gfs2/ops_dentry.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_DENTRY_DOT_H__
11#define __OPS_DENTRY_DOT_H__
12
13extern struct dentry_operations gfs2_dops;
14
15#endif /* __OPS_DENTRY_DOT_H__ */
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
new file mode 100644
index 000000000000..a376ead7d0cd
--- /dev/null
+++ b/fs/gfs2/ops_export.c
@@ -0,0 +1,297 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17
18#include "gfs2.h"
19#include "lm_interface.h"
20#include "incore.h"
21#include "dir.h"
22#include "glock.h"
23#include "glops.h"
24#include "inode.h"
25#include "ops_export.h"
26#include "rgrp.h"
27#include "util.h"
28
29static struct dentry *gfs2_decode_fh(struct super_block *sb,
30 __u32 *fh,
31 int fh_len,
32 int fh_type,
33 int (*acceptable)(void *context,
34 struct dentry *dentry),
35 void *context)
36{
37 struct gfs2_inum this, parent;
38
39 if (fh_type != fh_len)
40 return NULL;
41
42 memset(&parent, 0, sizeof(struct gfs2_inum));
43
44 switch (fh_type) {
45 case 8:
46 parent.no_formal_ino = ((uint64_t)be32_to_cpu(fh[4])) << 32;
47 parent.no_formal_ino |= be32_to_cpu(fh[5]);
48 parent.no_addr = ((uint64_t)be32_to_cpu(fh[6])) << 32;
49 parent.no_addr |= be32_to_cpu(fh[7]);
50 case 4:
51 this.no_formal_ino = ((uint64_t)be32_to_cpu(fh[0])) << 32;
52 this.no_formal_ino |= be32_to_cpu(fh[1]);
53 this.no_addr = ((uint64_t)be32_to_cpu(fh[2])) << 32;
54 this.no_addr |= be32_to_cpu(fh[3]);
55 break;
56 default:
57 return NULL;
58 }
59
60 return gfs2_export_ops.find_exported_dentry(sb, &this, &parent,
61 acceptable, context);
62}
63
64static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
65 int connectable)
66{
67 struct inode *inode = dentry->d_inode;
68 struct super_block *sb = inode->i_sb;
69 struct gfs2_inode *ip = inode->u.generic_ip;
70
71 if (*len < 4 || (connectable && *len < 8))
72 return 255;
73
74 fh[0] = ip->i_num.no_formal_ino >> 32;
75 fh[0] = cpu_to_be32(fh[0]);
76 fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
77 fh[1] = cpu_to_be32(fh[1]);
78 fh[2] = ip->i_num.no_addr >> 32;
79 fh[2] = cpu_to_be32(fh[2]);
80 fh[3] = ip->i_num.no_addr & 0xFFFFFFFF;
81 fh[3] = cpu_to_be32(fh[3]);
82 *len = 4;
83
84 if (!connectable || inode == sb->s_root->d_inode)
85 return *len;
86
87 spin_lock(&dentry->d_lock);
88 inode = dentry->d_parent->d_inode;
89 ip = inode->u.generic_ip;
90 gfs2_inode_hold(ip);
91 spin_unlock(&dentry->d_lock);
92
93 fh[4] = ip->i_num.no_formal_ino >> 32;
94 fh[4] = cpu_to_be32(fh[4]);
95 fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF;
96 fh[5] = cpu_to_be32(fh[5]);
97 fh[6] = ip->i_num.no_addr >> 32;
98 fh[6] = cpu_to_be32(fh[6]);
99 fh[7] = ip->i_num.no_addr & 0xFFFFFFFF;
100 fh[7] = cpu_to_be32(fh[7]);
101 *len = 8;
102
103 gfs2_inode_put(ip);
104
105 return *len;
106}
107
108struct get_name_filldir {
109 struct gfs2_inum inum;
110 char *name;
111};
112
113static int get_name_filldir(void *opaque, const char *name, unsigned int length,
114 uint64_t offset, struct gfs2_inum *inum,
115 unsigned int type)
116{
117 struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
118
119 if (!gfs2_inum_equal(inum, &gnfd->inum))
120 return 0;
121
122 memcpy(gnfd->name, name, length);
123 gnfd->name[length] = 0;
124
125 return 1;
126}
127
128static int gfs2_get_name(struct dentry *parent, char *name,
129 struct dentry *child)
130{
131 struct inode *dir = parent->d_inode;
132 struct inode *inode = child->d_inode;
133 struct gfs2_inode *dip, *ip;
134 struct get_name_filldir gnfd;
135 struct gfs2_holder gh;
136 uint64_t offset = 0;
137 int error;
138
139 if (!dir)
140 return -EINVAL;
141
142 if (!S_ISDIR(dir->i_mode) || !inode)
143 return -EINVAL;
144
145 dip = dir->u.generic_ip;
146 ip = inode->u.generic_ip;
147
148 *name = 0;
149 gnfd.inum = ip->i_num;
150 gnfd.name = name;
151
152 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
153 if (error)
154 return error;
155
156 error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir);
157
158 gfs2_glock_dq_uninit(&gh);
159
160 if (!error && !*name)
161 error = -ENOENT;
162
163 return error;
164}
165
166static struct dentry *gfs2_get_parent(struct dentry *child)
167{
168 struct qstr dotdot;
169 struct inode *inode;
170 struct dentry *dentry;
171
172 gfs2_str2qstr(&dotdot, "..");
173 inode = gfs2_lookupi(child->d_inode, &dotdot, 1, NULL);
174
175 if (!inode)
176 return ERR_PTR(-ENOENT);
177 if (IS_ERR(inode))
178 return ERR_PTR(PTR_ERR(inode));
179
180 dentry = d_alloc_anon(inode);
181 if (!dentry) {
182 iput(inode);
183 return ERR_PTR(-ENOMEM);
184 }
185
186 return dentry;
187}
188
189static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_p)
190{
191 struct gfs2_sbd *sdp = sb->s_fs_info;
192 struct gfs2_inum *inum = (struct gfs2_inum *)inum_p;
193 struct gfs2_holder i_gh, ri_gh, rgd_gh;
194 struct gfs2_rgrpd *rgd;
195 struct gfs2_inode *ip;
196 struct inode *inode;
197 struct dentry *dentry;
198 int error;
199
200 /* System files? */
201
202 inode = gfs2_iget(sb, inum);
203 if (inode) {
204 ip = inode->u.generic_ip;
205 if (ip->i_num.no_formal_ino != inum->no_formal_ino) {
206 iput(inode);
207 return ERR_PTR(-ESTALE);
208 }
209 goto out_inode;
210 }
211
212 error = gfs2_glock_nq_num(sdp,
213 inum->no_addr, &gfs2_inode_glops,
214 LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL,
215 &i_gh);
216 if (error)
217 return ERR_PTR(error);
218
219 error = gfs2_inode_get(i_gh.gh_gl, inum, NO_CREATE, &ip);
220 if (error)
221 goto fail;
222 if (ip)
223 goto out_ip;
224
225 error = gfs2_rindex_hold(sdp, &ri_gh);
226 if (error)
227 goto fail;
228
229 error = -EINVAL;
230 rgd = gfs2_blk2rgrpd(sdp, inum->no_addr);
231 if (!rgd)
232 goto fail_rindex;
233
234 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
235 if (error)
236 goto fail_rindex;
237
238 error = -ESTALE;
239 if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE)
240 goto fail_rgd;
241
242 gfs2_glock_dq_uninit(&rgd_gh);
243 gfs2_glock_dq_uninit(&ri_gh);
244
245 error = gfs2_inode_get(i_gh.gh_gl, inum, CREATE, &ip);
246 if (error)
247 goto fail;
248
249 error = gfs2_inode_refresh(ip);
250 if (error) {
251 gfs2_inode_put(ip);
252 goto fail;
253 }
254
255 out_ip:
256 error = -EIO;
257 if (ip->i_di.di_flags & GFS2_DIF_SYSTEM) {
258 gfs2_inode_put(ip);
259 goto fail;
260 }
261
262 gfs2_glock_dq_uninit(&i_gh);
263
264 inode = gfs2_ip2v(ip);
265 gfs2_inode_put(ip);
266
267 if (!inode)
268 return ERR_PTR(-ENOMEM);
269
270 out_inode:
271 dentry = d_alloc_anon(inode);
272 if (!dentry) {
273 iput(inode);
274 return ERR_PTR(-ENOMEM);
275 }
276
277 return dentry;
278
279 fail_rgd:
280 gfs2_glock_dq_uninit(&rgd_gh);
281
282 fail_rindex:
283 gfs2_glock_dq_uninit(&ri_gh);
284
285 fail:
286 gfs2_glock_dq_uninit(&i_gh);
287 return ERR_PTR(error);
288}
289
290struct export_operations gfs2_export_ops = {
291 .decode_fh = gfs2_decode_fh,
292 .encode_fh = gfs2_encode_fh,
293 .get_name = gfs2_get_name,
294 .get_parent = gfs2_get_parent,
295 .get_dentry = gfs2_get_dentry,
296};
297
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
new file mode 100644
index 000000000000..88d58e57f518
--- /dev/null
+++ b/fs/gfs2/ops_export.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_EXPORT_DOT_H__
11#define __OPS_EXPORT_DOT_H__
12
13extern struct export_operations gfs2_export_ops;
14
15#endif /* __OPS_EXPORT_DOT_H__ */
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
new file mode 100644
index 000000000000..1e8f602c1e50
--- /dev/null
+++ b/fs/gfs2/ops_file.c
@@ -0,0 +1,1000 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/uio.h>
17#include <linux/blkdev.h>
18#include <linux/mm.h>
19#include <linux/smp_lock.h>
20#include <linux/fs.h>
21#include <linux/gfs2_ondisk.h>
22#include <linux/ext2_fs.h>
23#include <linux/crc32.h>
24#include <linux/iflags.h>
25#include <asm/uaccess.h>
26
27#include "gfs2.h"
28#include "lm_interface.h"
29#include "incore.h"
30#include "bmap.h"
31#include "dir.h"
32#include "glock.h"
33#include "glops.h"
34#include "inode.h"
35#include "lm.h"
36#include "log.h"
37#include "meta_io.h"
38#include "ops_file.h"
39#include "ops_vm.h"
40#include "quota.h"
41#include "rgrp.h"
42#include "trans.h"
43#include "util.h"
44#include "eaops.h"
45
46/* "bad" is for NFS support */
47struct filldir_bad_entry {
48 char *fbe_name;
49 unsigned int fbe_length;
50 uint64_t fbe_offset;
51 struct gfs2_inum fbe_inum;
52 unsigned int fbe_type;
53};
54
55struct filldir_bad {
56 struct gfs2_sbd *fdb_sbd;
57
58 struct filldir_bad_entry *fdb_entry;
59 unsigned int fdb_entry_num;
60 unsigned int fdb_entry_off;
61
62 char *fdb_name;
63 unsigned int fdb_name_size;
64 unsigned int fdb_name_off;
65};
66
67/* For regular, non-NFS */
68struct filldir_reg {
69 struct gfs2_sbd *fdr_sbd;
70 int fdr_prefetch;
71
72 filldir_t fdr_filldir;
73 void *fdr_opaque;
74};
75
76/*
77 * Most fields left uninitialised to catch anybody who tries to
78 * use them. f_flags set to prevent file_accessed() from touching
79 * any other part of this. Its use is purely as a flag so that we
80 * know (in readpage()) whether or not do to locking.
81 */
82struct file gfs2_internal_file_sentinal = {
83 .f_flags = O_NOATIME|O_RDONLY,
84};
85
86static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
87 unsigned long offset, unsigned long size)
88{
89 char *kaddr;
90 unsigned long count = desc->count;
91
92 if (size > count)
93 size = count;
94
95 kaddr = kmap(page);
96 memcpy(desc->arg.buf, kaddr + offset, size);
97 kunmap(page);
98
99 desc->count = count - size;
100 desc->written += size;
101 desc->arg.buf += size;
102 return size;
103}
104
105int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
106 char *buf, loff_t *pos, unsigned size)
107{
108 struct inode *inode = ip->i_vnode;
109 read_descriptor_t desc;
110 desc.written = 0;
111 desc.arg.buf = buf;
112 desc.count = size;
113 desc.error = 0;
114 do_generic_mapping_read(inode->i_mapping, ra_state,
115 &gfs2_internal_file_sentinal, pos, &desc,
116 gfs2_read_actor);
117 return desc.written ? desc.written : desc.error;
118}
119
120/**
121 * gfs2_llseek - seek to a location in a file
122 * @file: the file
123 * @offset: the offset
124 * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
125 *
126 * SEEK_END requires the glock for the file because it references the
127 * file's size.
128 *
129 * Returns: The new offset, or errno
130 */
131
132static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
133{
134 struct gfs2_inode *ip = file->f_mapping->host->u.generic_ip;
135 struct gfs2_holder i_gh;
136 loff_t error;
137
138 if (origin == 2) {
139 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
140 &i_gh);
141 if (!error) {
142 error = remote_llseek(file, offset, origin);
143 gfs2_glock_dq_uninit(&i_gh);
144 }
145 } else
146 error = remote_llseek(file, offset, origin);
147
148 return error;
149}
150
151
152static ssize_t gfs2_direct_IO_read(struct kiocb *iocb, const struct iovec *iov,
153 loff_t offset, unsigned long nr_segs)
154{
155 struct file *file = iocb->ki_filp;
156 struct address_space *mapping = file->f_mapping;
157 ssize_t retval;
158
159 retval = filemap_write_and_wait(mapping);
160 if (retval == 0) {
161 retval = mapping->a_ops->direct_IO(READ, iocb, iov, offset,
162 nr_segs);
163 }
164 return retval;
165}
166
167/**
168 * __gfs2_file_aio_read - The main GFS2 read function
169 *
170 * N.B. This is almost, but not quite the same as __generic_file_aio_read()
171 * the important subtle different being that inode->i_size isn't valid
172 * unless we are holding a lock, and we do this _only_ on the O_DIRECT
173 * path since otherwise locking is done entirely at the page cache
174 * layer.
175 */
176static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
177 const struct iovec *iov,
178 unsigned long nr_segs, loff_t *ppos)
179{
180 struct file *filp = iocb->ki_filp;
181 struct gfs2_inode *ip = filp->f_mapping->host->u.generic_ip;
182 struct gfs2_holder gh;
183 ssize_t retval;
184 unsigned long seg;
185 size_t count;
186
187 count = 0;
188 for (seg = 0; seg < nr_segs; seg++) {
189 const struct iovec *iv = &iov[seg];
190
191 /*
192 * If any segment has a negative length, or the cumulative
193 * length ever wraps negative then return -EINVAL.
194 */
195 count += iv->iov_len;
196 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
197 return -EINVAL;
198 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
199 continue;
200 if (seg == 0)
201 return -EFAULT;
202 nr_segs = seg;
203 count -= iv->iov_len; /* This segment is no good */
204 break;
205 }
206
207 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
208 if (filp->f_flags & O_DIRECT) {
209 loff_t pos = *ppos, size;
210 struct address_space *mapping;
211 struct inode *inode;
212
213 mapping = filp->f_mapping;
214 inode = mapping->host;
215 retval = 0;
216 if (!count)
217 goto out; /* skip atime */
218
219 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
220 retval = gfs2_glock_nq_m_atime(1, &gh);
221 if (retval)
222 goto out;
223 if (gfs2_is_stuffed(ip)) {
224 gfs2_glock_dq_m(1, &gh);
225 gfs2_holder_uninit(&gh);
226 goto fallback_to_normal;
227 }
228 size = i_size_read(inode);
229 if (pos < size) {
230 retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
231 if (retval > 0 && !is_sync_kiocb(iocb))
232 retval = -EIOCBQUEUED;
233 if (retval > 0)
234 *ppos = pos + retval;
235 }
236 file_accessed(filp);
237 gfs2_glock_dq_m(1, &gh);
238 gfs2_holder_uninit(&gh);
239 goto out;
240 }
241
242fallback_to_normal:
243 retval = 0;
244 if (count) {
245 for (seg = 0; seg < nr_segs; seg++) {
246 read_descriptor_t desc;
247
248 desc.written = 0;
249 desc.arg.buf = iov[seg].iov_base;
250 desc.count = iov[seg].iov_len;
251 if (desc.count == 0)
252 continue;
253 desc.error = 0;
254 do_generic_file_read(filp,ppos,&desc,file_read_actor);
255 retval += desc.written;
256 if (desc.error) {
257 retval = retval ?: desc.error;
258 break;
259 }
260 }
261 }
262out:
263 return retval;
264}
265
266/**
267 * gfs2_read - Read bytes from a file
268 * @file: The file to read from
269 * @buf: The buffer to copy into
270 * @size: The amount of data requested
271 * @offset: The current file offset
272 *
273 * Outputs: Offset - updated according to number of bytes read
274 *
275 * Returns: The number of bytes read, errno on failure
276 */
277
278static ssize_t gfs2_read(struct file *filp, char __user *buf, size_t size,
279 loff_t *offset)
280{
281 struct iovec local_iov = { .iov_base = buf, .iov_len = size };
282 struct kiocb kiocb;
283 ssize_t ret;
284
285 init_sync_kiocb(&kiocb, filp);
286 ret = __gfs2_file_aio_read(&kiocb, &local_iov, 1, offset);
287 if (-EIOCBQUEUED == ret)
288 ret = wait_on_sync_kiocb(&kiocb);
289 return ret;
290}
291
292static ssize_t gfs2_file_readv(struct file *filp, const struct iovec *iov,
293 unsigned long nr_segs, loff_t *ppos)
294{
295 struct kiocb kiocb;
296 ssize_t ret;
297
298 init_sync_kiocb(&kiocb, filp);
299 ret = __gfs2_file_aio_read(&kiocb, iov, nr_segs, ppos);
300 if (-EIOCBQUEUED == ret)
301 ret = wait_on_sync_kiocb(&kiocb);
302 return ret;
303}
304
305static ssize_t gfs2_file_aio_read(struct kiocb *iocb, char __user *buf,
306 size_t count, loff_t pos)
307{
308 struct iovec local_iov = { .iov_base = buf, .iov_len = count };
309
310 BUG_ON(iocb->ki_pos != pos);
311 return __gfs2_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
312}
313
314
315/**
316 * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read()
317 * @opaque: opaque data used by the function
318 * @name: the name of the directory entry
319 * @length: the length of the name
320 * @offset: the entry's offset in the directory
321 * @inum: the inode number the entry points to
322 * @type: the type of inode the entry points to
323 *
324 * Returns: 0 on success, 1 if buffer full
325 */
326
327static int filldir_reg_func(void *opaque, const char *name, unsigned int length,
328 uint64_t offset, struct gfs2_inum *inum,
329 unsigned int type)
330{
331 struct filldir_reg *fdr = (struct filldir_reg *)opaque;
332 struct gfs2_sbd *sdp = fdr->fdr_sbd;
333 int error;
334
335 error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
336 inum->no_formal_ino, type);
337 if (error)
338 return 1;
339
340 if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
341 gfs2_glock_prefetch_num(sdp,
342 inum->no_addr, &gfs2_inode_glops,
343 LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
344 gfs2_glock_prefetch_num(sdp,
345 inum->no_addr, &gfs2_iopen_glops,
346 LM_ST_SHARED, LM_FLAG_TRY);
347 }
348
349 return 0;
350}
351
352/**
353 * readdir_reg - Read directory entries from a directory
354 * @file: The directory to read from
355 * @dirent: Buffer for dirents
356 * @filldir: Function used to do the copying
357 *
358 * Returns: errno
359 */
360
361static int readdir_reg(struct file *file, void *dirent, filldir_t filldir)
362{
363 struct inode *dir = file->f_mapping->host;
364 struct gfs2_inode *dip = dir->u.generic_ip;
365 struct filldir_reg fdr;
366 struct gfs2_holder d_gh;
367 uint64_t offset = file->f_pos;
368 int error;
369
370 fdr.fdr_sbd = dip->i_sbd;
371 fdr.fdr_prefetch = 1;
372 fdr.fdr_filldir = filldir;
373 fdr.fdr_opaque = dirent;
374
375 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
376 error = gfs2_glock_nq_atime(&d_gh);
377 if (error) {
378 gfs2_holder_uninit(&d_gh);
379 return error;
380 }
381
382 error = gfs2_dir_read(dir, &offset, &fdr, filldir_reg_func);
383
384 gfs2_glock_dq_uninit(&d_gh);
385
386 file->f_pos = offset;
387
388 return error;
389}
390
391/**
392 * filldir_bad_func - Report a directory entry to the caller of gfs2_dir_read()
393 * @opaque: opaque data used by the function
394 * @name: the name of the directory entry
395 * @length: the length of the name
396 * @offset: the entry's offset in the directory
397 * @inum: the inode number the entry points to
398 * @type: the type of inode the entry points to
399 *
400 * For supporting NFS.
401 *
402 * Returns: 0 on success, 1 if buffer full
403 */
404
405static int filldir_bad_func(void *opaque, const char *name, unsigned int length,
406 uint64_t offset, struct gfs2_inum *inum,
407 unsigned int type)
408{
409 struct filldir_bad *fdb = (struct filldir_bad *)opaque;
410 struct gfs2_sbd *sdp = fdb->fdb_sbd;
411 struct filldir_bad_entry *fbe;
412
413 if (fdb->fdb_entry_off == fdb->fdb_entry_num ||
414 fdb->fdb_name_off + length > fdb->fdb_name_size)
415 return 1;
416
417 fbe = &fdb->fdb_entry[fdb->fdb_entry_off];
418 fbe->fbe_name = fdb->fdb_name + fdb->fdb_name_off;
419 memcpy(fbe->fbe_name, name, length);
420 fbe->fbe_length = length;
421 fbe->fbe_offset = offset;
422 fbe->fbe_inum = *inum;
423 fbe->fbe_type = type;
424
425 fdb->fdb_entry_off++;
426 fdb->fdb_name_off += length;
427
428 if (!(length == 1 && *name == '.')) {
429 gfs2_glock_prefetch_num(sdp,
430 inum->no_addr, &gfs2_inode_glops,
431 LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
432 gfs2_glock_prefetch_num(sdp,
433 inum->no_addr, &gfs2_iopen_glops,
434 LM_ST_SHARED, LM_FLAG_TRY);
435 }
436
437 return 0;
438}
439
440/**
441 * readdir_bad - Read directory entries from a directory
442 * @file: The directory to read from
443 * @dirent: Buffer for dirents
444 * @filldir: Function used to do the copying
445 *
446 * For supporting NFS.
447 *
448 * Returns: errno
449 */
450
451static int readdir_bad(struct file *file, void *dirent, filldir_t filldir)
452{
453 struct inode *dir = file->f_mapping->host;
454 struct gfs2_inode *dip = dir->u.generic_ip;
455 struct gfs2_sbd *sdp = dip->i_sbd;
456 struct filldir_reg fdr;
457 unsigned int entries, size;
458 struct filldir_bad *fdb;
459 struct gfs2_holder d_gh;
460 uint64_t offset = file->f_pos;
461 unsigned int x;
462 struct filldir_bad_entry *fbe;
463 int error;
464
465 entries = gfs2_tune_get(sdp, gt_entries_per_readdir);
466 size = sizeof(struct filldir_bad) +
467 entries * (sizeof(struct filldir_bad_entry) + GFS2_FAST_NAME_SIZE);
468
469 fdb = kzalloc(size, GFP_KERNEL);
470 if (!fdb)
471 return -ENOMEM;
472
473 fdb->fdb_sbd = sdp;
474 fdb->fdb_entry = (struct filldir_bad_entry *)(fdb + 1);
475 fdb->fdb_entry_num = entries;
476 fdb->fdb_name = ((char *)fdb) + sizeof(struct filldir_bad) +
477 entries * sizeof(struct filldir_bad_entry);
478 fdb->fdb_name_size = entries * GFS2_FAST_NAME_SIZE;
479
480 gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
481 error = gfs2_glock_nq_atime(&d_gh);
482 if (error) {
483 gfs2_holder_uninit(&d_gh);
484 goto out;
485 }
486
487 error = gfs2_dir_read(dir, &offset, fdb, filldir_bad_func);
488
489 gfs2_glock_dq_uninit(&d_gh);
490
491 fdr.fdr_sbd = sdp;
492 fdr.fdr_prefetch = 0;
493 fdr.fdr_filldir = filldir;
494 fdr.fdr_opaque = dirent;
495
496 for (x = 0; x < fdb->fdb_entry_off; x++) {
497 fbe = &fdb->fdb_entry[x];
498
499 error = filldir_reg_func(&fdr,
500 fbe->fbe_name, fbe->fbe_length,
501 fbe->fbe_offset,
502 &fbe->fbe_inum, fbe->fbe_type);
503 if (error) {
504 file->f_pos = fbe->fbe_offset;
505 error = 0;
506 goto out;
507 }
508 }
509
510 file->f_pos = offset;
511
512 out:
513 kfree(fdb);
514
515 return error;
516}
517
518/**
519 * gfs2_readdir - Read directory entries from a directory
520 * @file: The directory to read from
521 * @dirent: Buffer for dirents
522 * @filldir: Function used to do the copying
523 *
524 * Returns: errno
525 */
526
527static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
528{
529 int error;
530
531 if (strcmp(current->comm, "nfsd") != 0)
532 error = readdir_reg(file, dirent, filldir);
533 else
534 error = readdir_bad(file, dirent, filldir);
535
536 return error;
537}
538
539static const u32 iflags_to_gfs2[32] = {
540 [iflag_Sync] = GFS2_DIF_SYNC,
541 [iflag_Immutable] = GFS2_DIF_IMMUTABLE,
542 [iflag_Append] = GFS2_DIF_APPENDONLY,
543 [iflag_NoAtime] = GFS2_DIF_NOATIME,
544 [iflag_Index] = GFS2_DIF_EXHASH,
545 [iflag_JournalData] = GFS2_DIF_JDATA,
546 [iflag_DirectIO] = GFS2_DIF_DIRECTIO,
547};
548
549static const u32 gfs2_to_iflags[32] = {
550 [gfs2fl_Sync] = IFLAG_SYNC,
551 [gfs2fl_Immutable] = IFLAG_IMMUTABLE,
552 [gfs2fl_AppendOnly] = IFLAG_APPEND,
553 [gfs2fl_NoAtime] = IFLAG_NOATIME,
554 [gfs2fl_ExHash] = IFLAG_INDEX,
555 [gfs2fl_Jdata] = IFLAG_JOURNAL_DATA,
556 [gfs2fl_Directio] = IFLAG_DIRECTIO,
557 [gfs2fl_InheritDirectio] = IFLAG_DIRECTIO,
558 [gfs2fl_InheritJdata] = IFLAG_JOURNAL_DATA,
559};
560
561static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
562{
563 struct inode *inode = filp->f_dentry->d_inode;
564 struct gfs2_inode *ip = inode->u.generic_ip;
565 struct gfs2_holder gh;
566 int error;
567 u32 iflags;
568
569 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
570 error = gfs2_glock_nq_m_atime(1, &gh);
571 if (error)
572 return error;
573
574 iflags = iflags_cvt(gfs2_to_iflags, ip->i_di.di_flags);
575 if (put_user(iflags, ptr))
576 error = -EFAULT;
577
578 gfs2_glock_dq_m(1, &gh);
579 gfs2_holder_uninit(&gh);
580 return error;
581}
582
583/* Flags that can be set by user space */
584#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \
585 GFS2_DIF_DIRECTIO| \
586 GFS2_DIF_IMMUTABLE| \
587 GFS2_DIF_APPENDONLY| \
588 GFS2_DIF_NOATIME| \
589 GFS2_DIF_SYNC| \
590 GFS2_DIF_SYSTEM| \
591 GFS2_DIF_INHERIT_DIRECTIO| \
592 GFS2_DIF_INHERIT_JDATA)
593
594/**
595 * gfs2_set_flags - set flags on an inode
596 * @inode: The inode
597 * @flags: The flags to set
598 * @mask: Indicates which flags are valid
599 *
600 */
601static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
602{
603 struct inode *inode = filp->f_dentry->d_inode;
604 struct gfs2_inode *ip = inode->u.generic_ip;
605 struct gfs2_sbd *sdp = ip->i_sbd;
606 struct buffer_head *bh;
607 struct gfs2_holder gh;
608 int error;
609 u32 new_flags, flags;
610
611 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
612 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
613 if (error) {
614 gfs2_holder_uninit(&gh);
615 return error;
616 }
617
618 flags = ip->i_di.di_flags;
619 new_flags = (flags & ~mask) | (reqflags & mask);
620 if ((new_flags ^ flags) == 0)
621 goto out;
622
623 if (S_ISDIR(inode->i_mode)) {
624 if ((new_flags ^ flags) & GFS2_DIF_JDATA)
625 new_flags ^= (GFS2_DIF_JDATA|GFS2_DIF_INHERIT_JDATA);
626 if ((new_flags ^ flags) & GFS2_DIF_DIRECTIO)
627 new_flags ^= (GFS2_DIF_DIRECTIO|GFS2_DIF_INHERIT_DIRECTIO);
628 }
629
630 error = -EINVAL;
631 if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET)
632 goto out;
633
634 error = -EPERM;
635 if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE))
636 goto out;
637 if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY))
638 goto out;
639 if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) &&
640 !capable(CAP_LINUX_IMMUTABLE))
641 goto out;
642 if (!IS_IMMUTABLE(inode)) {
643 error = gfs2_repermission(inode, MAY_WRITE, NULL);
644 if (error)
645 goto out;
646 }
647
648 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
649 if (error)
650 goto out;
651 error = gfs2_meta_inode_buffer(ip, &bh);
652 if (error)
653 goto out_trans_end;
654 gfs2_trans_add_bh(ip->i_gl, bh, 1);
655 ip->i_di.di_flags = new_flags;
656 gfs2_dinode_out(&ip->i_di, bh->b_data);
657 brelse(bh);
658out_trans_end:
659 gfs2_trans_end(sdp);
660out:
661 gfs2_glock_dq_uninit(&gh);
662 return error;
663}
664
665static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
666{
667 u32 iflags, gfsflags;
668 if (get_user(iflags, ptr))
669 return -EFAULT;
670 gfsflags = iflags_cvt(iflags_to_gfs2, iflags);
671 return do_gfs2_set_flags(filp, gfsflags, ~0);
672}
673
674static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
675{
676 switch(cmd) {
677 case IFLAGS_GET_IOC:
678 return gfs2_get_flags(filp, (u32 __user *)arg);
679 case IFLAGS_SET_IOC:
680 return gfs2_set_flags(filp, (u32 __user *)arg);
681 }
682 return -ENOTTY;
683}
684
685
686/**
687 * gfs2_mmap -
688 * @file: The file to map
689 * @vma: The VMA which described the mapping
690 *
691 * Returns: 0 or error code
692 */
693
694static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
695{
696 struct gfs2_inode *ip = file->f_mapping->host->u.generic_ip;
697 struct gfs2_holder i_gh;
698 int error;
699
700 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
701 error = gfs2_glock_nq_atime(&i_gh);
702 if (error) {
703 gfs2_holder_uninit(&i_gh);
704 return error;
705 }
706
707 /* This is VM_MAYWRITE instead of VM_WRITE because a call
708 to mprotect() can turn on VM_WRITE later. */
709
710 if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
711 (VM_MAYSHARE | VM_MAYWRITE))
712 vma->vm_ops = &gfs2_vm_ops_sharewrite;
713 else
714 vma->vm_ops = &gfs2_vm_ops_private;
715
716 gfs2_glock_dq_uninit(&i_gh);
717
718 return error;
719}
720
721/**
722 * gfs2_open - open a file
723 * @inode: the inode to open
724 * @file: the struct file for this opening
725 *
726 * Returns: errno
727 */
728
729static int gfs2_open(struct inode *inode, struct file *file)
730{
731 struct gfs2_inode *ip = inode->u.generic_ip;
732 struct gfs2_holder i_gh;
733 struct gfs2_file *fp;
734 int error;
735
736 fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL);
737 if (!fp)
738 return -ENOMEM;
739
740 mutex_init(&fp->f_fl_mutex);
741
742 gfs2_assert_warn(ip->i_sbd, !file->private_data);
743 file->private_data = fp;
744
745 if (S_ISREG(ip->i_di.di_mode)) {
746 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
747 &i_gh);
748 if (error)
749 goto fail;
750
751 if (!(file->f_flags & O_LARGEFILE) &&
752 ip->i_di.di_size > MAX_NON_LFS) {
753 error = -EFBIG;
754 goto fail_gunlock;
755 }
756
757 /* Listen to the Direct I/O flag */
758
759 if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
760 file->f_flags |= O_DIRECT;
761
762 gfs2_glock_dq_uninit(&i_gh);
763 }
764
765 return 0;
766
767 fail_gunlock:
768 gfs2_glock_dq_uninit(&i_gh);
769
770 fail:
771 file->private_data = NULL;
772 kfree(fp);
773
774 return error;
775}
776
777/**
778 * gfs2_close - called to close a struct file
779 * @inode: the inode the struct file belongs to
780 * @file: the struct file being closed
781 *
782 * Returns: errno
783 */
784
785static int gfs2_close(struct inode *inode, struct file *file)
786{
787 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
788 struct gfs2_file *fp;
789
790 fp = file->private_data;
791 file->private_data = NULL;
792
793 if (gfs2_assert_warn(sdp, fp))
794 return -EIO;
795
796 kfree(fp);
797
798 return 0;
799}
800
801/**
802 * gfs2_fsync - sync the dirty data for a file (across the cluster)
803 * @file: the file that points to the dentry (we ignore this)
804 * @dentry: the dentry that points to the inode to sync
805 *
806 * Returns: errno
807 */
808
809static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
810{
811 struct gfs2_inode *ip = dentry->d_inode->u.generic_ip;
812
813 gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
814
815 return 0;
816}
817
818/**
819 * gfs2_lock - acquire/release a posix lock on a file
820 * @file: the file pointer
821 * @cmd: either modify or retrieve lock state, possibly wait
822 * @fl: type and range of lock
823 *
824 * Returns: errno
825 */
826
827static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
828{
829 struct gfs2_inode *ip = file->f_mapping->host->u.generic_ip;
830 struct gfs2_sbd *sdp = ip->i_sbd;
831 struct lm_lockname name =
832 { .ln_number = ip->i_num.no_addr,
833 .ln_type = LM_TYPE_PLOCK };
834
835 if (!(fl->fl_flags & FL_POSIX))
836 return -ENOLCK;
837 if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
838 return -ENOLCK;
839
840 if (sdp->sd_args.ar_localflocks) {
841 if (IS_GETLK(cmd)) {
842 struct file_lock tmp;
843 int ret;
844 ret = posix_test_lock(file, fl, &tmp);
845 fl->fl_type = F_UNLCK;
846 if (ret)
847 memcpy(fl, &tmp, sizeof(struct file_lock));
848 return 0;
849 } else {
850 return posix_lock_file_wait(file, fl);
851 }
852 }
853
854 if (IS_GETLK(cmd))
855 return gfs2_lm_plock_get(sdp, &name, file, fl);
856 else if (fl->fl_type == F_UNLCK)
857 return gfs2_lm_punlock(sdp, &name, file, fl);
858 else
859 return gfs2_lm_plock(sdp, &name, file, cmd, fl);
860}
861
862/**
863 * gfs2_sendfile - Send bytes to a file or socket
864 * @in_file: The file to read from
865 * @out_file: The file to write to
866 * @count: The amount of data
867 * @offset: The beginning file offset
868 *
869 * Outputs: offset - updated according to number of bytes read
870 *
871 * Returns: The number of bytes sent, errno on failure
872 */
873
874static ssize_t gfs2_sendfile(struct file *in_file, loff_t *offset, size_t count,
875 read_actor_t actor, void *target)
876{
877 return generic_file_sendfile(in_file, offset, count, actor, target);
878}
879
880static int do_flock(struct file *file, int cmd, struct file_lock *fl)
881{
882 struct gfs2_file *fp = file->private_data;
883 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
884 struct gfs2_inode *ip = file->f_dentry->d_inode->u.generic_ip;
885 struct gfs2_glock *gl;
886 unsigned int state;
887 int flags;
888 int error = 0;
889
890 state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
891 flags = ((IS_SETLKW(cmd)) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
892
893 mutex_lock(&fp->f_fl_mutex);
894
895 gl = fl_gh->gh_gl;
896 if (gl) {
897 if (fl_gh->gh_state == state)
898 goto out;
899 gfs2_glock_hold(gl);
900 flock_lock_file_wait(file,
901 &(struct file_lock){.fl_type = F_UNLCK});
902 gfs2_glock_dq_uninit(fl_gh);
903 } else {
904 error = gfs2_glock_get(ip->i_sbd,
905 ip->i_num.no_addr, &gfs2_flock_glops,
906 CREATE, &gl);
907 if (error)
908 goto out;
909 }
910
911 gfs2_holder_init(gl, state, flags, fl_gh);
912 gfs2_glock_put(gl);
913
914 error = gfs2_glock_nq(fl_gh);
915 if (error) {
916 gfs2_holder_uninit(fl_gh);
917 if (error == GLR_TRYFAILED)
918 error = -EAGAIN;
919 } else {
920 error = flock_lock_file_wait(file, fl);
921 gfs2_assert_warn(ip->i_sbd, !error);
922 }
923
924 out:
925 mutex_unlock(&fp->f_fl_mutex);
926
927 return error;
928}
929
930static void do_unflock(struct file *file, struct file_lock *fl)
931{
932 struct gfs2_file *fp = file->private_data;
933 struct gfs2_holder *fl_gh = &fp->f_fl_gh;
934
935 mutex_lock(&fp->f_fl_mutex);
936 flock_lock_file_wait(file, fl);
937 if (fl_gh->gh_gl)
938 gfs2_glock_dq_uninit(fl_gh);
939 mutex_unlock(&fp->f_fl_mutex);
940}
941
942/**
943 * gfs2_flock - acquire/release a flock lock on a file
944 * @file: the file pointer
945 * @cmd: either modify or retrieve lock state, possibly wait
946 * @fl: type and range of lock
947 *
948 * Returns: errno
949 */
950
951static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
952{
953 struct gfs2_inode *ip = file->f_mapping->host->u.generic_ip;
954 struct gfs2_sbd *sdp = ip->i_sbd;
955
956 if (!(fl->fl_flags & FL_FLOCK))
957 return -ENOLCK;
958 if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
959 return -ENOLCK;
960
961 if (sdp->sd_args.ar_localflocks)
962 return flock_lock_file_wait(file, fl);
963
964 if (fl->fl_type == F_UNLCK) {
965 do_unflock(file, fl);
966 return 0;
967 } else
968 return do_flock(file, cmd, fl);
969}
970
971struct file_operations gfs2_file_fops = {
972 .llseek = gfs2_llseek,
973 .read = gfs2_read,
974 .readv = gfs2_file_readv,
975 .aio_read = gfs2_file_aio_read,
976 .write = generic_file_write,
977 .writev = generic_file_writev,
978 .aio_write = generic_file_aio_write,
979 .unlocked_ioctl = gfs2_ioctl,
980 .mmap = gfs2_mmap,
981 .open = gfs2_open,
982 .release = gfs2_close,
983 .fsync = gfs2_fsync,
984 .lock = gfs2_lock,
985 .sendfile = gfs2_sendfile,
986 .flock = gfs2_flock,
987 .splice_read = generic_file_splice_read,
988 .splice_write = generic_file_splice_write,
989};
990
991struct file_operations gfs2_dir_fops = {
992 .readdir = gfs2_readdir,
993 .unlocked_ioctl = gfs2_ioctl,
994 .open = gfs2_open,
995 .release = gfs2_close,
996 .fsync = gfs2_fsync,
997 .lock = gfs2_lock,
998 .flock = gfs2_flock,
999};
1000
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h
new file mode 100644
index 000000000000..a2edce38f5cb
--- /dev/null
+++ b/fs/gfs2/ops_file.h
@@ -0,0 +1,20 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_FILE_DOT_H__
11#define __OPS_FILE_DOT_H__
12extern struct file gfs2_internal_file_sentinal;
13extern int gfs2_internal_read(struct gfs2_inode *ip,
14 struct file_ra_state *ra_state,
15 char *buf, loff_t *pos, unsigned size);
16
17extern struct file_operations gfs2_file_fops;
18extern struct file_operations gfs2_dir_fops;
19
20#endif /* __OPS_FILE_DOT_H__ */
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
new file mode 100644
index 000000000000..a45982045509
--- /dev/null
+++ b/fs/gfs2/ops_fstype.c
@@ -0,0 +1,901 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/vmalloc.h>
16#include <linux/blkdev.h>
17#include <linux/kthread.h>
18#include <linux/gfs2_ondisk.h>
19
20#include "gfs2.h"
21#include "lm_interface.h"
22#include "incore.h"
23#include "daemon.h"
24#include "glock.h"
25#include "glops.h"
26#include "inode.h"
27#include "lm.h"
28#include "mount.h"
29#include "ops_export.h"
30#include "ops_fstype.h"
31#include "ops_super.h"
32#include "recovery.h"
33#include "rgrp.h"
34#include "super.h"
35#include "unlinked.h"
36#include "sys.h"
37#include "util.h"
38
39#define DO 0
40#define UNDO 1
41
42extern struct dentry_operations gfs2_dops;
43
44static struct gfs2_sbd *init_sbd(struct super_block *sb)
45{
46 struct gfs2_sbd *sdp;
47 unsigned int x;
48
49 sdp = vmalloc(sizeof(struct gfs2_sbd));
50 if (!sdp)
51 return NULL;
52
53 memset(sdp, 0, sizeof(struct gfs2_sbd));
54
55 sb->s_fs_info = sdp;
56 sdp->sd_vfs = sb;
57
58 gfs2_tune_init(&sdp->sd_tune);
59
60 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
61 sdp->sd_gl_hash[x].hb_lock = RW_LOCK_UNLOCKED;
62 INIT_LIST_HEAD(&sdp->sd_gl_hash[x].hb_list);
63 }
64 INIT_LIST_HEAD(&sdp->sd_reclaim_list);
65 spin_lock_init(&sdp->sd_reclaim_lock);
66 init_waitqueue_head(&sdp->sd_reclaim_wq);
67 mutex_init(&sdp->sd_invalidate_inodes_mutex);
68
69 mutex_init(&sdp->sd_inum_mutex);
70 spin_lock_init(&sdp->sd_statfs_spin);
71 mutex_init(&sdp->sd_statfs_mutex);
72
73 spin_lock_init(&sdp->sd_rindex_spin);
74 mutex_init(&sdp->sd_rindex_mutex);
75 INIT_LIST_HEAD(&sdp->sd_rindex_list);
76 INIT_LIST_HEAD(&sdp->sd_rindex_mru_list);
77 INIT_LIST_HEAD(&sdp->sd_rindex_recent_list);
78
79 INIT_LIST_HEAD(&sdp->sd_jindex_list);
80 spin_lock_init(&sdp->sd_jindex_spin);
81 mutex_init(&sdp->sd_jindex_mutex);
82
83 INIT_LIST_HEAD(&sdp->sd_unlinked_list);
84 spin_lock_init(&sdp->sd_unlinked_spin);
85 mutex_init(&sdp->sd_unlinked_mutex);
86
87 INIT_LIST_HEAD(&sdp->sd_quota_list);
88 spin_lock_init(&sdp->sd_quota_spin);
89 mutex_init(&sdp->sd_quota_mutex);
90
91 spin_lock_init(&sdp->sd_log_lock);
92
93 INIT_LIST_HEAD(&sdp->sd_log_le_gl);
94 INIT_LIST_HEAD(&sdp->sd_log_le_buf);
95 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
96 INIT_LIST_HEAD(&sdp->sd_log_le_rg);
97 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
98
99 mutex_init(&sdp->sd_log_reserve_mutex);
100 INIT_LIST_HEAD(&sdp->sd_ail1_list);
101 INIT_LIST_HEAD(&sdp->sd_ail2_list);
102
103 init_rwsem(&sdp->sd_log_flush_lock);
104 INIT_LIST_HEAD(&sdp->sd_log_flush_list);
105
106 INIT_LIST_HEAD(&sdp->sd_revoke_list);
107
108 mutex_init(&sdp->sd_freeze_lock);
109
110 return sdp;
111}
112
113static void init_vfs(struct super_block *sb, unsigned noatime)
114{
115 struct gfs2_sbd *sdp = sb->s_fs_info;
116
117 sb->s_magic = GFS2_MAGIC;
118 sb->s_op = &gfs2_super_ops;
119 sb->s_export_op = &gfs2_export_ops;
120 sb->s_maxbytes = MAX_LFS_FILESIZE;
121
122 if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
123 set_bit(noatime, &sdp->sd_flags);
124
125 /* Don't let the VFS update atimes. GFS2 handles this itself. */
126 sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
127}
128
129static int init_names(struct gfs2_sbd *sdp, int silent)
130{
131 struct gfs2_sb *sb = NULL;
132 char *proto, *table;
133 int error = 0;
134
135 proto = sdp->sd_args.ar_lockproto;
136 table = sdp->sd_args.ar_locktable;
137
138 /* Try to autodetect */
139
140 if (!proto[0] || !table[0]) {
141 struct buffer_head *bh;
142 bh = sb_getblk(sdp->sd_vfs,
143 GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
144 lock_buffer(bh);
145 clear_buffer_uptodate(bh);
146 clear_buffer_dirty(bh);
147 unlock_buffer(bh);
148 ll_rw_block(READ, 1, &bh);
149 wait_on_buffer(bh);
150
151 if (!buffer_uptodate(bh)) {
152 brelse(bh);
153 return -EIO;
154 }
155
156 sb = kmalloc(sizeof(struct gfs2_sb), GFP_KERNEL);
157 if (!sb) {
158 brelse(bh);
159 return -ENOMEM;
160 }
161 gfs2_sb_in(sb, bh->b_data);
162 brelse(bh);
163
164 error = gfs2_check_sb(sdp, sb, silent);
165 if (error)
166 goto out;
167
168 if (!proto[0])
169 proto = sb->sb_lockproto;
170 if (!table[0])
171 table = sb->sb_locktable;
172 }
173
174 if (!table[0])
175 table = sdp->sd_vfs->s_id;
176
177 snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
178 snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
179
180 out:
181 kfree(sb);
182
183 return error;
184}
185
186static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
187 int undo)
188{
189 struct task_struct *p;
190 int error = 0;
191
192 if (undo)
193 goto fail_trans;
194
195 p = kthread_run(gfs2_scand, sdp, "gfs2_scand");
196 error = IS_ERR(p);
197 if (error) {
198 fs_err(sdp, "can't start scand thread: %d\n", error);
199 return error;
200 }
201 sdp->sd_scand_process = p;
202
203 for (sdp->sd_glockd_num = 0;
204 sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
205 sdp->sd_glockd_num++) {
206 p = kthread_run(gfs2_glockd, sdp, "gfs2_glockd");
207 error = IS_ERR(p);
208 if (error) {
209 fs_err(sdp, "can't start glockd thread: %d\n", error);
210 goto fail;
211 }
212 sdp->sd_glockd_process[sdp->sd_glockd_num] = p;
213 }
214
215 error = gfs2_glock_nq_num(sdp,
216 GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
217 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
218 mount_gh);
219 if (error) {
220 fs_err(sdp, "can't acquire mount glock: %d\n", error);
221 goto fail;
222 }
223
224 error = gfs2_glock_nq_num(sdp,
225 GFS2_LIVE_LOCK, &gfs2_nondisk_glops,
226 LM_ST_SHARED,
227 LM_FLAG_NOEXP | GL_EXACT,
228 &sdp->sd_live_gh);
229 if (error) {
230 fs_err(sdp, "can't acquire live glock: %d\n", error);
231 goto fail_mount;
232 }
233
234 error = gfs2_glock_get(sdp, GFS2_RENAME_LOCK, &gfs2_nondisk_glops,
235 CREATE, &sdp->sd_rename_gl);
236 if (error) {
237 fs_err(sdp, "can't create rename glock: %d\n", error);
238 goto fail_live;
239 }
240
241 error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops,
242 CREATE, &sdp->sd_trans_gl);
243 if (error) {
244 fs_err(sdp, "can't create transaction glock: %d\n", error);
245 goto fail_rename;
246 }
247 set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags);
248
249 return 0;
250
251 fail_trans:
252 gfs2_glock_put(sdp->sd_trans_gl);
253
254 fail_rename:
255 gfs2_glock_put(sdp->sd_rename_gl);
256
257 fail_live:
258 gfs2_glock_dq_uninit(&sdp->sd_live_gh);
259
260 fail_mount:
261 gfs2_glock_dq_uninit(mount_gh);
262
263 fail:
264 while (sdp->sd_glockd_num--)
265 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
266
267 kthread_stop(sdp->sd_scand_process);
268
269 return error;
270}
271
272static struct inode *gfs2_lookup_root(struct gfs2_sbd *sdp,
273 const struct gfs2_inum *inum)
274{
275 int error;
276 struct gfs2_glock *gl;
277 struct gfs2_inode *ip;
278 struct inode *inode;
279
280 error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops,
281 CREATE, &gl);
282 if (!error) {
283 error = gfs2_inode_get(gl, inum, CREATE, &ip);
284 if (!error) {
285 gfs2_inode_min_init(ip, DT_DIR);
286 inode = gfs2_ip2v(ip);
287 gfs2_inode_put(ip);
288 gfs2_glock_put(gl);
289 return inode;
290 }
291 gfs2_glock_put(gl);
292 }
293 return ERR_PTR(error);
294}
295
296static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
297{
298 struct super_block *sb = sdp->sd_vfs;
299 struct gfs2_holder sb_gh;
300 struct gfs2_inum *inum;
301 struct inode *inode;
302 int error = 0;
303
304 if (undo) {
305 return 0;
306 }
307
308 error = gfs2_glock_nq_num(sdp,
309 GFS2_SB_LOCK, &gfs2_meta_glops,
310 LM_ST_SHARED, 0, &sb_gh);
311 if (error) {
312 fs_err(sdp, "can't acquire superblock glock: %d\n", error);
313 return error;
314 }
315
316 error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
317 if (error) {
318 fs_err(sdp, "can't read superblock: %d\n", error);
319 goto out;
320 }
321
322 /* Set up the buffer cache and SB for real */
323 if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
324 error = -EINVAL;
325 fs_err(sdp, "FS block size (%u) is too small for device "
326 "block size (%u)\n",
327 sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
328 goto out;
329 }
330 if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
331 error = -EINVAL;
332 fs_err(sdp, "FS block size (%u) is too big for machine "
333 "page size (%u)\n",
334 sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
335 goto out;
336 }
337
338 /* Get rid of buffers from the original block size */
339 sb_gh.gh_gl->gl_ops->go_inval(sb_gh.gh_gl, DIO_METADATA | DIO_DATA);
340 sb_gh.gh_gl->gl_aspace->i_blkbits = sdp->sd_sb.sb_bsize_shift;
341
342 sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
343
344 /* Get the root inode */
345 inum = &sdp->sd_sb.sb_root_dir;
346 if (sb->s_type == &gfs2meta_fs_type)
347 inum = &sdp->sd_sb.sb_master_dir;
348 inode = gfs2_lookup_root(sdp, inum);
349 if (IS_ERR(inode)) {
350 error = PTR_ERR(inode);
351 fs_err(sdp, "can't read in root inode: %d\n", error);
352 goto out;
353 }
354
355 sb->s_root = d_alloc_root(inode);
356 if (!sb->s_root) {
357 fs_err(sdp, "can't get root dentry\n");
358 error = -ENOMEM;
359 iput(inode);
360 }
361 sb->s_root->d_op = &gfs2_dops;
362out:
363 gfs2_glock_dq_uninit(&sb_gh);
364 return error;
365}
366
367static int init_journal(struct gfs2_sbd *sdp, int undo)
368{
369 struct gfs2_holder ji_gh;
370 struct task_struct *p;
371 struct gfs2_inode *ip;
372 int jindex = 1;
373 int error = 0;
374
375 if (undo) {
376 jindex = 0;
377 goto fail_recoverd;
378 }
379
380 sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex");
381 if (IS_ERR(sdp->sd_jindex)) {
382 fs_err(sdp, "can't lookup journal index: %d\n", error);
383 return PTR_ERR(sdp->sd_jindex);
384 }
385 ip = sdp->sd_jindex->u.generic_ip;
386 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
387
388 /* Load in the journal index special file */
389
390 error = gfs2_jindex_hold(sdp, &ji_gh);
391 if (error) {
392 fs_err(sdp, "can't read journal index: %d\n", error);
393 goto fail;
394 }
395
396 error = -EINVAL;
397 if (!gfs2_jindex_size(sdp)) {
398 fs_err(sdp, "no journals!\n");
399 goto fail_jindex;
400 }
401
402 if (sdp->sd_args.ar_spectator) {
403 sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
404 sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
405 } else {
406 if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
407 fs_err(sdp, "can't mount journal #%u\n",
408 sdp->sd_lockstruct.ls_jid);
409 fs_err(sdp, "there are only %u journals (0 - %u)\n",
410 gfs2_jindex_size(sdp),
411 gfs2_jindex_size(sdp) - 1);
412 goto fail_jindex;
413 }
414 sdp->sd_jdesc = gfs2_jdesc_find(sdp, sdp->sd_lockstruct.ls_jid);
415
416 error = gfs2_glock_nq_num(sdp,
417 sdp->sd_lockstruct.ls_jid,
418 &gfs2_journal_glops,
419 LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
420 &sdp->sd_journal_gh);
421 if (error) {
422 fs_err(sdp, "can't acquire journal glock: %d\n", error);
423 goto fail_jindex;
424 }
425
426 ip = sdp->sd_jdesc->jd_inode->u.generic_ip;
427 error = gfs2_glock_nq_init(ip->i_gl,
428 LM_ST_SHARED,
429 LM_FLAG_NOEXP | GL_EXACT,
430 &sdp->sd_jinode_gh);
431 if (error) {
432 fs_err(sdp, "can't acquire journal inode glock: %d\n",
433 error);
434 goto fail_journal_gh;
435 }
436
437 error = gfs2_jdesc_check(sdp->sd_jdesc);
438 if (error) {
439 fs_err(sdp, "my journal (%u) is bad: %d\n",
440 sdp->sd_jdesc->jd_jid, error);
441 goto fail_jinode_gh;
442 }
443 sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
444 }
445
446 if (sdp->sd_lockstruct.ls_first) {
447 unsigned int x;
448 for (x = 0; x < sdp->sd_journals; x++) {
449 error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x));
450 if (error) {
451 fs_err(sdp, "error recovering journal %u: %d\n",
452 x, error);
453 goto fail_jinode_gh;
454 }
455 }
456
457 gfs2_lm_others_may_mount(sdp);
458 } else if (!sdp->sd_args.ar_spectator) {
459 error = gfs2_recover_journal(sdp->sd_jdesc);
460 if (error) {
461 fs_err(sdp, "error recovering my journal: %d\n", error);
462 goto fail_jinode_gh;
463 }
464 }
465
466 set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
467 gfs2_glock_dq_uninit(&ji_gh);
468 jindex = 0;
469
470 p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd");
471 error = IS_ERR(p);
472 if (error) {
473 fs_err(sdp, "can't start recoverd thread: %d\n", error);
474 goto fail_jinode_gh;
475 }
476 sdp->sd_recoverd_process = p;
477
478 return 0;
479
480 fail_recoverd:
481 kthread_stop(sdp->sd_recoverd_process);
482
483 fail_jinode_gh:
484 if (!sdp->sd_args.ar_spectator)
485 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
486
487 fail_journal_gh:
488 if (!sdp->sd_args.ar_spectator)
489 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
490
491 fail_jindex:
492 gfs2_jindex_free(sdp);
493 if (jindex)
494 gfs2_glock_dq_uninit(&ji_gh);
495
496 fail:
497 iput(sdp->sd_jindex);
498
499 return error;
500}
501
502
503static int init_inodes(struct gfs2_sbd *sdp, int undo)
504{
505 int error = 0;
506 struct gfs2_inode *ip;
507 struct inode *inode;
508
509 if (undo)
510 goto fail_qinode;
511
512 inode = gfs2_lookup_root(sdp, &sdp->sd_sb.sb_master_dir);
513 if (IS_ERR(inode)) {
514 error = PTR_ERR(inode);
515 fs_err(sdp, "can't read in master directory: %d\n", error);
516 goto fail;
517 }
518 sdp->sd_master_dir = inode;
519
520 error = init_journal(sdp, undo);
521 if (error)
522 goto fail_master;
523
524 /* Read in the master inode number inode */
525 sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum");
526 if (IS_ERR(sdp->sd_inum_inode)) {
527 error = PTR_ERR(sdp->sd_inum_inode);
528 fs_err(sdp, "can't read in inum inode: %d\n", error);
529 goto fail_journal;
530 }
531
532
533 /* Read in the master statfs inode */
534 sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs");
535 if (IS_ERR(sdp->sd_statfs_inode)) {
536 error = PTR_ERR(sdp->sd_statfs_inode);
537 fs_err(sdp, "can't read in statfs inode: %d\n", error);
538 goto fail_inum;
539 }
540
541 /* Read in the resource index inode */
542 sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex");
543 if (IS_ERR(sdp->sd_rindex)) {
544 error = PTR_ERR(sdp->sd_rindex);
545 fs_err(sdp, "can't get resource index inode: %d\n", error);
546 goto fail_statfs;
547 }
548 ip = sdp->sd_rindex->u.generic_ip;
549 set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
550 sdp->sd_rindex_vn = ip->i_gl->gl_vn - 1;
551
552 /* Read in the quota inode */
553 sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota");
554 if (IS_ERR(sdp->sd_quota_inode)) {
555 error = PTR_ERR(sdp->sd_quota_inode);
556 fs_err(sdp, "can't get quota file inode: %d\n", error);
557 goto fail_rindex;
558 }
559 return 0;
560
561fail_qinode:
562 iput(sdp->sd_quota_inode);
563
564fail_rindex:
565 gfs2_clear_rgrpd(sdp);
566 iput(sdp->sd_rindex);
567
568fail_statfs:
569 iput(sdp->sd_statfs_inode);
570
571fail_inum:
572 iput(sdp->sd_inum_inode);
573fail_journal:
574 init_journal(sdp, UNDO);
575fail_master:
576 iput(sdp->sd_master_dir);
577fail:
578 return error;
579}
580
581static int init_per_node(struct gfs2_sbd *sdp, int undo)
582{
583 struct inode *pn = NULL;
584 char buf[30];
585 int error = 0;
586 struct gfs2_inode *ip;
587
588 if (sdp->sd_args.ar_spectator)
589 return 0;
590
591 if (undo)
592 goto fail_qc_gh;
593
594 pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node");
595 if (IS_ERR(pn)) {
596 error = PTR_ERR(pn);
597 fs_err(sdp, "can't find per_node directory: %d\n", error);
598 return error;
599 }
600
601 sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid);
602 sdp->sd_ir_inode = gfs2_lookup_simple(pn, buf);
603 if (IS_ERR(sdp->sd_ir_inode)) {
604 error = PTR_ERR(sdp->sd_ir_inode);
605 fs_err(sdp, "can't find local \"ir\" file: %d\n", error);
606 goto fail;
607 }
608
609 sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
610 sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf);
611 if (IS_ERR(sdp->sd_sc_inode)) {
612 error = PTR_ERR(sdp->sd_sc_inode);
613 fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
614 goto fail_ir_i;
615 }
616
617 sprintf(buf, "unlinked_tag%u", sdp->sd_jdesc->jd_jid);
618 sdp->sd_ut_inode = gfs2_lookup_simple(pn, buf);
619 if (IS_ERR(sdp->sd_ut_inode)) {
620 error = PTR_ERR(sdp->sd_ut_inode);
621 fs_err(sdp, "can't find local \"ut\" file: %d\n", error);
622 goto fail_sc_i;
623 }
624
625 sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
626 sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf);
627 if (IS_ERR(sdp->sd_qc_inode)) {
628 error = PTR_ERR(sdp->sd_qc_inode);
629 fs_err(sdp, "can't find local \"qc\" file: %d\n", error);
630 goto fail_ut_i;
631 }
632
633 iput(pn);
634 pn = NULL;
635
636 ip = sdp->sd_ir_inode->u.generic_ip;
637 error = gfs2_glock_nq_init(ip->i_gl,
638 LM_ST_EXCLUSIVE, 0,
639 &sdp->sd_ir_gh);
640 if (error) {
641 fs_err(sdp, "can't lock local \"ir\" file: %d\n", error);
642 goto fail_qc_i;
643 }
644
645 ip = sdp->sd_sc_inode->u.generic_ip;
646 error = gfs2_glock_nq_init(ip->i_gl,
647 LM_ST_EXCLUSIVE, 0,
648 &sdp->sd_sc_gh);
649 if (error) {
650 fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
651 goto fail_ir_gh;
652 }
653
654 ip = sdp->sd_ut_inode->u.generic_ip;
655 error = gfs2_glock_nq_init(ip->i_gl,
656 LM_ST_EXCLUSIVE, 0,
657 &sdp->sd_ut_gh);
658 if (error) {
659 fs_err(sdp, "can't lock local \"ut\" file: %d\n", error);
660 goto fail_sc_gh;
661 }
662
663 ip = sdp->sd_qc_inode->u.generic_ip;
664 error = gfs2_glock_nq_init(ip->i_gl,
665 LM_ST_EXCLUSIVE, 0,
666 &sdp->sd_qc_gh);
667 if (error) {
668 fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
669 goto fail_ut_gh;
670 }
671
672 return 0;
673
674 fail_qc_gh:
675 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
676
677 fail_ut_gh:
678 gfs2_glock_dq_uninit(&sdp->sd_ut_gh);
679
680 fail_sc_gh:
681 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
682
683 fail_ir_gh:
684 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
685
686 fail_qc_i:
687 iput(sdp->sd_qc_inode);
688
689 fail_ut_i:
690 iput(sdp->sd_ut_inode);
691
692 fail_sc_i:
693 iput(sdp->sd_sc_inode);
694
695 fail_ir_i:
696 iput(sdp->sd_ir_inode);
697
698 fail:
699 if (pn)
700 iput(pn);
701 return error;
702}
703
704static int init_threads(struct gfs2_sbd *sdp, int undo)
705{
706 struct task_struct *p;
707 int error = 0;
708
709 if (undo)
710 goto fail_inoded;
711
712 sdp->sd_log_flush_time = jiffies;
713 sdp->sd_jindex_refresh_time = jiffies;
714
715 p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
716 error = IS_ERR(p);
717 if (error) {
718 fs_err(sdp, "can't start logd thread: %d\n", error);
719 return error;
720 }
721 sdp->sd_logd_process = p;
722
723 sdp->sd_statfs_sync_time = jiffies;
724 sdp->sd_quota_sync_time = jiffies;
725
726 p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
727 error = IS_ERR(p);
728 if (error) {
729 fs_err(sdp, "can't start quotad thread: %d\n", error);
730 goto fail;
731 }
732 sdp->sd_quotad_process = p;
733
734 p = kthread_run(gfs2_inoded, sdp, "gfs2_inoded");
735 error = IS_ERR(p);
736 if (error) {
737 fs_err(sdp, "can't start inoded thread: %d\n", error);
738 goto fail_quotad;
739 }
740 sdp->sd_inoded_process = p;
741
742 return 0;
743
744 fail_inoded:
745 kthread_stop(sdp->sd_inoded_process);
746
747 fail_quotad:
748 kthread_stop(sdp->sd_quotad_process);
749
750 fail:
751 kthread_stop(sdp->sd_logd_process);
752
753 return error;
754}
755
756/**
757 * fill_super - Read in superblock
758 * @sb: The VFS superblock
759 * @data: Mount options
760 * @silent: Don't complain if it's not a GFS2 filesystem
761 *
762 * Returns: errno
763 */
764
765static int fill_super(struct super_block *sb, void *data, int silent)
766{
767 struct gfs2_sbd *sdp;
768 struct gfs2_holder mount_gh;
769 int error;
770
771 sdp = init_sbd(sb);
772 if (!sdp) {
773 printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n");
774 return -ENOMEM;
775 }
776
777 error = gfs2_mount_args(sdp, (char *)data, 0);
778 if (error) {
779 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
780 goto fail;
781 }
782
783 init_vfs(sb, SDF_NOATIME);
784
785 /* Set up the buffer cache and fill in some fake block size values
786 to allow us to read-in the on-disk superblock. */
787 sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK);
788 sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
789 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
790 GFS2_BASIC_BLOCK_SHIFT;
791 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
792
793 error = init_names(sdp, silent);
794 if (error)
795 goto fail;
796
797 error = gfs2_sys_fs_add(sdp);
798 if (error)
799 goto fail;
800
801 error = gfs2_lm_mount(sdp, silent);
802 if (error)
803 goto fail_sys;
804
805 error = init_locking(sdp, &mount_gh, DO);
806 if (error)
807 goto fail_lm;
808
809 error = init_sb(sdp, silent, DO);
810 if (error)
811 goto fail_locking;
812
813 error = init_inodes(sdp, DO);
814 if (error)
815 goto fail_sb;
816
817 error = init_per_node(sdp, DO);
818 if (error)
819 goto fail_inodes;
820
821 error = gfs2_statfs_init(sdp);
822 if (error) {
823 fs_err(sdp, "can't initialize statfs subsystem: %d\n", error);
824 goto fail_per_node;
825 }
826
827 error = init_threads(sdp, DO);
828 if (error)
829 goto fail_per_node;
830
831 if (!(sb->s_flags & MS_RDONLY)) {
832 error = gfs2_make_fs_rw(sdp);
833 if (error) {
834 fs_err(sdp, "can't make FS RW: %d\n", error);
835 goto fail_threads;
836 }
837 }
838
839 gfs2_glock_dq_uninit(&mount_gh);
840
841 return 0;
842
843 fail_threads:
844 init_threads(sdp, UNDO);
845
846 fail_per_node:
847 init_per_node(sdp, UNDO);
848
849 fail_inodes:
850 init_inodes(sdp, UNDO);
851
852 fail_sb:
853 init_sb(sdp, 0, UNDO);
854
855 fail_locking:
856 init_locking(sdp, &mount_gh, UNDO);
857
858 fail_lm:
859 gfs2_gl_hash_clear(sdp, WAIT);
860 gfs2_lm_unmount(sdp);
861 while (invalidate_inodes(sb))
862 yield();
863
864 fail_sys:
865 gfs2_sys_fs_del(sdp);
866
867 fail:
868 vfree(sdp);
869 sb->s_fs_info = NULL;
870
871 return error;
872}
873
874static struct super_block *gfs2_get_sb(struct file_system_type *fs_type,
875 int flags, const char *dev_name,
876 void *data)
877{
878 return get_sb_bdev(fs_type, flags, dev_name, data, fill_super);
879}
880
881static void gfs2_kill_sb(struct super_block *sb)
882{
883 kill_block_super(sb);
884}
885
886struct file_system_type gfs2_fs_type = {
887 .name = "gfs2",
888 .fs_flags = FS_REQUIRES_DEV,
889 .get_sb = gfs2_get_sb,
890 .kill_sb = gfs2_kill_sb,
891 .owner = THIS_MODULE,
892};
893
894struct file_system_type gfs2meta_fs_type = {
895 .name = "gfs2meta",
896 .fs_flags = FS_REQUIRES_DEV,
897 .get_sb = gfs2_get_sb,
898 .kill_sb = gfs2_kill_sb,
899 .owner = THIS_MODULE,
900};
901
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h
new file mode 100644
index 000000000000..622f5760d6b2
--- /dev/null
+++ b/fs/gfs2/ops_fstype.h
@@ -0,0 +1,16 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_FSTYPE_DOT_H__
11#define __OPS_FSTYPE_DOT_H__
12
13extern struct file_system_type gfs2_fs_type;
14extern struct file_system_type gfs2meta_fs_type;
15
16#endif /* __OPS_FSTYPE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
new file mode 100644
index 000000000000..0c06f92368f2
--- /dev/null
+++ b/fs/gfs2/ops_inode.c
@@ -0,0 +1,1194 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/namei.h>
16#include <linux/utsname.h>
17#include <linux/mm.h>
18#include <linux/xattr.h>
19#include <linux/posix_acl.h>
20#include <linux/gfs2_ondisk.h>
21#include <linux/crc32.h>
22#include <asm/uaccess.h>
23
24#include "gfs2.h"
25#include "lm_interface.h"
26#include "incore.h"
27#include "acl.h"
28#include "bmap.h"
29#include "dir.h"
30#include "eaops.h"
31#include "eattr.h"
32#include "glock.h"
33#include "inode.h"
34#include "meta_io.h"
35#include "ops_dentry.h"
36#include "ops_inode.h"
37#include "page.h"
38#include "quota.h"
39#include "rgrp.h"
40#include "trans.h"
41#include "unlinked.h"
42#include "util.h"
43
44/**
45 * gfs2_create - Create a file
46 * @dir: The directory in which to create the file
47 * @dentry: The dentry of the new file
48 * @mode: The mode of the new file
49 *
50 * Returns: errno
51 */
52
53static int gfs2_create(struct inode *dir, struct dentry *dentry,
54 int mode, struct nameidata *nd)
55{
56 struct gfs2_inode *dip = dir->u.generic_ip;
57 struct gfs2_sbd *sdp = dip->i_sbd;
58 struct gfs2_holder ghs[2];
59 struct inode *inode;
60 int new = 1;
61
62 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
63
64 for (;;) {
65 inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode);
66 if (!IS_ERR(inode)) {
67 gfs2_trans_end(sdp);
68 if (dip->i_alloc.al_rgd)
69 gfs2_inplace_release(dip);
70 gfs2_quota_unlock(dip);
71 gfs2_alloc_put(dip);
72 gfs2_glock_dq_uninit_m(2, ghs);
73 break;
74 } else if (PTR_ERR(inode) != -EEXIST ||
75 (nd->intent.open.flags & O_EXCL)) {
76 gfs2_holder_uninit(ghs);
77 return PTR_ERR(inode);
78 }
79
80 inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
81 if (inode) {
82 if (!IS_ERR(inode)) {
83 new = 0;
84 gfs2_holder_uninit(ghs);
85 break;
86 } else {
87 gfs2_holder_uninit(ghs);
88 return PTR_ERR(inode);
89 }
90 }
91 }
92
93 d_instantiate(dentry, inode);
94 if (new)
95 mark_inode_dirty(inode);
96
97 return 0;
98}
99
100/**
101 * gfs2_lookup - Look up a filename in a directory and return its inode
102 * @dir: The directory inode
103 * @dentry: The dentry of the new inode
104 * @nd: passed from Linux VFS, ignored by us
105 *
106 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
107 *
108 * Returns: errno
109 */
110
111static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
112 struct nameidata *nd)
113{
114 struct inode *inode = NULL;
115
116 dentry->d_op = &gfs2_dops;
117
118 inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd);
119 if (inode && IS_ERR(inode))
120 return ERR_PTR(PTR_ERR(inode));
121
122 if (inode)
123 return d_splice_alias(inode, dentry);
124 d_add(dentry, inode);
125
126 return NULL;
127}
128
129/**
130 * gfs2_link - Link to a file
131 * @old_dentry: The inode to link
132 * @dir: Add link to this directory
133 * @dentry: The name of the link
134 *
135 * Link the inode in "old_dentry" into the directory "dir" with the
136 * name in "dentry".
137 *
138 * Returns: errno
139 */
140
141static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
142 struct dentry *dentry)
143{
144 struct gfs2_inode *dip = dir->u.generic_ip;
145 struct gfs2_sbd *sdp = dip->i_sbd;
146 struct inode *inode = old_dentry->d_inode;
147 struct gfs2_inode *ip = inode->u.generic_ip;
148 struct gfs2_holder ghs[2];
149 int alloc_required;
150 int error;
151
152 if (S_ISDIR(ip->i_di.di_mode))
153 return -EPERM;
154
155 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
156 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
157
158 error = gfs2_glock_nq_m(2, ghs);
159 if (error)
160 goto out;
161
162 error = gfs2_repermission(dir, MAY_WRITE | MAY_EXEC, NULL);
163 if (error)
164 goto out_gunlock;
165
166 error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL);
167 switch (error) {
168 case -ENOENT:
169 break;
170 case 0:
171 error = -EEXIST;
172 default:
173 goto out_gunlock;
174 }
175
176 error = -EINVAL;
177 if (!dip->i_di.di_nlink)
178 goto out_gunlock;
179 error = -EFBIG;
180 if (dip->i_di.di_entries == (uint32_t)-1)
181 goto out_gunlock;
182 error = -EPERM;
183 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
184 goto out_gunlock;
185 error = -EINVAL;
186 if (!ip->i_di.di_nlink)
187 goto out_gunlock;
188 error = -EMLINK;
189 if (ip->i_di.di_nlink == (uint32_t)-1)
190 goto out_gunlock;
191
192 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
193 if (error < 0)
194 goto out_gunlock;
195 error = 0;
196
197 if (alloc_required) {
198 struct gfs2_alloc *al = gfs2_alloc_get(dip);
199
200 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
201 if (error)
202 goto out_alloc;
203
204 error = gfs2_quota_check(dip, dip->i_di.di_uid,
205 dip->i_di.di_gid);
206 if (error)
207 goto out_gunlock_q;
208
209 al->al_requested = sdp->sd_max_dirres;
210
211 error = gfs2_inplace_reserve(dip);
212 if (error)
213 goto out_gunlock_q;
214
215 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
216 al->al_rgd->rd_ri.ri_length +
217 2 * RES_DINODE + RES_STATFS +
218 RES_QUOTA, 0);
219 if (error)
220 goto out_ipres;
221 } else {
222 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
223 if (error)
224 goto out_ipres;
225 }
226
227 error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num,
228 IF2DT(ip->i_di.di_mode));
229 if (error)
230 goto out_end_trans;
231
232 error = gfs2_change_nlink(ip, +1);
233
234 out_end_trans:
235 gfs2_trans_end(sdp);
236
237 out_ipres:
238 if (alloc_required)
239 gfs2_inplace_release(dip);
240
241 out_gunlock_q:
242 if (alloc_required)
243 gfs2_quota_unlock(dip);
244
245 out_alloc:
246 if (alloc_required)
247 gfs2_alloc_put(dip);
248
249 out_gunlock:
250 gfs2_glock_dq_m(2, ghs);
251
252 out:
253 gfs2_holder_uninit(ghs);
254 gfs2_holder_uninit(ghs + 1);
255
256 if (!error) {
257 atomic_inc(&inode->i_count);
258 d_instantiate(dentry, inode);
259 mark_inode_dirty(inode);
260 }
261
262 return error;
263}
264
265/**
266 * gfs2_unlink - Unlink a file
267 * @dir: The inode of the directory containing the file to unlink
268 * @dentry: The file itself
269 *
270 * Unlink a file. Call gfs2_unlinki()
271 *
272 * Returns: errno
273 */
274
275static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
276{
277 struct gfs2_inode *dip = dir->u.generic_ip;
278 struct gfs2_sbd *sdp = dip->i_sbd;
279 struct gfs2_inode *ip = dentry->d_inode->u.generic_ip;
280 struct gfs2_unlinked *ul;
281 struct gfs2_holder ghs[2];
282 int error;
283
284 error = gfs2_unlinked_get(sdp, &ul);
285 if (error)
286 return error;
287
288 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
289 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
290
291 error = gfs2_glock_nq_m(2, ghs);
292 if (error)
293 goto out;
294
295 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
296 if (error)
297 goto out_gunlock;
298
299 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF +
300 RES_UNLINKED, 0);
301 if (error)
302 goto out_gunlock;
303
304 error = gfs2_unlinki(dip, &dentry->d_name, ip, ul);
305
306 gfs2_trans_end(sdp);
307
308 out_gunlock:
309 gfs2_glock_dq_m(2, ghs);
310
311 out:
312 gfs2_holder_uninit(ghs);
313 gfs2_holder_uninit(ghs + 1);
314
315 gfs2_unlinked_put(sdp, ul);
316
317 return error;
318}
319
320/**
321 * gfs2_symlink - Create a symlink
322 * @dir: The directory to create the symlink in
323 * @dentry: The dentry to put the symlink in
324 * @symname: The thing which the link points to
325 *
326 * Returns: errno
327 */
328
329static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
330 const char *symname)
331{
332 struct gfs2_inode *dip = dir->u.generic_ip, *ip;
333 struct gfs2_sbd *sdp = dip->i_sbd;
334 struct gfs2_holder ghs[2];
335 struct inode *inode;
336 struct buffer_head *dibh;
337 int size;
338 int error;
339
340 /* Must be stuffed with a null terminator for gfs2_follow_link() */
341 size = strlen(symname);
342 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
343 return -ENAMETOOLONG;
344
345 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
346
347 inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO);
348 if (IS_ERR(inode)) {
349 gfs2_holder_uninit(ghs);
350 return PTR_ERR(inode);
351 }
352
353 ip = ghs[1].gh_gl->gl_object;
354
355 ip->i_di.di_size = size;
356
357 error = gfs2_meta_inode_buffer(ip, &dibh);
358
359 if (!gfs2_assert_withdraw(sdp, !error)) {
360 gfs2_dinode_out(&ip->i_di, dibh->b_data);
361 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
362 size);
363 brelse(dibh);
364 }
365
366 gfs2_trans_end(sdp);
367 if (dip->i_alloc.al_rgd)
368 gfs2_inplace_release(dip);
369 gfs2_quota_unlock(dip);
370 gfs2_alloc_put(dip);
371
372 gfs2_glock_dq_uninit_m(2, ghs);
373
374 d_instantiate(dentry, inode);
375 mark_inode_dirty(inode);
376
377 return 0;
378}
379
380/**
381 * gfs2_mkdir - Make a directory
382 * @dir: The parent directory of the new one
383 * @dentry: The dentry of the new directory
384 * @mode: The mode of the new directory
385 *
386 * Returns: errno
387 */
388
389static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
390{
391 struct gfs2_inode *dip = dir->u.generic_ip, *ip;
392 struct gfs2_sbd *sdp = dip->i_sbd;
393 struct gfs2_holder ghs[2];
394 struct inode *inode;
395 struct buffer_head *dibh;
396 int error;
397
398 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
399
400 inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode);
401 if (IS_ERR(inode)) {
402 gfs2_holder_uninit(ghs);
403 return PTR_ERR(inode);
404 }
405
406 ip = ghs[1].gh_gl->gl_object;
407
408 ip->i_di.di_nlink = 2;
409 ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode);
410 ip->i_di.di_flags |= GFS2_DIF_JDATA;
411 ip->i_di.di_payload_format = GFS2_FORMAT_DE;
412 ip->i_di.di_entries = 2;
413
414 error = gfs2_meta_inode_buffer(ip, &dibh);
415
416 if (!gfs2_assert_withdraw(sdp, !error)) {
417 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
418 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
419 struct qstr str;
420
421 gfs2_str2qstr(&str, ".");
422 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
423 gfs2_qstr2dirent(&str, GFS2_DIRENT_SIZE(str.len), dent);
424 dent->de_inum = di->di_num; /* already GFS2 endian */
425 dent->de_type = DT_DIR;
426 di->di_entries = cpu_to_be32(1);
427
428 gfs2_str2qstr(&str, "..");
429 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
430 gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
431
432 gfs2_inum_out(&dip->i_num, (char *) &dent->de_inum);
433 dent->de_type = DT_DIR;
434
435 gfs2_dinode_out(&ip->i_di, (char *)di);
436
437 brelse(dibh);
438 }
439
440 error = gfs2_change_nlink(dip, +1);
441 gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
442
443 gfs2_trans_end(sdp);
444 if (dip->i_alloc.al_rgd)
445 gfs2_inplace_release(dip);
446 gfs2_quota_unlock(dip);
447 gfs2_alloc_put(dip);
448
449 gfs2_glock_dq_uninit_m(2, ghs);
450
451 d_instantiate(dentry, inode);
452 mark_inode_dirty(inode);
453
454 return 0;
455}
456
457/**
458 * gfs2_rmdir - Remove a directory
459 * @dir: The parent directory of the directory to be removed
460 * @dentry: The dentry of the directory to remove
461 *
462 * Remove a directory. Call gfs2_rmdiri()
463 *
464 * Returns: errno
465 */
466
467static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
468{
469 struct gfs2_inode *dip = dir->u.generic_ip;
470 struct gfs2_sbd *sdp = dip->i_sbd;
471 struct gfs2_inode *ip = dentry->d_inode->u.generic_ip;
472 struct gfs2_unlinked *ul;
473 struct gfs2_holder ghs[2];
474 int error;
475
476 error = gfs2_unlinked_get(sdp, &ul);
477 if (error)
478 return error;
479
480 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
481 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
482
483 error = gfs2_glock_nq_m(2, ghs);
484 if (error)
485 goto out;
486
487 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
488 if (error)
489 goto out_gunlock;
490
491 if (ip->i_di.di_entries < 2) {
492 if (gfs2_consist_inode(ip))
493 gfs2_dinode_print(&ip->i_di);
494 error = -EIO;
495 goto out_gunlock;
496 }
497 if (ip->i_di.di_entries > 2) {
498 error = -ENOTEMPTY;
499 goto out_gunlock;
500 }
501
502 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF +
503 RES_UNLINKED, 0);
504 if (error)
505 goto out_gunlock;
506
507 error = gfs2_rmdiri(dip, &dentry->d_name, ip, ul);
508
509 gfs2_trans_end(sdp);
510
511 out_gunlock:
512 gfs2_glock_dq_m(2, ghs);
513
514 out:
515 gfs2_holder_uninit(ghs);
516 gfs2_holder_uninit(ghs + 1);
517
518 gfs2_unlinked_put(sdp, ul);
519
520 return error;
521}
522
523/**
524 * gfs2_mknod - Make a special file
525 * @dir: The directory in which the special file will reside
526 * @dentry: The dentry of the special file
527 * @mode: The mode of the special file
528 * @rdev: The device specification of the special file
529 *
530 */
531
532static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
533 dev_t dev)
534{
535 struct gfs2_inode *dip = dir->u.generic_ip, *ip;
536 struct gfs2_sbd *sdp = dip->i_sbd;
537 struct gfs2_holder ghs[2];
538 struct inode *inode;
539 struct buffer_head *dibh;
540 uint32_t major = 0, minor = 0;
541 int error;
542
543 switch (mode & S_IFMT) {
544 case S_IFBLK:
545 case S_IFCHR:
546 major = MAJOR(dev);
547 minor = MINOR(dev);
548 break;
549 case S_IFIFO:
550 case S_IFSOCK:
551 break;
552 default:
553 return -EOPNOTSUPP;
554 };
555
556 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
557
558 inode = gfs2_createi(ghs, &dentry->d_name, mode);
559 if (IS_ERR(inode)) {
560 gfs2_holder_uninit(ghs);
561 return PTR_ERR(inode);
562 }
563
564 ip = ghs[1].gh_gl->gl_object;
565
566 ip->i_di.di_major = major;
567 ip->i_di.di_minor = minor;
568
569 error = gfs2_meta_inode_buffer(ip, &dibh);
570
571 if (!gfs2_assert_withdraw(sdp, !error)) {
572 gfs2_dinode_out(&ip->i_di, dibh->b_data);
573 brelse(dibh);
574 }
575
576 gfs2_trans_end(sdp);
577 if (dip->i_alloc.al_rgd)
578 gfs2_inplace_release(dip);
579 gfs2_quota_unlock(dip);
580 gfs2_alloc_put(dip);
581
582 gfs2_glock_dq_uninit_m(2, ghs);
583
584 d_instantiate(dentry, inode);
585 mark_inode_dirty(inode);
586
587 return 0;
588}
589
590/**
591 * gfs2_rename - Rename a file
592 * @odir: Parent directory of old file name
593 * @odentry: The old dentry of the file
594 * @ndir: Parent directory of new file name
595 * @ndentry: The new dentry of the file
596 *
597 * Returns: errno
598 */
599
600static int gfs2_rename(struct inode *odir, struct dentry *odentry,
601 struct inode *ndir, struct dentry *ndentry)
602{
603 struct gfs2_inode *odip = odir->u.generic_ip;
604 struct gfs2_inode *ndip = ndir->u.generic_ip;
605 struct gfs2_inode *ip = odentry->d_inode->u.generic_ip;
606 struct gfs2_inode *nip = NULL;
607 struct gfs2_sbd *sdp = odip->i_sbd;
608 struct gfs2_unlinked *ul;
609 struct gfs2_holder ghs[4], r_gh;
610 unsigned int num_gh;
611 int dir_rename = 0;
612 int alloc_required;
613 unsigned int x;
614 int error;
615
616 if (ndentry->d_inode) {
617 nip = ndentry->d_inode->u.generic_ip;
618 if (ip == nip)
619 return 0;
620 }
621
622 error = gfs2_unlinked_get(sdp, &ul);
623 if (error)
624 return error;
625
626 /* Make sure we aren't trying to move a dirctory into it's subdir */
627
628 if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) {
629 dir_rename = 1;
630
631 error = gfs2_glock_nq_init(sdp->sd_rename_gl,
632 LM_ST_EXCLUSIVE, 0,
633 &r_gh);
634 if (error)
635 goto out;
636
637 error = gfs2_ok_to_move(ip, ndip);
638 if (error)
639 goto out_gunlock_r;
640 }
641
642 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
643 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
644 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
645 num_gh = 3;
646
647 if (nip)
648 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
649
650 error = gfs2_glock_nq_m(num_gh, ghs);
651 if (error)
652 goto out_uninit;
653
654 /* Check out the old directory */
655
656 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
657 if (error)
658 goto out_gunlock;
659
660 /* Check out the new directory */
661
662 if (nip) {
663 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
664 if (error)
665 goto out_gunlock;
666
667 if (S_ISDIR(nip->i_di.di_mode)) {
668 if (nip->i_di.di_entries < 2) {
669 if (gfs2_consist_inode(nip))
670 gfs2_dinode_print(&nip->i_di);
671 error = -EIO;
672 goto out_gunlock;
673 }
674 if (nip->i_di.di_entries > 2) {
675 error = -ENOTEMPTY;
676 goto out_gunlock;
677 }
678 }
679 } else {
680 error = gfs2_repermission(ndir, MAY_WRITE | MAY_EXEC, NULL);
681 if (error)
682 goto out_gunlock;
683
684 error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL);
685 switch (error) {
686 case -ENOENT:
687 error = 0;
688 break;
689 case 0:
690 error = -EEXIST;
691 default:
692 goto out_gunlock;
693 };
694
695 if (odip != ndip) {
696 if (!ndip->i_di.di_nlink) {
697 error = -EINVAL;
698 goto out_gunlock;
699 }
700 if (ndip->i_di.di_entries == (uint32_t)-1) {
701 error = -EFBIG;
702 goto out_gunlock;
703 }
704 if (S_ISDIR(ip->i_di.di_mode) &&
705 ndip->i_di.di_nlink == (uint32_t)-1) {
706 error = -EMLINK;
707 goto out_gunlock;
708 }
709 }
710 }
711
712 /* Check out the dir to be renamed */
713
714 if (dir_rename) {
715 error = gfs2_repermission(odentry->d_inode, MAY_WRITE, NULL);
716 if (error)
717 goto out_gunlock;
718 }
719
720 alloc_required = error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
721 if (error < 0)
722 goto out_gunlock;
723 error = 0;
724
725 if (alloc_required) {
726 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
727
728 error = gfs2_quota_lock(ndip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
729 if (error)
730 goto out_alloc;
731
732 error = gfs2_quota_check(ndip, ndip->i_di.di_uid,
733 ndip->i_di.di_gid);
734 if (error)
735 goto out_gunlock_q;
736
737 al->al_requested = sdp->sd_max_dirres;
738
739 error = gfs2_inplace_reserve(ndip);
740 if (error)
741 goto out_gunlock_q;
742
743 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
744 al->al_rgd->rd_ri.ri_length +
745 4 * RES_DINODE + 4 * RES_LEAF +
746 RES_UNLINKED + RES_STATFS +
747 RES_QUOTA, 0);
748 if (error)
749 goto out_ipreserv;
750 } else {
751 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
752 5 * RES_LEAF +
753 RES_UNLINKED, 0);
754 if (error)
755 goto out_gunlock;
756 }
757
758 /* Remove the target file, if it exists */
759
760 if (nip) {
761 if (S_ISDIR(nip->i_di.di_mode))
762 error = gfs2_rmdiri(ndip, &ndentry->d_name, nip, ul);
763 else
764 error = gfs2_unlinki(ndip, &ndentry->d_name, nip, ul);
765 if (error)
766 goto out_end_trans;
767 }
768
769 if (dir_rename) {
770 struct qstr name;
771 gfs2_str2qstr(&name, "..");
772
773 error = gfs2_change_nlink(ndip, +1);
774 if (error)
775 goto out_end_trans;
776 error = gfs2_change_nlink(odip, -1);
777 if (error)
778 goto out_end_trans;
779
780 error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR);
781 if (error)
782 goto out_end_trans;
783 } else {
784 struct buffer_head *dibh;
785 error = gfs2_meta_inode_buffer(ip, &dibh);
786 if (error)
787 goto out_end_trans;
788 ip->i_di.di_ctime = get_seconds();
789 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
790 gfs2_dinode_out(&ip->i_di, dibh->b_data);
791 brelse(dibh);
792 }
793
794 error = gfs2_dir_del(odip, &odentry->d_name);
795 if (error)
796 goto out_end_trans;
797
798 error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num,
799 IF2DT(ip->i_di.di_mode));
800 if (error)
801 goto out_end_trans;
802
803 out_end_trans:
804 gfs2_trans_end(sdp);
805
806 out_ipreserv:
807 if (alloc_required)
808 gfs2_inplace_release(ndip);
809
810 out_gunlock_q:
811 if (alloc_required)
812 gfs2_quota_unlock(ndip);
813
814 out_alloc:
815 if (alloc_required)
816 gfs2_alloc_put(ndip);
817
818 out_gunlock:
819 gfs2_glock_dq_m(num_gh, ghs);
820
821 out_uninit:
822 for (x = 0; x < num_gh; x++)
823 gfs2_holder_uninit(ghs + x);
824
825 out_gunlock_r:
826 if (dir_rename)
827 gfs2_glock_dq_uninit(&r_gh);
828
829 out:
830 gfs2_unlinked_put(sdp, ul);
831
832 return error;
833}
834
835/**
836 * gfs2_readlink - Read the value of a symlink
837 * @dentry: the symlink
838 * @buf: the buffer to read the symlink data into
839 * @size: the size of the buffer
840 *
841 * Returns: errno
842 */
843
844static int gfs2_readlink(struct dentry *dentry, char __user *user_buf,
845 int user_size)
846{
847 struct gfs2_inode *ip = dentry->d_inode->u.generic_ip;
848 char array[GFS2_FAST_NAME_SIZE], *buf = array;
849 unsigned int len = GFS2_FAST_NAME_SIZE;
850 int error;
851
852 error = gfs2_readlinki(ip, &buf, &len);
853 if (error)
854 return error;
855
856 if (user_size > len - 1)
857 user_size = len - 1;
858
859 if (copy_to_user(user_buf, buf, user_size))
860 error = -EFAULT;
861 else
862 error = user_size;
863
864 if (buf != array)
865 kfree(buf);
866
867 return error;
868}
869
870/**
871 * gfs2_follow_link - Follow a symbolic link
872 * @dentry: The dentry of the link
873 * @nd: Data that we pass to vfs_follow_link()
874 *
875 * This can handle symlinks of any size. It is optimised for symlinks
876 * under GFS2_FAST_NAME_SIZE.
877 *
878 * Returns: 0 on success or error code
879 */
880
881static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
882{
883 struct gfs2_inode *ip = dentry->d_inode->u.generic_ip;
884 char array[GFS2_FAST_NAME_SIZE], *buf = array;
885 unsigned int len = GFS2_FAST_NAME_SIZE;
886 int error;
887
888 error = gfs2_readlinki(ip, &buf, &len);
889 if (!error) {
890 error = vfs_follow_link(nd, buf);
891 if (buf != array)
892 kfree(buf);
893 }
894
895 return ERR_PTR(error);
896}
897
898/**
899 * gfs2_permission -
900 * @inode:
901 * @mask:
902 * @nd: passed from Linux VFS, ignored by us
903 *
904 * Returns: errno
905 */
906
907static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
908{
909 struct gfs2_inode *ip = inode->u.generic_ip;
910 struct gfs2_holder i_gh;
911 int error;
912
913 if (ip->i_vn == ip->i_gl->gl_vn)
914 return generic_permission(inode, mask, gfs2_check_acl);
915
916 error = gfs2_glock_nq_init(ip->i_gl,
917 LM_ST_SHARED, LM_FLAG_ANY,
918 &i_gh);
919 if (!error) {
920 error = generic_permission(inode, mask, gfs2_check_acl_locked);
921 gfs2_glock_dq_uninit(&i_gh);
922 }
923
924 return error;
925}
926
927static int setattr_size(struct inode *inode, struct iattr *attr)
928{
929 struct gfs2_inode *ip = inode->u.generic_ip;
930 int error;
931
932 if (attr->ia_size != ip->i_di.di_size) {
933 error = vmtruncate(inode, attr->ia_size);
934 if (error)
935 return error;
936 }
937
938 error = gfs2_truncatei(ip, attr->ia_size);
939 if (error)
940 return error;
941
942 return error;
943}
944
945static int setattr_chown(struct inode *inode, struct iattr *attr)
946{
947 struct gfs2_inode *ip = inode->u.generic_ip;
948 struct gfs2_sbd *sdp = ip->i_sbd;
949 struct buffer_head *dibh;
950 uint32_t ouid, ogid, nuid, ngid;
951 int error;
952
953 ouid = ip->i_di.di_uid;
954 ogid = ip->i_di.di_gid;
955 nuid = attr->ia_uid;
956 ngid = attr->ia_gid;
957
958 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
959 ouid = nuid = NO_QUOTA_CHANGE;
960 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
961 ogid = ngid = NO_QUOTA_CHANGE;
962
963 gfs2_alloc_get(ip);
964
965 error = gfs2_quota_lock(ip, nuid, ngid);
966 if (error)
967 goto out_alloc;
968
969 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
970 error = gfs2_quota_check(ip, nuid, ngid);
971 if (error)
972 goto out_gunlock_q;
973 }
974
975 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
976 if (error)
977 goto out_gunlock_q;
978
979 error = gfs2_meta_inode_buffer(ip, &dibh);
980 if (error)
981 goto out_end_trans;
982
983 error = inode_setattr(inode, attr);
984 gfs2_assert_warn(sdp, !error);
985 gfs2_inode_attr_out(ip);
986
987 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
988 gfs2_dinode_out(&ip->i_di, dibh->b_data);
989 brelse(dibh);
990
991 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
992 gfs2_quota_change(ip, -ip->i_di.di_blocks,
993 ouid, ogid);
994 gfs2_quota_change(ip, ip->i_di.di_blocks,
995 nuid, ngid);
996 }
997
998 out_end_trans:
999 gfs2_trans_end(sdp);
1000
1001 out_gunlock_q:
1002 gfs2_quota_unlock(ip);
1003
1004 out_alloc:
1005 gfs2_alloc_put(ip);
1006
1007 return error;
1008}
1009
1010/**
1011 * gfs2_setattr - Change attributes on an inode
1012 * @dentry: The dentry which is changing
1013 * @attr: The structure describing the change
1014 *
1015 * The VFS layer wants to change one or more of an inodes attributes. Write
1016 * that change out to disk.
1017 *
1018 * Returns: errno
1019 */
1020
1021static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1022{
1023 struct inode *inode = dentry->d_inode;
1024 struct gfs2_inode *ip = inode->u.generic_ip;
1025 struct gfs2_holder i_gh;
1026 int error;
1027
1028 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1029 if (error)
1030 return error;
1031
1032 error = -EPERM;
1033 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1034 goto out;
1035
1036 error = inode_change_ok(inode, attr);
1037 if (error)
1038 goto out;
1039
1040 if (attr->ia_valid & ATTR_SIZE)
1041 error = setattr_size(inode, attr);
1042 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1043 error = setattr_chown(inode, attr);
1044 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1045 error = gfs2_acl_chmod(ip, attr);
1046 else
1047 error = gfs2_setattr_simple(ip, attr);
1048
1049 out:
1050 gfs2_glock_dq_uninit(&i_gh);
1051
1052 if (!error)
1053 mark_inode_dirty(inode);
1054
1055 return error;
1056}
1057
1058/**
1059 * gfs2_getattr - Read out an inode's attributes
1060 * @mnt: ?
1061 * @dentry: The dentry to stat
1062 * @stat: The inode's stats
1063 *
1064 * Returns: errno
1065 */
1066
1067static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1068 struct kstat *stat)
1069{
1070 struct inode *inode = dentry->d_inode;
1071 struct gfs2_inode *ip = inode->u.generic_ip;
1072 struct gfs2_holder gh;
1073 int error;
1074
1075 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1076 if (!error) {
1077 generic_fillattr(inode, stat);
1078 gfs2_glock_dq_uninit(&gh);
1079 }
1080
1081 return error;
1082}
1083
1084static int gfs2_setxattr(struct dentry *dentry, const char *name,
1085 const void *data, size_t size, int flags)
1086{
1087 struct gfs2_inode *ip = dentry->d_inode->u.generic_ip;
1088 struct gfs2_ea_request er;
1089
1090 memset(&er, 0, sizeof(struct gfs2_ea_request));
1091 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1092 if (er.er_type == GFS2_EATYPE_UNUSED)
1093 return -EOPNOTSUPP;
1094 er.er_data = (char *)data;
1095 er.er_name_len = strlen(er.er_name);
1096 er.er_data_len = size;
1097 er.er_flags = flags;
1098
1099 gfs2_assert_warn(ip->i_sbd, !(er.er_flags & GFS2_ERF_MODE));
1100
1101 return gfs2_ea_set(ip, &er);
1102}
1103
1104static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1105 void *data, size_t size)
1106{
1107 struct gfs2_ea_request er;
1108
1109 memset(&er, 0, sizeof(struct gfs2_ea_request));
1110 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1111 if (er.er_type == GFS2_EATYPE_UNUSED)
1112 return -EOPNOTSUPP;
1113 er.er_data = data;
1114 er.er_name_len = strlen(er.er_name);
1115 er.er_data_len = size;
1116
1117 return gfs2_ea_get(dentry->d_inode->u.generic_ip, &er);
1118}
1119
1120static ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
1121{
1122 struct gfs2_ea_request er;
1123
1124 memset(&er, 0, sizeof(struct gfs2_ea_request));
1125 er.er_data = (size) ? buffer : NULL;
1126 er.er_data_len = size;
1127
1128 return gfs2_ea_list(dentry->d_inode->u.generic_ip, &er);
1129}
1130
1131static int gfs2_removexattr(struct dentry *dentry, const char *name)
1132{
1133 struct gfs2_ea_request er;
1134
1135 memset(&er, 0, sizeof(struct gfs2_ea_request));
1136 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1137 if (er.er_type == GFS2_EATYPE_UNUSED)
1138 return -EOPNOTSUPP;
1139 er.er_name_len = strlen(er.er_name);
1140
1141 return gfs2_ea_remove(dentry->d_inode->u.generic_ip, &er);
1142}
1143
1144struct inode_operations gfs2_file_iops = {
1145 .permission = gfs2_permission,
1146 .setattr = gfs2_setattr,
1147 .getattr = gfs2_getattr,
1148 .setxattr = gfs2_setxattr,
1149 .getxattr = gfs2_getxattr,
1150 .listxattr = gfs2_listxattr,
1151 .removexattr = gfs2_removexattr,
1152};
1153
1154struct inode_operations gfs2_dev_iops = {
1155 .permission = gfs2_permission,
1156 .setattr = gfs2_setattr,
1157 .getattr = gfs2_getattr,
1158 .setxattr = gfs2_setxattr,
1159 .getxattr = gfs2_getxattr,
1160 .listxattr = gfs2_listxattr,
1161 .removexattr = gfs2_removexattr,
1162};
1163
1164struct inode_operations gfs2_dir_iops = {
1165 .create = gfs2_create,
1166 .lookup = gfs2_lookup,
1167 .link = gfs2_link,
1168 .unlink = gfs2_unlink,
1169 .symlink = gfs2_symlink,
1170 .mkdir = gfs2_mkdir,
1171 .rmdir = gfs2_rmdir,
1172 .mknod = gfs2_mknod,
1173 .rename = gfs2_rename,
1174 .permission = gfs2_permission,
1175 .setattr = gfs2_setattr,
1176 .getattr = gfs2_getattr,
1177 .setxattr = gfs2_setxattr,
1178 .getxattr = gfs2_getxattr,
1179 .listxattr = gfs2_listxattr,
1180 .removexattr = gfs2_removexattr,
1181};
1182
1183struct inode_operations gfs2_symlink_iops = {
1184 .readlink = gfs2_readlink,
1185 .follow_link = gfs2_follow_link,
1186 .permission = gfs2_permission,
1187 .setattr = gfs2_setattr,
1188 .getattr = gfs2_getattr,
1189 .setxattr = gfs2_setxattr,
1190 .getxattr = gfs2_getxattr,
1191 .listxattr = gfs2_listxattr,
1192 .removexattr = gfs2_removexattr,
1193};
1194
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
new file mode 100644
index 000000000000..930aaae91377
--- /dev/null
+++ b/fs/gfs2/ops_inode.h
@@ -0,0 +1,18 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_INODE_DOT_H__
11#define __OPS_INODE_DOT_H__
12
13extern struct inode_operations gfs2_file_iops;
14extern struct inode_operations gfs2_dir_iops;
15extern struct inode_operations gfs2_symlink_iops;
16extern struct inode_operations gfs2_dev_iops;
17
18#endif /* __OPS_INODE_DOT_H__ */
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
new file mode 100644
index 000000000000..1c17acc946f9
--- /dev/null
+++ b/fs/gfs2/ops_super.c
@@ -0,0 +1,399 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/statfs.h>
16#include <linux/vmalloc.h>
17#include <linux/seq_file.h>
18#include <linux/mount.h>
19#include <linux/kthread.h>
20#include <linux/delay.h>
21#include <linux/gfs2_ondisk.h>
22
23#include "gfs2.h"
24#include "lm_interface.h"
25#include "incore.h"
26#include "glock.h"
27#include "inode.h"
28#include "lm.h"
29#include "log.h"
30#include "mount.h"
31#include "ops_super.h"
32#include "page.h"
33#include "quota.h"
34#include "recovery.h"
35#include "rgrp.h"
36#include "super.h"
37#include "sys.h"
38#include "util.h"
39
40/**
41 * gfs2_write_inode - Make sure the inode is stable on the disk
42 * @inode: The inode
43 * @sync: synchronous write flag
44 *
45 * Returns: errno
46 */
47
48static int gfs2_write_inode(struct inode *inode, int sync)
49{
50 struct gfs2_inode *ip = inode->u.generic_ip;
51
52 if (current->flags & PF_MEMALLOC)
53 return 0;
54 if (ip && sync)
55 gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
56
57 return 0;
58}
59
60/**
61 * gfs2_put_super - Unmount the filesystem
62 * @sb: The VFS superblock
63 *
64 */
65
66static void gfs2_put_super(struct super_block *sb)
67{
68 struct gfs2_sbd *sdp = sb->s_fs_info;
69 int error;
70
71 if (!sdp)
72 return;
73
74 /* Unfreeze the filesystem, if we need to */
75
76 mutex_lock(&sdp->sd_freeze_lock);
77 if (sdp->sd_freeze_count)
78 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
79 mutex_unlock(&sdp->sd_freeze_lock);
80
81 kthread_stop(sdp->sd_inoded_process);
82 kthread_stop(sdp->sd_quotad_process);
83 kthread_stop(sdp->sd_logd_process);
84 kthread_stop(sdp->sd_recoverd_process);
85 while (sdp->sd_glockd_num--)
86 kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
87 kthread_stop(sdp->sd_scand_process);
88
89 if (!(sb->s_flags & MS_RDONLY)) {
90 error = gfs2_make_fs_ro(sdp);
91 if (error)
92 gfs2_io_error(sdp);
93 }
94 /* At this point, we're through modifying the disk */
95
96 /* Release stuff */
97
98 iput(sdp->sd_master_dir);
99 iput(sdp->sd_jindex);
100 iput(sdp->sd_inum_inode);
101 iput(sdp->sd_statfs_inode);
102 iput(sdp->sd_rindex);
103 iput(sdp->sd_quota_inode);
104
105 gfs2_glock_put(sdp->sd_rename_gl);
106 gfs2_glock_put(sdp->sd_trans_gl);
107
108 if (!sdp->sd_args.ar_spectator) {
109 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
110 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
111 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
112 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
113 gfs2_glock_dq_uninit(&sdp->sd_ut_gh);
114 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
115 iput(sdp->sd_ir_inode);
116 iput(sdp->sd_sc_inode);
117 iput(sdp->sd_ut_inode);
118 iput(sdp->sd_qc_inode);
119 }
120
121 gfs2_glock_dq_uninit(&sdp->sd_live_gh);
122 gfs2_clear_rgrpd(sdp);
123 gfs2_jindex_free(sdp);
124 /* Take apart glock structures and buffer lists */
125 gfs2_gl_hash_clear(sdp, WAIT);
126 /* Unmount the locking protocol */
127 gfs2_lm_unmount(sdp);
128
129 /* At this point, we're through participating in the lockspace */
130 gfs2_sys_fs_del(sdp);
131 vfree(sdp);
132 sb->s_fs_info = NULL;
133}
134
135/**
136 * gfs2_write_super - disk commit all incore transactions
137 * @sb: the filesystem
138 *
139 * This function is called every time sync(2) is called.
140 * After this exits, all dirty buffers are synced.
141 */
142
143static void gfs2_write_super(struct super_block *sb)
144{
145 struct gfs2_sbd *sdp = sb->s_fs_info;
146 gfs2_log_flush(sdp, NULL);
147}
148
149/**
150 * gfs2_write_super_lockfs - prevent further writes to the filesystem
151 * @sb: the VFS structure for the filesystem
152 *
153 */
154
155static void gfs2_write_super_lockfs(struct super_block *sb)
156{
157 struct gfs2_sbd *sdp = sb->s_fs_info;
158 int error;
159
160 for (;;) {
161 error = gfs2_freeze_fs(sdp);
162 if (!error)
163 break;
164
165 switch (error) {
166 case -EBUSY:
167 fs_err(sdp, "waiting for recovery before freeze\n");
168 break;
169
170 default:
171 fs_err(sdp, "error freezing FS: %d\n", error);
172 break;
173 }
174
175 fs_err(sdp, "retrying...\n");
176 msleep(1000);
177 }
178}
179
180/**
181 * gfs2_unlockfs - reallow writes to the filesystem
182 * @sb: the VFS structure for the filesystem
183 *
184 */
185
186static void gfs2_unlockfs(struct super_block *sb)
187{
188 struct gfs2_sbd *sdp = sb->s_fs_info;
189 gfs2_unfreeze_fs(sdp);
190}
191
192/**
193 * gfs2_statfs - Gather and return stats about the filesystem
194 * @sb: The superblock
195 * @statfsbuf: The buffer
196 *
197 * Returns: 0 on success or error code
198 */
199
200static int gfs2_statfs(struct super_block *sb, struct kstatfs *buf)
201{
202 struct gfs2_sbd *sdp = sb->s_fs_info;
203 struct gfs2_statfs_change sc;
204 int error;
205
206 if (gfs2_tune_get(sdp, gt_statfs_slow))
207 error = gfs2_statfs_slow(sdp, &sc);
208 else
209 error = gfs2_statfs_i(sdp, &sc);
210
211 if (error)
212 return error;
213
214 memset(buf, 0, sizeof(struct kstatfs));
215
216 buf->f_type = GFS2_MAGIC;
217 buf->f_bsize = sdp->sd_sb.sb_bsize;
218 buf->f_blocks = sc.sc_total;
219 buf->f_bfree = sc.sc_free;
220 buf->f_bavail = sc.sc_free;
221 buf->f_files = sc.sc_dinodes + sc.sc_free;
222 buf->f_ffree = sc.sc_free;
223 buf->f_namelen = GFS2_FNAMESIZE;
224
225 return 0;
226}
227
228/**
229 * gfs2_remount_fs - called when the FS is remounted
230 * @sb: the filesystem
231 * @flags: the remount flags
232 * @data: extra data passed in (not used right now)
233 *
234 * Returns: errno
235 */
236
237static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
238{
239 struct gfs2_sbd *sdp = sb->s_fs_info;
240 int error;
241
242 error = gfs2_mount_args(sdp, data, 1);
243 if (error)
244 return error;
245
246 if (sdp->sd_args.ar_spectator)
247 *flags |= MS_RDONLY;
248 else {
249 if (*flags & MS_RDONLY) {
250 if (!(sb->s_flags & MS_RDONLY))
251 error = gfs2_make_fs_ro(sdp);
252 } else if (!(*flags & MS_RDONLY) &&
253 (sb->s_flags & MS_RDONLY)) {
254 error = gfs2_make_fs_rw(sdp);
255 }
256 }
257
258 if (*flags & (MS_NOATIME | MS_NODIRATIME))
259 set_bit(SDF_NOATIME, &sdp->sd_flags);
260 else
261 clear_bit(SDF_NOATIME, &sdp->sd_flags);
262
263 /* Don't let the VFS update atimes. GFS2 handles this itself. */
264 *flags |= MS_NOATIME | MS_NODIRATIME;
265
266 return error;
267}
268
269/**
270 * gfs2_clear_inode - Deallocate an inode when VFS is done with it
271 * @inode: The VFS inode
272 *
273 */
274
275static void gfs2_clear_inode(struct inode *inode)
276{
277 struct gfs2_inode *ip = inode->u.generic_ip;
278
279 if (ip) {
280 spin_lock(&ip->i_spin);
281 ip->i_vnode = NULL;
282 inode->u.generic_ip = NULL;
283 spin_unlock(&ip->i_spin);
284
285 gfs2_glock_schedule_for_reclaim(ip->i_gl);
286 gfs2_inode_put(ip);
287 }
288}
289
290/**
291 * gfs2_show_options - Show mount options for /proc/mounts
292 * @s: seq_file structure
293 * @mnt: vfsmount
294 *
295 * Returns: 0 on success or error code
296 */
297
298static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
299{
300 struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
301 struct gfs2_args *args = &sdp->sd_args;
302
303 if (args->ar_lockproto[0])
304 seq_printf(s, ",lockproto=%s", args->ar_lockproto);
305 if (args->ar_locktable[0])
306 seq_printf(s, ",locktable=%s", args->ar_locktable);
307 if (args->ar_hostdata[0])
308 seq_printf(s, ",hostdata=%s", args->ar_hostdata);
309 if (args->ar_spectator)
310 seq_printf(s, ",spectator");
311 if (args->ar_ignore_local_fs)
312 seq_printf(s, ",ignore_local_fs");
313 if (args->ar_localflocks)
314 seq_printf(s, ",localflocks");
315 if (args->ar_localcaching)
316 seq_printf(s, ",localcaching");
317 if (args->ar_debug)
318 seq_printf(s, ",debug");
319 if (args->ar_upgrade)
320 seq_printf(s, ",upgrade");
321 if (args->ar_num_glockd != GFS2_GLOCKD_DEFAULT)
322 seq_printf(s, ",num_glockd=%u", args->ar_num_glockd);
323 if (args->ar_posix_acl)
324 seq_printf(s, ",acl");
325 if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
326 char *state;
327 switch (args->ar_quota) {
328 case GFS2_QUOTA_OFF:
329 state = "off";
330 break;
331 case GFS2_QUOTA_ACCOUNT:
332 state = "account";
333 break;
334 case GFS2_QUOTA_ON:
335 state = "on";
336 break;
337 default:
338 state = "unknown";
339 break;
340 }
341 seq_printf(s, ",quota=%s", state);
342 }
343 if (args->ar_suiddir)
344 seq_printf(s, ",suiddir");
345 if (args->ar_data != GFS2_DATA_DEFAULT) {
346 char *state;
347 switch (args->ar_data) {
348 case GFS2_DATA_WRITEBACK:
349 state = "writeback";
350 break;
351 case GFS2_DATA_ORDERED:
352 state = "ordered";
353 break;
354 default:
355 state = "unknown";
356 break;
357 }
358 seq_printf(s, ",data=%s", state);
359 }
360
361 return 0;
362}
363
364static struct inode *gfs2_alloc_inode(struct super_block *sb)
365{
366 struct gfs2_sbd *sdp = sb->s_fs_info;
367 struct gfs2_inode *ip;
368
369 ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
370 if (ip) {
371 ip->i_flags = 0;
372 ip->i_gl = NULL;
373 ip->i_sbd = sdp;
374 ip->i_vnode = &ip->i_inode;
375 ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
376 ip->i_last_pfault = jiffies;
377 }
378 return &ip->i_inode;
379}
380
381static void gfs2_destroy_inode(struct inode *inode)
382{
383 kmem_cache_free(gfs2_inode_cachep, inode);
384}
385
386struct super_operations gfs2_super_ops = {
387 .alloc_inode = gfs2_alloc_inode,
388 .destroy_inode = gfs2_destroy_inode,
389 .write_inode = gfs2_write_inode,
390 .put_super = gfs2_put_super,
391 .write_super = gfs2_write_super,
392 .write_super_lockfs = gfs2_write_super_lockfs,
393 .unlockfs = gfs2_unlockfs,
394 .statfs = gfs2_statfs,
395 .remount_fs = gfs2_remount_fs,
396 .clear_inode = gfs2_clear_inode,
397 .show_options = gfs2_show_options,
398};
399
diff --git a/fs/gfs2/ops_super.h b/fs/gfs2/ops_super.h
new file mode 100644
index 000000000000..a15ccc276113
--- /dev/null
+++ b/fs/gfs2/ops_super.h
@@ -0,0 +1,15 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_SUPER_DOT_H__
11#define __OPS_SUPER_DOT_H__
12
13extern struct super_operations gfs2_super_ops;
14
15#endif /* __OPS_SUPER_DOT_H__ */
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
new file mode 100644
index 000000000000..263c1fb7bbaf
--- /dev/null
+++ b/fs/gfs2/ops_vm.c
@@ -0,0 +1,195 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/mm.h>
16#include <linux/pagemap.h>
17#include <linux/gfs2_ondisk.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "bmap.h"
23#include "glock.h"
24#include "inode.h"
25#include "ops_vm.h"
26#include "page.h"
27#include "quota.h"
28#include "rgrp.h"
29#include "trans.h"
30#include "util.h"
31
32static void pfault_be_greedy(struct gfs2_inode *ip)
33{
34 unsigned int time;
35
36 spin_lock(&ip->i_spin);
37 time = ip->i_greedy;
38 ip->i_last_pfault = jiffies;
39 spin_unlock(&ip->i_spin);
40
41 gfs2_inode_hold(ip);
42 if (gfs2_glock_be_greedy(ip->i_gl, time))
43 gfs2_inode_put(ip);
44}
45
46static struct page *gfs2_private_nopage(struct vm_area_struct *area,
47 unsigned long address, int *type)
48{
49 struct gfs2_inode *ip = area->vm_file->f_mapping->host->u.generic_ip;
50 struct gfs2_holder i_gh;
51 struct page *result;
52 int error;
53
54 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
55 if (error)
56 return NULL;
57
58 set_bit(GIF_PAGED, &ip->i_flags);
59
60 result = filemap_nopage(area, address, type);
61
62 if (result && result != NOPAGE_OOM)
63 pfault_be_greedy(ip);
64
65 gfs2_glock_dq_uninit(&i_gh);
66
67 return result;
68}
69
70static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
71{
72 struct gfs2_sbd *sdp = ip->i_sbd;
73 unsigned long index = page->index;
74 uint64_t lblock = index << (PAGE_CACHE_SHIFT -
75 sdp->sd_sb.sb_bsize_shift);
76 unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift;
77 struct gfs2_alloc *al;
78 unsigned int data_blocks, ind_blocks;
79 unsigned int x;
80 int error;
81
82 al = gfs2_alloc_get(ip);
83
84 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
85 if (error)
86 goto out;
87
88 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
89 if (error)
90 goto out_gunlock_q;
91
92 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
93
94 al->al_requested = data_blocks + ind_blocks;
95
96 error = gfs2_inplace_reserve(ip);
97 if (error)
98 goto out_gunlock_q;
99
100 error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length +
101 ind_blocks + RES_DINODE +
102 RES_STATFS + RES_QUOTA, 0);
103 if (error)
104 goto out_ipres;
105
106 if (gfs2_is_stuffed(ip)) {
107 error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, NULL);
108 if (error)
109 goto out_trans;
110 }
111
112 for (x = 0; x < blocks; ) {
113 uint64_t dblock;
114 unsigned int extlen;
115 int new = 1;
116
117 error = gfs2_extent_map(ip->i_vnode, lblock, &new, &dblock, &extlen);
118 if (error)
119 goto out_trans;
120
121 lblock += extlen;
122 x += extlen;
123 }
124
125 gfs2_assert_warn(sdp, al->al_alloced);
126
127 out_trans:
128 gfs2_trans_end(sdp);
129
130 out_ipres:
131 gfs2_inplace_release(ip);
132
133 out_gunlock_q:
134 gfs2_quota_unlock(ip);
135
136 out:
137 gfs2_alloc_put(ip);
138
139 return error;
140}
141
142static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
143 unsigned long address, int *type)
144{
145 struct gfs2_inode *ip = area->vm_file->f_mapping->host->u.generic_ip;
146 struct gfs2_holder i_gh;
147 struct page *result = NULL;
148 unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) +
149 area->vm_pgoff;
150 int alloc_required;
151 int error;
152
153 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
154 if (error)
155 return NULL;
156
157 set_bit(GIF_PAGED, &ip->i_flags);
158 set_bit(GIF_SW_PAGED, &ip->i_flags);
159
160 error = gfs2_write_alloc_required(ip,
161 (uint64_t)index << PAGE_CACHE_SHIFT,
162 PAGE_CACHE_SIZE, &alloc_required);
163 if (error)
164 goto out;
165
166 result = filemap_nopage(area, address, type);
167 if (!result || result == NOPAGE_OOM)
168 goto out;
169
170 if (alloc_required) {
171 error = alloc_page_backing(ip, result);
172 if (error) {
173 page_cache_release(result);
174 result = NULL;
175 goto out;
176 }
177 set_page_dirty(result);
178 }
179
180 pfault_be_greedy(ip);
181
182 out:
183 gfs2_glock_dq_uninit(&i_gh);
184
185 return result;
186}
187
188struct vm_operations_struct gfs2_vm_ops_private = {
189 .nopage = gfs2_private_nopage,
190};
191
192struct vm_operations_struct gfs2_vm_ops_sharewrite = {
193 .nopage = gfs2_sharewrite_nopage,
194};
195
diff --git a/fs/gfs2/ops_vm.h b/fs/gfs2/ops_vm.h
new file mode 100644
index 000000000000..077cffcd4085
--- /dev/null
+++ b/fs/gfs2/ops_vm.h
@@ -0,0 +1,16 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __OPS_VM_DOT_H__
11#define __OPS_VM_DOT_H__
12
13extern struct vm_operations_struct gfs2_vm_ops_private;
14extern struct vm_operations_struct gfs2_vm_ops_sharewrite;
15
16#endif /* __OPS_VM_DOT_H__ */
diff --git a/fs/gfs2/page.c b/fs/gfs2/page.c
new file mode 100644
index 000000000000..cd93644c7d70
--- /dev/null
+++ b/fs/gfs2/page.c
@@ -0,0 +1,280 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/pagemap.h>
16#include <linux/mm.h>
17#include <linux/gfs2_ondisk.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "bmap.h"
23#include "inode.h"
24#include "page.h"
25#include "trans.h"
26#include "ops_address.h"
27#include "util.h"
28
29/**
30 * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock
31 * @gl: the glock
32 *
33 */
34
35void gfs2_pte_inval(struct gfs2_glock *gl)
36{
37 struct gfs2_inode *ip;
38 struct inode *inode;
39
40 ip = gl->gl_object;
41 if (!ip || !S_ISREG(ip->i_di.di_mode))
42 return;
43
44 if (!test_bit(GIF_PAGED, &ip->i_flags))
45 return;
46
47 inode = gfs2_ip2v_lookup(ip);
48 if (inode) {
49 unmap_shared_mapping_range(inode->i_mapping, 0, 0);
50 iput(inode);
51
52 if (test_bit(GIF_SW_PAGED, &ip->i_flags))
53 set_bit(GLF_DIRTY, &gl->gl_flags);
54 }
55
56 clear_bit(GIF_SW_PAGED, &ip->i_flags);
57}
58
59/**
60 * gfs2_page_inval - Invalidate all pages associated with a glock
61 * @gl: the glock
62 *
63 */
64
65void gfs2_page_inval(struct gfs2_glock *gl)
66{
67 struct gfs2_inode *ip;
68 struct inode *inode;
69
70 ip = gl->gl_object;
71 if (!ip || !S_ISREG(ip->i_di.di_mode))
72 return;
73
74 inode = gfs2_ip2v_lookup(ip);
75 if (inode) {
76 struct address_space *mapping = inode->i_mapping;
77
78 truncate_inode_pages(mapping, 0);
79 gfs2_assert_withdraw(ip->i_sbd, !mapping->nrpages);
80
81 iput(inode);
82 }
83
84 clear_bit(GIF_PAGED, &ip->i_flags);
85}
86
87/**
88 * gfs2_page_sync - Sync the data pages (not metadata) associated with a glock
89 * @gl: the glock
90 * @flags: DIO_START | DIO_WAIT
91 *
92 * Syncs data (not metadata) for a regular file.
93 * No-op for all other types.
94 */
95
96void gfs2_page_sync(struct gfs2_glock *gl, int flags)
97{
98 struct gfs2_inode *ip;
99 struct inode *inode;
100
101 ip = gl->gl_object;
102 if (!ip || !S_ISREG(ip->i_di.di_mode))
103 return;
104
105 inode = gfs2_ip2v_lookup(ip);
106 if (inode) {
107 struct address_space *mapping = inode->i_mapping;
108 int error = 0;
109
110 if (flags & DIO_START)
111 filemap_fdatawrite(mapping);
112 if (!error && (flags & DIO_WAIT))
113 error = filemap_fdatawait(mapping);
114
115 /* Put back any errors cleared by filemap_fdatawait()
116 so they can be caught by someone who can pass them
117 up to user space. */
118
119 if (error == -ENOSPC)
120 set_bit(AS_ENOSPC, &mapping->flags);
121 else if (error)
122 set_bit(AS_EIO, &mapping->flags);
123
124 iput(inode);
125 }
126}
127
128/**
129 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
130 * @ip: the inode
131 * @dibh: the dinode buffer
132 * @block: the block number that was allocated
133 * @private: any locked page held by the caller process
134 *
135 * Returns: errno
136 */
137
138int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
139 uint64_t block, void *private)
140{
141 struct gfs2_sbd *sdp = ip->i_sbd;
142 struct inode *inode = ip->i_vnode;
143 struct page *page = (struct page *)private;
144 struct buffer_head *bh;
145 int release = 0;
146
147 if (!page || page->index) {
148 page = grab_cache_page(inode->i_mapping, 0);
149 if (!page)
150 return -ENOMEM;
151 release = 1;
152 }
153
154 if (!PageUptodate(page)) {
155 void *kaddr = kmap(page);
156
157 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
158 ip->i_di.di_size);
159 memset(kaddr + ip->i_di.di_size, 0,
160 PAGE_CACHE_SIZE - ip->i_di.di_size);
161 kunmap(page);
162
163 SetPageUptodate(page);
164 }
165
166 if (!page_has_buffers(page))
167 create_empty_buffers(page, 1 << inode->i_blkbits,
168 (1 << BH_Uptodate));
169
170 bh = page_buffers(page);
171
172 if (!buffer_mapped(bh))
173 map_bh(bh, inode->i_sb, block);
174
175 set_buffer_uptodate(bh);
176 if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED) || gfs2_is_jdata(ip))
177 gfs2_trans_add_bh(ip->i_gl, bh, 0);
178 mark_buffer_dirty(bh);
179
180 if (release) {
181 unlock_page(page);
182 page_cache_release(page);
183 }
184
185 return 0;
186}
187
188/**
189 * gfs2_block_truncate_page - Deal with zeroing out data for truncate
190 *
191 * This is partly borrowed from ext3.
192 */
193int gfs2_block_truncate_page(struct address_space *mapping)
194{
195 struct inode *inode = mapping->host;
196 struct gfs2_inode *ip = inode->u.generic_ip;
197 struct gfs2_sbd *sdp = ip->i_sbd;
198 loff_t from = inode->i_size;
199 unsigned long index = from >> PAGE_CACHE_SHIFT;
200 unsigned offset = from & (PAGE_CACHE_SIZE-1);
201 unsigned blocksize, iblock, length, pos;
202 struct buffer_head *bh;
203 struct page *page;
204 void *kaddr;
205 int err;
206
207 page = grab_cache_page(mapping, index);
208 if (!page)
209 return 0;
210
211 blocksize = inode->i_sb->s_blocksize;
212 length = blocksize - (offset & (blocksize - 1));
213 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
214
215 if (!page_has_buffers(page))
216 create_empty_buffers(page, blocksize, 0);
217
218 /* Find the buffer that contains "offset" */
219 bh = page_buffers(page);
220 pos = blocksize;
221 while (offset >= pos) {
222 bh = bh->b_this_page;
223 iblock++;
224 pos += blocksize;
225 }
226
227 err = 0;
228
229 if (!buffer_mapped(bh)) {
230 gfs2_get_block(inode, iblock, bh, 0);
231 /* unmapped? It's a hole - nothing to do */
232 if (!buffer_mapped(bh))
233 goto unlock;
234 }
235
236 /* Ok, it's mapped. Make sure it's up-to-date */
237 if (PageUptodate(page))
238 set_buffer_uptodate(bh);
239
240 if (!buffer_uptodate(bh)) {
241 err = -EIO;
242 ll_rw_block(READ, 1, &bh);
243 wait_on_buffer(bh);
244 /* Uhhuh. Read error. Complain and punt. */
245 if (!buffer_uptodate(bh))
246 goto unlock;
247 }
248
249 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
250 gfs2_trans_add_bh(ip->i_gl, bh, 0);
251
252 kaddr = kmap_atomic(page, KM_USER0);
253 memset(kaddr + offset, 0, length);
254 flush_dcache_page(page);
255 kunmap_atomic(kaddr, KM_USER0);
256
257unlock:
258 unlock_page(page);
259 page_cache_release(page);
260 return err;
261}
262
263void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
264 unsigned int from, unsigned int to)
265{
266 struct buffer_head *head = page_buffers(page);
267 unsigned int bsize = head->b_size;
268 struct buffer_head *bh;
269 unsigned int start, end;
270
271 for (bh = head, start = 0;
272 bh != head || !start;
273 bh = bh->b_this_page, start = end) {
274 end = start + bsize;
275 if (end <= from || start >= to)
276 continue;
277 gfs2_trans_add_bh(ip->i_gl, bh, 0);
278 }
279}
280
diff --git a/fs/gfs2/page.h b/fs/gfs2/page.h
new file mode 100644
index 000000000000..2c853a90ac04
--- /dev/null
+++ b/fs/gfs2/page.h
@@ -0,0 +1,23 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __PAGE_DOT_H__
11#define __PAGE_DOT_H__
12
13void gfs2_pte_inval(struct gfs2_glock *gl);
14void gfs2_page_inval(struct gfs2_glock *gl);
15void gfs2_page_sync(struct gfs2_glock *gl, int flags);
16
17int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
18 uint64_t block, void *private);
19int gfs2_block_truncate_page(struct address_space *mapping);
20void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
21 unsigned int from, unsigned int to);
22
23#endif /* __PAGE_DOT_H__ */
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
new file mode 100644
index 000000000000..f752b0184690
--- /dev/null
+++ b/fs/gfs2/quota.c
@@ -0,0 +1,1305 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10/*
11 * Quota change tags are associated with each transaction that allocates or
12 * deallocates space. Those changes are accumulated locally to each node (in a
13 * per-node file) and then are periodically synced to the quota file. This
14 * avoids the bottleneck of constantly touching the quota file, but introduces
15 * fuzziness in the current usage value of IDs that are being used on different
16 * nodes in the cluster simultaneously. So, it is possible for a user on
17 * multiple nodes to overrun their quota, but that overrun is controlable.
18 * Since quota tags are part of transactions, there is no need to a quota check
19 * program to be run on node crashes or anything like that.
20 *
21 * There are couple of knobs that let the administrator manage the quota
22 * fuzziness. "quota_quantum" sets the maximum time a quota change can be
23 * sitting on one node before being synced to the quota file. (The default is
24 * 60 seconds.) Another knob, "quota_scale" controls how quickly the frequency
25 * of quota file syncs increases as the user moves closer to their limit. The
26 * more frequent the syncs, the more accurate the quota enforcement, but that
27 * means that there is more contention between the nodes for the quota file.
28 * The default value is one. This sets the maximum theoretical quota overrun
29 * (with infinite node with infinite bandwidth) to twice the user's limit. (In
30 * practice, the maximum overrun you see should be much less.) A "quota_scale"
31 * number greater than one makes quota syncs more frequent and reduces the
32 * maximum overrun. Numbers less than one (but greater than zero) make quota
33 * syncs less frequent.
34 *
35 * GFS quotas also use per-ID Lock Value Blocks (LVBs) to cache the contents of
36 * the quota file, so it is not being constantly read.
37 */
38
39#include <linux/sched.h>
40#include <linux/slab.h>
41#include <linux/spinlock.h>
42#include <linux/completion.h>
43#include <linux/buffer_head.h>
44#include <linux/tty.h>
45#include <linux/sort.h>
46#include <linux/fs.h>
47#include <linux/gfs2_ondisk.h>
48
49#include "gfs2.h"
50#include "lm_interface.h"
51#include "incore.h"
52#include "bmap.h"
53#include "glock.h"
54#include "glops.h"
55#include "log.h"
56#include "lvb.h"
57#include "meta_io.h"
58#include "quota.h"
59#include "rgrp.h"
60#include "super.h"
61#include "trans.h"
62#include "inode.h"
63#include "ops_file.h"
64#include "ops_address.h"
65#include "util.h"
66
67#define QUOTA_USER 1
68#define QUOTA_GROUP 0
69
70static uint64_t qd2offset(struct gfs2_quota_data *qd)
71{
72 uint64_t offset;
73
74 offset = 2 * (uint64_t)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags);
75 offset *= sizeof(struct gfs2_quota);
76
77 return offset;
78}
79
80static int qd_alloc(struct gfs2_sbd *sdp, int user, uint32_t id,
81 struct gfs2_quota_data **qdp)
82{
83 struct gfs2_quota_data *qd;
84 int error;
85
86 qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_KERNEL);
87 if (!qd)
88 return -ENOMEM;
89
90 qd->qd_count = 1;
91 qd->qd_id = id;
92 if (user)
93 set_bit(QDF_USER, &qd->qd_flags);
94 qd->qd_slot = -1;
95
96 error = gfs2_glock_get(sdp, 2 * (uint64_t)id + !user,
97 &gfs2_quota_glops, CREATE, &qd->qd_gl);
98 if (error)
99 goto fail;
100
101 error = gfs2_lvb_hold(qd->qd_gl);
102 gfs2_glock_put(qd->qd_gl);
103 if (error)
104 goto fail;
105
106 *qdp = qd;
107
108 return 0;
109
110 fail:
111 kfree(qd);
112 return error;
113}
114
115static int qd_get(struct gfs2_sbd *sdp, int user, uint32_t id, int create,
116 struct gfs2_quota_data **qdp)
117{
118 struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
119 int error, found;
120
121 *qdp = NULL;
122
123 for (;;) {
124 found = 0;
125 spin_lock(&sdp->sd_quota_spin);
126 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
127 if (qd->qd_id == id &&
128 !test_bit(QDF_USER, &qd->qd_flags) == !user) {
129 qd->qd_count++;
130 found = 1;
131 break;
132 }
133 }
134
135 if (!found)
136 qd = NULL;
137
138 if (!qd && new_qd) {
139 qd = new_qd;
140 list_add(&qd->qd_list, &sdp->sd_quota_list);
141 atomic_inc(&sdp->sd_quota_count);
142 new_qd = NULL;
143 }
144
145 spin_unlock(&sdp->sd_quota_spin);
146
147 if (qd || !create) {
148 if (new_qd) {
149 gfs2_lvb_unhold(new_qd->qd_gl);
150 kfree(new_qd);
151 }
152 *qdp = qd;
153 return 0;
154 }
155
156 error = qd_alloc(sdp, user, id, &new_qd);
157 if (error)
158 return error;
159 }
160}
161
162static void qd_hold(struct gfs2_quota_data *qd)
163{
164 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
165
166 spin_lock(&sdp->sd_quota_spin);
167 gfs2_assert(sdp, qd->qd_count);
168 qd->qd_count++;
169 spin_unlock(&sdp->sd_quota_spin);
170}
171
172static void qd_put(struct gfs2_quota_data *qd)
173{
174 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
175 spin_lock(&sdp->sd_quota_spin);
176 gfs2_assert(sdp, qd->qd_count);
177 if (!--qd->qd_count)
178 qd->qd_last_touched = jiffies;
179 spin_unlock(&sdp->sd_quota_spin);
180}
181
182static int slot_get(struct gfs2_quota_data *qd)
183{
184 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
185 unsigned int c, o = 0, b;
186 unsigned char byte = 0;
187
188 spin_lock(&sdp->sd_quota_spin);
189
190 if (qd->qd_slot_count++) {
191 spin_unlock(&sdp->sd_quota_spin);
192 return 0;
193 }
194
195 for (c = 0; c < sdp->sd_quota_chunks; c++)
196 for (o = 0; o < PAGE_SIZE; o++) {
197 byte = sdp->sd_quota_bitmap[c][o];
198 if (byte != 0xFF)
199 goto found;
200 }
201
202 goto fail;
203
204 found:
205 for (b = 0; b < 8; b++)
206 if (!(byte & (1 << b)))
207 break;
208 qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b;
209
210 if (qd->qd_slot >= sdp->sd_quota_slots)
211 goto fail;
212
213 sdp->sd_quota_bitmap[c][o] |= 1 << b;
214
215 spin_unlock(&sdp->sd_quota_spin);
216
217 return 0;
218
219 fail:
220 qd->qd_slot_count--;
221 spin_unlock(&sdp->sd_quota_spin);
222 return -ENOSPC;
223}
224
225static void slot_hold(struct gfs2_quota_data *qd)
226{
227 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
228
229 spin_lock(&sdp->sd_quota_spin);
230 gfs2_assert(sdp, qd->qd_slot_count);
231 qd->qd_slot_count++;
232 spin_unlock(&sdp->sd_quota_spin);
233}
234
235static void slot_put(struct gfs2_quota_data *qd)
236{
237 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
238
239 spin_lock(&sdp->sd_quota_spin);
240 gfs2_assert(sdp, qd->qd_slot_count);
241 if (!--qd->qd_slot_count) {
242 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0);
243 qd->qd_slot = -1;
244 }
245 spin_unlock(&sdp->sd_quota_spin);
246}
247
248static int bh_get(struct gfs2_quota_data *qd)
249{
250 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
251 struct gfs2_inode *ip = sdp->sd_qc_inode->u.generic_ip;
252 unsigned int block, offset;
253 uint64_t dblock;
254 int new = 0;
255 struct buffer_head *bh;
256 int error;
257 int boundary;
258
259 mutex_lock(&sdp->sd_quota_mutex);
260
261 if (qd->qd_bh_count++) {
262 mutex_unlock(&sdp->sd_quota_mutex);
263 return 0;
264 }
265
266 block = qd->qd_slot / sdp->sd_qc_per_block;
267 offset = qd->qd_slot % sdp->sd_qc_per_block;;
268
269 error = gfs2_block_map(ip->i_vnode, block, &new, &dblock, &boundary);
270 if (error)
271 goto fail;
272 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, &bh);
273 if (error)
274 goto fail;
275 error = -EIO;
276 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC))
277 goto fail_brelse;
278
279 qd->qd_bh = bh;
280 qd->qd_bh_qc = (struct gfs2_quota_change *)
281 (bh->b_data + sizeof(struct gfs2_meta_header) +
282 offset * sizeof(struct gfs2_quota_change));
283
284 mutex_lock(&sdp->sd_quota_mutex);
285
286 return 0;
287
288 fail_brelse:
289 brelse(bh);
290
291 fail:
292 qd->qd_bh_count--;
293 mutex_unlock(&sdp->sd_quota_mutex);
294 return error;
295}
296
297static void bh_put(struct gfs2_quota_data *qd)
298{
299 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
300
301 mutex_lock(&sdp->sd_quota_mutex);
302 gfs2_assert(sdp, qd->qd_bh_count);
303 if (!--qd->qd_bh_count) {
304 brelse(qd->qd_bh);
305 qd->qd_bh = NULL;
306 qd->qd_bh_qc = NULL;
307 }
308 mutex_unlock(&sdp->sd_quota_mutex);
309}
310
311static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
312{
313 struct gfs2_quota_data *qd = NULL;
314 int error;
315 int found = 0;
316
317 *qdp = NULL;
318
319 if (sdp->sd_vfs->s_flags & MS_RDONLY)
320 return 0;
321
322 spin_lock(&sdp->sd_quota_spin);
323
324 list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
325 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
326 !test_bit(QDF_CHANGE, &qd->qd_flags) ||
327 qd->qd_sync_gen >= sdp->sd_quota_sync_gen)
328 continue;
329
330 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
331
332 set_bit(QDF_LOCKED, &qd->qd_flags);
333 gfs2_assert_warn(sdp, qd->qd_count);
334 qd->qd_count++;
335 qd->qd_change_sync = qd->qd_change;
336 gfs2_assert_warn(sdp, qd->qd_slot_count);
337 qd->qd_slot_count++;
338 found = 1;
339
340 break;
341 }
342
343 if (!found)
344 qd = NULL;
345
346 spin_unlock(&sdp->sd_quota_spin);
347
348 if (qd) {
349 gfs2_assert_warn(sdp, qd->qd_change_sync);
350 error = bh_get(qd);
351 if (error) {
352 clear_bit(QDF_LOCKED, &qd->qd_flags);
353 slot_put(qd);
354 qd_put(qd);
355 return error;
356 }
357 }
358
359 *qdp = qd;
360
361 return 0;
362}
363
364static int qd_trylock(struct gfs2_quota_data *qd)
365{
366 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
367
368 if (sdp->sd_vfs->s_flags & MS_RDONLY)
369 return 0;
370
371 spin_lock(&sdp->sd_quota_spin);
372
373 if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
374 !test_bit(QDF_CHANGE, &qd->qd_flags)) {
375 spin_unlock(&sdp->sd_quota_spin);
376 return 0;
377 }
378
379 list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
380
381 set_bit(QDF_LOCKED, &qd->qd_flags);
382 gfs2_assert_warn(sdp, qd->qd_count);
383 qd->qd_count++;
384 qd->qd_change_sync = qd->qd_change;
385 gfs2_assert_warn(sdp, qd->qd_slot_count);
386 qd->qd_slot_count++;
387
388 spin_unlock(&sdp->sd_quota_spin);
389
390 gfs2_assert_warn(sdp, qd->qd_change_sync);
391 if (bh_get(qd)) {
392 clear_bit(QDF_LOCKED, &qd->qd_flags);
393 slot_put(qd);
394 qd_put(qd);
395 return 0;
396 }
397
398 return 1;
399}
400
401static void qd_unlock(struct gfs2_quota_data *qd)
402{
403 gfs2_assert_warn(qd->qd_gl->gl_sbd,
404 test_bit(QDF_LOCKED, &qd->qd_flags));
405 clear_bit(QDF_LOCKED, &qd->qd_flags);
406 bh_put(qd);
407 slot_put(qd);
408 qd_put(qd);
409}
410
411static int qdsb_get(struct gfs2_sbd *sdp, int user, uint32_t id, int create,
412 struct gfs2_quota_data **qdp)
413{
414 int error;
415
416 error = qd_get(sdp, user, id, create, qdp);
417 if (error)
418 return error;
419
420 error = slot_get(*qdp);
421 if (error)
422 goto fail;
423
424 error = bh_get(*qdp);
425 if (error)
426 goto fail_slot;
427
428 return 0;
429
430 fail_slot:
431 slot_put(*qdp);
432
433 fail:
434 qd_put(*qdp);
435 return error;
436}
437
438static void qdsb_put(struct gfs2_quota_data *qd)
439{
440 bh_put(qd);
441 slot_put(qd);
442 qd_put(qd);
443}
444
445int gfs2_quota_hold(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
446{
447 struct gfs2_sbd *sdp = ip->i_sbd;
448 struct gfs2_alloc *al = &ip->i_alloc;
449 struct gfs2_quota_data **qd = al->al_qd;
450 int error;
451
452 if (gfs2_assert_warn(sdp, !al->al_qd_num) ||
453 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
454 return -EIO;
455
456 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
457 return 0;
458
459 error = qdsb_get(sdp, QUOTA_USER, ip->i_di.di_uid, CREATE, qd);
460 if (error)
461 goto out;
462 al->al_qd_num++;
463 qd++;
464
465 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_di.di_gid, CREATE, qd);
466 if (error)
467 goto out;
468 al->al_qd_num++;
469 qd++;
470
471 if (uid != NO_QUOTA_CHANGE && uid != ip->i_di.di_uid) {
472 error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd);
473 if (error)
474 goto out;
475 al->al_qd_num++;
476 qd++;
477 }
478
479 if (gid != NO_QUOTA_CHANGE && gid != ip->i_di.di_gid) {
480 error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd);
481 if (error)
482 goto out;
483 al->al_qd_num++;
484 qd++;
485 }
486
487 out:
488 if (error)
489 gfs2_quota_unhold(ip);
490
491 return error;
492}
493
494void gfs2_quota_unhold(struct gfs2_inode *ip)
495{
496 struct gfs2_sbd *sdp = ip->i_sbd;
497 struct gfs2_alloc *al = &ip->i_alloc;
498 unsigned int x;
499
500 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
501
502 for (x = 0; x < al->al_qd_num; x++) {
503 qdsb_put(al->al_qd[x]);
504 al->al_qd[x] = NULL;
505 }
506 al->al_qd_num = 0;
507}
508
509static int sort_qd(const void *a, const void *b)
510{
511 struct gfs2_quota_data *qd_a = *(struct gfs2_quota_data **)a;
512 struct gfs2_quota_data *qd_b = *(struct gfs2_quota_data **)b;
513 int ret = 0;
514
515 if (!test_bit(QDF_USER, &qd_a->qd_flags) !=
516 !test_bit(QDF_USER, &qd_b->qd_flags)) {
517 if (test_bit(QDF_USER, &qd_a->qd_flags))
518 ret = -1;
519 else
520 ret = 1;
521 } else {
522 if (qd_a->qd_id < qd_b->qd_id)
523 ret = -1;
524 else if (qd_a->qd_id > qd_b->qd_id)
525 ret = 1;
526 }
527
528 return ret;
529}
530
531static void do_qc(struct gfs2_quota_data *qd, int64_t change)
532{
533 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
534 struct gfs2_inode *ip = sdp->sd_qc_inode->u.generic_ip;
535 struct gfs2_quota_change *qc = qd->qd_bh_qc;
536 int64_t x;
537
538 mutex_lock(&sdp->sd_quota_mutex);
539 gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1);
540
541 if (!test_bit(QDF_CHANGE, &qd->qd_flags)) {
542 qc->qc_change = 0;
543 qc->qc_flags = 0;
544 if (test_bit(QDF_USER, &qd->qd_flags))
545 qc->qc_flags = cpu_to_be32(GFS2_QCF_USER);
546 qc->qc_id = cpu_to_be32(qd->qd_id);
547 }
548
549 x = qc->qc_change;
550 x = be64_to_cpu(x) + change;
551 qc->qc_change = cpu_to_be64(x);
552
553 spin_lock(&sdp->sd_quota_spin);
554 qd->qd_change = x;
555 spin_unlock(&sdp->sd_quota_spin);
556
557 if (!x) {
558 gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags));
559 clear_bit(QDF_CHANGE, &qd->qd_flags);
560 qc->qc_flags = 0;
561 qc->qc_id = 0;
562 slot_put(qd);
563 qd_put(qd);
564 } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) {
565 qd_hold(qd);
566 slot_hold(qd);
567 }
568
569 mutex_unlock(&sdp->sd_quota_mutex);
570}
571
572/**
573 * gfs2_adjust_quota
574 *
575 * This function was mostly borrowed from gfs2_block_truncate_page which was
576 * in turn mostly borrowed from ext3
577 */
578static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
579 int64_t change, struct gfs2_quota_data *qd)
580{
581 struct inode *inode = ip->i_vnode;
582 struct address_space *mapping = inode->i_mapping;
583 unsigned long index = loc >> PAGE_CACHE_SHIFT;
584 unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
585 unsigned blocksize, iblock, pos;
586 struct buffer_head *bh;
587 struct page *page;
588 void *kaddr;
589 __be64 *ptr;
590 u64 value;
591 int err = -EIO;
592
593 page = grab_cache_page(mapping, index);
594 if (!page)
595 return -ENOMEM;
596
597 blocksize = inode->i_sb->s_blocksize;
598 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
599
600 if (!page_has_buffers(page))
601 create_empty_buffers(page, blocksize, 0);
602
603 bh = page_buffers(page);
604 pos = blocksize;
605 while (offset >= pos) {
606 bh = bh->b_this_page;
607 iblock++;
608 pos += blocksize;
609 }
610
611 if (!buffer_mapped(bh)) {
612 gfs2_get_block(inode, iblock, bh, 1);
613 if (!buffer_mapped(bh))
614 goto unlock;
615 }
616
617 if (PageUptodate(page))
618 set_buffer_uptodate(bh);
619
620 if (!buffer_uptodate(bh)) {
621 ll_rw_block(READ, 1, &bh);
622 wait_on_buffer(bh);
623 if (!buffer_uptodate(bh))
624 goto unlock;
625 }
626
627 gfs2_trans_add_bh(ip->i_gl, bh, 0);
628
629 kaddr = kmap_atomic(page, KM_USER0);
630 ptr = (__be64 *)(kaddr + offset);
631 value = *ptr = cpu_to_be64(be64_to_cpu(*ptr) + change);
632 flush_dcache_page(page);
633 kunmap_atomic(kaddr, KM_USER0);
634 err = 0;
635 qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
636#if 0
637 qd->qd_qb.qb_limit = cpu_to_be64(q.qu_limit);
638 qd->qd_qb.qb_warn = cpu_to_be64(q.qu_warn);
639#endif
640 qd->qd_qb.qb_value = cpu_to_be64(value);
641unlock:
642 unlock_page(page);
643 page_cache_release(page);
644 return err;
645}
646
647static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
648{
649 struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
650 struct gfs2_inode *ip = sdp->sd_quota_inode->u.generic_ip;
651 unsigned int data_blocks, ind_blocks;
652 struct file_ra_state ra_state;
653 struct gfs2_holder *ghs, i_gh;
654 unsigned int qx, x;
655 struct gfs2_quota_data *qd;
656 loff_t offset;
657 unsigned int nalloc = 0;
658 struct gfs2_alloc *al = NULL;
659 int error;
660
661 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
662 &data_blocks, &ind_blocks);
663
664 ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_KERNEL);
665 if (!ghs)
666 return -ENOMEM;
667
668 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
669 for (qx = 0; qx < num_qd; qx++) {
670 error = gfs2_glock_nq_init(qda[qx]->qd_gl,
671 LM_ST_EXCLUSIVE,
672 GL_NOCACHE, &ghs[qx]);
673 if (error)
674 goto out;
675 }
676
677 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
678 if (error)
679 goto out;
680
681 for (x = 0; x < num_qd; x++) {
682 int alloc_required;
683
684 offset = qd2offset(qda[x]);
685 error = gfs2_write_alloc_required(ip, offset,
686 sizeof(struct gfs2_quota),
687 &alloc_required);
688 if (error)
689 goto out_gunlock;
690 if (alloc_required)
691 nalloc++;
692 }
693
694 if (nalloc) {
695 al = gfs2_alloc_get(ip);
696
697 al->al_requested = nalloc * (data_blocks + ind_blocks);
698
699 error = gfs2_inplace_reserve(ip);
700 if (error)
701 goto out_alloc;
702
703 error = gfs2_trans_begin(sdp,
704 al->al_rgd->rd_ri.ri_length +
705 num_qd * data_blocks +
706 nalloc * ind_blocks +
707 RES_DINODE + num_qd +
708 RES_STATFS, 0);
709 if (error)
710 goto out_ipres;
711 } else {
712 error = gfs2_trans_begin(sdp,
713 num_qd * data_blocks +
714 RES_DINODE + num_qd, 0);
715 if (error)
716 goto out_gunlock;
717 }
718
719 file_ra_state_init(&ra_state, ip->i_vnode->i_mapping);
720 for (x = 0; x < num_qd; x++) {
721 qd = qda[x];
722 offset = qd2offset(qd);
723 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
724 (struct gfs2_quota_data *)
725 qd->qd_gl->gl_lvb);
726 if (error)
727 goto out_end_trans;
728
729 do_qc(qd, -qd->qd_change_sync);
730 }
731
732 error = 0;
733
734 out_end_trans:
735 gfs2_trans_end(sdp);
736
737 out_ipres:
738 if (nalloc)
739 gfs2_inplace_release(ip);
740
741 out_alloc:
742 if (nalloc)
743 gfs2_alloc_put(ip);
744
745 out_gunlock:
746 gfs2_glock_dq_uninit(&i_gh);
747
748 out:
749 while (qx--)
750 gfs2_glock_dq_uninit(&ghs[qx]);
751 kfree(ghs);
752 gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
753
754 return error;
755}
756
757static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
758 struct gfs2_holder *q_gh)
759{
760 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
761 struct gfs2_inode *ip = sdp->sd_quota_inode->u.generic_ip;
762 struct gfs2_holder i_gh;
763 struct gfs2_quota q;
764 char buf[sizeof(struct gfs2_quota)];
765 struct file_ra_state ra_state;
766 int error;
767
768 file_ra_state_init(&ra_state, sdp->sd_quota_inode->i_mapping);
769 restart:
770 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
771 if (error)
772 return error;
773
774 gfs2_quota_lvb_in(&qd->qd_qb, qd->qd_gl->gl_lvb);
775
776 if (force_refresh || qd->qd_qb.qb_magic != GFS2_MAGIC) {
777 loff_t pos;
778 gfs2_glock_dq_uninit(q_gh);
779 error = gfs2_glock_nq_init(qd->qd_gl,
780 LM_ST_EXCLUSIVE, GL_NOCACHE,
781 q_gh);
782 if (error)
783 return error;
784
785 error = gfs2_glock_nq_init(ip->i_gl,
786 LM_ST_SHARED, 0,
787 &i_gh);
788 if (error)
789 goto fail;
790
791 memset(buf, 0, sizeof(struct gfs2_quota));
792 pos = qd2offset(qd);
793 error = gfs2_internal_read(ip,
794 &ra_state, buf,
795 &pos,
796 sizeof(struct gfs2_quota));
797 if (error < 0)
798 goto fail_gunlock;
799
800 gfs2_glock_dq_uninit(&i_gh);
801
802 gfs2_quota_in(&q, buf);
803
804 memset(&qd->qd_qb, 0, sizeof(struct gfs2_quota_lvb));
805 qd->qd_qb.qb_magic = GFS2_MAGIC;
806 qd->qd_qb.qb_limit = q.qu_limit;
807 qd->qd_qb.qb_warn = q.qu_warn;
808 qd->qd_qb.qb_value = q.qu_value;
809
810 gfs2_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb);
811
812 if (gfs2_glock_is_blocking(qd->qd_gl)) {
813 gfs2_glock_dq_uninit(q_gh);
814 force_refresh = 0;
815 goto restart;
816 }
817 }
818
819 return 0;
820
821 fail_gunlock:
822 gfs2_glock_dq_uninit(&i_gh);
823
824 fail:
825 gfs2_glock_dq_uninit(q_gh);
826
827 return error;
828}
829
830int gfs2_quota_lock(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
831{
832 struct gfs2_sbd *sdp = ip->i_sbd;
833 struct gfs2_alloc *al = &ip->i_alloc;
834 unsigned int x;
835 int error = 0;
836
837 gfs2_quota_hold(ip, uid, gid);
838
839 if (capable(CAP_SYS_RESOURCE) ||
840 sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
841 return 0;
842
843 sort(al->al_qd, al->al_qd_num, sizeof(struct gfs2_quota_data *),
844 sort_qd, NULL);
845
846 for (x = 0; x < al->al_qd_num; x++) {
847 error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]);
848 if (error)
849 break;
850 }
851
852 if (!error)
853 set_bit(GIF_QD_LOCKED, &ip->i_flags);
854 else {
855 while (x--)
856 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
857 gfs2_quota_unhold(ip);
858 }
859
860 return error;
861}
862
863static int need_sync(struct gfs2_quota_data *qd)
864{
865 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
866 struct gfs2_tune *gt = &sdp->sd_tune;
867 int64_t value;
868 unsigned int num, den;
869 int do_sync = 1;
870
871 if (!qd->qd_qb.qb_limit)
872 return 0;
873
874 spin_lock(&sdp->sd_quota_spin);
875 value = qd->qd_change;
876 spin_unlock(&sdp->sd_quota_spin);
877
878 spin_lock(&gt->gt_spin);
879 num = gt->gt_quota_scale_num;
880 den = gt->gt_quota_scale_den;
881 spin_unlock(&gt->gt_spin);
882
883 if (value < 0)
884 do_sync = 0;
885 else if (qd->qd_qb.qb_value >= (int64_t)qd->qd_qb.qb_limit)
886 do_sync = 0;
887 else {
888 value *= gfs2_jindex_size(sdp) * num;
889 do_div(value, den);
890 value += qd->qd_qb.qb_value;
891 if (value < (int64_t)qd->qd_qb.qb_limit)
892 do_sync = 0;
893 }
894
895 return do_sync;
896}
897
898void gfs2_quota_unlock(struct gfs2_inode *ip)
899{
900 struct gfs2_alloc *al = &ip->i_alloc;
901 struct gfs2_quota_data *qda[4];
902 unsigned int count = 0;
903 unsigned int x;
904
905 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
906 goto out;
907
908 for (x = 0; x < al->al_qd_num; x++) {
909 struct gfs2_quota_data *qd;
910 int sync;
911
912 qd = al->al_qd[x];
913 sync = need_sync(qd);
914
915 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
916
917 if (sync && qd_trylock(qd))
918 qda[count++] = qd;
919 }
920
921 if (count) {
922 do_sync(count, qda);
923 for (x = 0; x < count; x++)
924 qd_unlock(qda[x]);
925 }
926
927 out:
928 gfs2_quota_unhold(ip);
929}
930
931#define MAX_LINE 256
932
933static int print_message(struct gfs2_quota_data *qd, char *type)
934{
935 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
936 char *line;
937 int len;
938
939 line = kmalloc(MAX_LINE, GFP_KERNEL);
940 if (!line)
941 return -ENOMEM;
942
943 len = snprintf(line, MAX_LINE-1,
944 "GFS2: fsid=%s: quota %s for %s %u\r\n",
945 sdp->sd_fsname, type,
946 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
947 qd->qd_id);
948 line[MAX_LINE-1] = 0;
949
950 if (current->signal) { /* Is this test still required? */
951 tty_write_message(current->signal->tty, line);
952 }
953
954 kfree(line);
955
956 return 0;
957}
958
959int gfs2_quota_check(struct gfs2_inode *ip, uint32_t uid, uint32_t gid)
960{
961 struct gfs2_sbd *sdp = ip->i_sbd;
962 struct gfs2_alloc *al = &ip->i_alloc;
963 struct gfs2_quota_data *qd;
964 int64_t value;
965 unsigned int x;
966 int error = 0;
967
968 if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
969 return 0;
970
971 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
972 return 0;
973
974 for (x = 0; x < al->al_qd_num; x++) {
975 qd = al->al_qd[x];
976
977 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
978 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
979 continue;
980
981 value = qd->qd_qb.qb_value;
982 spin_lock(&sdp->sd_quota_spin);
983 value += qd->qd_change;
984 spin_unlock(&sdp->sd_quota_spin);
985
986 if (qd->qd_qb.qb_limit && (int64_t)qd->qd_qb.qb_limit < value) {
987 print_message(qd, "exceeded");
988 error = -EDQUOT;
989 break;
990 } else if (qd->qd_qb.qb_warn &&
991 (int64_t)qd->qd_qb.qb_warn < value &&
992 time_after_eq(jiffies, qd->qd_last_warn +
993 gfs2_tune_get(sdp,
994 gt_quota_warn_period) * HZ)) {
995 error = print_message(qd, "warning");
996 qd->qd_last_warn = jiffies;
997 }
998 }
999
1000 return error;
1001}
1002
1003void gfs2_quota_change(struct gfs2_inode *ip, int64_t change,
1004 uint32_t uid, uint32_t gid)
1005{
1006 struct gfs2_alloc *al = &ip->i_alloc;
1007 struct gfs2_quota_data *qd;
1008 unsigned int x;
1009 unsigned int found = 0;
1010
1011 if (gfs2_assert_warn(ip->i_sbd, change))
1012 return;
1013 if (ip->i_di.di_flags & GFS2_DIF_SYSTEM)
1014 return;
1015
1016 for (x = 0; x < al->al_qd_num; x++) {
1017 qd = al->al_qd[x];
1018
1019 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
1020 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
1021 do_qc(qd, change);
1022 found++;
1023 }
1024 }
1025}
1026
1027int gfs2_quota_sync(struct gfs2_sbd *sdp)
1028{
1029 struct gfs2_quota_data **qda;
1030 unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync);
1031 unsigned int num_qd;
1032 unsigned int x;
1033 int error = 0;
1034
1035 sdp->sd_quota_sync_gen++;
1036
1037 qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
1038 if (!qda)
1039 return -ENOMEM;
1040
1041 do {
1042 num_qd = 0;
1043
1044 for (;;) {
1045 error = qd_fish(sdp, qda + num_qd);
1046 if (error || !qda[num_qd])
1047 break;
1048 if (++num_qd == max_qd)
1049 break;
1050 }
1051
1052 if (num_qd) {
1053 if (!error)
1054 error = do_sync(num_qd, qda);
1055 if (!error)
1056 for (x = 0; x < num_qd; x++)
1057 qda[x]->qd_sync_gen =
1058 sdp->sd_quota_sync_gen;
1059
1060 for (x = 0; x < num_qd; x++)
1061 qd_unlock(qda[x]);
1062 }
1063 } while (!error && num_qd == max_qd);
1064
1065 kfree(qda);
1066
1067 return error;
1068}
1069
1070int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, uint32_t id)
1071{
1072 struct gfs2_quota_data *qd;
1073 struct gfs2_holder q_gh;
1074 int error;
1075
1076 error = qd_get(sdp, user, id, CREATE, &qd);
1077 if (error)
1078 return error;
1079
1080 error = do_glock(qd, FORCE, &q_gh);
1081 if (!error)
1082 gfs2_glock_dq_uninit(&q_gh);
1083
1084 qd_put(qd);
1085
1086 return error;
1087}
1088
1089#if 0
1090int gfs2_quota_read(struct gfs2_sbd *sdp, int user, uint32_t id,
1091 struct gfs2_quota *q)
1092{
1093 struct gfs2_quota_data *qd;
1094 struct gfs2_holder q_gh;
1095 int error;
1096
1097 if (((user) ? (id != current->fsuid) : (!in_group_p(id))) &&
1098 !capable(CAP_SYS_ADMIN))
1099 return -EACCES;
1100
1101 error = qd_get(sdp, user, id, CREATE, &qd);
1102 if (error)
1103 return error;
1104
1105 error = do_glock(qd, NO_FORCE, &q_gh);
1106 if (error)
1107 goto out;
1108
1109 memset(q, 0, sizeof(struct gfs2_quota));
1110 q->qu_limit = qd->qd_qb.qb_limit;
1111 q->qu_warn = qd->qd_qb.qb_warn;
1112 q->qu_value = qd->qd_qb.qb_value;
1113
1114 spin_lock(&sdp->sd_quota_spin);
1115 q->qu_value += qd->qd_change;
1116 spin_unlock(&sdp->sd_quota_spin);
1117
1118 gfs2_glock_dq_uninit(&q_gh);
1119
1120 out:
1121 qd_put(qd);
1122
1123 return error;
1124}
1125#endif /* 0 */
1126
1127int gfs2_quota_init(struct gfs2_sbd *sdp)
1128{
1129 struct gfs2_inode *ip = sdp->sd_qc_inode->u.generic_ip;
1130 unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
1131 unsigned int x, slot = 0;
1132 unsigned int found = 0;
1133 uint64_t dblock;
1134 uint32_t extlen = 0;
1135 int error;
1136
1137 if (!ip->i_di.di_size ||
1138 ip->i_di.di_size > (64 << 20) ||
1139 ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
1140 gfs2_consist_inode(ip);
1141 return -EIO;
1142 }
1143 sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block;
1144 sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE);
1145
1146 error = -ENOMEM;
1147
1148 sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks,
1149 sizeof(unsigned char *), GFP_KERNEL);
1150 if (!sdp->sd_quota_bitmap)
1151 return error;
1152
1153 for (x = 0; x < sdp->sd_quota_chunks; x++) {
1154 sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL);
1155 if (!sdp->sd_quota_bitmap[x])
1156 goto fail;
1157 }
1158
1159 for (x = 0; x < blocks; x++) {
1160 struct buffer_head *bh;
1161 unsigned int y;
1162
1163 if (!extlen) {
1164 int new = 0;
1165 error = gfs2_extent_map(ip->i_vnode, x, &new, &dblock, &extlen);
1166 if (error)
1167 goto fail;
1168 }
1169 gfs2_meta_ra(ip->i_gl, dblock, extlen);
1170 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT,
1171 &bh);
1172 if (error)
1173 goto fail;
1174 error = -EIO;
1175 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) {
1176 brelse(bh);
1177 goto fail;
1178 }
1179
1180 for (y = 0;
1181 y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots;
1182 y++, slot++) {
1183 struct gfs2_quota_change qc;
1184 struct gfs2_quota_data *qd;
1185
1186 gfs2_quota_change_in(&qc, bh->b_data +
1187 sizeof(struct gfs2_meta_header) +
1188 y * sizeof(struct gfs2_quota_change));
1189 if (!qc.qc_change)
1190 continue;
1191
1192 error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER),
1193 qc.qc_id, &qd);
1194 if (error) {
1195 brelse(bh);
1196 goto fail;
1197 }
1198
1199 set_bit(QDF_CHANGE, &qd->qd_flags);
1200 qd->qd_change = qc.qc_change;
1201 qd->qd_slot = slot;
1202 qd->qd_slot_count = 1;
1203 qd->qd_last_touched = jiffies;
1204
1205 spin_lock(&sdp->sd_quota_spin);
1206 gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1);
1207 list_add(&qd->qd_list, &sdp->sd_quota_list);
1208 atomic_inc(&sdp->sd_quota_count);
1209 spin_unlock(&sdp->sd_quota_spin);
1210
1211 found++;
1212 }
1213
1214 brelse(bh);
1215 dblock++;
1216 extlen--;
1217 }
1218
1219 if (found)
1220 fs_info(sdp, "found %u quota changes\n", found);
1221
1222 return 0;
1223
1224 fail:
1225 gfs2_quota_cleanup(sdp);
1226 return error;
1227}
1228
1229void gfs2_quota_scan(struct gfs2_sbd *sdp)
1230{
1231 struct gfs2_quota_data *qd, *safe;
1232 LIST_HEAD(dead);
1233
1234 spin_lock(&sdp->sd_quota_spin);
1235 list_for_each_entry_safe(qd, safe, &sdp->sd_quota_list, qd_list) {
1236 if (!qd->qd_count &&
1237 time_after_eq(jiffies, qd->qd_last_touched +
1238 gfs2_tune_get(sdp, gt_quota_cache_secs) * HZ)) {
1239 list_move(&qd->qd_list, &dead);
1240 gfs2_assert_warn(sdp,
1241 atomic_read(&sdp->sd_quota_count) > 0);
1242 atomic_dec(&sdp->sd_quota_count);
1243 }
1244 }
1245 spin_unlock(&sdp->sd_quota_spin);
1246
1247 while (!list_empty(&dead)) {
1248 qd = list_entry(dead.next, struct gfs2_quota_data, qd_list);
1249 list_del(&qd->qd_list);
1250
1251 gfs2_assert_warn(sdp, !qd->qd_change);
1252 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1253 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1254
1255 gfs2_lvb_unhold(qd->qd_gl);
1256 kfree(qd);
1257 }
1258}
1259
1260void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
1261{
1262 struct list_head *head = &sdp->sd_quota_list;
1263 struct gfs2_quota_data *qd;
1264 unsigned int x;
1265
1266 spin_lock(&sdp->sd_quota_spin);
1267 while (!list_empty(head)) {
1268 qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
1269
1270 if (qd->qd_count > 1 ||
1271 (qd->qd_count && !test_bit(QDF_CHANGE, &qd->qd_flags))) {
1272 list_move(&qd->qd_list, head);
1273 spin_unlock(&sdp->sd_quota_spin);
1274 schedule();
1275 spin_lock(&sdp->sd_quota_spin);
1276 continue;
1277 }
1278
1279 list_del(&qd->qd_list);
1280 atomic_dec(&sdp->sd_quota_count);
1281 spin_unlock(&sdp->sd_quota_spin);
1282
1283 if (!qd->qd_count) {
1284 gfs2_assert_warn(sdp, !qd->qd_change);
1285 gfs2_assert_warn(sdp, !qd->qd_slot_count);
1286 } else
1287 gfs2_assert_warn(sdp, qd->qd_slot_count == 1);
1288 gfs2_assert_warn(sdp, !qd->qd_bh_count);
1289
1290 gfs2_lvb_unhold(qd->qd_gl);
1291 kfree(qd);
1292
1293 spin_lock(&sdp->sd_quota_spin);
1294 }
1295 spin_unlock(&sdp->sd_quota_spin);
1296
1297 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
1298
1299 if (sdp->sd_quota_bitmap) {
1300 for (x = 0; x < sdp->sd_quota_chunks; x++)
1301 kfree(sdp->sd_quota_bitmap[x]);
1302 kfree(sdp->sd_quota_bitmap);
1303 }
1304}
1305
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
new file mode 100644
index 000000000000..af05492f9644
--- /dev/null
+++ b/fs/gfs2/quota.h
@@ -0,0 +1,32 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __QUOTA_DOT_H__
11#define __QUOTA_DOT_H__
12
13#define NO_QUOTA_CHANGE ((uint32_t)-1)
14
15int gfs2_quota_hold(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
16void gfs2_quota_unhold(struct gfs2_inode *ip);
17
18int gfs2_quota_lock(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
19void gfs2_quota_unlock(struct gfs2_inode *ip);
20
21int gfs2_quota_check(struct gfs2_inode *ip, uint32_t uid, uint32_t gid);
22void gfs2_quota_change(struct gfs2_inode *ip, int64_t change,
23 uint32_t uid, uint32_t gid);
24
25int gfs2_quota_sync(struct gfs2_sbd *sdp);
26int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, uint32_t id);
27
28int gfs2_quota_init(struct gfs2_sbd *sdp);
29void gfs2_quota_scan(struct gfs2_sbd *sdp);
30void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
31
32#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
new file mode 100644
index 000000000000..c504ac1b831d
--- /dev/null
+++ b/fs/gfs2/recovery.c
@@ -0,0 +1,576 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/crc32.h>
17
18#include "gfs2.h"
19#include "lm_interface.h"
20#include "incore.h"
21#include "bmap.h"
22#include "glock.h"
23#include "glops.h"
24#include "lm.h"
25#include "lops.h"
26#include "meta_io.h"
27#include "recovery.h"
28#include "super.h"
29#include "util.h"
30#include "dir.h"
31
32int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
33 struct buffer_head **bh)
34{
35 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
36 struct gfs2_glock *gl = ip->i_gl;
37 int new = 0;
38 uint64_t dblock;
39 uint32_t extlen;
40 int error;
41
42 error = gfs2_extent_map(ip->i_vnode, blk, &new, &dblock, &extlen);
43 if (error)
44 return error;
45 if (!dblock) {
46 gfs2_consist_inode(ip);
47 return -EIO;
48 }
49
50 gfs2_meta_ra(gl, dblock, extlen);
51 error = gfs2_meta_read(gl, dblock, DIO_START | DIO_WAIT, bh);
52
53 return error;
54}
55
56int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
57{
58 struct list_head *head = &sdp->sd_revoke_list;
59 struct gfs2_revoke_replay *rr;
60 int found = 0;
61
62 list_for_each_entry(rr, head, rr_list) {
63 if (rr->rr_blkno == blkno) {
64 found = 1;
65 break;
66 }
67 }
68
69 if (found) {
70 rr->rr_where = where;
71 return 0;
72 }
73
74 rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL);
75 if (!rr)
76 return -ENOMEM;
77
78 rr->rr_blkno = blkno;
79 rr->rr_where = where;
80 list_add(&rr->rr_list, head);
81
82 return 1;
83}
84
85int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
86{
87 struct gfs2_revoke_replay *rr;
88 int wrap, a, b, revoke;
89 int found = 0;
90
91 list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) {
92 if (rr->rr_blkno == blkno) {
93 found = 1;
94 break;
95 }
96 }
97
98 if (!found)
99 return 0;
100
101 wrap = (rr->rr_where < sdp->sd_replay_tail);
102 a = (sdp->sd_replay_tail < where);
103 b = (where < rr->rr_where);
104 revoke = (wrap) ? (a || b) : (a && b);
105
106 return revoke;
107}
108
109void gfs2_revoke_clean(struct gfs2_sbd *sdp)
110{
111 struct list_head *head = &sdp->sd_revoke_list;
112 struct gfs2_revoke_replay *rr;
113
114 while (!list_empty(head)) {
115 rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
116 list_del(&rr->rr_list);
117 kfree(rr);
118 }
119}
120
121/**
122 * get_log_header - read the log header for a given segment
123 * @jd: the journal
124 * @blk: the block to look at
125 * @lh: the log header to return
126 *
127 * Read the log header for a given segement in a given journal. Do a few
128 * sanity checks on it.
129 *
130 * Returns: 0 on success,
131 * 1 if the header was invalid or incomplete,
132 * errno on error
133 */
134
135static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
136 struct gfs2_log_header *head)
137{
138 struct buffer_head *bh;
139 struct gfs2_log_header lh;
140 uint32_t hash;
141 int error;
142
143 error = gfs2_replay_read_block(jd, blk, &bh);
144 if (error)
145 return error;
146
147 memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header));
148 lh.lh_hash = 0;
149 hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header));
150 gfs2_log_header_in(&lh, bh->b_data);
151
152 brelse(bh);
153
154 if (lh.lh_header.mh_magic != GFS2_MAGIC ||
155 lh.lh_header.mh_type != GFS2_METATYPE_LH ||
156 lh.lh_blkno != blk ||
157 lh.lh_hash != hash)
158 return 1;
159
160 *head = lh;
161
162 return 0;
163}
164
165/**
166 * find_good_lh - find a good log header
167 * @jd: the journal
168 * @blk: the segment to start searching from
169 * @lh: the log header to fill in
170 * @forward: if true search forward in the log, else search backward
171 *
172 * Call get_log_header() to get a log header for a segment, but if the
173 * segment is bad, either scan forward or backward until we find a good one.
174 *
175 * Returns: errno
176 */
177
178static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
179 struct gfs2_log_header *head)
180{
181 unsigned int orig_blk = *blk;
182 int error;
183
184 for (;;) {
185 error = get_log_header(jd, *blk, head);
186 if (error <= 0)
187 return error;
188
189 if (++*blk == jd->jd_blocks)
190 *blk = 0;
191
192 if (*blk == orig_blk) {
193 gfs2_consist_inode(jd->jd_inode->u.generic_ip);
194 return -EIO;
195 }
196 }
197}
198
199/**
200 * jhead_scan - make sure we've found the head of the log
201 * @jd: the journal
202 * @head: this is filled in with the log descriptor of the head
203 *
204 * At this point, seg and lh should be either the head of the log or just
205 * before. Scan forward until we find the head.
206 *
207 * Returns: errno
208 */
209
210static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
211{
212 unsigned int blk = head->lh_blkno;
213 struct gfs2_log_header lh;
214 int error;
215
216 for (;;) {
217 if (++blk == jd->jd_blocks)
218 blk = 0;
219
220 error = get_log_header(jd, blk, &lh);
221 if (error < 0)
222 return error;
223 if (error == 1)
224 continue;
225
226 if (lh.lh_sequence == head->lh_sequence) {
227 gfs2_consist_inode(jd->jd_inode->u.generic_ip);
228 return -EIO;
229 }
230 if (lh.lh_sequence < head->lh_sequence)
231 break;
232
233 *head = lh;
234 }
235
236 return 0;
237}
238
239/**
240 * gfs2_find_jhead - find the head of a log
241 * @jd: the journal
242 * @head: the log descriptor for the head of the log is returned here
243 *
244 * Do a binary search of a journal and find the valid log entry with the
245 * highest sequence number. (i.e. the log head)
246 *
247 * Returns: errno
248 */
249
250int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
251{
252 struct gfs2_log_header lh_1, lh_m;
253 uint32_t blk_1, blk_2, blk_m;
254 int error;
255
256 blk_1 = 0;
257 blk_2 = jd->jd_blocks - 1;
258
259 for (;;) {
260 blk_m = (blk_1 + blk_2) / 2;
261
262 error = find_good_lh(jd, &blk_1, &lh_1);
263 if (error)
264 return error;
265
266 error = find_good_lh(jd, &blk_m, &lh_m);
267 if (error)
268 return error;
269
270 if (blk_1 == blk_m || blk_m == blk_2)
271 break;
272
273 if (lh_1.lh_sequence <= lh_m.lh_sequence)
274 blk_1 = blk_m;
275 else
276 blk_2 = blk_m;
277 }
278
279 error = jhead_scan(jd, &lh_1);
280 if (error)
281 return error;
282
283 *head = lh_1;
284
285 return error;
286}
287
288/**
289 * foreach_descriptor - go through the active part of the log
290 * @jd: the journal
291 * @start: the first log header in the active region
292 * @end: the last log header (don't process the contents of this entry))
293 *
294 * Call a given function once for every log descriptor in the active
295 * portion of the log.
296 *
297 * Returns: errno
298 */
299
300static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
301 unsigned int end, int pass)
302{
303 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
304 struct gfs2_sbd *sdp = ip->i_sbd;
305 struct buffer_head *bh;
306 struct gfs2_log_descriptor *ld;
307 int error = 0;
308 u32 length;
309 __be64 *ptr;
310 unsigned int offset = sizeof(struct gfs2_log_descriptor);
311 offset += (sizeof(__be64)-1);
312 offset &= ~(sizeof(__be64)-1);
313
314 while (start != end) {
315 error = gfs2_replay_read_block(jd, start, &bh);
316 if (error)
317 return error;
318 if (gfs2_meta_check(sdp, bh)) {
319 brelse(bh);
320 return -EIO;
321 }
322 ld = (struct gfs2_log_descriptor *)bh->b_data;
323 length = be32_to_cpu(ld->ld_length);
324
325 if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
326 struct gfs2_log_header lh;
327 error = get_log_header(jd, start, &lh);
328 if (!error) {
329 gfs2_replay_incr_blk(sdp, &start);
330 continue;
331 }
332 if (error == 1) {
333 gfs2_consist_inode(jd->jd_inode->u.generic_ip);
334 error = -EIO;
335 }
336 brelse(bh);
337 return error;
338 } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
339 brelse(bh);
340 return -EIO;
341 }
342 ptr = (__be64 *)(bh->b_data + offset);
343 error = lops_scan_elements(jd, start, ld, ptr, pass);
344 if (error) {
345 brelse(bh);
346 return error;
347 }
348
349 while (length--)
350 gfs2_replay_incr_blk(sdp, &start);
351
352 brelse(bh);
353 }
354
355 return 0;
356}
357
358/**
359 * clean_journal - mark a dirty journal as being clean
360 * @sdp: the filesystem
361 * @jd: the journal
362 * @gl: the journal's glock
363 * @head: the head journal to start from
364 *
365 * Returns: errno
366 */
367
368static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head)
369{
370 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
371 struct gfs2_sbd *sdp = ip->i_sbd;
372 unsigned int lblock;
373 int new = 0;
374 uint64_t dblock;
375 struct gfs2_log_header *lh;
376 uint32_t hash;
377 struct buffer_head *bh;
378 int error;
379 int boundary;
380
381 lblock = head->lh_blkno;
382 gfs2_replay_incr_blk(sdp, &lblock);
383 error = gfs2_block_map(ip->i_vnode, lblock, &new, &dblock, &boundary);
384 if (error)
385 return error;
386 if (!dblock) {
387 gfs2_consist_inode(ip);
388 return -EIO;
389 }
390
391 bh = sb_getblk(sdp->sd_vfs, dblock);
392 lock_buffer(bh);
393 memset(bh->b_data, 0, bh->b_size);
394 set_buffer_uptodate(bh);
395 clear_buffer_dirty(bh);
396 unlock_buffer(bh);
397
398 lh = (struct gfs2_log_header *)bh->b_data;
399 memset(lh, 0, sizeof(struct gfs2_log_header));
400 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
401 lh->lh_header.mh_type = cpu_to_be16(GFS2_METATYPE_LH);
402 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
403 lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
404 lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
405 lh->lh_blkno = cpu_to_be32(lblock);
406 hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
407 lh->lh_hash = cpu_to_be32(hash);
408
409 set_buffer_dirty(bh);
410 if (sync_dirty_buffer(bh))
411 gfs2_io_error_bh(sdp, bh);
412 brelse(bh);
413
414 return error;
415}
416
417/**
418 * gfs2_recover_journal - recovery a given journal
419 * @jd: the struct gfs2_jdesc describing the journal
420 *
421 * Acquire the journal's lock, check to see if the journal is clean, and
422 * do recovery if necessary.
423 *
424 * Returns: errno
425 */
426
427int gfs2_recover_journal(struct gfs2_jdesc *jd)
428{
429 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
430 struct gfs2_sbd *sdp = ip->i_sbd;
431 struct gfs2_log_header head;
432 struct gfs2_holder j_gh, ji_gh, t_gh;
433 unsigned long t;
434 int ro = 0;
435 unsigned int pass;
436 int error;
437
438 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
439 fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
440 jd->jd_jid);
441
442 /* Aquire the journal lock so we can do recovery */
443
444 error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
445 LM_ST_EXCLUSIVE,
446 LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
447 &j_gh);
448 switch (error) {
449 case 0:
450 break;
451
452 case GLR_TRYFAILED:
453 fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
454 error = 0;
455
456 default:
457 goto fail;
458 };
459
460 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
461 LM_FLAG_NOEXP, &ji_gh);
462 if (error)
463 goto fail_gunlock_j;
464 } else {
465 fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
466 }
467
468 fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
469
470 error = gfs2_jdesc_check(jd);
471 if (error)
472 goto fail_gunlock_ji;
473
474 error = gfs2_find_jhead(jd, &head);
475 if (error)
476 goto fail_gunlock_ji;
477
478 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
479 fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
480 jd->jd_jid);
481
482 t = jiffies;
483
484 /* Acquire a shared hold on the transaction lock */
485
486 error = gfs2_glock_nq_init(sdp->sd_trans_gl,
487 LM_ST_SHARED,
488 LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
489 GL_NOCANCEL | GL_NOCACHE,
490 &t_gh);
491 if (error)
492 goto fail_gunlock_ji;
493
494 if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
495 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
496 ro = 1;
497 } else {
498 if (sdp->sd_vfs->s_flags & MS_RDONLY)
499 ro = 1;
500 }
501
502 if (ro) {
503 fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n",
504 jd->jd_jid);
505 error = -EROFS;
506 goto fail_gunlock_tr;
507 }
508
509 fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
510
511 for (pass = 0; pass < 2; pass++) {
512 lops_before_scan(jd, &head, pass);
513 error = foreach_descriptor(jd, head.lh_tail,
514 head.lh_blkno, pass);
515 lops_after_scan(jd, error, pass);
516 if (error)
517 goto fail_gunlock_tr;
518 }
519
520 error = clean_journal(jd, &head);
521 if (error)
522 goto fail_gunlock_tr;
523
524 gfs2_glock_dq_uninit(&t_gh);
525 t = DIV_ROUND_UP(jiffies - t, HZ);
526 fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
527 jd->jd_jid, t);
528 }
529
530 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
531 gfs2_glock_dq_uninit(&ji_gh);
532
533 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
534
535 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
536 gfs2_glock_dq_uninit(&j_gh);
537
538 fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
539 return 0;
540
541fail_gunlock_tr:
542 gfs2_glock_dq_uninit(&t_gh);
543fail_gunlock_ji:
544 if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
545 gfs2_glock_dq_uninit(&ji_gh);
546fail_gunlock_j:
547 gfs2_glock_dq_uninit(&j_gh);
548 }
549
550 fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
551
552fail:
553 gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
554 return error;
555}
556
557/**
558 * gfs2_check_journals - Recover any dirty journals
559 * @sdp: the filesystem
560 *
561 */
562
563void gfs2_check_journals(struct gfs2_sbd *sdp)
564{
565 struct gfs2_jdesc *jd;
566
567 for (;;) {
568 jd = gfs2_jdesc_find_dirty(sdp);
569 if (!jd)
570 break;
571
572 if (jd != sdp->sd_jdesc)
573 gfs2_recover_journal(jd);
574 }
575}
576
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
new file mode 100644
index 000000000000..ac0f1d6ce456
--- /dev/null
+++ b/fs/gfs2/recovery.h
@@ -0,0 +1,32 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __RECOVERY_DOT_H__
11#define __RECOVERY_DOT_H__
12
13static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
14{
15 if (++*blk == sdp->sd_jdesc->jd_blocks)
16 *blk = 0;
17}
18
19int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
20 struct buffer_head **bh);
21
22int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where);
23int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where);
24void gfs2_revoke_clean(struct gfs2_sbd *sdp);
25
26int gfs2_find_jhead(struct gfs2_jdesc *jd,
27 struct gfs2_log_header *head);
28int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
29void gfs2_check_journals(struct gfs2_sbd *sdp);
30
31#endif /* __RECOVERY_DOT_H__ */
32
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
new file mode 100644
index 000000000000..691e6f3ce43b
--- /dev/null
+++ b/fs/gfs2/rgrp.c
@@ -0,0 +1,1524 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/fs.h>
16#include <linux/gfs2_ondisk.h>
17
18#include "gfs2.h"
19#include "lm_interface.h"
20#include "incore.h"
21#include "glock.h"
22#include "glops.h"
23#include "lops.h"
24#include "meta_io.h"
25#include "quota.h"
26#include "rgrp.h"
27#include "super.h"
28#include "trans.h"
29#include "ops_file.h"
30#include "util.h"
31
32#define BFITNOENT 0xFFFFFFFF
33
34/*
35 * These routines are used by the resource group routines (rgrp.c)
36 * to keep track of block allocation. Each block is represented by two
37 * bits. One bit indicates whether or not the block is used. (1=used,
38 * 0=free) The other bit indicates whether or not the block contains a
39 * dinode or not. (1=dinode, 0=not-dinode) So, each byte represents
40 * GFS2_NBBY (i.e. 4) blocks.
41 */
42
43static const char valid_change[16] = {
44 /* current */
45 /* n */ 0, 1, 0, 1,
46 /* e */ 1, 0, 0, 0,
47 /* w */ 0, 0, 0, 0,
48 1, 0, 0, 0
49};
50
51/**
52 * gfs2_setbit - Set a bit in the bitmaps
53 * @buffer: the buffer that holds the bitmaps
54 * @buflen: the length (in bytes) of the buffer
55 * @block: the block to set
56 * @new_state: the new state of the block
57 *
58 */
59
60static void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
61 unsigned int buflen, uint32_t block,
62 unsigned char new_state)
63{
64 unsigned char *byte, *end, cur_state;
65 unsigned int bit;
66
67 byte = buffer + (block / GFS2_NBBY);
68 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
69 end = buffer + buflen;
70
71 gfs2_assert(rgd->rd_sbd, byte < end);
72
73 cur_state = (*byte >> bit) & GFS2_BIT_MASK;
74
75 if (valid_change[new_state * 4 + cur_state]) {
76 *byte ^= cur_state << bit;
77 *byte |= new_state << bit;
78 } else
79 gfs2_consist_rgrpd(rgd);
80}
81
82/**
83 * gfs2_testbit - test a bit in the bitmaps
84 * @buffer: the buffer that holds the bitmaps
85 * @buflen: the length (in bytes) of the buffer
86 * @block: the block to read
87 *
88 */
89
90static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
91 unsigned int buflen, uint32_t block)
92{
93 unsigned char *byte, *end, cur_state;
94 unsigned int bit;
95
96 byte = buffer + (block / GFS2_NBBY);
97 bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
98 end = buffer + buflen;
99
100 gfs2_assert(rgd->rd_sbd, byte < end);
101
102 cur_state = (*byte >> bit) & GFS2_BIT_MASK;
103
104 return cur_state;
105}
106
107/**
108 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
109 * a block in a given allocation state.
110 * @buffer: the buffer that holds the bitmaps
111 * @buflen: the length (in bytes) of the buffer
112 * @goal: start search at this block's bit-pair (within @buffer)
113 * @old_state: GFS2_BLKST_XXX the state of the block we're looking for;
114 * bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0)
115 *
116 * Scope of @goal and returned block number is only within this bitmap buffer,
117 * not entire rgrp or filesystem. @buffer will be offset from the actual
118 * beginning of a bitmap block buffer, skipping any header structures.
119 *
120 * Return: the block number (bitmap buffer scope) that was found
121 */
122
123static uint32_t gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
124 unsigned int buflen, uint32_t goal,
125 unsigned char old_state)
126{
127 unsigned char *byte, *end, alloc;
128 uint32_t blk = goal;
129 unsigned int bit;
130
131 byte = buffer + (goal / GFS2_NBBY);
132 bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
133 end = buffer + buflen;
134 alloc = (old_state & 1) ? 0 : 0x55;
135
136 while (byte < end) {
137 if ((*byte & 0x55) == alloc) {
138 blk += (8 - bit) >> 1;
139
140 bit = 0;
141 byte++;
142
143 continue;
144 }
145
146 if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
147 return blk;
148
149 bit += GFS2_BIT_SIZE;
150 if (bit >= 8) {
151 bit = 0;
152 byte++;
153 }
154
155 blk++;
156 }
157
158 return BFITNOENT;
159}
160
161/**
162 * gfs2_bitcount - count the number of bits in a certain state
163 * @buffer: the buffer that holds the bitmaps
164 * @buflen: the length (in bytes) of the buffer
165 * @state: the state of the block we're looking for
166 *
167 * Returns: The number of bits
168 */
169
170static uint32_t gfs2_bitcount(struct gfs2_rgrpd *rgd, unsigned char *buffer,
171 unsigned int buflen, unsigned char state)
172{
173 unsigned char *byte = buffer;
174 unsigned char *end = buffer + buflen;
175 unsigned char state1 = state << 2;
176 unsigned char state2 = state << 4;
177 unsigned char state3 = state << 6;
178 uint32_t count = 0;
179
180 for (; byte < end; byte++) {
181 if (((*byte) & 0x03) == state)
182 count++;
183 if (((*byte) & 0x0C) == state1)
184 count++;
185 if (((*byte) & 0x30) == state2)
186 count++;
187 if (((*byte) & 0xC0) == state3)
188 count++;
189 }
190
191 return count;
192}
193
194/**
195 * gfs2_rgrp_verify - Verify that a resource group is consistent
196 * @sdp: the filesystem
197 * @rgd: the rgrp
198 *
199 */
200
201void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
202{
203 struct gfs2_sbd *sdp = rgd->rd_sbd;
204 struct gfs2_bitmap *bi = NULL;
205 uint32_t length = rgd->rd_ri.ri_length;
206 uint32_t count[4], tmp;
207 int buf, x;
208
209 memset(count, 0, 4 * sizeof(uint32_t));
210
211 /* Count # blocks in each of 4 possible allocation states */
212 for (buf = 0; buf < length; buf++) {
213 bi = rgd->rd_bits + buf;
214 for (x = 0; x < 4; x++)
215 count[x] += gfs2_bitcount(rgd,
216 bi->bi_bh->b_data +
217 bi->bi_offset,
218 bi->bi_len, x);
219 }
220
221 if (count[0] != rgd->rd_rg.rg_free) {
222 if (gfs2_consist_rgrpd(rgd))
223 fs_err(sdp, "free data mismatch: %u != %u\n",
224 count[0], rgd->rd_rg.rg_free);
225 return;
226 }
227
228 tmp = rgd->rd_ri.ri_data -
229 rgd->rd_rg.rg_free -
230 rgd->rd_rg.rg_dinodes;
231 if (count[1] != tmp) {
232 if (gfs2_consist_rgrpd(rgd))
233 fs_err(sdp, "used data mismatch: %u != %u\n",
234 count[1], tmp);
235 return;
236 }
237
238 if (count[2]) {
239 if (gfs2_consist_rgrpd(rgd))
240 fs_err(sdp, "free metadata mismatch: %u != 0\n",
241 count[2]);
242 return;
243 }
244
245 if (count[3] != rgd->rd_rg.rg_dinodes) {
246 if (gfs2_consist_rgrpd(rgd))
247 fs_err(sdp, "used metadata mismatch: %u != %u\n",
248 count[3], rgd->rd_rg.rg_dinodes);
249 return;
250 }
251}
252
253static inline int rgrp_contains_block(struct gfs2_rindex *ri, uint64_t block)
254{
255 uint64_t first = ri->ri_data0;
256 uint64_t last = first + ri->ri_data;
257 return !!(first <= block && block < last);
258}
259
260/**
261 * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
262 * @sdp: The GFS2 superblock
263 * @n: The data block number
264 *
265 * Returns: The resource group, or NULL if not found
266 */
267
268struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk)
269{
270 struct gfs2_rgrpd *rgd;
271
272 spin_lock(&sdp->sd_rindex_spin);
273
274 list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
275 if (rgrp_contains_block(&rgd->rd_ri, blk)) {
276 list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
277 spin_unlock(&sdp->sd_rindex_spin);
278 return rgd;
279 }
280 }
281
282 spin_unlock(&sdp->sd_rindex_spin);
283
284 return NULL;
285}
286
287/**
288 * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem
289 * @sdp: The GFS2 superblock
290 *
291 * Returns: The first rgrp in the filesystem
292 */
293
294struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
295{
296 gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list));
297 return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list);
298}
299
300/**
301 * gfs2_rgrpd_get_next - get the next RG
302 * @rgd: A RG
303 *
304 * Returns: The next rgrp
305 */
306
307struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
308{
309 if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list)
310 return NULL;
311 return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list);
312}
313
314static void clear_rgrpdi(struct gfs2_sbd *sdp)
315{
316 struct list_head *head;
317 struct gfs2_rgrpd *rgd;
318 struct gfs2_glock *gl;
319
320 spin_lock(&sdp->sd_rindex_spin);
321 sdp->sd_rindex_forward = NULL;
322 head = &sdp->sd_rindex_recent_list;
323 while (!list_empty(head)) {
324 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
325 list_del(&rgd->rd_recent);
326 }
327 spin_unlock(&sdp->sd_rindex_spin);
328
329 head = &sdp->sd_rindex_list;
330 while (!list_empty(head)) {
331 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list);
332 gl = rgd->rd_gl;
333
334 list_del(&rgd->rd_list);
335 list_del(&rgd->rd_list_mru);
336
337 if (gl) {
338 gl->gl_object = NULL;
339 gfs2_glock_put(gl);
340 }
341
342 kfree(rgd->rd_bits);
343 kfree(rgd);
344 }
345}
346
347void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
348{
349 mutex_lock(&sdp->sd_rindex_mutex);
350 clear_rgrpdi(sdp);
351 mutex_unlock(&sdp->sd_rindex_mutex);
352}
353
354/**
355 * gfs2_compute_bitstructs - Compute the bitmap sizes
356 * @rgd: The resource group descriptor
357 *
358 * Calculates bitmap descriptors, one for each block that contains bitmap data
359 *
360 * Returns: errno
361 */
362
363static int compute_bitstructs(struct gfs2_rgrpd *rgd)
364{
365 struct gfs2_sbd *sdp = rgd->rd_sbd;
366 struct gfs2_bitmap *bi;
367 uint32_t length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
368 uint32_t bytes_left, bytes;
369 int x;
370
371 rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_KERNEL);
372 if (!rgd->rd_bits)
373 return -ENOMEM;
374
375 bytes_left = rgd->rd_ri.ri_bitbytes;
376
377 for (x = 0; x < length; x++) {
378 bi = rgd->rd_bits + x;
379
380 /* small rgrp; bitmap stored completely in header block */
381 if (length == 1) {
382 bytes = bytes_left;
383 bi->bi_offset = sizeof(struct gfs2_rgrp);
384 bi->bi_start = 0;
385 bi->bi_len = bytes;
386 /* header block */
387 } else if (x == 0) {
388 bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
389 bi->bi_offset = sizeof(struct gfs2_rgrp);
390 bi->bi_start = 0;
391 bi->bi_len = bytes;
392 /* last block */
393 } else if (x + 1 == length) {
394 bytes = bytes_left;
395 bi->bi_offset = sizeof(struct gfs2_meta_header);
396 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
397 bi->bi_len = bytes;
398 /* other blocks */
399 } else {
400 bytes = sdp->sd_sb.sb_bsize -
401 sizeof(struct gfs2_meta_header);
402 bi->bi_offset = sizeof(struct gfs2_meta_header);
403 bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
404 bi->bi_len = bytes;
405 }
406
407 bytes_left -= bytes;
408 }
409
410 if (bytes_left) {
411 gfs2_consist_rgrpd(rgd);
412 return -EIO;
413 }
414 bi = rgd->rd_bits + (length - 1);
415 if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) {
416 if (gfs2_consist_rgrpd(rgd)) {
417 gfs2_rindex_print(&rgd->rd_ri);
418 fs_err(sdp, "start=%u len=%u offset=%u\n",
419 bi->bi_start, bi->bi_len, bi->bi_offset);
420 }
421 return -EIO;
422 }
423
424 return 0;
425}
426
427/**
428 * gfs2_ri_update - Pull in a new resource index from the disk
429 * @gl: The glock covering the rindex inode
430 *
431 * Returns: 0 on successful update, error code otherwise
432 */
433
434static int gfs2_ri_update(struct gfs2_inode *ip)
435{
436 struct gfs2_sbd *sdp = ip->i_sbd;
437 struct inode *inode = ip->i_vnode;
438 struct gfs2_rgrpd *rgd;
439 char buf[sizeof(struct gfs2_rindex)];
440 struct file_ra_state ra_state;
441 uint64_t junk = ip->i_di.di_size;
442 int error;
443
444 if (do_div(junk, sizeof(struct gfs2_rindex))) {
445 gfs2_consist_inode(ip);
446 return -EIO;
447 }
448
449 clear_rgrpdi(sdp);
450
451 file_ra_state_init(&ra_state, inode->i_mapping);
452 for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
453 loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
454 error = gfs2_internal_read(ip, &ra_state, buf, &pos,
455 sizeof(struct gfs2_rindex));
456 if (!error)
457 break;
458 if (error != sizeof(struct gfs2_rindex)) {
459 if (error > 0)
460 error = -EIO;
461 goto fail;
462 }
463
464 rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_KERNEL);
465 error = -ENOMEM;
466 if (!rgd)
467 goto fail;
468
469 mutex_init(&rgd->rd_mutex);
470 lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
471 rgd->rd_sbd = sdp;
472
473 list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
474 list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
475
476 gfs2_rindex_in(&rgd->rd_ri, buf);
477
478 error = compute_bitstructs(rgd);
479 if (error)
480 goto fail;
481
482 error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr,
483 &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
484 if (error)
485 goto fail;
486
487 rgd->rd_gl->gl_object = rgd;
488 rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
489 }
490
491 sdp->sd_rindex_vn = ip->i_gl->gl_vn;
492
493 return 0;
494
495 fail:
496 clear_rgrpdi(sdp);
497
498 return error;
499}
500
501/**
502 * gfs2_rindex_hold - Grab a lock on the rindex
503 * @sdp: The GFS2 superblock
504 * @ri_gh: the glock holder
505 *
506 * We grab a lock on the rindex inode to make sure that it doesn't
507 * change whilst we are performing an operation. We keep this lock
508 * for quite long periods of time compared to other locks. This
509 * doesn't matter, since it is shared and it is very, very rarely
510 * accessed in the exclusive mode (i.e. only when expanding the filesystem).
511 *
512 * This makes sure that we're using the latest copy of the resource index
513 * special file, which might have been updated if someone expanded the
514 * filesystem (via gfs2_grow utility), which adds new resource groups.
515 *
516 * Returns: 0 on success, error code otherwise
517 */
518
519int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
520{
521 struct gfs2_inode *ip = sdp->sd_rindex->u.generic_ip;
522 struct gfs2_glock *gl = ip->i_gl;
523 int error;
524
525 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh);
526 if (error)
527 return error;
528
529 /* Read new copy from disk if we don't have the latest */
530 if (sdp->sd_rindex_vn != gl->gl_vn) {
531 mutex_lock(&sdp->sd_rindex_mutex);
532 if (sdp->sd_rindex_vn != gl->gl_vn) {
533 error = gfs2_ri_update(ip);
534 if (error)
535 gfs2_glock_dq_uninit(ri_gh);
536 }
537 mutex_unlock(&sdp->sd_rindex_mutex);
538 }
539
540 return error;
541}
542
543/**
544 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
545 * @rgd: the struct gfs2_rgrpd describing the RG to read in
546 *
547 * Read in all of a Resource Group's header and bitmap blocks.
548 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
549 *
550 * Returns: errno
551 */
552
553int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
554{
555 struct gfs2_sbd *sdp = rgd->rd_sbd;
556 struct gfs2_glock *gl = rgd->rd_gl;
557 unsigned int length = rgd->rd_ri.ri_length;
558 struct gfs2_bitmap *bi;
559 unsigned int x, y;
560 int error;
561
562 mutex_lock(&rgd->rd_mutex);
563
564 spin_lock(&sdp->sd_rindex_spin);
565 if (rgd->rd_bh_count) {
566 rgd->rd_bh_count++;
567 spin_unlock(&sdp->sd_rindex_spin);
568 mutex_unlock(&rgd->rd_mutex);
569 return 0;
570 }
571 spin_unlock(&sdp->sd_rindex_spin);
572
573 for (x = 0; x < length; x++) {
574 bi = rgd->rd_bits + x;
575 error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, DIO_START,
576 &bi->bi_bh);
577 if (error)
578 goto fail;
579 }
580
581 for (y = length; y--;) {
582 bi = rgd->rd_bits + y;
583 error = gfs2_meta_reread(sdp, bi->bi_bh, DIO_WAIT);
584 if (error)
585 goto fail;
586 if (gfs2_metatype_check(sdp, bi->bi_bh,
587 (y) ? GFS2_METATYPE_RB :
588 GFS2_METATYPE_RG)) {
589 error = -EIO;
590 goto fail;
591 }
592 }
593
594 if (rgd->rd_rg_vn != gl->gl_vn) {
595 gfs2_rgrp_in(&rgd->rd_rg, (rgd->rd_bits[0].bi_bh)->b_data);
596 rgd->rd_rg_vn = gl->gl_vn;
597 }
598
599 spin_lock(&sdp->sd_rindex_spin);
600 rgd->rd_free_clone = rgd->rd_rg.rg_free;
601 rgd->rd_bh_count++;
602 spin_unlock(&sdp->sd_rindex_spin);
603
604 mutex_unlock(&rgd->rd_mutex);
605
606 return 0;
607
608 fail:
609 while (x--) {
610 bi = rgd->rd_bits + x;
611 brelse(bi->bi_bh);
612 bi->bi_bh = NULL;
613 gfs2_assert_warn(sdp, !bi->bi_clone);
614 }
615 mutex_unlock(&rgd->rd_mutex);
616
617 return error;
618}
619
620void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
621{
622 struct gfs2_sbd *sdp = rgd->rd_sbd;
623
624 spin_lock(&sdp->sd_rindex_spin);
625 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
626 rgd->rd_bh_count++;
627 spin_unlock(&sdp->sd_rindex_spin);
628}
629
630/**
631 * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get()
632 * @rgd: the struct gfs2_rgrpd describing the RG to read in
633 *
634 */
635
636void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
637{
638 struct gfs2_sbd *sdp = rgd->rd_sbd;
639 int x, length = rgd->rd_ri.ri_length;
640
641 spin_lock(&sdp->sd_rindex_spin);
642 gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
643 if (--rgd->rd_bh_count) {
644 spin_unlock(&sdp->sd_rindex_spin);
645 return;
646 }
647
648 for (x = 0; x < length; x++) {
649 struct gfs2_bitmap *bi = rgd->rd_bits + x;
650 kfree(bi->bi_clone);
651 bi->bi_clone = NULL;
652 brelse(bi->bi_bh);
653 bi->bi_bh = NULL;
654 }
655
656 spin_unlock(&sdp->sd_rindex_spin);
657}
658
659void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
660{
661 struct gfs2_sbd *sdp = rgd->rd_sbd;
662 unsigned int length = rgd->rd_ri.ri_length;
663 unsigned int x;
664
665 for (x = 0; x < length; x++) {
666 struct gfs2_bitmap *bi = rgd->rd_bits + x;
667 if (!bi->bi_clone)
668 continue;
669 memcpy(bi->bi_clone + bi->bi_offset,
670 bi->bi_bh->b_data + bi->bi_offset,
671 bi->bi_len);
672 }
673
674 spin_lock(&sdp->sd_rindex_spin);
675 rgd->rd_free_clone = rgd->rd_rg.rg_free;
676 spin_unlock(&sdp->sd_rindex_spin);
677}
678
679/**
680 * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode
681 * @ip: the incore GFS2 inode structure
682 *
683 * Returns: the struct gfs2_alloc
684 */
685
686struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
687{
688 struct gfs2_alloc *al = &ip->i_alloc;
689
690 /* FIXME: Should assert that the correct locks are held here... */
691 memset(al, 0, sizeof(*al));
692 return al;
693}
694
695/**
696 * gfs2_alloc_put - throw away the struct gfs2_alloc for an inode
697 * @ip: the inode
698 *
699 */
700
701void gfs2_alloc_put(struct gfs2_inode *ip)
702{
703 return;
704}
705
706/**
707 * try_rgrp_fit - See if a given reservation will fit in a given RG
708 * @rgd: the RG data
709 * @al: the struct gfs2_alloc structure describing the reservation
710 *
711 * If there's room for the requested blocks to be allocated from the RG:
712 * Sets the $al_reserved_data field in @al.
713 * Sets the $al_reserved_meta field in @al.
714 * Sets the $al_rgd field in @al.
715 *
716 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
717 */
718
719static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
720{
721 struct gfs2_sbd *sdp = rgd->rd_sbd;
722 int ret = 0;
723
724 spin_lock(&sdp->sd_rindex_spin);
725 if (rgd->rd_free_clone >= al->al_requested) {
726 al->al_rgd = rgd;
727 ret = 1;
728 }
729 spin_unlock(&sdp->sd_rindex_spin);
730
731 return ret;
732}
733
734/**
735 * recent_rgrp_first - get first RG from "recent" list
736 * @sdp: The GFS2 superblock
737 * @rglast: address of the rgrp used last
738 *
739 * Returns: The first rgrp in the recent list
740 */
741
742static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
743 uint64_t rglast)
744{
745 struct gfs2_rgrpd *rgd = NULL;
746
747 spin_lock(&sdp->sd_rindex_spin);
748
749 if (list_empty(&sdp->sd_rindex_recent_list))
750 goto out;
751
752 if (!rglast)
753 goto first;
754
755 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
756 if (rgd->rd_ri.ri_addr == rglast)
757 goto out;
758 }
759
760 first:
761 rgd = list_entry(sdp->sd_rindex_recent_list.next, struct gfs2_rgrpd,
762 rd_recent);
763
764 out:
765 spin_unlock(&sdp->sd_rindex_spin);
766
767 return rgd;
768}
769
770/**
771 * recent_rgrp_next - get next RG from "recent" list
772 * @cur_rgd: current rgrp
773 * @remove:
774 *
775 * Returns: The next rgrp in the recent list
776 */
777
778static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd,
779 int remove)
780{
781 struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
782 struct list_head *head;
783 struct gfs2_rgrpd *rgd;
784
785 spin_lock(&sdp->sd_rindex_spin);
786
787 head = &sdp->sd_rindex_recent_list;
788
789 list_for_each_entry(rgd, head, rd_recent) {
790 if (rgd == cur_rgd) {
791 if (cur_rgd->rd_recent.next != head)
792 rgd = list_entry(cur_rgd->rd_recent.next,
793 struct gfs2_rgrpd, rd_recent);
794 else
795 rgd = NULL;
796
797 if (remove)
798 list_del(&cur_rgd->rd_recent);
799
800 goto out;
801 }
802 }
803
804 rgd = NULL;
805 if (!list_empty(head))
806 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent);
807
808 out:
809 spin_unlock(&sdp->sd_rindex_spin);
810
811 return rgd;
812}
813
814/**
815 * recent_rgrp_add - add an RG to tail of "recent" list
816 * @new_rgd: The rgrp to add
817 *
818 */
819
820static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd)
821{
822 struct gfs2_sbd *sdp = new_rgd->rd_sbd;
823 struct gfs2_rgrpd *rgd;
824 unsigned int count = 0;
825 unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp);
826
827 spin_lock(&sdp->sd_rindex_spin);
828
829 list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
830 if (rgd == new_rgd)
831 goto out;
832
833 if (++count >= max)
834 goto out;
835 }
836 list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list);
837
838 out:
839 spin_unlock(&sdp->sd_rindex_spin);
840}
841
842/**
843 * forward_rgrp_get - get an rgrp to try next from full list
844 * @sdp: The GFS2 superblock
845 *
846 * Returns: The rgrp to try next
847 */
848
849static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp)
850{
851 struct gfs2_rgrpd *rgd;
852 unsigned int journals = gfs2_jindex_size(sdp);
853 unsigned int rg = 0, x;
854
855 spin_lock(&sdp->sd_rindex_spin);
856
857 rgd = sdp->sd_rindex_forward;
858 if (!rgd) {
859 if (sdp->sd_rgrps >= journals)
860 rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals;
861
862 for (x = 0, rgd = gfs2_rgrpd_get_first(sdp);
863 x < rg;
864 x++, rgd = gfs2_rgrpd_get_next(rgd))
865 /* Do Nothing */;
866
867 sdp->sd_rindex_forward = rgd;
868 }
869
870 spin_unlock(&sdp->sd_rindex_spin);
871
872 return rgd;
873}
874
875/**
876 * forward_rgrp_set - set the forward rgrp pointer
877 * @sdp: the filesystem
878 * @rgd: The new forward rgrp
879 *
880 */
881
882static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
883{
884 spin_lock(&sdp->sd_rindex_spin);
885 sdp->sd_rindex_forward = rgd;
886 spin_unlock(&sdp->sd_rindex_spin);
887}
888
889/**
890 * get_local_rgrp - Choose and lock a rgrp for allocation
891 * @ip: the inode to reserve space for
892 * @rgp: the chosen and locked rgrp
893 *
894 * Try to acquire rgrp in way which avoids contending with others.
895 *
896 * Returns: errno
897 */
898
899static int get_local_rgrp(struct gfs2_inode *ip)
900{
901 struct gfs2_sbd *sdp = ip->i_sbd;
902 struct gfs2_rgrpd *rgd, *begin = NULL;
903 struct gfs2_alloc *al = &ip->i_alloc;
904 int flags = LM_FLAG_TRY;
905 int skipped = 0;
906 int loops = 0;
907 int error;
908
909 /* Try recently successful rgrps */
910
911 rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc);
912
913 while (rgd) {
914 error = gfs2_glock_nq_init(rgd->rd_gl,
915 LM_ST_EXCLUSIVE, LM_FLAG_TRY,
916 &al->al_rgd_gh);
917 switch (error) {
918 case 0:
919 if (try_rgrp_fit(rgd, al))
920 goto out;
921 gfs2_glock_dq_uninit(&al->al_rgd_gh);
922 rgd = recent_rgrp_next(rgd, 1);
923 break;
924
925 case GLR_TRYFAILED:
926 rgd = recent_rgrp_next(rgd, 0);
927 break;
928
929 default:
930 return error;
931 }
932 }
933
934 /* Go through full list of rgrps */
935
936 begin = rgd = forward_rgrp_get(sdp);
937
938 for (;;) {
939 error = gfs2_glock_nq_init(rgd->rd_gl,
940 LM_ST_EXCLUSIVE, flags,
941 &al->al_rgd_gh);
942 switch (error) {
943 case 0:
944 if (try_rgrp_fit(rgd, al))
945 goto out;
946 gfs2_glock_dq_uninit(&al->al_rgd_gh);
947 break;
948
949 case GLR_TRYFAILED:
950 skipped++;
951 break;
952
953 default:
954 return error;
955 }
956
957 rgd = gfs2_rgrpd_get_next(rgd);
958 if (!rgd)
959 rgd = gfs2_rgrpd_get_first(sdp);
960
961 if (rgd == begin) {
962 if (++loops >= 2 || !skipped)
963 return -ENOSPC;
964 flags = 0;
965 }
966 }
967
968 out:
969 ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
970
971 if (begin) {
972 recent_rgrp_add(rgd);
973 rgd = gfs2_rgrpd_get_next(rgd);
974 if (!rgd)
975 rgd = gfs2_rgrpd_get_first(sdp);
976 forward_rgrp_set(sdp, rgd);
977 }
978
979 return 0;
980}
981
982/**
983 * gfs2_inplace_reserve_i - Reserve space in the filesystem
984 * @ip: the inode to reserve space for
985 *
986 * Returns: errno
987 */
988
989int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
990{
991 struct gfs2_sbd *sdp = ip->i_sbd;
992 struct gfs2_alloc *al = &ip->i_alloc;
993 int error;
994
995 if (gfs2_assert_warn(sdp, al->al_requested))
996 return -EINVAL;
997
998 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
999 if (error)
1000 return error;
1001
1002 error = get_local_rgrp(ip);
1003 if (error) {
1004 gfs2_glock_dq_uninit(&al->al_ri_gh);
1005 return error;
1006 }
1007
1008 al->al_file = file;
1009 al->al_line = line;
1010
1011 return 0;
1012}
1013
1014/**
1015 * gfs2_inplace_release - release an inplace reservation
1016 * @ip: the inode the reservation was taken out on
1017 *
1018 * Release a reservation made by gfs2_inplace_reserve().
1019 */
1020
1021void gfs2_inplace_release(struct gfs2_inode *ip)
1022{
1023 struct gfs2_sbd *sdp = ip->i_sbd;
1024 struct gfs2_alloc *al = &ip->i_alloc;
1025
1026 if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
1027 fs_warn(sdp, "al_alloced = %u, al_requested = %u "
1028 "al_file = %s, al_line = %u\n",
1029 al->al_alloced, al->al_requested, al->al_file,
1030 al->al_line);
1031
1032 al->al_rgd = NULL;
1033 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1034 gfs2_glock_dq_uninit(&al->al_ri_gh);
1035}
1036
1037/**
1038 * gfs2_get_block_type - Check a block in a RG is of given type
1039 * @rgd: the resource group holding the block
1040 * @block: the block number
1041 *
1042 * Returns: The block type (GFS2_BLKST_*)
1043 */
1044
1045unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, uint64_t block)
1046{
1047 struct gfs2_bitmap *bi = NULL;
1048 uint32_t length, rgrp_block, buf_block;
1049 unsigned int buf;
1050 unsigned char type;
1051
1052 length = rgd->rd_ri.ri_length;
1053 rgrp_block = block - rgd->rd_ri.ri_data0;
1054
1055 for (buf = 0; buf < length; buf++) {
1056 bi = rgd->rd_bits + buf;
1057 if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1058 break;
1059 }
1060
1061 gfs2_assert(rgd->rd_sbd, buf < length);
1062 buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
1063
1064 type = gfs2_testbit(rgd,
1065 bi->bi_bh->b_data + bi->bi_offset,
1066 bi->bi_len, buf_block);
1067
1068 return type;
1069}
1070
1071/**
1072 * rgblk_search - find a block in @old_state, change allocation
1073 * state to @new_state
1074 * @rgd: the resource group descriptor
1075 * @goal: the goal block within the RG (start here to search for avail block)
1076 * @old_state: GFS2_BLKST_XXX the before-allocation state to find
1077 * @new_state: GFS2_BLKST_XXX the after-allocation block state
1078 *
1079 * Walk rgrp's bitmap to find bits that represent a block in @old_state.
1080 * Add the found bitmap buffer to the transaction.
1081 * Set the found bits to @new_state to change block's allocation state.
1082 *
1083 * This function never fails, because we wouldn't call it unless we
1084 * know (from reservation results, etc.) that a block is available.
1085 *
1086 * Scope of @goal and returned block is just within rgrp, not the whole
1087 * filesystem.
1088 *
1089 * Returns: the block number allocated
1090 */
1091
1092static uint32_t rgblk_search(struct gfs2_rgrpd *rgd, uint32_t goal,
1093 unsigned char old_state, unsigned char new_state)
1094{
1095 struct gfs2_bitmap *bi = NULL;
1096 uint32_t length = rgd->rd_ri.ri_length;
1097 uint32_t blk = 0;
1098 unsigned int buf, x;
1099
1100 /* Find bitmap block that contains bits for goal block */
1101 for (buf = 0; buf < length; buf++) {
1102 bi = rgd->rd_bits + buf;
1103 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1104 break;
1105 }
1106
1107 gfs2_assert(rgd->rd_sbd, buf < length);
1108
1109 /* Convert scope of "goal" from rgrp-wide to within found bit block */
1110 goal -= bi->bi_start * GFS2_NBBY;
1111
1112 /* Search (up to entire) bitmap in this rgrp for allocatable block.
1113 "x <= length", instead of "x < length", because we typically start
1114 the search in the middle of a bit block, but if we can't find an
1115 allocatable block anywhere else, we want to be able wrap around and
1116 search in the first part of our first-searched bit block. */
1117 for (x = 0; x <= length; x++) {
1118 if (bi->bi_clone)
1119 blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset,
1120 bi->bi_len, goal, old_state);
1121 else
1122 blk = gfs2_bitfit(rgd,
1123 bi->bi_bh->b_data + bi->bi_offset,
1124 bi->bi_len, goal, old_state);
1125 if (blk != BFITNOENT)
1126 break;
1127
1128 /* Try next bitmap block (wrap back to rgrp header if at end) */
1129 buf = (buf + 1) % length;
1130 bi = rgd->rd_bits + buf;
1131 goal = 0;
1132 }
1133
1134 if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
1135 blk = 0;
1136
1137 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1138 gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
1139 bi->bi_len, blk, new_state);
1140 if (bi->bi_clone)
1141 gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
1142 bi->bi_len, blk, new_state);
1143
1144 return bi->bi_start * GFS2_NBBY + blk;
1145}
1146
1147/**
1148 * rgblk_free - Change alloc state of given block(s)
1149 * @sdp: the filesystem
1150 * @bstart: the start of a run of blocks to free
1151 * @blen: the length of the block run (all must lie within ONE RG!)
1152 * @new_state: GFS2_BLKST_XXX the after-allocation block state
1153 *
1154 * Returns: Resource group containing the block(s)
1155 */
1156
1157static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, uint64_t bstart,
1158 uint32_t blen, unsigned char new_state)
1159{
1160 struct gfs2_rgrpd *rgd;
1161 struct gfs2_bitmap *bi = NULL;
1162 uint32_t length, rgrp_blk, buf_blk;
1163 unsigned int buf;
1164
1165 rgd = gfs2_blk2rgrpd(sdp, bstart);
1166 if (!rgd) {
1167 if (gfs2_consist(sdp))
1168 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
1169 return NULL;
1170 }
1171
1172 length = rgd->rd_ri.ri_length;
1173
1174 rgrp_blk = bstart - rgd->rd_ri.ri_data0;
1175
1176 while (blen--) {
1177 for (buf = 0; buf < length; buf++) {
1178 bi = rgd->rd_bits + buf;
1179 if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
1180 break;
1181 }
1182
1183 gfs2_assert(rgd->rd_sbd, buf < length);
1184
1185 buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
1186 rgrp_blk++;
1187
1188 if (!bi->bi_clone) {
1189 bi->bi_clone = kmalloc(bi->bi_bh->b_size,
1190 GFP_KERNEL | __GFP_NOFAIL);
1191 memcpy(bi->bi_clone + bi->bi_offset,
1192 bi->bi_bh->b_data + bi->bi_offset,
1193 bi->bi_len);
1194 }
1195 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1196 gfs2_setbit(rgd,
1197 bi->bi_bh->b_data + bi->bi_offset,
1198 bi->bi_len, buf_blk, new_state);
1199 }
1200
1201 return rgd;
1202}
1203
1204/**
1205 * gfs2_alloc_data - Allocate a data block
1206 * @ip: the inode to allocate the data block for
1207 *
1208 * Returns: the allocated block
1209 */
1210
1211uint64_t gfs2_alloc_data(struct gfs2_inode *ip)
1212{
1213 struct gfs2_sbd *sdp = ip->i_sbd;
1214 struct gfs2_alloc *al = &ip->i_alloc;
1215 struct gfs2_rgrpd *rgd = al->al_rgd;
1216 uint32_t goal, blk;
1217 uint64_t block;
1218
1219 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data))
1220 goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0;
1221 else
1222 goal = rgd->rd_last_alloc_data;
1223
1224 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
1225 rgd->rd_last_alloc_data = blk;
1226
1227 block = rgd->rd_ri.ri_data0 + blk;
1228 ip->i_di.di_goal_data = block;
1229
1230 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1231 rgd->rd_rg.rg_free--;
1232
1233 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1234 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1235
1236 al->al_alloced++;
1237
1238 gfs2_statfs_change(sdp, 0, -1, 0);
1239 gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
1240
1241 spin_lock(&sdp->sd_rindex_spin);
1242 rgd->rd_free_clone--;
1243 spin_unlock(&sdp->sd_rindex_spin);
1244
1245 return block;
1246}
1247
1248/**
1249 * gfs2_alloc_meta - Allocate a metadata block
1250 * @ip: the inode to allocate the metadata block for
1251 *
1252 * Returns: the allocated block
1253 */
1254
1255uint64_t gfs2_alloc_meta(struct gfs2_inode *ip)
1256{
1257 struct gfs2_sbd *sdp = ip->i_sbd;
1258 struct gfs2_alloc *al = &ip->i_alloc;
1259 struct gfs2_rgrpd *rgd = al->al_rgd;
1260 uint32_t goal, blk;
1261 uint64_t block;
1262
1263 if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta))
1264 goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0;
1265 else
1266 goal = rgd->rd_last_alloc_meta;
1267
1268 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
1269 rgd->rd_last_alloc_meta = blk;
1270
1271 block = rgd->rd_ri.ri_data0 + blk;
1272 ip->i_di.di_goal_meta = block;
1273
1274 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1275 rgd->rd_rg.rg_free--;
1276
1277 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1278 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1279
1280 al->al_alloced++;
1281
1282 gfs2_statfs_change(sdp, 0, -1, 0);
1283 gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid);
1284 gfs2_trans_add_unrevoke(sdp, block);
1285
1286 spin_lock(&sdp->sd_rindex_spin);
1287 rgd->rd_free_clone--;
1288 spin_unlock(&sdp->sd_rindex_spin);
1289
1290 return block;
1291}
1292
1293/**
1294 * gfs2_alloc_di - Allocate a dinode
1295 * @dip: the directory that the inode is going in
1296 *
1297 * Returns: the block allocated
1298 */
1299
1300uint64_t gfs2_alloc_di(struct gfs2_inode *dip)
1301{
1302 struct gfs2_sbd *sdp = dip->i_sbd;
1303 struct gfs2_alloc *al = &dip->i_alloc;
1304 struct gfs2_rgrpd *rgd = al->al_rgd;
1305 uint32_t blk;
1306 uint64_t block;
1307
1308 blk = rgblk_search(rgd, rgd->rd_last_alloc_meta,
1309 GFS2_BLKST_FREE, GFS2_BLKST_DINODE);
1310
1311 rgd->rd_last_alloc_meta = blk;
1312
1313 block = rgd->rd_ri.ri_data0 + blk;
1314
1315 gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
1316 rgd->rd_rg.rg_free--;
1317 rgd->rd_rg.rg_dinodes++;
1318
1319 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1320 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1321
1322 al->al_alloced++;
1323
1324 gfs2_statfs_change(sdp, 0, -1, +1);
1325 gfs2_trans_add_unrevoke(sdp, block);
1326
1327 spin_lock(&sdp->sd_rindex_spin);
1328 rgd->rd_free_clone--;
1329 spin_unlock(&sdp->sd_rindex_spin);
1330
1331 return block;
1332}
1333
1334/**
1335 * gfs2_free_data - free a contiguous run of data block(s)
1336 * @ip: the inode these blocks are being freed from
1337 * @bstart: first block of a run of contiguous blocks
1338 * @blen: the length of the block run
1339 *
1340 */
1341
1342void gfs2_free_data(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
1343{
1344 struct gfs2_sbd *sdp = ip->i_sbd;
1345 struct gfs2_rgrpd *rgd;
1346
1347 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1348 if (!rgd)
1349 return;
1350
1351 rgd->rd_rg.rg_free += blen;
1352
1353 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1354 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1355
1356 gfs2_trans_add_rg(rgd);
1357
1358 gfs2_statfs_change(sdp, 0, +blen, 0);
1359 gfs2_quota_change(ip, -(int64_t)blen,
1360 ip->i_di.di_uid, ip->i_di.di_gid);
1361}
1362
1363/**
1364 * gfs2_free_meta - free a contiguous run of data block(s)
1365 * @ip: the inode these blocks are being freed from
1366 * @bstart: first block of a run of contiguous blocks
1367 * @blen: the length of the block run
1368 *
1369 */
1370
1371void gfs2_free_meta(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen)
1372{
1373 struct gfs2_sbd *sdp = ip->i_sbd;
1374 struct gfs2_rgrpd *rgd;
1375
1376 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1377 if (!rgd)
1378 return;
1379
1380 rgd->rd_rg.rg_free += blen;
1381
1382 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1383 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1384
1385 gfs2_trans_add_rg(rgd);
1386
1387 gfs2_statfs_change(sdp, 0, +blen, 0);
1388 gfs2_quota_change(ip, -(int64_t)blen,
1389 ip->i_di.di_uid, ip->i_di.di_gid);
1390 gfs2_meta_wipe(ip, bstart, blen);
1391}
1392
1393void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, uint64_t blkno)
1394{
1395 struct gfs2_sbd *sdp = rgd->rd_sbd;
1396 struct gfs2_rgrpd *tmp_rgd;
1397
1398 tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
1399 if (!tmp_rgd)
1400 return;
1401 gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
1402
1403 if (!rgd->rd_rg.rg_dinodes)
1404 gfs2_consist_rgrpd(rgd);
1405 rgd->rd_rg.rg_dinodes--;
1406 rgd->rd_rg.rg_free++;
1407
1408 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1409 gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data);
1410
1411 gfs2_statfs_change(sdp, 0, +1, -1);
1412 gfs2_trans_add_rg(rgd);
1413}
1414
1415/**
1416 * gfs2_free_uninit_di - free a dinode block
1417 * @rgd: the resource group that contains the dinode
1418 * @ip: the inode
1419 *
1420 */
1421
1422void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1423{
1424 gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
1425 gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid);
1426 gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
1427}
1428
1429/**
1430 * gfs2_rlist_add - add a RG to a list of RGs
1431 * @sdp: the filesystem
1432 * @rlist: the list of resource groups
1433 * @block: the block
1434 *
1435 * Figure out what RG a block belongs to and add that RG to the list
1436 *
1437 * FIXME: Don't use NOFAIL
1438 *
1439 */
1440
1441void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
1442 uint64_t block)
1443{
1444 struct gfs2_rgrpd *rgd;
1445 struct gfs2_rgrpd **tmp;
1446 unsigned int new_space;
1447 unsigned int x;
1448
1449 if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
1450 return;
1451
1452 rgd = gfs2_blk2rgrpd(sdp, block);
1453 if (!rgd) {
1454 if (gfs2_consist(sdp))
1455 fs_err(sdp, "block = %llu\n", (unsigned long long)block);
1456 return;
1457 }
1458
1459 for (x = 0; x < rlist->rl_rgrps; x++)
1460 if (rlist->rl_rgd[x] == rgd)
1461 return;
1462
1463 if (rlist->rl_rgrps == rlist->rl_space) {
1464 new_space = rlist->rl_space + 10;
1465
1466 tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *),
1467 GFP_KERNEL | __GFP_NOFAIL);
1468
1469 if (rlist->rl_rgd) {
1470 memcpy(tmp, rlist->rl_rgd,
1471 rlist->rl_space * sizeof(struct gfs2_rgrpd *));
1472 kfree(rlist->rl_rgd);
1473 }
1474
1475 rlist->rl_space = new_space;
1476 rlist->rl_rgd = tmp;
1477 }
1478
1479 rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
1480}
1481
1482/**
1483 * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
1484 * and initialize an array of glock holders for them
1485 * @rlist: the list of resource groups
1486 * @state: the lock state to acquire the RG lock in
1487 * @flags: the modifier flags for the holder structures
1488 *
1489 * FIXME: Don't use NOFAIL
1490 *
1491 */
1492
1493void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
1494 int flags)
1495{
1496 unsigned int x;
1497
1498 rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
1499 GFP_KERNEL | __GFP_NOFAIL);
1500 for (x = 0; x < rlist->rl_rgrps; x++)
1501 gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
1502 state, flags,
1503 &rlist->rl_ghs[x]);
1504}
1505
1506/**
1507 * gfs2_rlist_free - free a resource group list
1508 * @list: the list of resource groups
1509 *
1510 */
1511
1512void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
1513{
1514 unsigned int x;
1515
1516 kfree(rlist->rl_rgd);
1517
1518 if (rlist->rl_ghs) {
1519 for (x = 0; x < rlist->rl_rgrps; x++)
1520 gfs2_holder_uninit(&rlist->rl_ghs[x]);
1521 kfree(rlist->rl_ghs);
1522 }
1523}
1524
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
new file mode 100644
index 000000000000..d2db3719cc0f
--- /dev/null
+++ b/fs/gfs2/rgrp.h
@@ -0,0 +1,62 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __RGRP_DOT_H__
11#define __RGRP_DOT_H__
12
13void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
14
15struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk);
16struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
17struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
18
19void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
20int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
21
22int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
23void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
24void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
25
26void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
27
28struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
29void gfs2_alloc_put(struct gfs2_inode *ip);
30
31int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
32 char *file, unsigned int line);
33#define gfs2_inplace_reserve(ip) \
34gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
35
36void gfs2_inplace_release(struct gfs2_inode *ip);
37
38unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, uint64_t block);
39
40uint64_t gfs2_alloc_data(struct gfs2_inode *ip);
41uint64_t gfs2_alloc_meta(struct gfs2_inode *ip);
42uint64_t gfs2_alloc_di(struct gfs2_inode *ip);
43
44void gfs2_free_data(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
45void gfs2_free_meta(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen);
46void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, uint64_t blkno);
47void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
48
49struct gfs2_rgrp_list {
50 unsigned int rl_rgrps;
51 unsigned int rl_space;
52 struct gfs2_rgrpd **rl_rgd;
53 struct gfs2_holder *rl_ghs;
54};
55
56void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
57 uint64_t block);
58void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
59 int flags);
60void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
61
62#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
new file mode 100644
index 000000000000..a943a505bc5a
--- /dev/null
+++ b/fs/gfs2/super.c
@@ -0,0 +1,945 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/crc32.h>
16#include <linux/gfs2_ondisk.h>
17
18#include "gfs2.h"
19#include "lm_interface.h"
20#include "incore.h"
21#include "bmap.h"
22#include "dir.h"
23#include "format.h"
24#include "glock.h"
25#include "glops.h"
26#include "inode.h"
27#include "log.h"
28#include "meta_io.h"
29#include "quota.h"
30#include "recovery.h"
31#include "rgrp.h"
32#include "super.h"
33#include "trans.h"
34#include "unlinked.h"
35#include "util.h"
36
37/**
38 * gfs2_tune_init - Fill a gfs2_tune structure with default values
39 * @gt: tune
40 *
41 */
42
43void gfs2_tune_init(struct gfs2_tune *gt)
44{
45 spin_lock_init(&gt->gt_spin);
46
47 gt->gt_ilimit = 100;
48 gt->gt_ilimit_tries = 3;
49 gt->gt_ilimit_min = 1;
50 gt->gt_demote_secs = 300;
51 gt->gt_incore_log_blocks = 1024;
52 gt->gt_log_flush_secs = 60;
53 gt->gt_jindex_refresh_secs = 60;
54 gt->gt_scand_secs = 15;
55 gt->gt_recoverd_secs = 60;
56 gt->gt_logd_secs = 1;
57 gt->gt_quotad_secs = 5;
58 gt->gt_inoded_secs = 15;
59 gt->gt_quota_simul_sync = 64;
60 gt->gt_quota_warn_period = 10;
61 gt->gt_quota_scale_num = 1;
62 gt->gt_quota_scale_den = 1;
63 gt->gt_quota_cache_secs = 300;
64 gt->gt_quota_quantum = 60;
65 gt->gt_atime_quantum = 3600;
66 gt->gt_new_files_jdata = 0;
67 gt->gt_new_files_directio = 0;
68 gt->gt_max_atomic_write = 4 << 20;
69 gt->gt_max_readahead = 1 << 18;
70 gt->gt_lockdump_size = 131072;
71 gt->gt_stall_secs = 600;
72 gt->gt_complain_secs = 10;
73 gt->gt_reclaim_limit = 5000;
74 gt->gt_entries_per_readdir = 32;
75 gt->gt_prefetch_secs = 10;
76 gt->gt_greedy_default = HZ / 10;
77 gt->gt_greedy_quantum = HZ / 40;
78 gt->gt_greedy_max = HZ / 4;
79 gt->gt_statfs_quantum = 30;
80 gt->gt_statfs_slow = 0;
81}
82
83/**
84 * gfs2_check_sb - Check superblock
85 * @sdp: the filesystem
86 * @sb: The superblock
87 * @silent: Don't print a message if the check fails
88 *
89 * Checks the version code of the FS is one that we understand how to
90 * read and that the sizes of the various on-disk structures have not
91 * changed.
92 */
93
94int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent)
95{
96 unsigned int x;
97
98 if (sb->sb_header.mh_magic != GFS2_MAGIC ||
99 sb->sb_header.mh_type != GFS2_METATYPE_SB) {
100 if (!silent)
101 printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
102 return -EINVAL;
103 }
104
105 /* If format numbers match exactly, we're done. */
106
107 if (sb->sb_fs_format == GFS2_FORMAT_FS &&
108 sb->sb_multihost_format == GFS2_FORMAT_MULTI)
109 return 0;
110
111 if (sb->sb_fs_format != GFS2_FORMAT_FS) {
112 for (x = 0; gfs2_old_fs_formats[x]; x++)
113 if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
114 break;
115
116 if (!gfs2_old_fs_formats[x]) {
117 printk(KERN_WARNING
118 "GFS2: code version (%u, %u) is incompatible "
119 "with ondisk format (%u, %u)\n",
120 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
121 sb->sb_fs_format, sb->sb_multihost_format);
122 printk(KERN_WARNING
123 "GFS2: I don't know how to upgrade this FS\n");
124 return -EINVAL;
125 }
126 }
127
128 if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
129 for (x = 0; gfs2_old_multihost_formats[x]; x++)
130 if (gfs2_old_multihost_formats[x] ==
131 sb->sb_multihost_format)
132 break;
133
134 if (!gfs2_old_multihost_formats[x]) {
135 printk(KERN_WARNING
136 "GFS2: code version (%u, %u) is incompatible "
137 "with ondisk format (%u, %u)\n",
138 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
139 sb->sb_fs_format, sb->sb_multihost_format);
140 printk(KERN_WARNING
141 "GFS2: I don't know how to upgrade this FS\n");
142 return -EINVAL;
143 }
144 }
145
146 if (!sdp->sd_args.ar_upgrade) {
147 printk(KERN_WARNING
148 "GFS2: code version (%u, %u) is incompatible "
149 "with ondisk format (%u, %u)\n",
150 GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
151 sb->sb_fs_format, sb->sb_multihost_format);
152 printk(KERN_INFO
153 "GFS2: Use the \"upgrade\" mount option to upgrade "
154 "the FS\n");
155 printk(KERN_INFO "GFS2: See the manual for more details\n");
156 return -EINVAL;
157 }
158
159 return 0;
160}
161
162/**
163 * gfs2_read_sb - Read super block
164 * @sdp: The GFS2 superblock
165 * @gl: the glock for the superblock (assumed to be held)
166 * @silent: Don't print message if mount fails
167 *
168 */
169
170int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
171{
172 struct buffer_head *bh;
173 uint32_t hash_blocks, ind_blocks, leaf_blocks;
174 uint32_t tmp_blocks;
175 unsigned int x;
176 int error;
177
178 error = gfs2_meta_read(gl, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift,
179 DIO_FORCE | DIO_START | DIO_WAIT, &bh);
180 if (error) {
181 if (!silent)
182 fs_err(sdp, "can't read superblock\n");
183 return error;
184 }
185
186 gfs2_assert(sdp, sizeof(struct gfs2_sb) <= bh->b_size);
187 gfs2_sb_in(&sdp->sd_sb, bh->b_data);
188 brelse(bh);
189
190 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
191 if (error)
192 return error;
193
194 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
195 GFS2_BASIC_BLOCK_SHIFT;
196 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
197 sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
198 sizeof(struct gfs2_dinode)) / sizeof(uint64_t);
199 sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
200 sizeof(struct gfs2_meta_header)) / sizeof(uint64_t);
201 sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
202 sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
203 sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
204 sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(uint64_t);
205 sdp->sd_ut_per_block = (sdp->sd_sb.sb_bsize -
206 sizeof(struct gfs2_meta_header)) /
207 sizeof(struct gfs2_unlinked_tag);
208 sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
209 sizeof(struct gfs2_meta_header)) /
210 sizeof(struct gfs2_quota_change);
211
212 /* Compute maximum reservation required to add a entry to a directory */
213
214 hash_blocks = DIV_ROUND_UP(sizeof(uint64_t) * (1 << GFS2_DIR_MAX_DEPTH),
215 sdp->sd_jbsize);
216
217 ind_blocks = 0;
218 for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
219 tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
220 ind_blocks += tmp_blocks;
221 }
222
223 leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
224
225 sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
226
227 sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
228 sizeof(struct gfs2_dinode);
229 sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
230 for (x = 2;; x++) {
231 uint64_t space, d;
232 uint32_t m;
233
234 space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
235 d = space;
236 m = do_div(d, sdp->sd_inptrs);
237
238 if (d != sdp->sd_heightsize[x - 1] || m)
239 break;
240 sdp->sd_heightsize[x] = space;
241 }
242 sdp->sd_max_height = x;
243 gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
244
245 sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
246 sizeof(struct gfs2_dinode);
247 sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
248 for (x = 2;; x++) {
249 uint64_t space, d;
250 uint32_t m;
251
252 space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
253 d = space;
254 m = do_div(d, sdp->sd_inptrs);
255
256 if (d != sdp->sd_jheightsize[x - 1] || m)
257 break;
258 sdp->sd_jheightsize[x] = space;
259 }
260 sdp->sd_max_jheight = x;
261 gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
262
263 return 0;
264}
265
266/**
267 * gfs2_jindex_hold - Grab a lock on the jindex
268 * @sdp: The GFS2 superblock
269 * @ji_gh: the holder for the jindex glock
270 *
271 * This is very similar to the gfs2_rindex_hold() function, except that
272 * in general we hold the jindex lock for longer periods of time and
273 * we grab it far less frequently (in general) then the rgrp lock.
274 *
275 * Returns: errno
276 */
277
278int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
279{
280 struct gfs2_inode *dip = sdp->sd_jindex->u.generic_ip;
281 struct qstr name;
282 char buf[20];
283 struct gfs2_jdesc *jd;
284 int error;
285
286 name.name = buf;
287
288 mutex_lock(&sdp->sd_jindex_mutex);
289
290 for (;;) {
291 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED,
292 GL_LOCAL_EXCL, ji_gh);
293 if (error)
294 break;
295
296 name.len = sprintf(buf, "journal%u", sdp->sd_journals);
297 name.hash = gfs2_disk_hash(name.name, name.len);
298
299 error = gfs2_dir_search(sdp->sd_jindex,
300 &name, NULL, NULL);
301 if (error == -ENOENT) {
302 error = 0;
303 break;
304 }
305
306 gfs2_glock_dq_uninit(ji_gh);
307
308 if (error)
309 break;
310
311 error = -ENOMEM;
312 jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
313 if (!jd)
314 break;
315
316 jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL);
317 if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
318 if (!jd->jd_inode)
319 error = -ENOENT;
320 else
321 error = PTR_ERR(jd->jd_inode);
322 kfree(jd);
323 break;
324 }
325
326 spin_lock(&sdp->sd_jindex_spin);
327 jd->jd_jid = sdp->sd_journals++;
328 list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
329 spin_unlock(&sdp->sd_jindex_spin);
330 }
331
332 mutex_unlock(&sdp->sd_jindex_mutex);
333
334 return error;
335}
336
337/**
338 * gfs2_jindex_free - Clear all the journal index information
339 * @sdp: The GFS2 superblock
340 *
341 */
342
343void gfs2_jindex_free(struct gfs2_sbd *sdp)
344{
345 struct list_head list;
346 struct gfs2_jdesc *jd;
347
348 spin_lock(&sdp->sd_jindex_spin);
349 list_add(&list, &sdp->sd_jindex_list);
350 list_del_init(&sdp->sd_jindex_list);
351 sdp->sd_journals = 0;
352 spin_unlock(&sdp->sd_jindex_spin);
353
354 while (!list_empty(&list)) {
355 jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
356 list_del(&jd->jd_list);
357 iput(jd->jd_inode);
358 kfree(jd);
359 }
360}
361
362static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
363{
364 struct gfs2_jdesc *jd;
365 int found = 0;
366
367 list_for_each_entry(jd, head, jd_list) {
368 if (jd->jd_jid == jid) {
369 found = 1;
370 break;
371 }
372 }
373
374 if (!found)
375 jd = NULL;
376
377 return jd;
378}
379
380struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
381{
382 struct gfs2_jdesc *jd;
383
384 spin_lock(&sdp->sd_jindex_spin);
385 jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
386 spin_unlock(&sdp->sd_jindex_spin);
387
388 return jd;
389}
390
391void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
392{
393 struct gfs2_jdesc *jd;
394
395 spin_lock(&sdp->sd_jindex_spin);
396 jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
397 if (jd)
398 jd->jd_dirty = 1;
399 spin_unlock(&sdp->sd_jindex_spin);
400}
401
402struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
403{
404 struct gfs2_jdesc *jd;
405 int found = 0;
406
407 spin_lock(&sdp->sd_jindex_spin);
408
409 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
410 if (jd->jd_dirty) {
411 jd->jd_dirty = 0;
412 found = 1;
413 break;
414 }
415 }
416 spin_unlock(&sdp->sd_jindex_spin);
417
418 if (!found)
419 jd = NULL;
420
421 return jd;
422}
423
424int gfs2_jdesc_check(struct gfs2_jdesc *jd)
425{
426 struct gfs2_inode *ip = jd->jd_inode->u.generic_ip;
427 struct gfs2_sbd *sdp = ip->i_sbd;
428 int ar;
429 int error;
430
431 if (ip->i_di.di_size < (8 << 20) ||
432 ip->i_di.di_size > (1 << 30) ||
433 (ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) {
434 gfs2_consist_inode(ip);
435 return -EIO;
436 }
437 jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
438
439 error = gfs2_write_alloc_required(ip,
440 0, ip->i_di.di_size,
441 &ar);
442 if (!error && ar) {
443 gfs2_consist_inode(ip);
444 error = -EIO;
445 }
446
447 return error;
448}
449
450/**
451 * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
452 * @sdp: the filesystem
453 *
454 * Returns: errno
455 */
456
457int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
458{
459 struct gfs2_inode *ip = sdp->sd_jdesc->jd_inode->u.generic_ip;
460 struct gfs2_glock *j_gl = ip->i_gl;
461 struct gfs2_holder t_gh;
462 struct gfs2_log_header head;
463 int error;
464
465 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
466 GL_LOCAL_EXCL, &t_gh);
467 if (error)
468 return error;
469
470 gfs2_meta_cache_flush(ip);
471 j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
472
473 error = gfs2_find_jhead(sdp->sd_jdesc, &head);
474 if (error)
475 goto fail;
476
477 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
478 gfs2_consist(sdp);
479 error = -EIO;
480 goto fail;
481 }
482
483 /* Initialize some head of the log stuff */
484 sdp->sd_log_sequence = head.lh_sequence + 1;
485 gfs2_log_pointers_init(sdp, head.lh_blkno);
486
487 error = gfs2_unlinked_init(sdp);
488 if (error)
489 goto fail;
490 error = gfs2_quota_init(sdp);
491 if (error)
492 goto fail_unlinked;
493
494 set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
495
496 gfs2_glock_dq_uninit(&t_gh);
497
498 return 0;
499
500 fail_unlinked:
501 gfs2_unlinked_cleanup(sdp);
502
503 fail:
504 t_gh.gh_flags |= GL_NOCACHE;
505 gfs2_glock_dq_uninit(&t_gh);
506
507 return error;
508}
509
510/**
511 * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
512 * @sdp: the filesystem
513 *
514 * Returns: errno
515 */
516
517int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
518{
519 struct gfs2_holder t_gh;
520 int error;
521
522 gfs2_unlinked_dealloc(sdp);
523 gfs2_quota_sync(sdp);
524 gfs2_statfs_sync(sdp);
525
526 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
527 GL_LOCAL_EXCL | GL_NOCACHE,
528 &t_gh);
529 if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
530 return error;
531
532 gfs2_meta_syncfs(sdp);
533 gfs2_log_shutdown(sdp);
534
535 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
536
537 if (t_gh.gh_gl)
538 gfs2_glock_dq_uninit(&t_gh);
539
540 gfs2_unlinked_cleanup(sdp);
541 gfs2_quota_cleanup(sdp);
542
543 return error;
544}
545
546int gfs2_statfs_init(struct gfs2_sbd *sdp)
547{
548 struct gfs2_inode *m_ip = sdp->sd_statfs_inode->u.generic_ip;
549 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
550 struct gfs2_inode *l_ip = sdp->sd_sc_inode->u.generic_ip;
551 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
552 struct buffer_head *m_bh, *l_bh;
553 struct gfs2_holder gh;
554 int error;
555
556 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
557 &gh);
558 if (error)
559 return error;
560
561 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
562 if (error)
563 goto out;
564
565 if (sdp->sd_args.ar_spectator) {
566 spin_lock(&sdp->sd_statfs_spin);
567 gfs2_statfs_change_in(m_sc, m_bh->b_data +
568 sizeof(struct gfs2_dinode));
569 spin_unlock(&sdp->sd_statfs_spin);
570 } else {
571 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
572 if (error)
573 goto out_m_bh;
574
575 spin_lock(&sdp->sd_statfs_spin);
576 gfs2_statfs_change_in(m_sc, m_bh->b_data +
577 sizeof(struct gfs2_dinode));
578 gfs2_statfs_change_in(l_sc, l_bh->b_data +
579 sizeof(struct gfs2_dinode));
580 spin_unlock(&sdp->sd_statfs_spin);
581
582 brelse(l_bh);
583 }
584
585 out_m_bh:
586 brelse(m_bh);
587
588 out:
589 gfs2_glock_dq_uninit(&gh);
590
591 return 0;
592}
593
594void gfs2_statfs_change(struct gfs2_sbd *sdp, int64_t total, int64_t free,
595 int64_t dinodes)
596{
597 struct gfs2_inode *l_ip = sdp->sd_sc_inode->u.generic_ip;
598 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
599 struct buffer_head *l_bh;
600 int error;
601
602 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
603 if (error)
604 return;
605
606 mutex_lock(&sdp->sd_statfs_mutex);
607 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
608 mutex_unlock(&sdp->sd_statfs_mutex);
609
610 spin_lock(&sdp->sd_statfs_spin);
611 l_sc->sc_total += total;
612 l_sc->sc_free += free;
613 l_sc->sc_dinodes += dinodes;
614 gfs2_statfs_change_out(l_sc, l_bh->b_data +
615 sizeof(struct gfs2_dinode));
616 spin_unlock(&sdp->sd_statfs_spin);
617
618 brelse(l_bh);
619}
620
621int gfs2_statfs_sync(struct gfs2_sbd *sdp)
622{
623 struct gfs2_inode *m_ip = sdp->sd_statfs_inode->u.generic_ip;
624 struct gfs2_inode *l_ip = sdp->sd_sc_inode->u.generic_ip;
625 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
626 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
627 struct gfs2_holder gh;
628 struct buffer_head *m_bh, *l_bh;
629 int error;
630
631 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
632 &gh);
633 if (error)
634 return error;
635
636 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
637 if (error)
638 goto out;
639
640 spin_lock(&sdp->sd_statfs_spin);
641 gfs2_statfs_change_in(m_sc, m_bh->b_data +
642 sizeof(struct gfs2_dinode));
643 if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
644 spin_unlock(&sdp->sd_statfs_spin);
645 goto out_bh;
646 }
647 spin_unlock(&sdp->sd_statfs_spin);
648
649 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
650 if (error)
651 goto out_bh;
652
653 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
654 if (error)
655 goto out_bh2;
656
657 mutex_lock(&sdp->sd_statfs_mutex);
658 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
659 mutex_unlock(&sdp->sd_statfs_mutex);
660
661 spin_lock(&sdp->sd_statfs_spin);
662 m_sc->sc_total += l_sc->sc_total;
663 m_sc->sc_free += l_sc->sc_free;
664 m_sc->sc_dinodes += l_sc->sc_dinodes;
665 memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
666 memset(l_bh->b_data + sizeof(struct gfs2_dinode),
667 0, sizeof(struct gfs2_statfs_change));
668 spin_unlock(&sdp->sd_statfs_spin);
669
670 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
671 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
672
673 gfs2_trans_end(sdp);
674
675 out_bh2:
676 brelse(l_bh);
677
678 out_bh:
679 brelse(m_bh);
680
681 out:
682 gfs2_glock_dq_uninit(&gh);
683
684 return error;
685}
686
687/**
688 * gfs2_statfs_i - Do a statfs
689 * @sdp: the filesystem
690 * @sg: the sg structure
691 *
692 * Returns: errno
693 */
694
695int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
696{
697 struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master;
698 struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local;
699
700 spin_lock(&sdp->sd_statfs_spin);
701
702 *sc = *m_sc;
703 sc->sc_total += l_sc->sc_total;
704 sc->sc_free += l_sc->sc_free;
705 sc->sc_dinodes += l_sc->sc_dinodes;
706
707 spin_unlock(&sdp->sd_statfs_spin);
708
709 if (sc->sc_free < 0)
710 sc->sc_free = 0;
711 if (sc->sc_free > sc->sc_total)
712 sc->sc_free = sc->sc_total;
713 if (sc->sc_dinodes < 0)
714 sc->sc_dinodes = 0;
715
716 return 0;
717}
718
719/**
720 * statfs_fill - fill in the sg for a given RG
721 * @rgd: the RG
722 * @sc: the sc structure
723 *
724 * Returns: 0 on success, -ESTALE if the LVB is invalid
725 */
726
727static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
728 struct gfs2_statfs_change *sc)
729{
730 gfs2_rgrp_verify(rgd);
731 sc->sc_total += rgd->rd_ri.ri_data;
732 sc->sc_free += rgd->rd_rg.rg_free;
733 sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
734 return 0;
735}
736
737/**
738 * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
739 * @sdp: the filesystem
740 * @sc: the sc info that will be returned
741 *
742 * Any error (other than a signal) will cause this routine to fall back
743 * to the synchronous version.
744 *
745 * FIXME: This really shouldn't busy wait like this.
746 *
747 * Returns: errno
748 */
749
750int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc)
751{
752 struct gfs2_holder ri_gh;
753 struct gfs2_rgrpd *rgd_next;
754 struct gfs2_holder *gha, *gh;
755 unsigned int slots = 64;
756 unsigned int x;
757 int done;
758 int error = 0, err;
759
760 memset(sc, 0, sizeof(struct gfs2_statfs_change));
761 gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
762 if (!gha)
763 return -ENOMEM;
764
765 error = gfs2_rindex_hold(sdp, &ri_gh);
766 if (error)
767 goto out;
768
769 rgd_next = gfs2_rgrpd_get_first(sdp);
770
771 for (;;) {
772 done = 1;
773
774 for (x = 0; x < slots; x++) {
775 gh = gha + x;
776
777 if (gh->gh_gl && gfs2_glock_poll(gh)) {
778 err = gfs2_glock_wait(gh);
779 if (err) {
780 gfs2_holder_uninit(gh);
781 error = err;
782 } else {
783 if (!error)
784 error = statfs_slow_fill(
785 gh->gh_gl->gl_object, sc);
786 gfs2_glock_dq_uninit(gh);
787 }
788 }
789
790 if (gh->gh_gl)
791 done = 0;
792 else if (rgd_next && !error) {
793 error = gfs2_glock_nq_init(rgd_next->rd_gl,
794 LM_ST_SHARED,
795 GL_ASYNC,
796 gh);
797 rgd_next = gfs2_rgrpd_get_next(rgd_next);
798 done = 0;
799 }
800
801 if (signal_pending(current))
802 error = -ERESTARTSYS;
803 }
804
805 if (done)
806 break;
807
808 yield();
809 }
810
811 gfs2_glock_dq_uninit(&ri_gh);
812
813 out:
814 kfree(gha);
815
816 return error;
817}
818
819struct lfcc {
820 struct list_head list;
821 struct gfs2_holder gh;
822};
823
824/**
825 * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
826 * journals are clean
827 * @sdp: the file system
828 * @state: the state to put the transaction lock into
829 * @t_gh: the hold on the transaction lock
830 *
831 * Returns: errno
832 */
833
834static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
835 struct gfs2_holder *t_gh)
836{
837 struct gfs2_inode *ip;
838 struct gfs2_holder ji_gh;
839 struct gfs2_jdesc *jd;
840 struct lfcc *lfcc;
841 LIST_HEAD(list);
842 struct gfs2_log_header lh;
843 int error;
844
845 error = gfs2_jindex_hold(sdp, &ji_gh);
846 if (error)
847 return error;
848
849 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
850 lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
851 if (!lfcc) {
852 error = -ENOMEM;
853 goto out;
854 }
855 ip = jd->jd_inode->u.generic_ip;
856 error = gfs2_glock_nq_init(ip->i_gl,
857 LM_ST_SHARED, 0,
858 &lfcc->gh);
859 if (error) {
860 kfree(lfcc);
861 goto out;
862 }
863 list_add(&lfcc->list, &list);
864 }
865
866 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED,
867 LM_FLAG_PRIORITY | GL_NOCACHE,
868 t_gh);
869
870 list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
871 error = gfs2_jdesc_check(jd);
872 if (error)
873 break;
874 error = gfs2_find_jhead(jd, &lh);
875 if (error)
876 break;
877 if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
878 error = -EBUSY;
879 break;
880 }
881 }
882
883 if (error)
884 gfs2_glock_dq_uninit(t_gh);
885
886 out:
887 while (!list_empty(&list)) {
888 lfcc = list_entry(list.next, struct lfcc, list);
889 list_del(&lfcc->list);
890 gfs2_glock_dq_uninit(&lfcc->gh);
891 kfree(lfcc);
892 }
893 gfs2_glock_dq_uninit(&ji_gh);
894
895 return error;
896}
897
898/**
899 * gfs2_freeze_fs - freezes the file system
900 * @sdp: the file system
901 *
902 * This function flushes data and meta data for all machines by
903 * aquiring the transaction log exclusively. All journals are
904 * ensured to be in a clean state as well.
905 *
906 * Returns: errno
907 */
908
909int gfs2_freeze_fs(struct gfs2_sbd *sdp)
910{
911 int error = 0;
912
913 mutex_lock(&sdp->sd_freeze_lock);
914
915 if (!sdp->sd_freeze_count++) {
916 error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
917 if (error)
918 sdp->sd_freeze_count--;
919 }
920
921 mutex_unlock(&sdp->sd_freeze_lock);
922
923 return error;
924}
925
926/**
927 * gfs2_unfreeze_fs - unfreezes the file system
928 * @sdp: the file system
929 *
930 * This function allows the file system to proceed by unlocking
931 * the exclusively held transaction lock. Other GFS2 nodes are
932 * now free to acquire the lock shared and go on with their lives.
933 *
934 */
935
936void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
937{
938 mutex_lock(&sdp->sd_freeze_lock);
939
940 if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
941 gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
942
943 mutex_unlock(&sdp->sd_freeze_lock);
944}
945
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
new file mode 100644
index 000000000000..df2495230402
--- /dev/null
+++ b/fs/gfs2/super.h
@@ -0,0 +1,52 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __SUPER_DOT_H__
11#define __SUPER_DOT_H__
12
13void gfs2_tune_init(struct gfs2_tune *gt);
14
15int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent);
16int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
17
18static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
19{
20 unsigned int x;
21 spin_lock(&sdp->sd_jindex_spin);
22 x = sdp->sd_journals;
23 spin_unlock(&sdp->sd_jindex_spin);
24 return x;
25}
26
27int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh);
28void gfs2_jindex_free(struct gfs2_sbd *sdp);
29
30struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
31void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid);
32struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp);
33int gfs2_jdesc_check(struct gfs2_jdesc *jd);
34
35int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
36 struct gfs2_inode **ipp);
37
38int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
39int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
40
41int gfs2_statfs_init(struct gfs2_sbd *sdp);
42void gfs2_statfs_change(struct gfs2_sbd *sdp,
43 int64_t total, int64_t free, int64_t dinodes);
44int gfs2_statfs_sync(struct gfs2_sbd *sdp);
45int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
46int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc);
47
48int gfs2_freeze_fs(struct gfs2_sbd *sdp);
49void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
50
51#endif /* __SUPER_DOT_H__ */
52
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
new file mode 100644
index 000000000000..d32a2c54daee
--- /dev/null
+++ b/fs/gfs2/sys.c
@@ -0,0 +1,581 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/kobject.h>
17#include <linux/gfs2_ondisk.h>
18#include <asm/uaccess.h>
19
20#include "gfs2.h"
21#include "lm_interface.h"
22#include "incore.h"
23#include "lm.h"
24#include "sys.h"
25#include "super.h"
26#include "glock.h"
27#include "quota.h"
28#include "util.h"
29
30char *gfs2_sys_margs;
31spinlock_t gfs2_sys_margs_lock;
32
33static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
34{
35 return sprintf(buf, "%s\n", sdp->sd_vfs->s_id);
36}
37
38static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
39{
40 return sprintf(buf, "%s\n", sdp->sd_fsname);
41}
42
43static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
44{
45 unsigned int count;
46
47 mutex_lock(&sdp->sd_freeze_lock);
48 count = sdp->sd_freeze_count;
49 mutex_unlock(&sdp->sd_freeze_lock);
50
51 return sprintf(buf, "%u\n", count);
52}
53
54static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
55{
56 ssize_t ret = len;
57 int error = 0;
58 int n = simple_strtol(buf, NULL, 0);
59
60 if (!capable(CAP_SYS_ADMIN))
61 return -EACCES;
62
63 switch (n) {
64 case 0:
65 gfs2_unfreeze_fs(sdp);
66 break;
67 case 1:
68 error = gfs2_freeze_fs(sdp);
69 break;
70 default:
71 ret = -EINVAL;
72 }
73
74 if (error)
75 fs_warn(sdp, "freeze %d error %d", n, error);
76
77 return ret;
78}
79
80static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
81{
82 unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags);
83 return sprintf(buf, "%u\n", b);
84}
85
86static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
87{
88 if (!capable(CAP_SYS_ADMIN))
89 return -EACCES;
90
91 if (simple_strtol(buf, NULL, 0) != 1)
92 return -EINVAL;
93
94 gfs2_lm_withdraw(sdp,
95 "GFS2: fsid=%s: withdrawing from cluster at user's request\n",
96 sdp->sd_fsname);
97 return len;
98}
99
100static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
101 size_t len)
102{
103 if (!capable(CAP_SYS_ADMIN))
104 return -EACCES;
105
106 if (simple_strtol(buf, NULL, 0) != 1)
107 return -EINVAL;
108
109 gfs2_statfs_sync(sdp);
110 return len;
111}
112
113static ssize_t shrink_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
114{
115 if (!capable(CAP_SYS_ADMIN))
116 return -EACCES;
117
118 if (simple_strtol(buf, NULL, 0) != 1)
119 return -EINVAL;
120
121 gfs2_gl_hash_clear(sdp, NO_WAIT);
122 return len;
123}
124
125static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
126 size_t len)
127{
128 if (!capable(CAP_SYS_ADMIN))
129 return -EACCES;
130
131 if (simple_strtol(buf, NULL, 0) != 1)
132 return -EINVAL;
133
134 gfs2_quota_sync(sdp);
135 return len;
136}
137
138static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
139 size_t len)
140{
141 uint32_t id;
142
143 if (!capable(CAP_SYS_ADMIN))
144 return -EACCES;
145
146 id = simple_strtoul(buf, NULL, 0);
147
148 gfs2_quota_refresh(sdp, 1, id);
149 return len;
150}
151
152static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
153 size_t len)
154{
155 uint32_t id;
156
157 if (!capable(CAP_SYS_ADMIN))
158 return -EACCES;
159
160 id = simple_strtoul(buf, NULL, 0);
161
162 gfs2_quota_refresh(sdp, 0, id);
163 return len;
164}
165
166struct gfs2_attr {
167 struct attribute attr;
168 ssize_t (*show)(struct gfs2_sbd *, char *);
169 ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
170};
171
172#define GFS2_ATTR(name, mode, show, store) \
173static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
174
175GFS2_ATTR(id, 0444, id_show, NULL);
176GFS2_ATTR(fsname, 0444, fsname_show, NULL);
177GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
178GFS2_ATTR(shrink, 0200, NULL, shrink_store);
179GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
180GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
181GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
182GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store);
183GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store);
184
185static struct attribute *gfs2_attrs[] = {
186 &gfs2_attr_id.attr,
187 &gfs2_attr_fsname.attr,
188 &gfs2_attr_freeze.attr,
189 &gfs2_attr_shrink.attr,
190 &gfs2_attr_withdraw.attr,
191 &gfs2_attr_statfs_sync.attr,
192 &gfs2_attr_quota_sync.attr,
193 &gfs2_attr_quota_refresh_user.attr,
194 &gfs2_attr_quota_refresh_group.attr,
195 NULL,
196};
197
198static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
199 char *buf)
200{
201 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
202 struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
203 return a->show ? a->show(sdp, buf) : 0;
204}
205
206static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
207 const char *buf, size_t len)
208{
209 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
210 struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
211 return a->store ? a->store(sdp, buf, len) : len;
212}
213
214static struct sysfs_ops gfs2_attr_ops = {
215 .show = gfs2_attr_show,
216 .store = gfs2_attr_store,
217};
218
219static struct kobj_type gfs2_ktype = {
220 .default_attrs = gfs2_attrs,
221 .sysfs_ops = &gfs2_attr_ops,
222};
223
224static struct kset gfs2_kset = {
225 .subsys = &fs_subsys,
226 .kobj = {.name = "gfs2",},
227 .ktype = &gfs2_ktype,
228};
229
230/*
231 * display struct lm_lockstruct fields
232 */
233
234struct lockstruct_attr {
235 struct attribute attr;
236 ssize_t (*show)(struct gfs2_sbd *, char *);
237};
238
239#define LOCKSTRUCT_ATTR(name, fmt) \
240static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
241{ \
242 return sprintf(buf, fmt, sdp->sd_lockstruct.ls_##name); \
243} \
244static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name)
245
246LOCKSTRUCT_ATTR(jid, "%u\n");
247LOCKSTRUCT_ATTR(first, "%u\n");
248LOCKSTRUCT_ATTR(lvb_size, "%u\n");
249LOCKSTRUCT_ATTR(flags, "%d\n");
250
251static struct attribute *lockstruct_attrs[] = {
252 &lockstruct_attr_jid.attr,
253 &lockstruct_attr_first.attr,
254 &lockstruct_attr_lvb_size.attr,
255 &lockstruct_attr_flags.attr,
256 NULL
257};
258
259/*
260 * display struct gfs2_args fields
261 */
262
263struct args_attr {
264 struct attribute attr;
265 ssize_t (*show)(struct gfs2_sbd *, char *);
266};
267
268#define ARGS_ATTR(name, fmt) \
269static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
270{ \
271 return sprintf(buf, fmt, sdp->sd_args.ar_##name); \
272} \
273static struct args_attr args_attr_##name = __ATTR_RO(name)
274
275ARGS_ATTR(lockproto, "%s\n");
276ARGS_ATTR(locktable, "%s\n");
277ARGS_ATTR(hostdata, "%s\n");
278ARGS_ATTR(spectator, "%d\n");
279ARGS_ATTR(ignore_local_fs, "%d\n");
280ARGS_ATTR(localcaching, "%d\n");
281ARGS_ATTR(localflocks, "%d\n");
282ARGS_ATTR(debug, "%d\n");
283ARGS_ATTR(upgrade, "%d\n");
284ARGS_ATTR(num_glockd, "%u\n");
285ARGS_ATTR(posix_acl, "%d\n");
286ARGS_ATTR(quota, "%u\n");
287ARGS_ATTR(suiddir, "%d\n");
288ARGS_ATTR(data, "%d\n");
289
290/* one oddball doesn't fit the macro mold */
291static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf)
292{
293 return sprintf(buf, "%d\n", !!test_bit(SDF_NOATIME, &sdp->sd_flags));
294}
295static struct args_attr args_attr_noatime = __ATTR_RO(noatime);
296
297static struct attribute *args_attrs[] = {
298 &args_attr_lockproto.attr,
299 &args_attr_locktable.attr,
300 &args_attr_hostdata.attr,
301 &args_attr_spectator.attr,
302 &args_attr_ignore_local_fs.attr,
303 &args_attr_localcaching.attr,
304 &args_attr_localflocks.attr,
305 &args_attr_debug.attr,
306 &args_attr_upgrade.attr,
307 &args_attr_num_glockd.attr,
308 &args_attr_posix_acl.attr,
309 &args_attr_quota.attr,
310 &args_attr_suiddir.attr,
311 &args_attr_data.attr,
312 &args_attr_noatime.attr,
313 NULL
314};
315
316/*
317 * display counters from superblock
318 */
319
320struct counters_attr {
321 struct attribute attr;
322 ssize_t (*show)(struct gfs2_sbd *, char *);
323};
324
325#define COUNTERS_ATTR(name, fmt) \
326static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
327{ \
328 return sprintf(buf, fmt, (unsigned int)atomic_read(&sdp->sd_##name)); \
329} \
330static struct counters_attr counters_attr_##name = __ATTR_RO(name)
331
332COUNTERS_ATTR(glock_count, "%u\n");
333COUNTERS_ATTR(glock_held_count, "%u\n");
334COUNTERS_ATTR(inode_count, "%u\n");
335COUNTERS_ATTR(reclaimed, "%u\n");
336
337static struct attribute *counters_attrs[] = {
338 &counters_attr_glock_count.attr,
339 &counters_attr_glock_held_count.attr,
340 &counters_attr_inode_count.attr,
341 &counters_attr_reclaimed.attr,
342 NULL
343};
344
345/*
346 * get and set struct gfs2_tune fields
347 */
348
349static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf)
350{
351 return sprintf(buf, "%u %u\n", sdp->sd_tune.gt_quota_scale_num,
352 sdp->sd_tune.gt_quota_scale_den);
353}
354
355static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
356 size_t len)
357{
358 struct gfs2_tune *gt = &sdp->sd_tune;
359 unsigned int x, y;
360
361 if (!capable(CAP_SYS_ADMIN))
362 return -EACCES;
363
364 if (sscanf(buf, "%u %u", &x, &y) != 2 || !y)
365 return -EINVAL;
366
367 spin_lock(&gt->gt_spin);
368 gt->gt_quota_scale_num = x;
369 gt->gt_quota_scale_den = y;
370 spin_unlock(&gt->gt_spin);
371 return len;
372}
373
374static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
375 int check_zero, const char *buf, size_t len)
376{
377 struct gfs2_tune *gt = &sdp->sd_tune;
378 unsigned int x;
379
380 if (!capable(CAP_SYS_ADMIN))
381 return -EACCES;
382
383 x = simple_strtoul(buf, NULL, 0);
384
385 if (check_zero && !x)
386 return -EINVAL;
387
388 spin_lock(&gt->gt_spin);
389 *field = x;
390 spin_unlock(&gt->gt_spin);
391 return len;
392}
393
394struct tune_attr {
395 struct attribute attr;
396 ssize_t (*show)(struct gfs2_sbd *, char *);
397 ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
398};
399
400#define TUNE_ATTR_3(name, show, store) \
401static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store)
402
403#define TUNE_ATTR_2(name, store) \
404static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
405{ \
406 return sprintf(buf, "%u\n", sdp->sd_tune.gt_##name); \
407} \
408TUNE_ATTR_3(name, name##_show, store)
409
410#define TUNE_ATTR(name, check_zero) \
411static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
412{ \
413 return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len); \
414} \
415TUNE_ATTR_2(name, name##_store)
416
417#define TUNE_ATTR_DAEMON(name, process) \
418static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
419{ \
420 ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len); \
421 wake_up_process(sdp->sd_##process); \
422 return r; \
423} \
424TUNE_ATTR_2(name, name##_store)
425
426TUNE_ATTR(ilimit, 0);
427TUNE_ATTR(ilimit_tries, 0);
428TUNE_ATTR(ilimit_min, 0);
429TUNE_ATTR(demote_secs, 0);
430TUNE_ATTR(incore_log_blocks, 0);
431TUNE_ATTR(log_flush_secs, 0);
432TUNE_ATTR(jindex_refresh_secs, 0);
433TUNE_ATTR(quota_warn_period, 0);
434TUNE_ATTR(quota_quantum, 0);
435TUNE_ATTR(atime_quantum, 0);
436TUNE_ATTR(max_readahead, 0);
437TUNE_ATTR(complain_secs, 0);
438TUNE_ATTR(reclaim_limit, 0);
439TUNE_ATTR(prefetch_secs, 0);
440TUNE_ATTR(statfs_slow, 0);
441TUNE_ATTR(new_files_jdata, 0);
442TUNE_ATTR(new_files_directio, 0);
443TUNE_ATTR(quota_simul_sync, 1);
444TUNE_ATTR(quota_cache_secs, 1);
445TUNE_ATTR(max_atomic_write, 1);
446TUNE_ATTR(stall_secs, 1);
447TUNE_ATTR(entries_per_readdir, 1);
448TUNE_ATTR(greedy_default, 1);
449TUNE_ATTR(greedy_quantum, 1);
450TUNE_ATTR(greedy_max, 1);
451TUNE_ATTR(statfs_quantum, 1);
452TUNE_ATTR_DAEMON(scand_secs, scand_process);
453TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
454TUNE_ATTR_DAEMON(logd_secs, logd_process);
455TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
456TUNE_ATTR_DAEMON(inoded_secs, inoded_process);
457TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
458
459static struct attribute *tune_attrs[] = {
460 &tune_attr_ilimit.attr,
461 &tune_attr_ilimit_tries.attr,
462 &tune_attr_ilimit_min.attr,
463 &tune_attr_demote_secs.attr,
464 &tune_attr_incore_log_blocks.attr,
465 &tune_attr_log_flush_secs.attr,
466 &tune_attr_jindex_refresh_secs.attr,
467 &tune_attr_quota_warn_period.attr,
468 &tune_attr_quota_quantum.attr,
469 &tune_attr_atime_quantum.attr,
470 &tune_attr_max_readahead.attr,
471 &tune_attr_complain_secs.attr,
472 &tune_attr_reclaim_limit.attr,
473 &tune_attr_prefetch_secs.attr,
474 &tune_attr_statfs_slow.attr,
475 &tune_attr_quota_simul_sync.attr,
476 &tune_attr_quota_cache_secs.attr,
477 &tune_attr_max_atomic_write.attr,
478 &tune_attr_stall_secs.attr,
479 &tune_attr_entries_per_readdir.attr,
480 &tune_attr_greedy_default.attr,
481 &tune_attr_greedy_quantum.attr,
482 &tune_attr_greedy_max.attr,
483 &tune_attr_statfs_quantum.attr,
484 &tune_attr_scand_secs.attr,
485 &tune_attr_recoverd_secs.attr,
486 &tune_attr_logd_secs.attr,
487 &tune_attr_quotad_secs.attr,
488 &tune_attr_inoded_secs.attr,
489 &tune_attr_quota_scale.attr,
490 &tune_attr_new_files_jdata.attr,
491 &tune_attr_new_files_directio.attr,
492 NULL
493};
494
495static struct attribute_group lockstruct_group = {
496 .name = "lockstruct",
497 .attrs = lockstruct_attrs
498};
499
500static struct attribute_group counters_group = {
501 .name = "counters",
502 .attrs = counters_attrs
503};
504
505static struct attribute_group args_group = {
506 .name = "args",
507 .attrs = args_attrs
508};
509
510static struct attribute_group tune_group = {
511 .name = "tune",
512 .attrs = tune_attrs
513};
514
515int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
516{
517 int error;
518
519 sdp->sd_kobj.kset = &gfs2_kset;
520 sdp->sd_kobj.ktype = &gfs2_ktype;
521
522 error = kobject_set_name(&sdp->sd_kobj, "%s", sdp->sd_table_name);
523 if (error)
524 goto fail;
525
526 error = kobject_register(&sdp->sd_kobj);
527 if (error)
528 goto fail;
529
530 error = sysfs_create_group(&sdp->sd_kobj, &lockstruct_group);
531 if (error)
532 goto fail_reg;
533
534 error = sysfs_create_group(&sdp->sd_kobj, &counters_group);
535 if (error)
536 goto fail_lockstruct;
537
538 error = sysfs_create_group(&sdp->sd_kobj, &args_group);
539 if (error)
540 goto fail_counters;
541
542 error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
543 if (error)
544 goto fail_args;
545
546 return 0;
547
548 fail_args:
549 sysfs_remove_group(&sdp->sd_kobj, &args_group);
550 fail_counters:
551 sysfs_remove_group(&sdp->sd_kobj, &counters_group);
552 fail_lockstruct:
553 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
554 fail_reg:
555 kobject_unregister(&sdp->sd_kobj);
556 fail:
557 return error;
558}
559
560void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
561{
562 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
563 sysfs_remove_group(&sdp->sd_kobj, &args_group);
564 sysfs_remove_group(&sdp->sd_kobj, &counters_group);
565 sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
566 kobject_unregister(&sdp->sd_kobj);
567}
568
569int gfs2_sys_init(void)
570{
571 gfs2_sys_margs = NULL;
572 spin_lock_init(&gfs2_sys_margs_lock);
573 return kset_register(&gfs2_kset);
574}
575
576void gfs2_sys_uninit(void)
577{
578 kfree(gfs2_sys_margs);
579 kset_unregister(&gfs2_kset);
580}
581
diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h
new file mode 100644
index 000000000000..c46a700e801e
--- /dev/null
+++ b/fs/gfs2/sys.h
@@ -0,0 +1,24 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __SYS_DOT_H__
11#define __SYS_DOT_H__
12
13/* Allow args to be passed to GFS2 when using an initial ram disk */
14extern char *gfs2_sys_margs;
15extern spinlock_t gfs2_sys_margs_lock;
16
17int gfs2_sys_fs_add(struct gfs2_sbd *sdp);
18void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
19
20int gfs2_sys_init(void);
21void gfs2_sys_uninit(void);
22
23#endif /* __SYS_DOT_H__ */
24
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
new file mode 100644
index 000000000000..05e0b72d56ff
--- /dev/null
+++ b/fs/gfs2/trans.c
@@ -0,0 +1,184 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/gfs2_ondisk.h>
16#include <linux/kallsyms.h>
17
18#include "gfs2.h"
19#include "lm_interface.h"
20#include "incore.h"
21#include "glock.h"
22#include "log.h"
23#include "lops.h"
24#include "meta_io.h"
25#include "trans.h"
26#include "util.h"
27
28int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
29 unsigned int revokes)
30{
31 struct gfs2_trans *tr;
32 int error;
33
34 BUG_ON(current->journal_info);
35 BUG_ON(blocks == 0 && revokes == 0);
36
37 tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS);
38 if (!tr)
39 return -ENOMEM;
40
41 tr->tr_ip = (unsigned long)__builtin_return_address(0);
42 tr->tr_blocks = blocks;
43 tr->tr_revokes = revokes;
44 tr->tr_reserved = 1;
45 if (blocks)
46 tr->tr_reserved += 6 + blocks;
47 if (revokes)
48 tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
49 sizeof(uint64_t));
50 INIT_LIST_HEAD(&tr->tr_list_buf);
51
52 gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh);
53
54 error = gfs2_glock_nq(&tr->tr_t_gh);
55 if (error)
56 goto fail_holder_uninit;
57
58 if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
59 tr->tr_t_gh.gh_flags |= GL_NOCACHE;
60 error = -EROFS;
61 goto fail_gunlock;
62 }
63
64 error = gfs2_log_reserve(sdp, tr->tr_reserved);
65 if (error)
66 goto fail_gunlock;
67
68 current->journal_info = tr;
69
70 return 0;
71
72fail_gunlock:
73 gfs2_glock_dq(&tr->tr_t_gh);
74
75fail_holder_uninit:
76 gfs2_holder_uninit(&tr->tr_t_gh);
77 kfree(tr);
78
79 return error;
80}
81
82void gfs2_trans_end(struct gfs2_sbd *sdp)
83{
84 struct gfs2_trans *tr = current->journal_info;
85
86 BUG_ON(!tr);
87 current->journal_info = NULL;
88
89 if (!tr->tr_touched) {
90 gfs2_log_release(sdp, tr->tr_reserved);
91 gfs2_glock_dq(&tr->tr_t_gh);
92 gfs2_holder_uninit(&tr->tr_t_gh);
93 kfree(tr);
94 return;
95 }
96
97 if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks)) {
98 fs_err(sdp, "tr_num_buf = %u, tr_blocks = %u ",
99 tr->tr_num_buf, tr->tr_blocks);
100 print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
101 }
102 if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes)) {
103 fs_err(sdp, "tr_num_revoke = %u, tr_revokes = %u ",
104 tr->tr_num_revoke, tr->tr_revokes);
105 print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
106 }
107
108 gfs2_log_commit(sdp, tr);
109 gfs2_glock_dq(&tr->tr_t_gh);
110 gfs2_holder_uninit(&tr->tr_t_gh);
111 kfree(tr);
112
113 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
114 gfs2_log_flush(sdp, NULL);
115}
116
117void gfs2_trans_add_gl(struct gfs2_glock *gl)
118{
119 lops_add(gl->gl_sbd, &gl->gl_le);
120}
121
122/**
123 * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction
124 * @gl: the glock the buffer belongs to
125 * @bh: The buffer to add
126 * @meta: True in the case of adding metadata
127 *
128 */
129
130void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
131{
132 struct gfs2_sbd *sdp = gl->gl_sbd;
133 struct gfs2_bufdata *bd;
134
135 bd = bh->b_private;
136 if (bd)
137 gfs2_assert(sdp, bd->bd_gl == gl);
138 else {
139 gfs2_attach_bufdata(gl, bh, meta);
140 bd = bh->b_private;
141 }
142 lops_add(sdp, &bd->bd_le);
143}
144
145void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno)
146{
147 struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
148 GFP_NOFS | __GFP_NOFAIL);
149 lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
150 rv->rv_blkno = blkno;
151 lops_add(sdp, &rv->rv_le);
152}
153
154void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno)
155{
156 struct gfs2_revoke *rv;
157 int found = 0;
158
159 gfs2_log_lock(sdp);
160
161 list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) {
162 if (rv->rv_blkno == blkno) {
163 list_del(&rv->rv_le.le_list);
164 gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
165 sdp->sd_log_num_revoke--;
166 found = 1;
167 break;
168 }
169 }
170
171 gfs2_log_unlock(sdp);
172
173 if (found) {
174 struct gfs2_trans *tr = current->journal_info;
175 kfree(rv);
176 tr->tr_num_revoke_rm++;
177 }
178}
179
180void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd)
181{
182 lops_add(rgd->rd_sbd, &rgd->rd_le);
183}
184
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
new file mode 100644
index 000000000000..60ef163dd9bb
--- /dev/null
+++ b/fs/gfs2/trans.h
@@ -0,0 +1,35 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __TRANS_DOT_H__
11#define __TRANS_DOT_H__
12
13#define RES_DINODE 1
14#define RES_INDIRECT 1
15#define RES_JDATA 1
16#define RES_DATA 1
17#define RES_LEAF 1
18#define RES_RG_BIT 2
19#define RES_EATTR 1
20#define RES_UNLINKED 1
21#define RES_STATFS 1
22#define RES_QUOTA 2
23
24int gfs2_trans_begin(struct gfs2_sbd *sdp,
25 unsigned int blocks, unsigned int revokes);
26
27void gfs2_trans_end(struct gfs2_sbd *sdp);
28
29void gfs2_trans_add_gl(struct gfs2_glock *gl);
30void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
31void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno);
32void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno);
33void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
34
35#endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/unlinked.c b/fs/gfs2/unlinked.c
new file mode 100644
index 000000000000..b92d73002055
--- /dev/null
+++ b/fs/gfs2/unlinked.c
@@ -0,0 +1,459 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/kthread.h>
16#include <linux/gfs2_ondisk.h>
17
18#include "gfs2.h"
19#include "lm_interface.h"
20#include "incore.h"
21#include "bmap.h"
22#include "inode.h"
23#include "meta_io.h"
24#include "trans.h"
25#include "unlinked.h"
26#include "util.h"
27
28static int munge_ondisk(struct gfs2_sbd *sdp, unsigned int slot,
29 struct gfs2_unlinked_tag *ut)
30{
31 struct gfs2_inode *ip = sdp->sd_ut_inode->u.generic_ip;
32 unsigned int block, offset;
33 uint64_t dblock;
34 int new = 0;
35 struct buffer_head *bh;
36 int error;
37 int boundary;
38
39 block = slot / sdp->sd_ut_per_block;
40 offset = slot % sdp->sd_ut_per_block;
41
42 error = gfs2_block_map(ip->i_vnode, block, &new, &dblock, &boundary);
43 if (error)
44 return error;
45 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, &bh);
46 if (error)
47 return error;
48 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) {
49 error = -EIO;
50 goto out;
51 }
52
53 mutex_lock(&sdp->sd_unlinked_mutex);
54 gfs2_trans_add_bh(ip->i_gl, bh, 1);
55 gfs2_unlinked_tag_out(ut, bh->b_data +
56 sizeof(struct gfs2_meta_header) +
57 offset * sizeof(struct gfs2_unlinked_tag));
58 mutex_unlock(&sdp->sd_unlinked_mutex);
59
60 out:
61 brelse(bh);
62
63 return error;
64}
65
66static void ul_hash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
67{
68 spin_lock(&sdp->sd_unlinked_spin);
69 list_add(&ul->ul_list, &sdp->sd_unlinked_list);
70 gfs2_assert(sdp, ul->ul_count);
71 ul->ul_count++;
72 atomic_inc(&sdp->sd_unlinked_count);
73 spin_unlock(&sdp->sd_unlinked_spin);
74}
75
76static void ul_unhash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
77{
78 spin_lock(&sdp->sd_unlinked_spin);
79 list_del_init(&ul->ul_list);
80 gfs2_assert(sdp, ul->ul_count > 1);
81 ul->ul_count--;
82 gfs2_assert_warn(sdp, atomic_read(&sdp->sd_unlinked_count) > 0);
83 atomic_dec(&sdp->sd_unlinked_count);
84 spin_unlock(&sdp->sd_unlinked_spin);
85}
86
87static struct gfs2_unlinked *ul_fish(struct gfs2_sbd *sdp)
88{
89 struct list_head *head;
90 struct gfs2_unlinked *ul;
91 int found = 0;
92
93 if (sdp->sd_vfs->s_flags & MS_RDONLY)
94 return NULL;
95
96 spin_lock(&sdp->sd_unlinked_spin);
97
98 head = &sdp->sd_unlinked_list;
99
100 list_for_each_entry(ul, head, ul_list) {
101 if (test_bit(ULF_LOCKED, &ul->ul_flags))
102 continue;
103
104 list_move_tail(&ul->ul_list, head);
105 ul->ul_count++;
106 set_bit(ULF_LOCKED, &ul->ul_flags);
107 found = 1;
108
109 break;
110 }
111
112 if (!found)
113 ul = NULL;
114
115 spin_unlock(&sdp->sd_unlinked_spin);
116
117 return ul;
118}
119
120/**
121 * enforce_limit - limit the number of inodes waiting to be deallocated
122 * @sdp: the filesystem
123 *
124 * Returns: errno
125 */
126
127static void enforce_limit(struct gfs2_sbd *sdp)
128{
129 unsigned int tries = 0, min = 0;
130 int error;
131
132 if (atomic_read(&sdp->sd_unlinked_count) <
133 gfs2_tune_get(sdp, gt_ilimit))
134 return;
135
136 tries = gfs2_tune_get(sdp, gt_ilimit_tries);
137 min = gfs2_tune_get(sdp, gt_ilimit_min);
138
139 while (tries--) {
140 struct gfs2_unlinked *ul = ul_fish(sdp);
141 if (!ul)
142 break;
143 error = gfs2_inode_dealloc(sdp, ul);
144 gfs2_unlinked_put(sdp, ul);
145
146 if (!error) {
147 if (!--min)
148 break;
149 } else if (error != 1)
150 break;
151 }
152}
153
154static struct gfs2_unlinked *ul_alloc(struct gfs2_sbd *sdp)
155{
156 struct gfs2_unlinked *ul;
157
158 ul = kzalloc(sizeof(struct gfs2_unlinked), GFP_KERNEL);
159 if (ul) {
160 INIT_LIST_HEAD(&ul->ul_list);
161 ul->ul_count = 1;
162 set_bit(ULF_LOCKED, &ul->ul_flags);
163 }
164
165 return ul;
166}
167
168int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul)
169{
170 unsigned int c, o = 0, b;
171 unsigned char byte = 0;
172
173 enforce_limit(sdp);
174
175 *ul = ul_alloc(sdp);
176 if (!*ul)
177 return -ENOMEM;
178
179 spin_lock(&sdp->sd_unlinked_spin);
180
181 for (c = 0; c < sdp->sd_unlinked_chunks; c++)
182 for (o = 0; o < PAGE_SIZE; o++) {
183 byte = sdp->sd_unlinked_bitmap[c][o];
184 if (byte != 0xFF)
185 goto found;
186 }
187
188 goto fail;
189
190found:
191 for (b = 0; b < 8; b++)
192 if (!(byte & (1 << b)))
193 break;
194 (*ul)->ul_slot = c * (8 * PAGE_SIZE) + o * 8 + b;
195
196 if ((*ul)->ul_slot >= sdp->sd_unlinked_slots)
197 goto fail;
198
199 sdp->sd_unlinked_bitmap[c][o] |= 1 << b;
200
201 spin_unlock(&sdp->sd_unlinked_spin);
202
203 return 0;
204
205fail:
206 spin_unlock(&sdp->sd_unlinked_spin);
207 kfree(*ul);
208 return -ENOSPC;
209}
210
211void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
212{
213 gfs2_assert_warn(sdp, test_and_clear_bit(ULF_LOCKED, &ul->ul_flags));
214
215 spin_lock(&sdp->sd_unlinked_spin);
216 gfs2_assert(sdp, ul->ul_count);
217 ul->ul_count--;
218 if (!ul->ul_count) {
219 gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, ul->ul_slot, 0);
220 spin_unlock(&sdp->sd_unlinked_spin);
221 kfree(ul);
222 } else
223 spin_unlock(&sdp->sd_unlinked_spin);
224}
225
226int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
227{
228 int error;
229
230 gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
231 gfs2_assert_warn(sdp, list_empty(&ul->ul_list));
232
233 error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut);
234 if (!error)
235 ul_hash(sdp, ul);
236
237 return error;
238}
239
240int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
241{
242 int error;
243
244 gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
245 gfs2_assert_warn(sdp, !list_empty(&ul->ul_list));
246
247 error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut);
248
249 return error;
250}
251
252int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
253{
254 struct gfs2_unlinked_tag ut;
255 int error;
256
257 gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags));
258 gfs2_assert_warn(sdp, !list_empty(&ul->ul_list));
259
260 memset(&ut, 0, sizeof(struct gfs2_unlinked_tag));
261
262 error = munge_ondisk(sdp, ul->ul_slot, &ut);
263 if (error)
264 return error;
265
266 ul_unhash(sdp, ul);
267
268 return 0;
269}
270
271/**
272 * gfs2_unlinked_dealloc - Go through the list of inodes to be deallocated
273 * @sdp: the filesystem
274 *
275 * Returns: errno
276 */
277
278int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp)
279{
280 unsigned int hits, strikes;
281 int error;
282
283 for (;;) {
284 hits = 0;
285 strikes = 0;
286
287 for (;;) {
288 struct gfs2_unlinked *ul = ul_fish(sdp);
289 if (!ul)
290 return 0;
291 error = gfs2_inode_dealloc(sdp, ul);
292 gfs2_unlinked_put(sdp, ul);
293
294 if (!error) {
295 hits++;
296 if (strikes)
297 strikes--;
298 } else if (error == 1) {
299 strikes++;
300 if (strikes >=
301 atomic_read(&sdp->sd_unlinked_count)) {
302 error = 0;
303 break;
304 }
305 } else
306 return error;
307 }
308
309 if (!hits || kthread_should_stop())
310 break;
311
312 cond_resched();
313 }
314
315 return 0;
316}
317
318int gfs2_unlinked_init(struct gfs2_sbd *sdp)
319{
320 struct gfs2_inode *ip = sdp->sd_ut_inode->u.generic_ip;
321 unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
322 unsigned int x, slot = 0;
323 unsigned int found = 0;
324 uint64_t dblock;
325 uint32_t extlen = 0;
326 int error;
327
328 if (!ip->i_di.di_size ||
329 ip->i_di.di_size > (64 << 20) ||
330 ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) {
331 gfs2_consist_inode(ip);
332 return -EIO;
333 }
334 sdp->sd_unlinked_slots = blocks * sdp->sd_ut_per_block;
335 sdp->sd_unlinked_chunks = DIV_ROUND_UP(sdp->sd_unlinked_slots,
336 8 * PAGE_SIZE);
337
338 error = -ENOMEM;
339
340 sdp->sd_unlinked_bitmap = kcalloc(sdp->sd_unlinked_chunks,
341 sizeof(unsigned char *),
342 GFP_KERNEL);
343 if (!sdp->sd_unlinked_bitmap)
344 return error;
345
346 for (x = 0; x < sdp->sd_unlinked_chunks; x++) {
347 sdp->sd_unlinked_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL);
348 if (!sdp->sd_unlinked_bitmap[x])
349 goto fail;
350 }
351
352 for (x = 0; x < blocks; x++) {
353 struct buffer_head *bh;
354 unsigned int y;
355
356 if (!extlen) {
357 int new = 0;
358 error = gfs2_extent_map(ip->i_vnode, x, &new, &dblock, &extlen);
359 if (error)
360 goto fail;
361 }
362 gfs2_meta_ra(ip->i_gl, dblock, extlen);
363 error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT,
364 &bh);
365 if (error)
366 goto fail;
367 error = -EIO;
368 if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) {
369 brelse(bh);
370 goto fail;
371 }
372
373 for (y = 0;
374 y < sdp->sd_ut_per_block && slot < sdp->sd_unlinked_slots;
375 y++, slot++) {
376 struct gfs2_unlinked_tag ut;
377 struct gfs2_unlinked *ul;
378
379 gfs2_unlinked_tag_in(&ut, bh->b_data +
380 sizeof(struct gfs2_meta_header) +
381 y * sizeof(struct gfs2_unlinked_tag));
382 if (!ut.ut_inum.no_addr)
383 continue;
384
385 error = -ENOMEM;
386 ul = ul_alloc(sdp);
387 if (!ul) {
388 brelse(bh);
389 goto fail;
390 }
391 ul->ul_ut = ut;
392 ul->ul_slot = slot;
393
394 spin_lock(&sdp->sd_unlinked_spin);
395 gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, slot, 1);
396 spin_unlock(&sdp->sd_unlinked_spin);
397 ul_hash(sdp, ul);
398
399 gfs2_unlinked_put(sdp, ul);
400 found++;
401 }
402
403 brelse(bh);
404 dblock++;
405 extlen--;
406 }
407
408 if (found)
409 fs_info(sdp, "found %u unlinked inodes\n", found);
410
411 return 0;
412
413fail:
414 gfs2_unlinked_cleanup(sdp);
415 return error;
416}
417
418/**
419 * gfs2_unlinked_cleanup - get rid of any extra struct gfs2_unlinked structures
420 * @sdp: the filesystem
421 *
422 */
423
424void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp)
425{
426 struct list_head *head = &sdp->sd_unlinked_list;
427 struct gfs2_unlinked *ul;
428 unsigned int x;
429
430 spin_lock(&sdp->sd_unlinked_spin);
431 while (!list_empty(head)) {
432 ul = list_entry(head->next, struct gfs2_unlinked, ul_list);
433
434 if (ul->ul_count > 1) {
435 list_move_tail(&ul->ul_list, head);
436 spin_unlock(&sdp->sd_unlinked_spin);
437 schedule();
438 spin_lock(&sdp->sd_unlinked_spin);
439 continue;
440 }
441
442 list_del_init(&ul->ul_list);
443 atomic_dec(&sdp->sd_unlinked_count);
444
445 gfs2_assert_warn(sdp, ul->ul_count == 1);
446 gfs2_assert_warn(sdp, !test_bit(ULF_LOCKED, &ul->ul_flags));
447 kfree(ul);
448 }
449 spin_unlock(&sdp->sd_unlinked_spin);
450
451 gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_unlinked_count));
452
453 if (sdp->sd_unlinked_bitmap) {
454 for (x = 0; x < sdp->sd_unlinked_chunks; x++)
455 kfree(sdp->sd_unlinked_bitmap[x]);
456 kfree(sdp->sd_unlinked_bitmap);
457 }
458}
459
diff --git a/fs/gfs2/unlinked.h b/fs/gfs2/unlinked.h
new file mode 100644
index 000000000000..159cf5ffe47e
--- /dev/null
+++ b/fs/gfs2/unlinked.h
@@ -0,0 +1,25 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __UNLINKED_DOT_H__
11#define __UNLINKED_DOT_H__
12
13int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul);
14void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
15
16int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
17int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
18int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul);
19
20int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp);
21
22int gfs2_unlinked_init(struct gfs2_sbd *sdp);
23void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp);
24
25#endif /* __UNLINKED_DOT_H__ */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
new file mode 100644
index 000000000000..88974e9824f7
--- /dev/null
+++ b/fs/gfs2/util.c
@@ -0,0 +1,245 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
15#include <linux/crc32.h>
16#include <linux/gfs2_ondisk.h>
17#include <asm/uaccess.h>
18
19#include "gfs2.h"
20#include "lm_interface.h"
21#include "incore.h"
22#include "glock.h"
23#include "lm.h"
24#include "util.h"
25
26kmem_cache_t *gfs2_glock_cachep __read_mostly;
27kmem_cache_t *gfs2_inode_cachep __read_mostly;
28kmem_cache_t *gfs2_bufdata_cachep __read_mostly;
29
30void gfs2_assert_i(struct gfs2_sbd *sdp)
31{
32 printk(KERN_EMERG "GFS2: fsid=%s: fatal assertion failed\n",
33 sdp->sd_fsname);
34}
35
36/**
37 * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
38 * Returns: -1 if this call withdrew the machine,
39 * -2 if it was already withdrawn
40 */
41
42int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
43 const char *function, char *file, unsigned int line)
44{
45 int me;
46 me = gfs2_lm_withdraw(sdp,
47 "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n"
48 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
49 sdp->sd_fsname, assertion,
50 sdp->sd_fsname, function, file, line);
51 dump_stack();
52 return (me) ? -1 : -2;
53}
54
55/**
56 * gfs2_assert_warn_i - Print a message to the console if @assertion is false
57 * Returns: -1 if we printed something
58 * -2 if we didn't
59 */
60
61int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
62 const char *function, char *file, unsigned int line)
63{
64 if (time_before(jiffies,
65 sdp->sd_last_warning +
66 gfs2_tune_get(sdp, gt_complain_secs) * HZ))
67 return -2;
68
69 printk(KERN_WARNING
70 "GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
71 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
72 sdp->sd_fsname, assertion,
73 sdp->sd_fsname, function, file, line);
74
75 if (sdp->sd_args.ar_debug)
76 BUG();
77 else
78 dump_stack();
79
80 sdp->sd_last_warning = jiffies;
81
82 return -1;
83}
84
85/**
86 * gfs2_consist_i - Flag a filesystem consistency error and withdraw
87 * Returns: -1 if this call withdrew the machine,
88 * 0 if it was already withdrawn
89 */
90
91int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function,
92 char *file, unsigned int line)
93{
94 int rv;
95 rv = gfs2_lm_withdraw(sdp,
96 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
97 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
98 sdp->sd_fsname,
99 sdp->sd_fsname, function, file, line);
100 return rv;
101}
102
103/**
104 * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
105 * Returns: -1 if this call withdrew the machine,
106 * 0 if it was already withdrawn
107 */
108
109int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
110 const char *function, char *file, unsigned int line)
111{
112 struct gfs2_sbd *sdp = ip->i_sbd;
113 int rv;
114 rv = gfs2_lm_withdraw(sdp,
115 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
116 "GFS2: fsid=%s: inode = %llu %llu\n"
117 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
118 sdp->sd_fsname,
119 sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino,
120 (unsigned long long)ip->i_num.no_addr,
121 sdp->sd_fsname, function, file, line);
122 return rv;
123}
124
125/**
126 * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
127 * Returns: -1 if this call withdrew the machine,
128 * 0 if it was already withdrawn
129 */
130
131int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
132 const char *function, char *file, unsigned int line)
133{
134 struct gfs2_sbd *sdp = rgd->rd_sbd;
135 int rv;
136 rv = gfs2_lm_withdraw(sdp,
137 "GFS2: fsid=%s: fatal: filesystem consistency error\n"
138 "GFS2: fsid=%s: RG = %llu\n"
139 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
140 sdp->sd_fsname,
141 sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr,
142 sdp->sd_fsname, function, file, line);
143 return rv;
144}
145
146/**
147 * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
148 * Returns: -1 if this call withdrew the machine,
149 * -2 if it was already withdrawn
150 */
151
152int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
153 const char *type, const char *function, char *file,
154 unsigned int line)
155{
156 int me;
157 me = gfs2_lm_withdraw(sdp,
158 "GFS2: fsid=%s: fatal: invalid metadata block\n"
159 "GFS2: fsid=%s: bh = %llu (%s)\n"
160 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
161 sdp->sd_fsname,
162 sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type,
163 sdp->sd_fsname, function, file, line);
164 return (me) ? -1 : -2;
165}
166
167/**
168 * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
169 * Returns: -1 if this call withdrew the machine,
170 * -2 if it was already withdrawn
171 */
172
173int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
174 uint16_t type, uint16_t t, const char *function,
175 char *file, unsigned int line)
176{
177 int me;
178 me = gfs2_lm_withdraw(sdp,
179 "GFS2: fsid=%s: fatal: invalid metadata block\n"
180 "GFS2: fsid=%s: bh = %llu (type: exp=%u, found=%u)\n"
181 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
182 sdp->sd_fsname,
183 sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, t,
184 sdp->sd_fsname, function, file, line);
185 return (me) ? -1 : -2;
186}
187
188/**
189 * gfs2_io_error_i - Flag an I/O error and withdraw
190 * Returns: -1 if this call withdrew the machine,
191 * 0 if it was already withdrawn
192 */
193
194int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
195 unsigned int line)
196{
197 int rv;
198 rv = gfs2_lm_withdraw(sdp,
199 "GFS2: fsid=%s: fatal: I/O error\n"
200 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
201 sdp->sd_fsname,
202 sdp->sd_fsname, function, file, line);
203 return rv;
204}
205
206/**
207 * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw
208 * Returns: -1 if this call withdrew the machine,
209 * 0 if it was already withdrawn
210 */
211
212int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
213 const char *function, char *file, unsigned int line)
214{
215 int rv;
216 rv = gfs2_lm_withdraw(sdp,
217 "GFS2: fsid=%s: fatal: I/O error\n"
218 "GFS2: fsid=%s: block = %llu\n"
219 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
220 sdp->sd_fsname,
221 sdp->sd_fsname, (unsigned long long)bh->b_blocknr,
222 sdp->sd_fsname, function, file, line);
223 return rv;
224}
225
226void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
227 unsigned int bit, int new_value)
228{
229 unsigned int c, o, b = bit;
230 int old_value;
231
232 c = b / (8 * PAGE_SIZE);
233 b %= 8 * PAGE_SIZE;
234 o = b / 8;
235 b %= 8;
236
237 old_value = (bitmap[c][o] & (1 << b));
238 gfs2_assert_withdraw(sdp, !old_value != !new_value);
239
240 if (new_value)
241 bitmap[c][o] |= 1 << b;
242 else
243 bitmap[c][o] &= ~(1 << b);
244}
245
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
new file mode 100644
index 000000000000..8216d28bd816
--- /dev/null
+++ b/fs/gfs2/util.h
@@ -0,0 +1,169 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10#ifndef __UTIL_DOT_H__
11#define __UTIL_DOT_H__
12
13
14#define fs_printk(level, fs, fmt, arg...) \
15 printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg)
16
17#define fs_info(fs, fmt, arg...) \
18 fs_printk(KERN_INFO , fs , fmt , ## arg)
19
20#define fs_warn(fs, fmt, arg...) \
21 fs_printk(KERN_WARNING , fs , fmt , ## arg)
22
23#define fs_err(fs, fmt, arg...) \
24 fs_printk(KERN_ERR, fs , fmt , ## arg)
25
26
27void gfs2_assert_i(struct gfs2_sbd *sdp);
28
29#define gfs2_assert(sdp, assertion) \
30do { \
31 if (unlikely(!(assertion))) { \
32 gfs2_assert_i(sdp); \
33 BUG(); \
34 } \
35} while (0)
36
37
38int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
39 const char *function, char *file, unsigned int line);
40
41#define gfs2_assert_withdraw(sdp, assertion) \
42((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
43 __FUNCTION__, __FILE__, __LINE__))
44
45
46int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
47 const char *function, char *file, unsigned int line);
48
49#define gfs2_assert_warn(sdp, assertion) \
50((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
51 __FUNCTION__, __FILE__, __LINE__))
52
53
54int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
55 const char *function, char *file, unsigned int line);
56
57#define gfs2_consist(sdp) \
58gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__)
59
60
61int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
62 const char *function, char *file, unsigned int line);
63
64#define gfs2_consist_inode(ip) \
65gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__)
66
67
68int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
69 const char *function, char *file, unsigned int line);
70
71#define gfs2_consist_rgrpd(rgd) \
72gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__)
73
74
75int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
76 const char *type, const char *function,
77 char *file, unsigned int line);
78
79static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
80 struct buffer_head *bh,
81 const char *function,
82 char *file, unsigned int line)
83{
84 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
85 uint32_t magic = mh->mh_magic;
86 magic = be32_to_cpu(magic);
87 if (unlikely(magic != GFS2_MAGIC))
88 return gfs2_meta_check_ii(sdp, bh, "magic number", function,
89 file, line);
90 return 0;
91}
92
93#define gfs2_meta_check(sdp, bh) \
94gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__)
95
96
97int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
98 uint16_t type, uint16_t t,
99 const char *function,
100 char *file, unsigned int line);
101
102static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
103 struct buffer_head *bh,
104 uint16_t type,
105 const char *function,
106 char *file, unsigned int line)
107{
108 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
109 uint32_t magic = mh->mh_magic;
110 uint16_t t = be32_to_cpu(mh->mh_type);
111 magic = be32_to_cpu(magic);
112 if (unlikely(magic != GFS2_MAGIC))
113 return gfs2_meta_check_ii(sdp, bh, "magic number", function,
114 file, line);
115 if (unlikely(t != type))
116 return gfs2_metatype_check_ii(sdp, bh, type, t, function,
117 file, line);
118 return 0;
119}
120
121#define gfs2_metatype_check(sdp, bh, type) \
122gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__)
123
124static inline void gfs2_metatype_set(struct buffer_head *bh, uint16_t type,
125 uint16_t format)
126{
127 struct gfs2_meta_header *mh;
128 mh = (struct gfs2_meta_header *)bh->b_data;
129 mh->mh_type = cpu_to_be32(type);
130 mh->mh_format = cpu_to_be32(format);
131}
132
133
134int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
135 char *file, unsigned int line);
136
137#define gfs2_io_error(sdp) \
138gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__);
139
140
141int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
142 const char *function, char *file, unsigned int line);
143
144#define gfs2_io_error_bh(sdp, bh) \
145gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
146
147
148extern kmem_cache_t *gfs2_glock_cachep;
149extern kmem_cache_t *gfs2_inode_cachep;
150extern kmem_cache_t *gfs2_bufdata_cachep;
151
152static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
153 unsigned int *p)
154{
155 unsigned int x;
156 spin_lock(&gt->gt_spin);
157 x = *p;
158 spin_unlock(&gt->gt_spin);
159 return x;
160}
161
162#define gfs2_tune_get(sdp, field) \
163gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
164
165void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
166 unsigned int bit, int new_value);
167
168#endif /* __UTIL_DOT_H__ */
169